diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
39 files changed, 963 insertions, 393 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 1796d2bdcaaa..d7a1e5a9331c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -24,17 +24,22 @@ obj-y += match.o obj-y += bugs.o obj-y += aperfmperf.o obj-y += cpuid-deps.o +obj-y += umwait.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o intel_epb.o +ifdef CONFIG_CPU_SUP_INTEL +obj-y += intel.o intel_pconfig.o +obj-$(CONFIG_PM) += intel_epb.o +endif obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o +obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zhaoxin.o obj-$(CONFIG_X86_MCE) += mce/ obj-$(CONFIG_MTRR) += mtrr/ @@ -44,6 +49,7 @@ obj-$(CONFIG_X86_CPU_RESCTRL) += resctrl/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o +obj-$(CONFIG_ACRN_GUEST) += acrn.o ifdef CONFIG_X86_FEATURE_NAMES quiet_cmd_mkcapflags = MKCAP $@ @@ -51,8 +57,7 @@ quiet_cmd_mkcapflags = MKCAP $@ cpufeature = $(src)/../../include/asm/cpufeatures.h -targets += capflags.c $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE $(call if_changed,mkcapflags) endif -clean-files += capflags.c +targets += capflags.c diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c new file mode 100644 index 000000000000..676022e71791 --- /dev/null +++ b/arch/x86/kernel/cpu/acrn.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ACRN detection support + * + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * + * Jason Chen CJ <jason.cj.chen@intel.com> + * Zhao Yakui <yakui.zhao@intel.com> + * + */ + +#include <linux/interrupt.h> +#include <asm/acrn.h> +#include <asm/apic.h> +#include <asm/desc.h> +#include <asm/hypervisor.h> +#include <asm/irq_regs.h> + +static uint32_t __init acrn_detect(void) +{ + return hypervisor_cpuid_base("ACRNACRNACRN\0\0", 0); +} + +static void __init acrn_init_platform(void) +{ + /* Setup the IDT for ACRN hypervisor callback */ + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, acrn_hv_callback_vector); +} + +static bool acrn_x2apic_available(void) +{ + /* + * x2apic is not supported for now. Future enablement will have to check + * X86_FEATURE_X2APIC to determine whether x2apic is supported in the + * guest. + */ + return false; +} + +static void (*acrn_intr_handler)(void); + +__visible void __irq_entry acrn_hv_vector_handler(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + /* + * The hypervisor requires that the APIC EOI should be acked. + * If the APIC EOI is not acked, the APIC ISR bit for the + * HYPERVISOR_CALLBACK_VECTOR will not be cleared and then it + * will block the interrupt whose vector is lower than + * HYPERVISOR_CALLBACK_VECTOR. + */ + entering_ack_irq(); + inc_irq_stat(irq_hv_callback_count); + + if (acrn_intr_handler) + acrn_intr_handler(); + + exiting_irq(); + set_irq_regs(old_regs); +} + +const __initconst struct hypervisor_x86 x86_hyper_acrn = { + .name = "ACRN", + .detect = acrn_detect, + .type = X86_HYPER_ACRN, + .init.init_platform = acrn_init_platform, + .init.x2apic_available = acrn_x2apic_available, +}; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index fb6a64bd765f..8d4e50428b68 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only #include <linux/export.h> #include <linux/bitops.h> #include <linux/elf.h> @@ -823,8 +824,11 @@ static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); - /* Fix erratum 1076: CPB feature bit not being set in CPUID. */ - if (!cpu_has(c, X86_FEATURE_CPB)) + /* + * Fix erratum 1076: CPB feature bit not being set in CPUID. + * Always set it, except when running under a hypervisor. + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) set_cpu_cap(c, X86_FEATURE_CPB); } diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index 64d5aec24203..e2f319dc992d 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * x86 APERF/MPERF KHz calculation for * /sys/.../cpufreq/scaling_cur_freq * * Copyright (C) 2017 Intel Corp. * Author: Len Brown <len.brown@intel.com> - * - * This file is licensed under GPLv2. */ #include <linux/delay.h> @@ -14,6 +13,7 @@ #include <linux/percpu.h> #include <linux/cpufreq.h> #include <linux/smp.h> +#include <linux/sched/isolation.h> #include "cpu.h" @@ -86,6 +86,9 @@ unsigned int aperfmperf_get_khz(int cpu) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return 0; + if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + return 0; + aperfmperf_snapshot_cpu(cpu, ktime_get(), true); return per_cpu(samples.khz, cpu); } @@ -102,9 +105,12 @@ void arch_freq_prepare_all(void) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return; - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { + if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + continue; if (!aperfmperf_snapshot_cpu(cpu, now, false)) wait = true; + } if (wait) msleep(APERFMPERF_REFRESH_DELAY_MS); @@ -118,6 +124,9 @@ unsigned int arch_freq_get_on_cpu(int cpu) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return 0; + if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + return 0; + if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true)) return per_cpu(samples.khz, cpu); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 03b4cc0ec3a7..66ca906aa790 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -836,6 +836,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) } /* + * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper + * bit in the mask to allow guests to use the mitigation even in the + * case where the host does not enable it. + */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; + } + + /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass @@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); } } diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 395d46f78582..c7503be92f35 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -658,8 +658,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) if (c->x86 < 0x17) { /* LLC is at the node level. */ per_cpu(cpu_llc_id, cpu) = node_id; - } else if (c->x86 == 0x17 && - c->x86_model >= 0 && c->x86_model <= 0x1F) { + } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { /* * LLC is at the core complex level. * Core complex ID is ApicId[3] for these processors. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d7f55ad2dfb1..11472178e17f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* cpu_feature_enabled() cannot be used this early */ #define USE_EARLY_PGTABLE_L5 @@ -365,6 +366,77 @@ out: cr4_clear_bits(X86_CR4_UMIP); } +static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); +static unsigned long cr4_pinned_bits __ro_after_init; + +void native_write_cr0(unsigned long val) +{ + unsigned long bits_missing = 0; + +set_register: + asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order)); + + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { + bits_missing = X86_CR0_WP; + val |= bits_missing; + goto set_register; + } + /* Warn after we've set the missing bits. */ + WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n"); + } +} +EXPORT_SYMBOL(native_write_cr0); + +void native_write_cr4(unsigned long val) +{ + unsigned long bits_missing = 0; + +set_register: + asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits)); + + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) { + bits_missing = ~val & cr4_pinned_bits; + val |= bits_missing; + goto set_register; + } + /* Warn after we've set the missing bits. */ + WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n", + bits_missing); + } +} +EXPORT_SYMBOL(native_write_cr4); + +void cr4_init(void) +{ + unsigned long cr4 = __read_cr4(); + + if (boot_cpu_has(X86_FEATURE_PCID)) + cr4 |= X86_CR4_PCIDE; + if (static_branch_likely(&cr_pinning)) + cr4 |= cr4_pinned_bits; + + __write_cr4(cr4); + + /* Initialize cr4 shadow for this CPU. */ + this_cpu_write(cpu_tlbstate.cr4, cr4); +} + +/* + * Once CPU feature detection is finished (and boot params have been + * parsed), record any of the sensitive CR bits that are set, and + * enable CR pinning. + */ +static void __init setup_cr_pinning(void) +{ + unsigned long mask; + + mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP); + cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask; + static_key_enable(&cr_pinning.key); +} + /* * Protection Keys are not available in 32-bit mode. */ @@ -800,6 +872,30 @@ static void init_speculation_control(struct cpuinfo_x86 *c) } } +static void init_cqm(struct cpuinfo_x86 *c) +{ + if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { + c->x86_cache_max_rmid = -1; + c->x86_cache_occ_scale = -1; + return; + } + + /* will be overridden if occupancy monitoring exists */ + c->x86_cache_max_rmid = cpuid_ebx(0xf); + + if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || + cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || + cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { + u32 eax, ebx, ecx, edx; + + /* QoS sub-leaf, EAX=0Fh, ECX=1 */ + cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); + + c->x86_cache_max_rmid = ecx; + c->x86_cache_occ_scale = ebx; + } +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -822,6 +918,12 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_7_0_EBX] = ebx; c->x86_capability[CPUID_7_ECX] = ecx; c->x86_capability[CPUID_7_EDX] = edx; + + /* Check valid sub-leaf index before accessing it */ + if (eax >= 1) { + cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx); + c->x86_capability[CPUID_7_1_EAX] = eax; + } } /* Extended state features: level 0x0000000d */ @@ -831,33 +933,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_D_1_EAX] = eax; } - /* Additional Intel-defined flags: level 0x0000000F */ - if (c->cpuid_level >= 0x0000000F) { - - /* QoS sub-leaf, EAX=0Fh, ECX=0 */ - cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_F_0_EDX] = edx; - - if (cpu_has(c, X86_FEATURE_CQM_LLC)) { - /* will be overridden if occupancy monitoring exists */ - c->x86_cache_max_rmid = ebx; - - /* QoS sub-leaf, EAX=0Fh, ECX=1 */ - cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_F_1_EDX] = edx; - - if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) || - ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) || - (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) { - c->x86_cache_max_rmid = ecx; - c->x86_cache_occ_scale = ebx; - } - } else { - c->x86_cache_max_rmid = -1; - c->x86_cache_occ_scale = -1; - } - } - /* AMD-defined flags: level 0x80000001 */ eax = cpuid_eax(0x80000000); c->extended_cpuid_level = eax; @@ -888,6 +963,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); init_speculation_control(c); + init_cqm(c); /* * Clear/Set all flags overridden by options, after probe. @@ -1298,6 +1374,7 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c) cpu, apicid, c->initial_apicid); } BUG_ON(topology_update_package_map(c->phys_proc_id, cpu)); + BUG_ON(topology_update_die_map(c->cpu_die_id, cpu)); #else c->logical_proc_id = 0; #endif @@ -1463,6 +1540,7 @@ void __init identify_boot_cpu(void) enable_sep_cpu(); #endif cpu_detect_tlb(&boot_cpu_data); + setup_cr_pinning(); } void identify_secondary_cpu(struct cpuinfo_x86 *c) @@ -1697,12 +1775,6 @@ void cpu_init(void) wait_for_master_cpu(cpu); - /* - * Initialize the CR4 shadow before doing anything that could - * try to read it. - */ - cr4_init_shadow(); - if (cpu) load_ucode_ap(); @@ -1797,12 +1869,6 @@ void cpu_init(void) wait_for_master_cpu(cpu); - /* - * Initialize the CR4 shadow before doing anything that could - * try to read it. - */ - cr4_init_shadow(); - show_ucode_info_early(); pr_info("Initializing CPU#%d\n", cpu); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 2c0bd38a44ab..b5353244749b 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -20,6 +20,7 @@ struct cpuid_dep { * but it's difficult to tell that to the init reference checker. */ static const struct cpuid_dep cpuid_deps[] = { + { X86_FEATURE_FXSR, X86_FEATURE_FPU }, { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE }, { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE }, { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE }, @@ -27,7 +28,11 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_PKU, X86_FEATURE_XSAVE }, { X86_FEATURE_MPX, X86_FEATURE_XSAVE }, { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE }, + { X86_FEATURE_CMOV, X86_FEATURE_FXSR }, + { X86_FEATURE_MMX, X86_FEATURE_FXSR }, + { X86_FEATURE_MMXEXT, X86_FEATURE_MMX }, { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR }, + { X86_FEATURE_XSAVE, X86_FEATURE_FXSR }, { X86_FEATURE_XMM, X86_FEATURE_FXSR }, { X86_FEATURE_XMM2, X86_FEATURE_XMM }, { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, @@ -59,6 +64,10 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F }, + { X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, {} }; diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 479ca4728de0..553bfbfc3a1b 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -26,13 +26,6 @@ #include <asm/processor.h> #include <asm/hypervisor.h> -extern const struct hypervisor_x86 x86_hyper_vmware; -extern const struct hypervisor_x86 x86_hyper_ms_hyperv; -extern const struct hypervisor_x86 x86_hyper_xen_pv; -extern const struct hypervisor_x86 x86_hyper_xen_hvm; -extern const struct hypervisor_x86 x86_hyper_kvm; -extern const struct hypervisor_x86 x86_hyper_jailhouse; - static const __initconst struct hypervisor_x86 * const hypervisors[] = { #ifdef CONFIG_XEN_PV @@ -49,11 +42,22 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = #ifdef CONFIG_JAILHOUSE_GUEST &x86_hyper_jailhouse, #endif +#ifdef CONFIG_ACRN_GUEST + &x86_hyper_acrn, +#endif }; enum x86_hypervisor_type x86_hyper_type; EXPORT_SYMBOL(x86_hyper_type); +bool __initdata nopv; +static __init int parse_nopv(char *arg) +{ + nopv = true; + return 0; +} +early_param("nopv", parse_nopv); + static inline const struct hypervisor_x86 * __init detect_hypervisor_vendor(void) { @@ -61,6 +65,9 @@ detect_hypervisor_vendor(void) uint32_t pri, max_pri = 0; for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { + if (unlikely(nopv) && !(*p)->ignore_nopv) + continue; + pri = (*p)->detect(); if (pri > max_pri) { max_pri = pri; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f17c1a714779..8d6d92ebeb54 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -66,6 +66,32 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) } } +/* + * Processors which have self-snooping capability can handle conflicting + * memory type across CPUs by snooping its own cache. However, there exists + * CPU models in which having conflicting memory types still leads to + * unpredictable behavior, machine check errors, or hangs. Clear this + * feature to prevent its use on machines with known erratas. + */ +static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c) +{ + switch (c->x86_model) { + case INTEL_FAM6_CORE_YONAH: + case INTEL_FAM6_CORE2_MEROM: + case INTEL_FAM6_CORE2_MEROM_L: + case INTEL_FAM6_CORE2_PENRYN: + case INTEL_FAM6_CORE2_DUNNINGTON: + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_G: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_SANDYBRIDGE: + setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP); + } +} + static bool ring3mwait_disabled __read_mostly; static int __init ring3mwait_disable(char *__unused) @@ -304,6 +330,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) } check_mpx_erratum(c); + check_memory_type_self_snoop_errata(c); /* * Get the number of SMT siblings early from the extended topology diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c index ebb14a26f117..f4dd73396f28 100644 --- a/arch/x86/kernel/cpu/intel_epb.c +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -97,7 +97,6 @@ static void intel_epb_restore(void) wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val); } -#ifdef CONFIG_PM static struct syscore_ops intel_epb_syscore_ops = { .suspend = intel_epb_save, .resume = intel_epb_restore, @@ -194,25 +193,6 @@ static int intel_epb_offline(unsigned int cpu) return 0; } -static inline void register_intel_ebp_syscore_ops(void) -{ - register_syscore_ops(&intel_epb_syscore_ops); -} -#else /* !CONFIG_PM */ -static int intel_epb_online(unsigned int cpu) -{ - intel_epb_restore(); - return 0; -} - -static int intel_epb_offline(unsigned int cpu) -{ - return intel_epb_save(); -} - -static inline void register_intel_ebp_syscore_ops(void) {} -#endif - static __init int intel_epb_init(void) { int ret; @@ -226,7 +206,7 @@ static __init int intel_epb_init(void) if (ret < 0) goto err_out_online; - register_intel_ebp_syscore_ops(); + register_syscore_ops(&intel_epb_syscore_ops); return 0; err_out_online: diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index d904aafe6409..6ea7fdc82f3c 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1,8 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * (c) 2005-2016 Advanced Micro Devices, Inc. - * Your use of this code is subject to the terms and conditions of the - * GNU general public license version 2. See "COPYING" or - * http://www.gnu.org/licenses/gpl.html * * Written by Jacob Shin - AMD, Inc. * Maintained by: Borislav Petkov <bp@alien8.de> @@ -101,11 +99,6 @@ static struct smca_bank_name smca_names[] = { [SMCA_PCIE] = { "pcie", "PCI Express Unit" }, }; -static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init = -{ - [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 } -}; - static const char *smca_get_name(enum smca_bank_types t) { if (t >= N_SMCA_BANK_TYPES) @@ -199,6 +192,9 @@ static char buf_mcatype[MAX_MCATYPE_NAME_LEN]; static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */ +/* Map of banks that have more than MCA_MISC0 available. */ +static DEFINE_PER_CPU(u32, smca_misc_banks_map); + static void amd_threshold_interrupt(void); static void amd_deferred_error_interrupt(void); @@ -208,6 +204,28 @@ static void default_deferred_error_interrupt(void) } void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; +static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu) +{ + u32 low, high; + + /* + * For SMCA enabled processors, BLKPTR field of the first MISC register + * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). + */ + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) + return; + + if (!(low & MCI_CONFIG_MCAX)) + return; + + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high)) + return; + + if (low & MASK_BLKPTR_LO) + per_cpu(smca_misc_banks_map, cpu) |= BIT(bank); + +} + static void smca_configure(unsigned int bank, unsigned int cpu) { unsigned int i, hwid_mcatype; @@ -245,6 +263,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu) wrmsr(smca_config, low, high); } + smca_set_misc_banks_map(bank, cpu); + /* Return early if this bank was already initialized. */ if (smca_banks[bank].hwid) return; @@ -455,50 +475,29 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) wrmsr(MSR_CU_DEF_ERR, low, high); } -static u32 smca_get_block_address(unsigned int bank, unsigned int block) +static u32 smca_get_block_address(unsigned int bank, unsigned int block, + unsigned int cpu) { - u32 low, high; - u32 addr = 0; - - if (smca_get_bank_type(bank) == SMCA_RESERVED) - return addr; - if (!block) return MSR_AMD64_SMCA_MCx_MISC(bank); - /* Check our cache first: */ - if (smca_bank_addrs[bank][block] != -1) - return smca_bank_addrs[bank][block]; - - /* - * For SMCA enabled processors, BLKPTR field of the first MISC register - * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). - */ - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) - goto out; - - if (!(low & MCI_CONFIG_MCAX)) - goto out; - - if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && - (low & MASK_BLKPTR_LO)) - addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank))) + return 0; -out: - smca_bank_addrs[bank][block] = addr; - return addr; + return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); } static u32 get_block_address(u32 current_addr, u32 low, u32 high, - unsigned int bank, unsigned int block) + unsigned int bank, unsigned int block, + unsigned int cpu) { u32 addr = 0, offset = 0; - if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) + if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS)) return addr; if (mce_flags.smca) - return smca_get_block_address(bank, block); + return smca_get_block_address(bank, block, cpu); /* Fall back to method we used for older processors: */ switch (block) { @@ -626,18 +625,19 @@ void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { - u32 low = 0, high = 0, address = 0; unsigned int bank, block, cpu = smp_processor_id(); + u32 low = 0, high = 0, address = 0; int offset = -1; - for (bank = 0; bank < mca_cfg.banks; ++bank) { + + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { if (mce_flags.smca) smca_configure(bank, cpu); disable_err_thresholding(c, bank); for (block = 0; block < NR_BLOCKS; ++block) { - address = get_block_address(address, low, high, bank, block); + address = get_block_address(address, low, high, bank, block, cpu); if (!address) break; @@ -975,7 +975,7 @@ static void amd_deferred_error_interrupt(void) { unsigned int bank; - for (bank = 0; bank < mca_cfg.banks; ++bank) + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) log_error_deferred(bank); } @@ -1016,7 +1016,7 @@ static void amd_threshold_interrupt(void) struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL; unsigned int bank, cpu = smp_processor_id(); - for (bank = 0; bank < mca_cfg.banks; ++bank) { + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; @@ -1203,7 +1203,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, u32 low, high; int err; - if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) + if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS)) return 0; if (rdmsr_safe_on_cpu(cpu, address, &low, &high)) @@ -1254,7 +1254,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, if (err) goto out_free; recurse: - address = get_block_address(address, low, high, bank, ++block); + address = get_block_address(address, low, high, bank, ++block, cpu); if (!address) return 0; @@ -1437,7 +1437,7 @@ int mce_threshold_remove_device(unsigned int cpu) { unsigned int bank; - for (bank = 0; bank < mca_cfg.banks; ++bank) { + for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; threshold_remove_bank(cpu, bank); @@ -1458,14 +1458,14 @@ int mce_threshold_create_device(unsigned int cpu) if (bp) return 0; - bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *), + bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *), GFP_KERNEL); if (!bp) return -ENOMEM; per_cpu(threshold_banks, cpu) = bp; - for (bank = 0; bank < mca_cfg.banks; ++bank) { + for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; err = threshold_create_bank(cpu, bank); diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c index c038e5c00a59..af8d37962586 100644 --- a/arch/x86/kernel/cpu/mce/apei.c +++ b/arch/x86/kernel/cpu/mce/apei.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Bridge between MCE and APEI * @@ -13,19 +14,6 @@ * * Copyright 2010 Intel Corp. * Author: Huang Ying <ying.huang@intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/export.h> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 5112a50e6486..743370ee4983 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Machine check handler. * @@ -64,7 +65,23 @@ static DEFINE_MUTEX(mce_sysfs_mutex); DEFINE_PER_CPU(unsigned, mce_exception_count); -struct mce_bank *mce_banks __read_mostly; +DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks); + +struct mce_bank { + u64 ctl; /* subevents to enable */ + bool init; /* initialise bank? */ +}; +static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); + +#define ATTR_LEN 16 +/* One object for each MCE bank, shared by all CPUs */ +struct mce_bank_dev { + struct device_attribute attr; /* device attribute */ + char attrname[ATTR_LEN]; /* attribute name */ + u8 bank; /* bank number */ +}; +static struct mce_bank_dev mce_bank_devs[MAX_NR_BANKS]; + struct mce_vendor_flags mce_flags __read_mostly; struct mca_config mca_cfg __read_mostly = { @@ -674,6 +691,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); */ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); bool error_seen = false; struct mce m; int i; @@ -685,7 +703,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) if (flags & MCP_TIMESTAMP) m.tsc = rdtsc(); - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { if (!mce_banks[i].ctl || !test_bit(i, *b)) continue; @@ -787,7 +805,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, char *tmp; int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { m->status = mce_rdmsrl(msr_ops.status(i)); if (!(m->status & MCI_STATUS_VAL)) continue; @@ -1067,7 +1085,7 @@ static void mce_clear_state(unsigned long *toclear) { int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { if (test_bit(i, toclear)) mce_wrmsrl(msr_ops.status(i), 0); } @@ -1121,10 +1139,11 @@ static void __mc_scan_banks(struct mce *m, struct mce *final, unsigned long *toclear, unsigned long *valid_banks, int no_way_out, int *worst) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); struct mca_config *cfg = &mca_cfg; int severity, i; - for (i = 0; i < cfg->banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { __clear_bit(i, toclear); if (!test_bit(i, valid_banks)) continue; @@ -1329,7 +1348,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) local_irq_enable(); if (kill_it || do_memory_failure(&m)) - force_sig(SIGBUS, current); + force_sig(SIGBUS); local_irq_disable(); ist_end_non_atomic(); } else { @@ -1462,27 +1481,29 @@ int mce_notify_irq(void) } EXPORT_SYMBOL_GPL(mce_notify_irq); -static int __mcheck_cpu_mce_banks_init(void) +static void __mcheck_cpu_mce_banks_init(void) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + u8 n_banks = this_cpu_read(mce_num_banks); int i; - mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL); - if (!mce_banks) - return -ENOMEM; - - for (i = 0; i < MAX_NR_BANKS; i++) { + for (i = 0; i < n_banks; i++) { struct mce_bank *b = &mce_banks[i]; + /* + * Init them all, __mcheck_cpu_apply_quirks() is going to apply + * the required vendor quirks before + * __mcheck_cpu_init_clear_banks() does the final bank setup. + */ b->ctl = -1ULL; b->init = 1; } - return 0; } /* * Initialize Machine Checks for a CPU. */ -static int __mcheck_cpu_cap_init(void) +static void __mcheck_cpu_cap_init(void) { u64 cap; u8 b; @@ -1490,16 +1511,16 @@ static int __mcheck_cpu_cap_init(void) rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - if (WARN_ON_ONCE(b > MAX_NR_BANKS)) + + if (b > MAX_NR_BANKS) { + pr_warn("CPU%d: Using only %u machine check banks out of %u\n", + smp_processor_id(), MAX_NR_BANKS, b); b = MAX_NR_BANKS; + } - mca_cfg.banks = max(mca_cfg.banks, b); + this_cpu_write(mce_num_banks, b); - if (!mce_banks) { - int err = __mcheck_cpu_mce_banks_init(); - if (err) - return err; - } + __mcheck_cpu_mce_banks_init(); /* Use accurate RIP reporting if available. */ if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) @@ -1507,8 +1528,6 @@ static int __mcheck_cpu_cap_init(void) if (cap & MCG_SER_P) mca_cfg.ser = 1; - - return 0; } static void __mcheck_cpu_init_generic(void) @@ -1535,9 +1554,10 @@ static void __mcheck_cpu_init_generic(void) static void __mcheck_cpu_init_clear_banks(void) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &mce_banks[i]; if (!b->init) @@ -1548,6 +1568,33 @@ static void __mcheck_cpu_init_clear_banks(void) } /* + * Do a final check to see if there are any unused/RAZ banks. + * + * This must be done after the banks have been initialized and any quirks have + * been applied. + * + * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs. + * Otherwise, a user who disables a bank will not be able to re-enable it + * without a system reboot. + */ +static void __mcheck_cpu_check_banks(void) +{ + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + u64 msrval; + int i; + + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { + struct mce_bank *b = &mce_banks[i]; + + if (!b->init) + continue; + + rdmsrl(msr_ops.ctl(i), msrval); + b->init = !!msrval; + } +} + +/* * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM * Vol 3B Table 15-20). But this confuses both the code that determines @@ -1578,6 +1625,7 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs) /* Add per CPU specific workarounds here */ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); struct mca_config *cfg = &mca_cfg; if (c->x86_vendor == X86_VENDOR_UNKNOWN) { @@ -1587,7 +1635,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && cfg->banks > 4) { + if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { /* * disable GART TBL walk error reporting, which * trips off incorrectly with the IOMMU & 3ware @@ -1606,7 +1654,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) * Various K7s with broken bank 0 around. Always disable * by default. */ - if (c->x86 == 6 && cfg->banks > 0) + if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0) mce_banks[0].ctl = 0; /* @@ -1628,7 +1676,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) * valid event later, merely don't write CTL0. */ - if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0) + if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0) mce_banks[0].init = 0; /* @@ -1814,7 +1862,9 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) if (!mce_available(c)) return; - if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { + __mcheck_cpu_cap_init(); + + if (__mcheck_cpu_apply_quirks(c) < 0) { mca_cfg.disabled = 1; return; } @@ -1831,6 +1881,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_clear_banks(); + __mcheck_cpu_check_banks(); __mcheck_cpu_setup_timer(); } @@ -1862,7 +1913,7 @@ static void __mce_disable_bank(void *arg) void mce_disable_bank(int bank) { - if (bank >= mca_cfg.banks) { + if (bank >= this_cpu_read(mce_num_banks)) { pr_warn(FW_BUG "Ignoring request to disable invalid MCA bank %d.\n", bank); @@ -1948,9 +1999,10 @@ int __init mcheck_init(void) */ static void mce_disable_error_reporting(void) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &mce_banks[i]; if (b->init) @@ -2050,26 +2102,47 @@ static struct bus_type mce_subsys = { DEFINE_PER_CPU(struct device *, mce_device); -static inline struct mce_bank *attr_to_bank(struct device_attribute *attr) +static inline struct mce_bank_dev *attr_to_bank(struct device_attribute *attr) { - return container_of(attr, struct mce_bank, attr); + return container_of(attr, struct mce_bank_dev, attr); } static ssize_t show_bank(struct device *s, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); + u8 bank = attr_to_bank(attr)->bank; + struct mce_bank *b; + + if (bank >= per_cpu(mce_num_banks, s->id)) + return -EINVAL; + + b = &per_cpu(mce_banks_array, s->id)[bank]; + + if (!b->init) + return -ENODEV; + + return sprintf(buf, "%llx\n", b->ctl); } static ssize_t set_bank(struct device *s, struct device_attribute *attr, const char *buf, size_t size) { + u8 bank = attr_to_bank(attr)->bank; + struct mce_bank *b; u64 new; if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; - attr_to_bank(attr)->ctl = new; + if (bank >= per_cpu(mce_num_banks, s->id)) + return -EINVAL; + + b = &per_cpu(mce_banks_array, s->id)[bank]; + + if (!b->init) + return -ENODEV; + + b->ctl = new; mce_restart(); return size; @@ -2184,7 +2257,7 @@ static void mce_device_release(struct device *dev) kfree(dev); } -/* Per cpu device init. All of the cpus still share the same ctrl bank: */ +/* Per CPU device init. All of the CPUs still share the same bank device: */ static int mce_device_create(unsigned int cpu) { struct device *dev; @@ -2216,8 +2289,8 @@ static int mce_device_create(unsigned int cpu) if (err) goto error; } - for (j = 0; j < mca_cfg.banks; j++) { - err = device_create_file(dev, &mce_banks[j].attr); + for (j = 0; j < per_cpu(mce_num_banks, cpu); j++) { + err = device_create_file(dev, &mce_bank_devs[j].attr); if (err) goto error2; } @@ -2227,7 +2300,7 @@ static int mce_device_create(unsigned int cpu) return 0; error2: while (--j >= 0) - device_remove_file(dev, &mce_banks[j].attr); + device_remove_file(dev, &mce_bank_devs[j].attr); error: while (--i >= 0) device_remove_file(dev, mce_device_attrs[i]); @@ -2248,8 +2321,8 @@ static void mce_device_remove(unsigned int cpu) for (i = 0; mce_device_attrs[i]; i++) device_remove_file(dev, mce_device_attrs[i]); - for (i = 0; i < mca_cfg.banks; i++) - device_remove_file(dev, &mce_banks[i].attr); + for (i = 0; i < per_cpu(mce_num_banks, cpu); i++) + device_remove_file(dev, &mce_bank_devs[i].attr); device_unregister(dev); cpumask_clear_cpu(cpu, mce_device_initialized); @@ -2270,6 +2343,7 @@ static void mce_disable_cpu(void) static void mce_reenable_cpu(void) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); int i; if (!mce_available(raw_cpu_ptr(&cpu_info))) @@ -2277,7 +2351,7 @@ static void mce_reenable_cpu(void) if (!cpuhp_tasks_frozen) cmci_reenable(); - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &mce_banks[i]; if (b->init) @@ -2327,10 +2401,12 @@ static __init void mce_init_banks(void) { int i; - for (i = 0; i < mca_cfg.banks; i++) { - struct mce_bank *b = &mce_banks[i]; + for (i = 0; i < MAX_NR_BANKS; i++) { + struct mce_bank_dev *b = &mce_bank_devs[i]; struct device_attribute *a = &b->attr; + b->bank = i; + sysfs_attr_init(&a->attr); a->attr.name = b->attrname; snprintf(b->attrname, ATTR_LEN, "bank%d", i); @@ -2440,22 +2516,16 @@ static int fake_panic_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set, "%llu\n"); -static int __init mcheck_debugfs_init(void) +static void __init mcheck_debugfs_init(void) { - struct dentry *dmce, *ffake_panic; + struct dentry *dmce; dmce = mce_get_debugfs_dir(); - if (!dmce) - return -ENOMEM; - ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce, - NULL, &fake_panic_fops); - if (!ffake_panic) - return -ENOMEM; - - return 0; + debugfs_create_file_unsafe("fake_panic", 0444, dmce, NULL, + &fake_panic_fops); } #else -static int __init mcheck_debugfs_init(void) { return -EINVAL; } +static void __init mcheck_debugfs_init(void) { } #endif DEFINE_STATIC_KEY_FALSE(mcsafe_key); @@ -2463,8 +2533,6 @@ EXPORT_SYMBOL_GPL(mcsafe_key); static int __init mcheck_late_init(void) { - pr_info("Using %d MCE banks\n", mca_cfg.banks); - if (mca_cfg.recovery) static_branch_inc(&mcsafe_key); diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c index 9690ec5c8051..7c8958dee103 100644 --- a/arch/x86/kernel/cpu/mce/dev-mcelog.c +++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * /dev/mcelog driver * diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c index 64d1d5a00f39..fbe8b61c3413 100644 --- a/arch/x86/kernel/cpu/mce/genpool.c +++ b/arch/x86/kernel/cpu/mce/genpool.c @@ -1,10 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * MCE event pool management in MCE context * * Copyright (C) 2015 Intel Corp. * Author: Chen, Gong <gong.chen@linux.intel.com> - * - * This file is licensed under GPLv2. */ #include <linux/smp.h> #include <linux/mm.h> diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index a6026170af92..1f30117b24ba 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Machine check injection support. * Copyright 2008 Intel Corporation. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - * * Authors: * Andi Kleen * Ying Huang @@ -16,9 +12,6 @@ * built as module so that it can be loaded on production kernels for * testing purposes. * - * This file may be distributed under the terms of the GNU General Public - * License version 2. - * * Copyright (c) 2010-17: Borislav Petkov <bp@alien8.de> * Advanced Micro Devices Inc. */ @@ -652,7 +645,6 @@ static const struct file_operations readme_fops = { static struct dfs_node { char *name; - struct dentry *d; const struct file_operations *fops; umode_t perm; } dfs_fls[] = { @@ -666,49 +658,23 @@ static struct dfs_node { { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH }, }; -static int __init debugfs_init(void) +static void __init debugfs_init(void) { unsigned int i; dfs_inj = debugfs_create_dir("mce-inject", NULL); - if (!dfs_inj) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) { - dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name, - dfs_fls[i].perm, - dfs_inj, - &i_mce, - dfs_fls[i].fops); - - if (!dfs_fls[i].d) - goto err_dfs_add; - } - - return 0; -err_dfs_add: - while (i-- > 0) - debugfs_remove(dfs_fls[i].d); - - debugfs_remove(dfs_inj); - dfs_inj = NULL; - - return -ENODEV; + for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) + debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj, + &i_mce, dfs_fls[i].fops); } static int __init inject_init(void) { - int err; - if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) return -ENOMEM; - err = debugfs_init(); - if (err) { - free_cpumask_var(mce_inject_cpumask); - return err; - } + debugfs_init(); register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify"); mce_register_injector_chain(&inject_nb); diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index a34b55baa7aa..43031db429d2 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -22,17 +22,8 @@ enum severity_level { extern struct blocking_notifier_head x86_mce_decoder_chain; -#define ATTR_LEN 16 #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ -/* One object for each MCE bank, shared by all CPUs */ -struct mce_bank { - u64 ctl; /* subevents to enable */ - unsigned char init; /* initialise bank? */ - struct device_attribute attr; /* device attribute */ - char attrname[ATTR_LEN]; /* attribute name */ -}; - struct mce_evt_llist { struct llist_node llnode; struct mce mce; @@ -47,7 +38,6 @@ struct llist_node *mce_gen_pool_prepare_records(void); extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); struct dentry *mce_get_debugfs_dir(void); -extern struct mce_bank *mce_banks; extern mce_banks_t mce_banks_ce_disabled; #ifdef CONFIG_X86_MCE_INTEL @@ -128,7 +118,6 @@ struct mca_config { bios_cmci_threshold : 1, __reserved : 59; - u8 banks; s8 bootlog; int tolerant; int monarch_timeout; @@ -137,6 +126,7 @@ struct mca_config { }; extern struct mca_config mca_cfg; +DECLARE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks); struct mce_vendor_flags { /* diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index 65201e180fe0..210f1f5db5f7 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * MCE grading rules. * Copyright 2008, 2009 Intel Corporation. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - * * Author: Andi Kleen */ #include <linux/kernel.h> @@ -404,21 +400,13 @@ static const struct file_operations severities_coverage_fops = { static int __init severities_debugfs_init(void) { - struct dentry *dmce, *fsev; + struct dentry *dmce; dmce = mce_get_debugfs_dir(); - if (!dmce) - goto err_out; - - fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL, - &severities_coverage_fops); - if (!fsev) - goto err_out; + debugfs_create_file("severities-coverage", 0444, dmce, NULL, + &severities_coverage_fops); return 0; - -err_out: - return -ENOMEM; } late_initcall(severities_debugfs_init); #endif /* CONFIG_DEBUG_FS */ diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c index 10a3b0599300..6e2becf547c5 100644 --- a/arch/x86/kernel/cpu/mce/therm_throt.c +++ b/arch/x86/kernel/cpu/mce/therm_throt.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Thermal throttle event support code (such as syslog messaging and rate * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). diff --git a/arch/x86/kernel/cpu/microcode/Makefile b/arch/x86/kernel/cpu/microcode/Makefile index ba12e8aa4a45..34098d48c48f 100644 --- a/arch/x86/kernel/cpu/microcode/Makefile +++ b/arch/x86/kernel/cpu/microcode/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only microcode-y := core.o obj-$(CONFIG_MICROCODE) += microcode.o microcode-$(CONFIG_MICROCODE_INTEL) += intel.o diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index e1f3ba19ba54..a0e52bd00ecc 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * AMD CPU Microcode Update Driver for Linux * @@ -17,9 +18,6 @@ * * Author: Jacob Shin <jacob.shin@amd.com> * Fixes: Borislav Petkov <bp@suse.de> - * - * Licensed under the terms of the GNU General Public - * License version 2. See file COPYING for details. */ #define pr_fmt(fmt) "microcode: " fmt @@ -61,7 +59,7 @@ static u8 amd_ucode_patch[PATCH_MAX_SIZE]; /* * Microcode patch container file is prepended to the initrd in cpio - * format. See Documentation/x86/microcode.txt + * format. See Documentation/x86/microcode.rst */ static const char ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index c321f4f513f9..cb0fdcaf1415 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * CPU Microcode Update Driver for Linux * @@ -12,11 +13,6 @@ * (C) 2015 Borislav Petkov <bp@alien8.de> * * This driver allows to upgrade microcode on x86 processors. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define pr_fmt(fmt) "microcode: " fmt @@ -793,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = { .resume = mc_bp_resume, }; -static int mc_cpu_online(unsigned int cpu) +static int mc_cpu_starting(unsigned int cpu) { - struct device *dev; - - dev = get_cpu_device(cpu); microcode_update_cpu(cpu); pr_debug("CPU%d added\n", cpu); + return 0; +} + +static int mc_cpu_online(unsigned int cpu) +{ + struct device *dev = get_cpu_device(cpu); if (sysfs_create_group(&dev->kobj, &mc_attr_group)) pr_err("Failed to create group for CPU%d\n", cpu); @@ -876,6 +875,8 @@ int __init microcode_init(void) goto out_ucode_group; register_syscore_ops(&mc_syscore_ops); + cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting", + mc_cpu_starting, NULL); cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", mc_cpu_online, mc_cpu_down_prep); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index a44bdbe7c55e..ce799cfe9434 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Intel CPU Microcode Update Driver for Linux * @@ -8,11 +9,6 @@ * * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> * H Peter Anvin" <hpa@zytor.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ /* diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index d0dfb892c72f..aed45b8895d5 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -4,6 +4,8 @@ # Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h # +set -e + IN=$1 OUT=$2 diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 3fa238a137d2..062f77279ce3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * HyperV Detection code. * * Copyright (C) 2010, Novell, Inc. * Author : K. Y. Srinivasan <ksrinivasan@novell.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * */ #include <linux/types.h> @@ -21,6 +17,7 @@ #include <linux/irq.h> #include <linux/kexec.h> #include <linux/i8253.h> +#include <linux/random.h> #include <asm/processor.h> #include <asm/hypervisor.h> #include <asm/hyperv-tlfs.h> @@ -84,6 +81,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) inc_irq_stat(hyperv_stimer0_count); if (hv_stimer0_handler) hv_stimer0_handler(); + add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0); ack_APIC_irq(); exiting_irq(); @@ -93,7 +91,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)) { *vector = HYPERV_STIMER0_VECTOR; - *irq = 0; /* Unused on x86/x64 */ + *irq = -1; /* Unused on x86/x64 */ hv_stimer0_handler = handler; return 0; } @@ -270,9 +268,9 @@ static void __init ms_hyperv_init_platform(void) rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); - lapic_timer_frequency = hv_lapic_frequency; + lapic_timer_period = hv_lapic_frequency; pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n", - lapic_timer_frequency); + lapic_timer_period); } register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST, diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile index 2ad9107ee980..cc4f9f1cb94c 100644 --- a/arch/x86/kernel/cpu/mtrr/Makefile +++ b/arch/x86/kernel/cpu/mtrr/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-y := mtrr.o if.o generic.o cleanup.o obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 86e277f8daf4..aa5c064a6a22 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong * because MTRRs can span up to 40 bits (36bits on most modern x86) @@ -742,7 +743,15 @@ static void prepare_set(void) __acquires(set_atomicity_lock) /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ cr0 = read_cr0() | X86_CR0_CD; write_cr0(cr0); - wbinvd(); + + /* + * Cache flushing is the most time-consuming step when programming + * the MTRRs. Fortunately, as per the Intel Software Development + * Manual, we can skip it if the processor supports cache self- + * snooping. + */ + if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) + wbinvd(); /* Save value of CR4 and clear Page Global Enable (bit 7) */ if (boot_cpu_has(X86_FEATURE_PGE)) { @@ -759,7 +768,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock) /* Disable MTRRs, and set the default type to uncached */ mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); - wbinvd(); + + /* Again, only flush caches if we have to. */ + if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) + wbinvd(); } static void post_set(void) __releases(set_atomicity_lock) diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index cfa97ff67bda..5c900f9527ff 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of the Linux kernel. * * Copyright (c) 2011, Intel Corporation * Authors: Fenghua Yu <fenghua.yu@intel.com>, * H. Peter Anvin <hpa@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * */ #include <asm/processor.h> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index c3a9dc63edf2..03eb90d00af0 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Resource Director Technology(RDT) * - Cache Allocation code. @@ -9,15 +10,6 @@ * Tony Luck <tony.luck@intel.com> * Vikas Shivappa <vikas.shivappa@intel.com> * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * * More information about RDT be found in the Intel (R) x86 Architecture * Software Developer Manual June 2016, volume 3, section 17.17. */ diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 89320c0396b1..efbd54cc4e69 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Resource Director Technology(RDT) * - Cache Allocation code. @@ -8,15 +9,6 @@ * Fenghua Yu <fenghua.yu@intel.com> * Tony Luck <tony.luck@intel.com> * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * * More information about RDT be found in the Intel (R) x86 Architecture * Software Developer Manual June 2016, volume 3, section 17.17. */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 1573a0a6b525..397206f23d14 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Resource Director Technology(RDT) * - Monitoring code @@ -10,15 +11,6 @@ * This replaces the cqm.c based on perf but we reuse a lot of * code and datastructures originally from Peter Zijlstra and Matt Fleming. * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * * More information about RDT be found in the Intel (R) x86 Architecture * Software Developer Manual June 2016, volume 3, section 17.17. */ @@ -368,6 +360,9 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) struct list_head *head; struct rdtgroup *entry; + if (!is_mbm_local_enabled()) + return; + r_mba = &rdt_resources_all[RDT_RESOURCE_MBA]; closid = rgrp->closid; rmid = rgrp->mon.rmid; diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 604c0e3bcc83..d7623e1b927d 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -431,11 +431,7 @@ static int pseudo_lock_fn(void *_rdtgrp) #else register unsigned int line_size asm("esi"); register unsigned int size asm("edi"); -#ifdef CONFIG_X86_64 - register void *mem_r asm("rbx"); -#else - register void *mem_r asm("ebx"); -#endif /* CONFIG_X86_64 */ + register void *mem_r asm(_ASM_BX); #endif /* CONFIG_KASAN */ /* @@ -1503,7 +1499,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) * may be scheduled elsewhere and invalidate entries in the * pseudo-locked region. */ - if (!cpumask_subset(¤t->cpus_allowed, &plr->d->cpu_mask)) { + if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) { mutex_unlock(&rdtgroup_mutex); return -EINVAL; } diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 333c177a2471..a46dee8e78db 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * User interface for Resource Alloction in Resource Director Technology(RDT) * @@ -5,15 +6,6 @@ * * Author: Fenghua Yu <fenghua.yu@intel.com> * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * * More information about RDT be found in the Intel (R) x86 Architecture * Software Developer Manual. */ @@ -804,8 +796,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct rdt_resource *r = of->kn->parent->priv; - u32 sw_shareable = 0, hw_shareable = 0; - u32 exclusive = 0, pseudo_locked = 0; + /* + * Use unsigned long even though only 32 bits are used to ensure + * test_bit() is used safely. + */ + unsigned long sw_shareable = 0, hw_shareable = 0; + unsigned long exclusive = 0, pseudo_locked = 0; struct rdt_domain *dom; int i, hwb, swb, excl, psl; enum rdtgrp_mode mode; @@ -850,10 +846,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, } for (i = r->cache.cbm_len - 1; i >= 0; i--) { pseudo_locked = dom->plr ? dom->plr->cbm : 0; - hwb = test_bit(i, (unsigned long *)&hw_shareable); - swb = test_bit(i, (unsigned long *)&sw_shareable); - excl = test_bit(i, (unsigned long *)&exclusive); - psl = test_bit(i, (unsigned long *)&pseudo_locked); + hwb = test_bit(i, &hw_shareable); + swb = test_bit(i, &sw_shareable); + excl = test_bit(i, &exclusive); + psl = test_bit(i, &pseudo_locked); if (hwb && swb) seq_putc(seq, 'X'); else if (hwb && !swb) @@ -2108,8 +2104,7 @@ static int rdt_init_fs_context(struct fs_context *fc) ctx->kfc.magic = RDTGROUP_SUPER_MAGIC; fc->fs_private = &ctx->kfc; fc->ops = &rdt_fs_context_ops; - if (fc->user_ns) - put_user_ns(fc->user_ns); + put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(&init_user_ns); fc->global = true; return 0; @@ -2492,28 +2487,21 @@ out_destroy: * modification to the CBM if the default does not satisfy the * requirements. */ -static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r) +static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) { - /* - * Convert the u32 _val to an unsigned long required by all the bit - * operations within this function. No more than 32 bits of this - * converted value can be accessed because all bit operations are - * additionally provided with cbm_len that is initialized during - * hardware enumeration using five bits from the EAX register and - * thus never can exceed 32 bits. - */ - unsigned long *val = (unsigned long *)_val; unsigned int cbm_len = r->cache.cbm_len; unsigned long first_bit, zero_bit; + unsigned long val = _val; - if (*val == 0) - return; + if (!val) + return 0; - first_bit = find_first_bit(val, cbm_len); - zero_bit = find_next_zero_bit(val, cbm_len, first_bit); + first_bit = find_first_bit(&val, cbm_len); + zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); /* Clear any remaining bits to ensure contiguous region */ - bitmap_clear(val, zero_bit, cbm_len - zero_bit); + bitmap_clear(&val, zero_bit, cbm_len - zero_bit); + return (u32)val; } /* @@ -2542,7 +2530,12 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, if (closid_allocated(i) && i != closid) { mode = rdtgroup_mode_by_closid(i); if (mode == RDT_MODE_PSEUDO_LOCKSETUP) - break; + /* + * ctrl values for locksetup aren't relevant + * until the schemata is written, and the mode + * becomes RDT_MODE_PSEUDO_LOCKED. + */ + continue; /* * If CDP is active include peer domain's * usage to ensure there is no overlap @@ -2566,7 +2559,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, * Force the initial CBM to be valid, user can * modify the CBM based on system availability. */ - cbm_ensure_valid(&d->new_ctrl, r); + d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r); /* * Assign the u32 CBM to an unsigned long to ensure that * bitmap_weight() does not access out-of-bound memory. diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 94aa1c72ca98..adf9b71386ef 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -26,6 +26,10 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, + { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, + { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, + { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 }, { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 8f6c784141d1..ee48c3fc8a65 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -15,33 +15,66 @@ /* leaf 0xb SMT level */ #define SMT_LEVEL 0 -/* leaf 0xb sub-leaf types */ +/* extended topology sub-leaf types */ #define INVALID_TYPE 0 #define SMT_TYPE 1 #define CORE_TYPE 2 +#define DIE_TYPE 5 #define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) -int detect_extended_topology_early(struct cpuinfo_x86 *c) -{ #ifdef CONFIG_SMP +unsigned int __max_die_per_package __read_mostly = 1; +EXPORT_SYMBOL(__max_die_per_package); + +/* + * Check if given CPUID extended toplogy "leaf" is implemented + */ +static int check_extended_topology_leaf(int leaf) +{ unsigned int eax, ebx, ecx, edx; - if (c->cpuid_level < 0xb) + cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + + if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) return -1; - cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + return 0; +} +/* + * Return best CPUID Extended Toplogy Leaf supported + */ +static int detect_extended_topology_leaf(struct cpuinfo_x86 *c) +{ + if (c->cpuid_level >= 0x1f) { + if (check_extended_topology_leaf(0x1f) == 0) + return 0x1f; + } - /* - * check if the cpuid leaf 0xb is actually implemented. - */ - if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) + if (c->cpuid_level >= 0xb) { + if (check_extended_topology_leaf(0xb) == 0) + return 0xb; + } + + return -1; +} +#endif + +int detect_extended_topology_early(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned int eax, ebx, ecx, edx; + int leaf; + + leaf = detect_extended_topology_leaf(c); + if (leaf < 0) return -1; set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); + cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); /* * initial apic id, which also represents 32-bit extended x2apic id. */ @@ -52,7 +85,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c) } /* - * Check for extended topology enumeration cpuid leaf 0xb and if it + * Check for extended topology enumeration cpuid leaf, and if it * exists, use it for populating initial_apicid and cpu topology * detection. */ @@ -60,22 +93,28 @@ int detect_extended_topology(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP unsigned int eax, ebx, ecx, edx, sub_index; - unsigned int ht_mask_width, core_plus_mask_width; + unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width; unsigned int core_select_mask, core_level_siblings; + unsigned int die_select_mask, die_level_siblings; + int leaf; - if (detect_extended_topology_early(c) < 0) + leaf = detect_extended_topology_leaf(c); + if (leaf < 0) return -1; /* * Populate HT related information from sub-leaf level 0. */ - cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + c->initial_apicid = edx; core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); sub_index = 1; do { - cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); + cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx); /* * Check for the Core type in the implemented sub leaves. @@ -83,23 +122,34 @@ int detect_extended_topology(struct cpuinfo_x86 *c) if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - break; + die_level_siblings = core_level_siblings; + die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + } + if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) { + die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); } sub_index++; } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; - - c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width) - & core_select_mask; - c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width); + die_select_mask = (~(-1 << die_plus_mask_width)) >> + core_plus_mask_width; + + c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, + ht_mask_width) & core_select_mask; + c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid, + core_plus_mask_width) & die_select_mask; + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, + die_plus_mask_width); /* * Reinit the apicid, now that we have extended initial_apicid. */ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); c->x86_max_cores = (core_level_siblings / smp_num_siblings); + __max_die_per_package = (die_level_siblings / core_level_siblings); #endif return 0; } diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c new file mode 100644 index 000000000000..6a204e7336c1 --- /dev/null +++ b/arch/x86/kernel/cpu/umwait.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/syscore_ops.h> +#include <linux/suspend.h> +#include <linux/cpu.h> + +#include <asm/msr.h> + +#define UMWAIT_C02_ENABLE 0 + +#define UMWAIT_CTRL_VAL(max_time, c02_disable) \ + (((max_time) & MSR_IA32_UMWAIT_CONTROL_TIME_MASK) | \ + ((c02_disable) & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE)) + +/* + * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default, + * umwait max time is 100000 in TSC-quanta and C0.2 is enabled + */ +static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); + +/* + * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in + * the sysfs write functions. + */ +static DEFINE_MUTEX(umwait_lock); + +static void umwait_update_control_msr(void * unused) +{ + lockdep_assert_irqs_disabled(); + wrmsr(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached), 0); +} + +/* + * The CPU hotplug callback sets the control MSR to the global control + * value. + * + * Disable interrupts so the read of umwait_control_cached and the WRMSR + * are protected against a concurrent sysfs write. Otherwise the sysfs + * write could update the cached value after it had been read on this CPU + * and issue the IPI before the old value had been written. The IPI would + * interrupt, write the new value and after return from IPI the previous + * value would be written by this CPU. + * + * With interrupts disabled the upcoming CPU either sees the new control + * value or the IPI is updating this CPU to the new control value after + * interrupts have been reenabled. + */ +static int umwait_cpu_online(unsigned int cpu) +{ + local_irq_disable(); + umwait_update_control_msr(NULL); + local_irq_enable(); + return 0; +} + +/* + * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which + * is the only active CPU at this time. The MSR is set up on the APs via the + * CPU hotplug callback. + * + * This function is invoked on resume from suspend and hibernation. On + * resume from suspend the restore should be not required, but we neither + * trust the firmware nor does it matter if the same value is written + * again. + */ +static void umwait_syscore_resume(void) +{ + umwait_update_control_msr(NULL); +} + +static struct syscore_ops umwait_syscore_ops = { + .resume = umwait_syscore_resume, +}; + +/* sysfs interface */ + +/* + * When bit 0 in IA32_UMWAIT_CONTROL MSR is 1, C0.2 is disabled. + * Otherwise, C0.2 is enabled. + */ +static inline bool umwait_ctrl_c02_enabled(u32 ctrl) +{ + return !(ctrl & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE); +} + +static inline u32 umwait_ctrl_max_time(u32 ctrl) +{ + return ctrl & MSR_IA32_UMWAIT_CONTROL_TIME_MASK; +} + +static inline void umwait_update_control(u32 maxtime, bool c02_enable) +{ + u32 ctrl = maxtime & MSR_IA32_UMWAIT_CONTROL_TIME_MASK; + + if (!c02_enable) + ctrl |= MSR_IA32_UMWAIT_CONTROL_C02_DISABLE; + + WRITE_ONCE(umwait_control_cached, ctrl); + /* Propagate to all CPUs */ + on_each_cpu(umwait_update_control_msr, NULL, 1); +} + +static ssize_t +enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + u32 ctrl = READ_ONCE(umwait_control_cached); + + return sprintf(buf, "%d\n", umwait_ctrl_c02_enabled(ctrl)); +} + +static ssize_t enable_c02_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + bool c02_enable; + u32 ctrl; + int ret; + + ret = kstrtobool(buf, &c02_enable); + if (ret) + return ret; + + mutex_lock(&umwait_lock); + + ctrl = READ_ONCE(umwait_control_cached); + if (c02_enable != umwait_ctrl_c02_enabled(ctrl)) + umwait_update_control(ctrl, c02_enable); + + mutex_unlock(&umwait_lock); + + return count; +} +static DEVICE_ATTR_RW(enable_c02); + +static ssize_t +max_time_show(struct device *kobj, struct device_attribute *attr, char *buf) +{ + u32 ctrl = READ_ONCE(umwait_control_cached); + + return sprintf(buf, "%u\n", umwait_ctrl_max_time(ctrl)); +} + +static ssize_t max_time_store(struct device *kobj, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 max_time, ctrl; + int ret; + + ret = kstrtou32(buf, 0, &max_time); + if (ret) + return ret; + + /* bits[1:0] must be zero */ + if (max_time & ~MSR_IA32_UMWAIT_CONTROL_TIME_MASK) + return -EINVAL; + + mutex_lock(&umwait_lock); + + ctrl = READ_ONCE(umwait_control_cached); + if (max_time != umwait_ctrl_max_time(ctrl)) + umwait_update_control(max_time, umwait_ctrl_c02_enabled(ctrl)); + + mutex_unlock(&umwait_lock); + + return count; +} +static DEVICE_ATTR_RW(max_time); + +static struct attribute *umwait_attrs[] = { + &dev_attr_enable_c02.attr, + &dev_attr_max_time.attr, + NULL +}; + +static struct attribute_group umwait_attr_group = { + .attrs = umwait_attrs, + .name = "umwait_control", +}; + +static int __init umwait_init(void) +{ + struct device *dev; + int ret; + + if (!boot_cpu_has(X86_FEATURE_WAITPKG)) + return -ENODEV; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online", + umwait_cpu_online, NULL); + + register_syscore_ops(&umwait_syscore_ops); + + /* + * Add umwait control interface. Ignore failure, so at least the + * default values are set up in case the machine manages to boot. + */ + dev = cpu_subsys.dev_root; + return sysfs_create_group(&dev->kobj, &umwait_attr_group); +} +device_initcall(umwait_init); diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 0eda91f8eeac..3c648476d4fb 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -157,7 +157,7 @@ static void __init vmware_platform_setup(void) #ifdef CONFIG_X86_LOCAL_APIC /* Skip lapic calibration since we know the bus frequency. */ - lapic_timer_frequency = ecx / HZ; + lapic_timer_period = ecx / HZ; pr_info("Host bus clock speed read from hypervisor : %u Hz\n", ecx); #endif diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c new file mode 100644 index 000000000000..8e6f2f4b4afe --- /dev/null +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/sched.h> +#include <linux/sched/clock.h> + +#include <asm/cpufeature.h> + +#include "cpu.h" + +#define MSR_ZHAOXIN_FCR57 0x00001257 + +#define ACE_PRESENT (1 << 6) +#define ACE_ENABLED (1 << 7) +#define ACE_FCR (1 << 7) /* MSR_ZHAOXIN_FCR */ + +#define RNG_PRESENT (1 << 2) +#define RNG_ENABLED (1 << 3) +#define RNG_ENABLE (1 << 8) /* MSR_ZHAOXIN_RNG */ + +#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 +#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 +#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 +#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 +#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 +#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 + +static void init_zhaoxin_cap(struct cpuinfo_x86 *c) +{ + u32 lo, hi; + + /* Test for Extended Feature Flags presence */ + if (cpuid_eax(0xC0000000) >= 0xC0000001) { + u32 tmp = cpuid_edx(0xC0000001); + + /* Enable ACE unit, if present and disabled */ + if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { + rdmsr(MSR_ZHAOXIN_FCR57, lo, hi); + /* Enable ACE unit */ + lo |= ACE_FCR; + wrmsr(MSR_ZHAOXIN_FCR57, lo, hi); + pr_info("CPU: Enabled ACE h/w crypto\n"); + } + + /* Enable RNG unit, if present and disabled */ + if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { + rdmsr(MSR_ZHAOXIN_FCR57, lo, hi); + /* Enable RNG unit */ + lo |= RNG_ENABLE; + wrmsr(MSR_ZHAOXIN_FCR57, lo, hi); + pr_info("CPU: Enabled h/w RNG\n"); + } + + /* + * Store Extended Feature Flags as word 5 of the CPU + * capability bit array + */ + c->x86_capability[CPUID_C000_0001_EDX] = cpuid_edx(0xC0000001); + } + + if (c->x86 >= 0x6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + cpu_detect_cache_sizes(c); +} + +static void early_init_zhaoxin(struct cpuinfo_x86 *c) +{ + if (c->x86 >= 0x6) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSENTER32); +#endif + if (c->x86_power & (1 << 8)) { + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + } + + if (c->cpuid_level >= 0x00000001) { + u32 eax, ebx, ecx, edx; + + cpuid(0x00000001, &eax, &ebx, &ecx, &edx); + /* + * If HTT (EDX[28]) is set EBX[16:23] contain the number of + * apicids which are reserved per package. Store the resulting + * shift value for the package management code. + */ + if (edx & (1U << 28)) + c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); + } + +} + +static void zhaoxin_detect_vmx_virtcap(struct cpuinfo_x86 *c) +{ + u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; + + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + msr_ctl = vmx_msr_high | vmx_msr_low; + + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) + set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) + set_cpu_cap(c, X86_FEATURE_VNMI); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, + vmx_msr_low, vmx_msr_high); + msr_ctl2 = vmx_msr_high | vmx_msr_low; + if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && + (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) + set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) + set_cpu_cap(c, X86_FEATURE_EPT); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) + set_cpu_cap(c, X86_FEATURE_VPID); + } +} + +static void init_zhaoxin(struct cpuinfo_x86 *c) +{ + early_init_zhaoxin(c); + init_intel_cacheinfo(c); + detect_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + + if (c->cpuid_level > 9) { + unsigned int eax = cpuid_eax(10); + + /* + * Check for version and the number of counters + * Version(eax[7:0]) can't be 0; + * Counters(eax[15:8]) should be greater than 1; + */ + if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1)) + set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); + } + + if (c->x86 >= 0x6) + init_zhaoxin_cap(c); +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +#endif + + if (cpu_has(c, X86_FEATURE_VMX)) + zhaoxin_detect_vmx_virtcap(c); +} + +#ifdef CONFIG_X86_32 +static unsigned int +zhaoxin_size_cache(struct cpuinfo_x86 *c, unsigned int size) +{ + return size; +} +#endif + +static const struct cpu_dev zhaoxin_cpu_dev = { + .c_vendor = "zhaoxin", + .c_ident = { " Shanghai " }, + .c_early_init = early_init_zhaoxin, + .c_init = init_zhaoxin, +#ifdef CONFIG_X86_32 + .legacy_cache_size = zhaoxin_size_cache, +#endif + .c_x86_vendor = X86_VENDOR_ZHAOXIN, +}; + +cpu_dev_register(zhaoxin_cpu_dev); |