diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2023-02-15 20:33:28 +0300 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2023-02-15 20:33:28 +0300 |
commit | 33436335e93a1788a58443fc99c5ab320ce4b9d9 (patch) | |
tree | d92f88768c8dbd00f8b65164f47a8a091768b95a /arch/x86 | |
parent | 27b025ebb0f6092d5c0a88de2ab73545bc1c496e (diff) | |
parent | c39cea6f38eefe356d64d0bc1e1f2267e282cdd3 (diff) | |
download | linux-33436335e93a1788a58443fc99c5ab320ce4b9d9.tar.xz |
Merge tag 'kvm-riscv-6.3-1' of https://github.com/kvm-riscv/linux into HEAD
KVM/riscv changes for 6.3
- Fix wrong usage of PGDIR_SIZE to check page sizes
- Fix privilege mode setting in kvm_riscv_vcpu_trap_redirect()
- Redirect illegal instruction traps to guest
- SBI PMU support for guest
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/boot/bioscall.S | 4 | ||||
-rw-r--r-- | arch/x86/boot/compressed/ident_map_64.c | 6 | ||||
-rw-r--r-- | arch/x86/boot/compressed/misc.h | 2 | ||||
-rw-r--r-- | arch/x86/boot/compressed/sev.c | 70 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 1 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c | 22 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.c | 1 | ||||
-rw-r--r-- | arch/x86/events/msr.c | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/acpi.h | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/debugreg.h | 26 | ||||
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 20 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/svm.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/aperfmperf.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/monitor.c | 49 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/rdtgroup.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/i8259.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/irqinit.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 21 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/pat/memtype.c | 3 | ||||
-rw-r--r-- | arch/x86/pci/mmconfig-shared.c | 44 | ||||
-rw-r--r-- | arch/x86/pci/xen.c | 2 | ||||
-rw-r--r-- | arch/x86/um/elfcore.c | 4 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 5 |
25 files changed, 271 insertions, 58 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 9cf07322875a..73ed982d4100 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -14,13 +14,13 @@ endif ifdef CONFIG_CC_IS_GCC RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) -RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix) RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) endif ifdef CONFIG_CC_IS_CLANG RETPOLINE_CFLAGS := -mretpoline-external-thunk RETPOLINE_VDSO_CFLAGS := -mretpoline endif +RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix) ifdef CONFIG_RETHUNK RETHUNK_CFLAGS := -mfunction-return=thunk-extern diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S index 5521ea12f44e..aa9b96457584 100644 --- a/arch/x86/boot/bioscall.S +++ b/arch/x86/boot/bioscall.S @@ -32,7 +32,7 @@ intcall: movw %dx, %si movw %sp, %di movw $11, %cx - rep; movsd + rep; movsl /* Pop full state from the stack */ popal @@ -67,7 +67,7 @@ intcall: jz 4f movw %sp, %si movw $11, %cx - rep; movsd + rep; movsl 4: addw $44, %sp /* Restore state and return */ diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index d4a314cc50d6..321a5011042d 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -180,6 +180,12 @@ void initialize_identity_maps(void *rmode) /* Load the new page-table. */ write_cr3(top_level_pgt); + + /* + * Now that the required page table mappings are established and a + * GHCB can be used, check for SNP guest/HV feature compatibility. + */ + snp_check_features(); } static pte_t *split_large_pmd(struct x86_mapping_info *info, diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 62208ec04ca4..20118fb7c53b 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -126,6 +126,7 @@ static inline void console_init(void) #ifdef CONFIG_AMD_MEM_ENCRYPT void sev_enable(struct boot_params *bp); +void snp_check_features(void); void sev_es_shutdown_ghcb(void); extern bool sev_es_check_ghcb_fault(unsigned long address); void snp_set_page_private(unsigned long paddr); @@ -143,6 +144,7 @@ static inline void sev_enable(struct boot_params *bp) if (bp) bp->cc_blob_address = 0; } +static inline void snp_check_features(void) { } static inline void sev_es_shutdown_ghcb(void) { } static inline bool sev_es_check_ghcb_fault(unsigned long address) { diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index c93930d5ccbd..d63ad8f99f83 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -208,6 +208,23 @@ void sev_es_shutdown_ghcb(void) error("Can't unmap GHCB page"); } +static void __noreturn sev_es_ghcb_terminate(struct ghcb *ghcb, unsigned int set, + unsigned int reason, u64 exit_info_2) +{ + u64 exit_info_1 = SVM_VMGEXIT_TERM_REASON(set, reason); + + vc_ghcb_invalidate(ghcb); + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_TERM_REQUEST); + ghcb_set_sw_exit_info_1(ghcb, exit_info_1); + ghcb_set_sw_exit_info_2(ghcb, exit_info_2); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + VMGEXIT(); + + while (true) + asm volatile("hlt\n" : : : "memory"); +} + bool sev_es_check_ghcb_fault(unsigned long address) { /* Check whether the fault was on the GHCB page */ @@ -270,6 +287,59 @@ static void enforce_vmpl0(void) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0); } +/* + * SNP_FEATURES_IMPL_REQ is the mask of SNP features that will need + * guest side implementation for proper functioning of the guest. If any + * of these features are enabled in the hypervisor but are lacking guest + * side implementation, the behavior of the guest will be undefined. The + * guest could fail in non-obvious way making it difficult to debug. + * + * As the behavior of reserved feature bits is unknown to be on the + * safe side add them to the required features mask. + */ +#define SNP_FEATURES_IMPL_REQ (MSR_AMD64_SNP_VTOM | \ + MSR_AMD64_SNP_REFLECT_VC | \ + MSR_AMD64_SNP_RESTRICTED_INJ | \ + MSR_AMD64_SNP_ALT_INJ | \ + MSR_AMD64_SNP_DEBUG_SWAP | \ + MSR_AMD64_SNP_VMPL_SSS | \ + MSR_AMD64_SNP_SECURE_TSC | \ + MSR_AMD64_SNP_VMGEXIT_PARAM | \ + MSR_AMD64_SNP_VMSA_REG_PROTECTION | \ + MSR_AMD64_SNP_RESERVED_BIT13 | \ + MSR_AMD64_SNP_RESERVED_BIT15 | \ + MSR_AMD64_SNP_RESERVED_MASK) + +/* + * SNP_FEATURES_PRESENT is the mask of SNP features that are implemented + * by the guest kernel. As and when a new feature is implemented in the + * guest kernel, a corresponding bit should be added to the mask. + */ +#define SNP_FEATURES_PRESENT (0) + +void snp_check_features(void) +{ + u64 unsupported; + + if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + return; + + /* + * Terminate the boot if hypervisor has enabled any feature lacking + * guest side implementation. Pass on the unsupported features mask through + * EXIT_INFO_2 of the GHCB protocol so that those features can be reported + * as part of the guest boot failure. + */ + unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; + if (unsupported) { + if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb())) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); + + sev_es_ghcb_terminate(boot_ghcb, SEV_TERM_SET_GEN, + GHCB_SNP_UNSUPPORTED, unsupported); + } +} + void sev_enable(struct boot_params *bp) { unsigned int eax, ebx, ecx, edx; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index aa53d042b943..73bd0d70817e 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6339,6 +6339,7 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_SAPPHIRERAPIDS_X: + case INTEL_FAM6_EMERALDRAPIDS_X: pmem = true; x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index a2834bc93149..551741e79e03 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -41,6 +41,7 @@ * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL + * MTL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 @@ -51,50 +52,50 @@ * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL,RPL,SPR + * TGL,TNT,RKL,ADL,RPL,SPR,MTL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, - * ICL,TGL,RKL,ADL,RPL + * ICL,TGL,RKL,ADL,RPL,MTL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, - * RPL,SPR + * RPL,SPR,MTL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL,RPL,SPR + * TGL,TNT,RKL,ADL,RPL,SPR,MTL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, - * KBL,CML,ICL,TGL,RKL,ADL,RPL + * KBL,CML,ICL,TGL,RKL,ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, - * TNT,RKL,ADL,RPL + * TNT,RKL,ADL,RPL,MTL * Scope: Package (physical package) * */ @@ -676,6 +677,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_cstates), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &icx_cstates), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &icx_cstates), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates), @@ -686,6 +688,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &adl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 6f1ccc57a692..459b1aafd4d4 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1833,6 +1833,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), {}, }; diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index ecced3a52668..c65d8906cbcf 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -69,6 +69,7 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_BROADWELL_G: case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_SAPPHIRERAPIDS_X: + case INTEL_FAM6_EMERALDRAPIDS_X: case INTEL_FAM6_ATOM_SILVERMONT: case INTEL_FAM6_ATOM_SILVERMONT_D: @@ -107,6 +108,8 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: case INTEL_FAM6_RAPTORLAKE_S: + case INTEL_FAM6_METEORLAKE: + case INTEL_FAM6_METEORLAKE_L: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 65064d9f7fa6..8eb74cf386db 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -14,6 +14,7 @@ #include <asm/mmu.h> #include <asm/mpspec.h> #include <asm/x86_init.h> +#include <asm/cpufeature.h> #ifdef CONFIG_ACPI_APEI # include <asm/pgtable_types.h> @@ -63,6 +64,13 @@ extern int (*acpi_suspend_lowlevel)(void); /* Physical address to resume after wakeup */ unsigned long acpi_get_wakeup_address(void); +static inline bool acpi_skip_set_wakeup_address(void) +{ + return cpu_feature_enabled(X86_FEATURE_XENPV); +} + +#define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address + /* * Check if the CPU can handle C2 and deeper */ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index b049d950612f..ca97442e8d49 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -39,7 +39,20 @@ static __always_inline unsigned long native_get_debugreg(int regno) asm("mov %%db6, %0" :"=r" (val)); break; case 7: - asm("mov %%db7, %0" :"=r" (val)); + /* + * Apply __FORCE_ORDER to DR7 reads to forbid re-ordering them + * with other code. + * + * This is needed because a DR7 access can cause a #VC exception + * when running under SEV-ES. Taking a #VC exception is not a + * safe thing to do just anywhere in the entry code and + * re-ordering might place the access into an unsafe location. + * + * This happened in the NMI handler, where the DR7 read was + * re-ordered to happen before the call to sev_es_ist_enter(), + * causing stack recursion. + */ + asm volatile("mov %%db7, %0" : "=r" (val) : __FORCE_ORDER); break; default: BUG(); @@ -66,7 +79,16 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value) asm("mov %0, %%db6" ::"r" (value)); break; case 7: - asm("mov %0, %%db7" ::"r" (value)); + /* + * Apply __FORCE_ORDER to DR7 writes to forbid re-ordering them + * with other code. + * + * While is didn't happen with a DR7 write (see the DR7 read + * comment above which explains where it happened), add the + * __FORCE_ORDER here too to avoid similar problems in the + * future. + */ + asm volatile("mov %0, %%db7" ::"r" (value), __FORCE_ORDER); break; default: BUG(); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 37ff47552bcb..d3fe82c5d6b6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -566,6 +566,26 @@ #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) #define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) +/* SNP feature bits enabled by the hypervisor */ +#define MSR_AMD64_SNP_VTOM BIT_ULL(3) +#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(4) +#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(5) +#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(6) +#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(7) +#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(8) +#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(9) +#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(10) +#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(11) +#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(12) +#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(14) +#define MSR_AMD64_SNP_VMSA_REG_PROTECTION BIT_ULL(16) +#define MSR_AMD64_SNP_SMT_PROTECTION BIT_ULL(17) + +/* SNP feature bits reserved for future use. */ +#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13) +#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15) +#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, 18) + #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f /* AMD Collaborative Processor Performance Control MSRs */ diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index f69c168391aa..80e1df482337 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -116,6 +116,12 @@ #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd +#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe +#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ + /* SW_EXITINFO1[3:0] */ \ + (((((u64)reason_set) & 0xf)) | \ + /* SW_EXITINFO1[11:4] */ \ + ((((u64)reason_code) & 0xff) << 4)) #define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff /* Exit code reserved for hypervisor/software use */ diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index 1f60a2b27936..fdbb5f07448f 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -330,7 +330,16 @@ static void __init bp_init_freq_invariance(void) static void disable_freq_invariance_workfn(struct work_struct *work) { + int cpu; + static_branch_disable(&arch_scale_freq_key); + + /* + * Set arch_freq_scale to a default value on all cpus + * This negates the effect of scaling + */ + for_each_possible_cpu(cpu) + per_cpu(arch_freq_scale, cpu) = SCHED_CAPACITY_SCALE; } static DECLARE_WORK(disable_freq_invariance_work, diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index efe0c30d3a12..77538abeb72a 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -146,6 +146,30 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid) return entry; } +static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) +{ + u64 msr_val; + + /* + * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured + * with a valid event code for supported resource type and the bits + * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, + * IA32_QM_CTR.data (bits 61:0) reports the monitored data. + * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) + * are error bits. + */ + wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + rdmsrl(MSR_IA32_QM_CTR, msr_val); + + if (msr_val & RMID_VAL_ERROR) + return -EIO; + if (msr_val & RMID_VAL_UNAVAIL) + return -EINVAL; + + *val = msr_val; + return 0; +} + static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, u32 rmid, enum resctrl_event_id eventid) @@ -172,8 +196,12 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, struct arch_mbm_state *am; am = get_arch_mbm_state(hw_dom, rmid, eventid); - if (am) + if (am) { memset(am, 0, sizeof(*am)); + + /* Record any initial, non-zero count value. */ + __rmid_read(rmid, eventid, &am->prev_msr); + } } static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) @@ -191,25 +219,14 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct arch_mbm_state *am; u64 msr_val, chunks; + int ret; if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) return -EINVAL; - /* - * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured - * with a valid event code for supported resource type and the bits - * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, - * IA32_QM_CTR.data (bits 61:0) reports the monitored data. - * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) - * are error bits. - */ - wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); - rdmsrl(MSR_IA32_QM_CTR, msr_val); - - if (msr_val & RMID_VAL_ERROR) - return -EIO; - if (msr_val & RMID_VAL_UNAVAIL) - return -EINVAL; + ret = __rmid_read(rmid, eventid, &msr_val); + if (ret) + return ret; am = get_arch_mbm_state(hw_dom, rmid, eventid); if (am) { diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index e5a48f05e787..5993da21d822 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -580,8 +580,10 @@ static int __rdtgroup_move_task(struct task_struct *tsk, /* * Ensure the task's closid and rmid are written before determining if * the task is current that will decide if it will be interrupted. + * This pairs with the full barrier between the rq->curr update and + * resctrl_sched_in() during context switch. */ - barrier(); + smp_mb(); /* * By now, the task's closid and rmid are set. If the task is current @@ -2402,6 +2404,14 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, WRITE_ONCE(t->rmid, to->mon.rmid); /* + * Order the closid/rmid stores above before the loads + * in task_curr(). This pairs with the full barrier + * between the rq->curr update and resctrl_sched_in() + * during context switch. + */ + smp_mb(); + + /* * If the task is on a CPU, set the CPU in the mask. * The detection is inaccurate as tasks might move or * schedule before the smp function call takes place. diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 3aa5304200c5..4d8aff05a509 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -114,6 +114,7 @@ static void make_8259A_irq(unsigned int irq) disable_irq_nosync(irq); io_apic_irqs &= ~(1<<irq); irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq); + irq_set_status_flags(irq, IRQ_LEVEL); enable_irq(irq); lapic_assign_legacy_vector(irq, true); } diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index beb1bada1b0a..c683666876f1 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -65,8 +65,10 @@ void __init init_ISA_irqs(void) legacy_pic->init(0); - for (i = 0; i < nr_legacy_irqs(); i++) + for (i = 0; i < nr_legacy_irqs(); i++) { irq_set_chip_and_handler(i, chip, handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } } void __init init_IRQ(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 0f82c247e1fd..7896acf05117 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3539,18 +3539,15 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var) { u32 ar; - if (var->unusable || !var->present) - ar = 1 << 16; - else { - ar = var->type & 15; - ar |= (var->s & 1) << 4; - ar |= (var->dpl & 3) << 5; - ar |= (var->present & 1) << 7; - ar |= (var->avl & 1) << 12; - ar |= (var->l & 1) << 13; - ar |= (var->db & 1) << 14; - ar |= (var->g & 1) << 15; - } + ar = var->type & 15; + ar |= (var->s & 1) << 4; + ar |= (var->dpl & 3) << 5; + ar |= (var->present & 1) << 7; + ar |= (var->avl & 1) << 12; + ar |= (var->l & 1) << 13; + ar |= (var->db & 1) << 14; + ar |= (var->g & 1) << 15; + ar |= (var->unusable || !var->present) << 16; return ar; } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d3987359d441..cb258f58fdc8 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -26,6 +26,7 @@ #include <asm/pti.h> #include <asm/text-patching.h> #include <asm/memtype.h> +#include <asm/paravirt.h> /* * We need to define the tracepoints somewhere, and tlb.c @@ -804,6 +805,9 @@ void __init poking_init(void) poking_mm = mm_alloc(); BUG_ON(!poking_mm); + /* Xen PV guests need the PGD to be pinned. */ + paravirt_arch_dup_mmap(NULL, poking_mm); + /* * Randomize the poking address, but make sure that the following page * will be mapped at the same PMD. We need 2 pages, so find space for 3, diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 46de9cf5c91d..fb4b1b5e0dea 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -387,7 +387,8 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, u8 mtrr_type, uniform; mtrr_type = mtrr_type_lookup(start, end, &uniform); - if (mtrr_type != MTRR_TYPE_WRBACK) + if (mtrr_type != MTRR_TYPE_WRBACK && + mtrr_type != MTRR_TYPE_INVALID) return _PAGE_CACHE_MODE_UC_MINUS; return _PAGE_CACHE_MODE_WB; diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 758cbfe55daa..4b3efaa82ab7 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -12,6 +12,7 @@ */ #include <linux/acpi.h> +#include <linux/efi.h> #include <linux/pci.h> #include <linux/init.h> #include <linux/bitmap.h> @@ -442,17 +443,42 @@ static bool is_acpi_reserved(u64 start, u64 end, enum e820_type not_used) return mcfg_res.flags; } +static bool is_efi_mmio(u64 start, u64 end, enum e820_type not_used) +{ +#ifdef CONFIG_EFI + efi_memory_desc_t *md; + u64 size, mmio_start, mmio_end; + + for_each_efi_memory_desc(md) { + if (md->type == EFI_MEMORY_MAPPED_IO) { + size = md->num_pages << EFI_PAGE_SHIFT; + mmio_start = md->phys_addr; + mmio_end = mmio_start + size; + + /* + * N.B. Caller supplies (start, start + size), + * so to match, mmio_end is the first address + * *past* the EFI_MEMORY_MAPPED_IO area. + */ + if (mmio_start <= start && end <= mmio_end) + return true; + } + } +#endif + + return false; +} + typedef bool (*check_reserved_t)(u64 start, u64 end, enum e820_type type); static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, struct pci_mmcfg_region *cfg, - struct device *dev, int with_e820) + struct device *dev, const char *method) { u64 addr = cfg->res.start; u64 size = resource_size(&cfg->res); u64 old_size = size; int num_buses; - char *method = with_e820 ? "E820" : "ACPI motherboard resources"; while (!is_reserved(addr, addr + size, E820_TYPE_RESERVED)) { size >>= 1; @@ -464,10 +490,10 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, return false; if (dev) - dev_info(dev, "MMCONFIG at %pR reserved in %s\n", + dev_info(dev, "MMCONFIG at %pR reserved as %s\n", &cfg->res, method); else - pr_info(PREFIX "MMCONFIG at %pR reserved in %s\n", + pr_info(PREFIX "MMCONFIG at %pR reserved as %s\n", &cfg->res, method); if (old_size != size) { @@ -500,7 +526,8 @@ static bool __ref pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int early) { if (!early && !acpi_disabled) { - if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, 0)) + if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, + "ACPI motherboard resource")) return true; if (dev) @@ -513,6 +540,10 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e "MMCONFIG at %pR not reserved in " "ACPI motherboard resources\n", &cfg->res); + + if (is_mmconf_reserved(is_efi_mmio, cfg, dev, + "EfiMemoryMappedIO")) + return true; } /* @@ -527,7 +558,8 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e /* Don't try to do this check unless configuration type 1 is available. how about type 2 ?*/ if (raw_pci_ops) - return is_mmconf_reserved(e820__mapped_all, cfg, dev, 1); + return is_mmconf_reserved(e820__mapped_all, cfg, dev, + "E820 entry"); return false; } diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index b94f727251b6..8babce71915f 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -392,6 +392,7 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev) msi_for_each_desc(msidesc, &dev->dev, MSI_DESC_ASSOCIATED) { for (i = 0; i < msidesc->nvec_used; i++) xen_destroy_irq(msidesc->irq + i); + msidesc->irq = 0; } } @@ -433,6 +434,7 @@ static struct msi_domain_ops xen_pci_msi_domain_ops = { }; static struct msi_domain_info xen_pci_msi_domain_info = { + .flags = MSI_FLAG_PCI_MSIX | MSI_FLAG_FREE_MSI_DESCS | MSI_FLAG_DEV_SYSFS, .ops = &xen_pci_msi_domain_ops, }; diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c index 48a3eb09d951..650cdbbdaf45 100644 --- a/arch/x86/um/elfcore.c +++ b/arch/x86/um/elfcore.c @@ -7,7 +7,7 @@ #include <asm/elf.h> -Elf32_Half elf_core_extra_phdrs(void) +Elf32_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0; } @@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -size_t elf_core_extra_data_size(void) +size_t elf_core_extra_data_size(struct coredump_params *cprm) { if ( vsyscall_ehdr ) { const struct elfhdr *const ehdrp = diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 58db86f7b384..9bdc3b656b2c 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -134,11 +134,6 @@ static inline unsigned p2m_mid_index(unsigned long pfn) return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; } -static inline unsigned p2m_index(unsigned long pfn) -{ - return pfn % P2M_PER_PAGE; -} - static void p2m_top_mfn_init(unsigned long *top) { unsigned i; |