diff options
Diffstat (limited to 'arch')
46 files changed, 527 insertions, 373 deletions
diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig index 724c4075df40..dd2dd9f0861f 100644 --- a/arch/alpha/configs/defconfig +++ b/arch/alpha/configs/defconfig @@ -25,19 +25,18 @@ CONFIG_PNP=y CONFIG_ISAPNP=y CONFIG_BLK_DEV_FD=y CONFIG_BLK_DEV_LOOP=m -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_IDE_GENERIC=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_ALI15X3=y -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_CY82C693=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_SCSI_AIC7XXX=m CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set +CONFIG_ATA=y +# CONFIG_SATA_PMP is not set +CONFIG_PATA_ALI=y +CONFIG_PATA_CMD64X=y +CONFIG_PATA_CYPRESS=y +CONFIG_ATA_GENERIC=y CONFIG_NETDEVICES=y CONFIG_DUMMY=m CONFIG_NET_ETHERNET=y diff --git a/arch/arm/configs/footbridge_defconfig b/arch/arm/configs/footbridge_defconfig index 2aa3ebeb89d7..7a32de51f0fa 100644 --- a/arch/arm/configs/footbridge_defconfig +++ b/arch/arm/configs/footbridge_defconfig @@ -64,7 +64,6 @@ CONFIG_PARIDE_ON26=m CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y -CONFIG_IDE=y CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_NET_VENDOR_3COM=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 875a3c28a267..363f1b1b08e3 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -215,8 +215,6 @@ CONFIG_IIO=m CONFIG_AD5446=m CONFIG_EEPROM_AT24=m CONFIG_SENSORS_LIS3_SPI=m -CONFIG_IDE=m -CONFIG_BLK_DEV_IDECS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=m CONFIG_CHR_DEV_ST=m diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 1a5edf562e85..73ca7797b92f 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -545,9 +545,11 @@ void notrace cpu_init(void) * In Thumb-2, msr with an immediate value is not allowed. */ #ifdef CONFIG_THUMB2_KERNEL -#define PLC "r" +#define PLC_l "l" +#define PLC_r "r" #else -#define PLC "I" +#define PLC_l "I" +#define PLC_r "I" #endif /* @@ -569,15 +571,15 @@ void notrace cpu_init(void) "msr cpsr_c, %9" : : "r" (stk), - PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), "I" (offsetof(struct stack, irq[0])), - PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | ABT_MODE), "I" (offsetof(struct stack, abt[0])), - PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | UND_MODE), "I" (offsetof(struct stack, und[0])), - PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), "I" (offsetof(struct stack, fiq[0])), - PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE) + PLC_l (PSR_F_BIT | PSR_I_BIT | SVC_MODE) : "r14"); #endif } diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c index 44f9b5216ac9..261a0f57cc9a 100644 --- a/arch/m68k/atari/config.c +++ b/arch/m68k/atari/config.c @@ -875,16 +875,8 @@ static const struct resource atari_scsi_tt_rsrc[] __initconst = { #define FALCON_IDE_BASE 0xfff00000 static const struct resource atari_falconide_rsrc[] __initconst = { - { - .flags = IORESOURCE_MEM, - .start = FALCON_IDE_BASE, - .end = FALCON_IDE_BASE + 0x39, - }, - { - .flags = IORESOURCE_IRQ, - .start = IRQ_MFP_FSCSI, - .end = IRQ_MFP_FSCSI, - }, + DEFINE_RES_MEM(FALCON_IDE_BASE, 0x38), + DEFINE_RES_MEM(FALCON_IDE_BASE + 0x38, 2), }; int __init atari_platform_init(void) diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 59b727b69357..4fe26d54627e 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -323,11 +323,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m -CONFIG_IDE=y -CONFIG_IDE_GD_ATAPI=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GAYLE=y -CONFIG_BLK_DEV_BUDDHA=y CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -344,6 +339,11 @@ CONFIG_GVP11_SCSI=y CONFIG_SCSI_A4000T=y CONFIG_SCSI_ZORRO7XX=y CONFIG_SCSI_ZORRO_ESP=y +CONFIG_ATA=y +# CONFIG_ATA_VERBOSE_ERROR is not set +# CONFIG_ATA_BMDMA is not set +CONFIG_PATA_GAYLE=y +CONFIG_PATA_BUDDHA=y CONFIG_MD=y CONFIG_MD_LINEAR=m CONFIG_BLK_DEV_DM=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 9cc9f1a06516..21b2990fe9af 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -324,10 +324,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m -CONFIG_IDE=y -CONFIG_IDE_GD_ATAPI=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_FALCON_IDE=y CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -339,6 +335,10 @@ CONFIG_SCSI_SAS_ATTRS=m CONFIG_ISCSI_TCP=m CONFIG_ISCSI_BOOT_SYSFS=m CONFIG_ATARI_SCSI=y +CONFIG_ATA=y +# CONFIG_ATA_VERBOSE_ERROR is not set +# CONFIG_ATA_BMDMA is not set +CONFIG_PATA_FALCON=y CONFIG_MD=y CONFIG_MD_LINEAR=m CONFIG_BLK_DEV_DM=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 4e68b72d9c50..b03300df13fc 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -315,11 +315,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m -CONFIG_IDE=y -CONFIG_IDE_GD_ATAPI=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_PLATFORM=y -CONFIG_BLK_DEV_MAC_IDE=y CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -332,6 +327,10 @@ CONFIG_ISCSI_TCP=m CONFIG_ISCSI_BOOT_SYSFS=m CONFIG_MAC_SCSI=y CONFIG_SCSI_MAC_ESP=y +CONFIG_ATA=y +# CONFIG_ATA_VERBOSE_ERROR is not set +# CONFIG_ATA_BMDMA is not set +CONFIG_PATA_PLATFORM=y CONFIG_MD=y CONFIG_MD_LINEAR=m CONFIG_BLK_DEV_DM=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index d31896293c39..e2c8368e2231 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -344,15 +344,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m -CONFIG_IDE=y -CONFIG_IDE_GD_ATAPI=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_PLATFORM=y -CONFIG_BLK_DEV_GAYLE=y -CONFIG_BLK_DEV_BUDDHA=y -CONFIG_BLK_DEV_FALCON_IDE=y -CONFIG_BLK_DEV_MAC_IDE=y -CONFIG_BLK_DEV_Q40IDE=y CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -376,6 +367,13 @@ CONFIG_MVME147_SCSI=y CONFIG_MVME16x_SCSI=y CONFIG_BVME6000_SCSI=y CONFIG_SUN3X_ESP=y +CONFIG_ATA=y +# CONFIG_ATA_VERBOSE_ERROR is not set +# CONFIG_ATA_BMDMA is not set +CONFIG_PATA_FALCON=y +CONFIG_PATA_GAYLE=y +CONFIG_PATA_BUDDHA=y +CONFIG_PATA_PLATFORM=y CONFIG_MD=y CONFIG_MD_LINEAR=m CONFIG_BLK_DEV_DM=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 664025a0f6a4..514e2e8cddbd 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -314,10 +314,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m -CONFIG_IDE=y -CONFIG_IDE_GD_ATAPI=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_Q40IDE=y CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -328,6 +324,10 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m CONFIG_ISCSI_TCP=m CONFIG_ISCSI_BOOT_SYSFS=m +CONFIG_ATA=y +# CONFIG_ATA_VERBOSE_ERROR is not set +# CONFIG_ATA_BMDMA is not set +CONFIG_PATA_FALCON=y CONFIG_MD=y CONFIG_MD_LINEAR=m CONFIG_BLK_DEV_DM=m diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 1cdac959bd91..5d16f9b47aa9 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -933,13 +933,15 @@ static const struct resource mac_scsi_ccl_rsrc[] __initconst = { }, }; -static const struct resource mac_ide_quadra_rsrc[] __initconst = { - DEFINE_RES_MEM(0x50F1A000, 0x104), +static const struct resource mac_pata_quadra_rsrc[] __initconst = { + DEFINE_RES_MEM(0x50F1A000, 0x38), + DEFINE_RES_MEM(0x50F1A038, 0x04), DEFINE_RES_IRQ(IRQ_NUBUS_F), }; -static const struct resource mac_ide_pb_rsrc[] __initconst = { - DEFINE_RES_MEM(0x50F1A000, 0x104), +static const struct resource mac_pata_pb_rsrc[] __initconst = { + DEFINE_RES_MEM(0x50F1A000, 0x38), + DEFINE_RES_MEM(0x50F1A038, 0x04), DEFINE_RES_IRQ(IRQ_NUBUS_C), }; @@ -949,7 +951,7 @@ static const struct resource mac_pata_baboon_rsrc[] __initconst = { DEFINE_RES_IRQ(IRQ_BABOON_1), }; -static const struct pata_platform_info mac_pata_baboon_data __initconst = { +static const struct pata_platform_info mac_pata_data __initconst = { .ioport_shift = 2, }; @@ -1067,17 +1069,19 @@ int __init mac_platform_init(void) switch (macintosh_config->ide_type) { case MAC_IDE_QUADRA: - platform_device_register_simple("mac_ide", -1, - mac_ide_quadra_rsrc, ARRAY_SIZE(mac_ide_quadra_rsrc)); + platform_device_register_resndata(NULL, "pata_platform", -1, + mac_pata_quadra_rsrc, ARRAY_SIZE(mac_pata_quadra_rsrc), + &mac_pata_data, sizeof(mac_pata_data)); break; case MAC_IDE_PB: - platform_device_register_simple("mac_ide", -1, - mac_ide_pb_rsrc, ARRAY_SIZE(mac_ide_pb_rsrc)); + platform_device_register_resndata(NULL, "pata_platform", -1, + mac_pata_pb_rsrc, ARRAY_SIZE(mac_pata_pb_rsrc), + &mac_pata_data, sizeof(mac_pata_data)); break; case MAC_IDE_BABOON: platform_device_register_resndata(NULL, "pata_platform", -1, mac_pata_baboon_rsrc, ARRAY_SIZE(mac_pata_baboon_rsrc), - &mac_pata_baboon_data, sizeof(mac_pata_baboon_data)); + &mac_pata_data, sizeof(mac_pata_data)); break; } diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index d6a423875231..5caf1e5be1c2 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c @@ -286,14 +286,39 @@ static int q40_set_rtc_pll(struct rtc_pll_info *pll) return -EINVAL; } -static __init int q40_add_kbd_device(void) -{ - struct platform_device *pdev; +#define PCIDE_BASE1 0x1f0 +#define PCIDE_BASE2 0x170 +#define PCIDE_CTL 0x206 + +static const struct resource q40_pata_rsrc_0[] __initconst = { + DEFINE_RES_MEM(q40_isa_io_base + PCIDE_BASE1 * 4, 0x38), + DEFINE_RES_MEM(q40_isa_io_base + (PCIDE_BASE1 + PCIDE_CTL) * 4, 2), + DEFINE_RES_IO(PCIDE_BASE1, 8), + DEFINE_RES_IO(PCIDE_BASE1 + PCIDE_CTL, 1), + DEFINE_RES_IRQ(14), +}; +static const struct resource q40_pata_rsrc_1[] __initconst = { + DEFINE_RES_MEM(q40_isa_io_base + PCIDE_BASE2 * 4, 0x38), + DEFINE_RES_MEM(q40_isa_io_base + (PCIDE_BASE2 + PCIDE_CTL) * 4, 2), + DEFINE_RES_IO(PCIDE_BASE2, 8), + DEFINE_RES_IO(PCIDE_BASE2 + PCIDE_CTL, 1), + DEFINE_RES_IRQ(15), +}; + +static __init int q40_platform_init(void) +{ if (!MACH_IS_Q40) return -ENODEV; - pdev = platform_device_register_simple("q40kbd", -1, NULL, 0); - return PTR_ERR_OR_ZERO(pdev); + platform_device_register_simple("q40kbd", -1, NULL, 0); + + platform_device_register_simple("atari-falcon-ide", 0, q40_pata_rsrc_0, + ARRAY_SIZE(q40_pata_rsrc_0)); + + platform_device_register_simple("atari-falcon-ide", 1, q40_pata_rsrc_1, + ARRAY_SIZE(q40_pata_rsrc_1)); + + return 0; } -arch_initcall(q40_add_kbd_device); +arch_initcall(q40_platform_init); diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 2b543163d90a..76c6034428be 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -91,12 +91,16 @@ struct stack_frame { CALL_ARGS_4(arg1, arg2, arg3, arg4); \ register unsigned long r4 asm("6") = (unsigned long)(arg5) -#define CALL_FMT_0 "=&d" (r2) : -#define CALL_FMT_1 "+&d" (r2) : -#define CALL_FMT_2 CALL_FMT_1 "d" (r3), -#define CALL_FMT_3 CALL_FMT_2 "d" (r4), -#define CALL_FMT_4 CALL_FMT_3 "d" (r5), -#define CALL_FMT_5 CALL_FMT_4 "d" (r6), +/* + * To keep this simple mark register 2-6 as being changed (volatile) + * by the called function, even though register 6 is saved/nonvolatile. + */ +#define CALL_FMT_0 "=&d" (r2) +#define CALL_FMT_1 "+&d" (r2) +#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3) +#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4) +#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5) +#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6) #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory" #define CALL_CLOBBER_4 CALL_CLOBBER_5 @@ -118,7 +122,7 @@ struct stack_frame { " brasl 14,%[_fn]\n" \ " la 15,0(%[_prev])\n" \ : [_prev] "=&a" (prev), CALL_FMT_##nr \ - [_stack] "R" (stack), \ + : [_stack] "R" (stack), \ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ [_frame] "d" (frame), \ [_fn] "X" (fn) : CALL_CLOBBER_##nr); \ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 9cc71ca9a88f..e84f495e7eb2 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -418,6 +418,7 @@ ENTRY(\name) xgr %r6,%r6 xgr %r7,%r7 xgr %r10,%r10 + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) tm %r8,0x0001 # coming from user space? diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 90163e6184f5..080e7aed181f 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -512,7 +512,6 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) /* No handlers present - check for system call restart */ clear_pt_regs_flag(regs, PIF_SYSCALL); - clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); if (current->thread.system_call) { regs->int_code = current->thread.system_call; switch (regs->gprs[2]) { diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index bfcc327acc6b..26aa2614ee35 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -66,7 +66,10 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c { static cpumask_t mask; - cpumask_copy(&mask, cpumask_of(cpu)); + cpumask_clear(&mask); + if (!cpu_online(cpu)) + goto out; + cpumask_set_cpu(cpu, &mask); switch (topology_mode) { case TOPOLOGY_MODE_HW: while (info) { @@ -83,10 +86,10 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c default: fallthrough; case TOPOLOGY_MODE_SINGLE: - cpumask_copy(&mask, cpumask_of(cpu)); break; } cpumask_and(&mask, &mask, cpu_online_mask); +out: cpumask_copy(dst, &mask); } @@ -95,7 +98,10 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) static cpumask_t mask; int i; - cpumask_copy(&mask, cpumask_of(cpu)); + cpumask_clear(&mask); + if (!cpu_online(cpu)) + goto out; + cpumask_set_cpu(cpu, &mask); if (topology_mode != TOPOLOGY_MODE_HW) goto out; cpu -= cpu % (smp_cpu_mtid + 1); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 813b6e93dc83..c8841f476e91 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -140,7 +140,12 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm) /* Allocate variable storage */ vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE); vlen += uv_info.guest_virt_base_stor_len; - kvm->arch.pv.stor_var = vzalloc(vlen); + /* + * The Create Secure Configuration Ultravisor Call does not support + * using large pages for the virtual memory area. + * This is a hardware limitation. + */ + kvm->arch.pv.stor_var = vmalloc_no_huge(vlen); if (!kvm->arch.pv.stor_var) goto out_err; return 0; diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a16a5294d55f..1886aaf19914 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -506,7 +506,7 @@ SYM_CODE_START(\asmsym) movq %rsp, %rdi /* pt_regs pointer */ - call \cfunc + call kernel_\cfunc /* * No need to switch back to the IST stack. The current stack is either @@ -517,7 +517,7 @@ SYM_CODE_START(\asmsym) /* Switch to the regular task stack */ .Lfrom_usermode_switch_stack_\@: - idtentry_body safe_stack_\cfunc, has_error_code=1 + idtentry_body user_\cfunc, has_error_code=1 _ASM_NOKPROBE(\asmsym) SYM_CODE_END(\asmsym) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index e28892270c58..3a77f66730d0 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6015,7 +6015,13 @@ __init int intel_pmu_init(void) tsx_attr = hsw_tsx_events_attrs; intel_pmu_pebs_data_source_skl(pmem); - if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { + /* + * Processors with CPUID.RTM_ALWAYS_ABORT have TSX deprecated by default. + * TSX force abort hooks are not required on these systems. Only deploy + * workaround when microcode has not enabled X86_FEATURE_RTM_ALWAYS_ABORT. + */ + if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) && + !boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)) { x86_pmu.flags |= PMU_FL_TFA; x86_pmu.get_event_constraints = tfa_get_event_constraints; x86_pmu.enable_all = intel_tfa_pmu_enable_all; diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 4409d2cccfda..e8453de7a964 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -731,7 +731,8 @@ void reserve_lbr_buffers(void) if (!kmem_cache || cpuc->lbr_xsave) continue; - cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, GFP_KERNEL, + cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, + GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); } } diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 84a1042c3b01..85feafacc445 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -764,13 +764,14 @@ static struct rapl_model model_spr = { .rapl_msrs = intel_rapl_spr_msrs, }; -static struct rapl_model model_amd_fam17h = { +static struct rapl_model model_amd_hygon = { .events = BIT(PERF_RAPL_PKG), .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, .rapl_msrs = amd_rapl_msrs, }; static const struct x86_cpu_id rapl_model_match[] __initconst = { + X86_MATCH_FEATURE(X86_FEATURE_RAPL, &model_amd_hygon), X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &model_snb), X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &model_snbep), X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &model_snb), @@ -803,9 +804,6 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), - X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h), - X86_MATCH_VENDOR_FAM(HYGON, 0x18, &model_amd_fam17h), - X86_MATCH_VENDOR_FAM(AMD, 0x19, &model_amd_fam17h), {}, }; MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ac37830ae941..d0ce5cfd3ac1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -108,7 +108,7 @@ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ -/* free ( 3*32+29) */ +#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ #define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ @@ -378,6 +378,7 @@ #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ #define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ +#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ #define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index fdee23ea4e17..16bf4d4a8159 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); } +static inline void fxsave(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); + else + asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); +} + /* These macros all use (%edi)/(%rdi) as the single memory argument. */ #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" @@ -272,28 +280,6 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) -{ - u64 mask = xfeatures_mask_all; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - XSTATE_OP(XSAVES, xstate, lmask, hmask, err); - else - XSTATE_OP(XSAVE, xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - WARN_ON_FPU(err); -} - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) { u64 mask = -1; diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 73d45b0dfff2..cd9f3e304944 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -312,8 +312,8 @@ static __always_inline void __##func(struct pt_regs *regs) */ #define DECLARE_IDTENTRY_VC(vector, func) \ DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \ - __visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code); \ - __visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code) + __visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \ + __visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code) /** * DEFINE_IDTENTRY_IST - Emit code for IST entry points @@ -355,33 +355,24 @@ static __always_inline void __##func(struct pt_regs *regs) DEFINE_IDTENTRY_RAW_ERRORCODE(func) /** - * DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler - which runs on a safe stack. + * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler + when raised from kernel mode * @func: Function name of the entry point * * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE */ -#define DEFINE_IDTENTRY_VC_SAFE_STACK(func) \ - DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func) +#define DEFINE_IDTENTRY_VC_KERNEL(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func) /** - * DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler - which runs on the VC fall-back stack + * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler + when raised from user mode * @func: Function name of the entry point * * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE */ -#define DEFINE_IDTENTRY_VC_IST(func) \ - DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func) - -/** - * DEFINE_IDTENTRY_VC - Emit code for VMM communication handler - * @func: Function name of the entry point - * - * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE - */ -#define DEFINE_IDTENTRY_VC(func) \ - DEFINE_IDTENTRY_RAW_ERRORCODE(func) +#define DEFINE_IDTENTRY_VC_USER(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func) #else /* CONFIG_X86_64 */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 955b06d6325a..27158436f322 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -102,7 +102,8 @@ #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ #define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */ -#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Willow Cove */ + +#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 610a05374c02..0449b125d27f 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -4,8 +4,6 @@ #define HAVE_JUMP_LABEL_BATCH -#define JUMP_LABEL_NOP_SIZE 5 - #include <asm/asm.h> #include <asm/nops.h> @@ -14,15 +12,35 @@ #include <linux/stringify.h> #include <linux/types.h> +#define JUMP_TABLE_ENTRY \ + ".pushsection __jump_table, \"aw\" \n\t" \ + _ASM_ALIGN "\n\t" \ + ".long 1b - . \n\t" \ + ".long %l[l_yes] - . \n\t" \ + _ASM_PTR "%c0 + %c1 - .\n\t" \ + ".popsection \n\t" + +#ifdef CONFIG_STACK_VALIDATION + +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:" + "jmp %l[l_yes] # objtool NOPs this \n\t" + JUMP_TABLE_ENTRY + : : "i" (key), "i" (2 | branch) : : l_yes); + + return false; +l_yes: + return true; +} + +#else + static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { asm_volatile_goto("1:" ".byte " __stringify(BYTES_NOP5) "\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - ".long 1b - ., %l[l_yes] - . \n\t" - _ASM_PTR "%c0 + %c1 - .\n\t" - ".popsection \n\t" + JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); return false; @@ -30,16 +48,13 @@ l_yes: return true; } +#endif /* STACK_VALIDATION */ + static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { asm_volatile_goto("1:" - ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" - "2:\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - ".long 1b - ., %l[l_yes] - . \n\t" - _ASM_PTR "%c0 + %c1 - .\n\t" - ".popsection \n\t" + "jmp %l[l_yes]\n\t" + JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); return false; @@ -47,41 +62,7 @@ l_yes: return true; } -#else /* __ASSEMBLY__ */ - -.macro STATIC_JUMP_IF_TRUE target, key, def -.Lstatic_jump_\@: - .if \def - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .else - .byte BYTES_NOP5 - .endif - .pushsection __jump_table, "aw" - _ASM_ALIGN - .long .Lstatic_jump_\@ - ., \target - . - _ASM_PTR \key - . - .popsection -.endm - -.macro STATIC_JUMP_IF_FALSE target, key, def -.Lstatic_jump_\@: - .if \def - .byte BYTES_NOP5 - .else - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .endif - .pushsection __jump_table, "aw" - _ASM_ALIGN - .long .Lstatic_jump_\@ - ., \target - . - _ASM_PTR \key + 1 - . - .popsection -.endm +extern int arch_jump_entry_size(struct jump_entry *entry); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ddfb3cad8dff..0607ec4f5091 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -305,7 +305,7 @@ extern void apei_mce_report_mem_error(int corrected, /* These may be used by multiple smca_hwid_mcatypes */ enum smca_bank_types { SMCA_LS = 0, /* Load Store */ - SMCA_LS_V2, /* Load Store */ + SMCA_LS_V2, SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ @@ -314,17 +314,22 @@ enum smca_bank_types { SMCA_FP, /* Floating Point */ SMCA_L3_CACHE, /* L3 Cache */ SMCA_CS, /* Coherent Slave */ - SMCA_CS_V2, /* Coherent Slave */ + SMCA_CS_V2, SMCA_PIE, /* Power, Interrupts, etc. */ SMCA_UMC, /* Unified Memory Controller */ + SMCA_UMC_V2, SMCA_PB, /* Parameter Block */ SMCA_PSP, /* Platform Security Processor */ - SMCA_PSP_V2, /* Platform Security Processor */ + SMCA_PSP_V2, SMCA_SMU, /* System Management Unit */ - SMCA_SMU_V2, /* System Management Unit */ + SMCA_SMU_V2, SMCA_MP5, /* Microprocessor 5 Unit */ SMCA_NBIO, /* Northbridge IO Unit */ SMCA_PCIE, /* PCI Express Unit */ + SMCA_PCIE_V2, + SMCA_XGMI_PCS, /* xGMI PCS Unit */ + SMCA_XGMI_PHY, /* xGMI PHY Unit */ + SMCA_WAFL_PHY, /* WAFL PHY Unit */ N_SMCA_BANK_TYPES }; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 211ba3375ee9..a7c413432b33 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -772,6 +772,10 @@ #define MSR_TFA_RTM_FORCE_ABORT_BIT 0 #define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT) +#define MSR_TFA_TSX_CPUID_CLEAR_BIT 1 +#define MSR_TFA_TSX_CPUID_CLEAR BIT_ULL(MSR_TFA_TSX_CPUID_CLEAR_BIT) +#define MSR_TFA_SDV_ENABLE_RTM_BIT 2 +#define MSR_TFA_SDV_ENABLE_RTM BIT_ULL(MSR_TFA_SDV_ENABLE_RTM_BIT) /* P4/Xeon+ specific */ #define MSR_IA32_MCG_EAX 0x00000180 diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 629c3df243f0..2cef6c5a52c2 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -9,8 +9,13 @@ #define __ASM_X86_SEV_COMMON_H #define GHCB_MSR_INFO_POS 0 -#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) +#define GHCB_DATA_LOW 12 +#define GHCB_MSR_INFO_MASK (BIT_ULL(GHCB_DATA_LOW) - 1) +#define GHCB_DATA(v) \ + (((unsigned long)(v) & ~GHCB_MSR_INFO_MASK) >> GHCB_DATA_LOW) + +/* SEV Information Request/Response */ #define GHCB_MSR_SEV_INFO_RESP 0x001 #define GHCB_MSR_SEV_INFO_REQ 0x002 #define GHCB_MSR_VER_MAX_POS 48 @@ -28,6 +33,7 @@ #define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK) #define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK) +/* CPUID Request/Response */ #define GHCB_MSR_CPUID_REQ 0x004 #define GHCB_MSR_CPUID_RESP 0x005 #define GHCB_MSR_CPUID_FUNC_POS 32 @@ -45,6 +51,14 @@ (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \ (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS)) +/* AP Reset Hold */ +#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 +#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 + +/* GHCB Hypervisor Feature Request/Response */ +#define GHCB_MSR_HV_FT_REQ 0x080 +#define GHCB_MSR_HV_FT_RESP 0x081 + #define GHCB_MSR_TERM_REQ 0x100 #define GHCB_MSR_TERM_REASON_SET_POS 12 #define GHCB_MSR_TERM_REASON_SET_MASK 0xf diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 49ae4e1ac9cd..7de599eba7f0 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -197,7 +197,8 @@ static int __init ffh_cstate_init(void) struct cpuinfo_x86 *c = &boot_cpu_data; if (c->x86_vendor != X86_VENDOR_INTEL && - c->x86_vendor != X86_VENDOR_AMD) + c->x86_vendor != X86_VENDOR_AMD && + c->x86_vendor != X86_VENDOR_HYGON) return -1; cpu_cstate_entry = alloc_percpu(struct cstate_entry); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 09083094eb57..23dda362dc0f 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -25,6 +25,7 @@ #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 #define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 +#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -57,6 +58,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) }, {} }; @@ -72,6 +74,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c06ac56eae4d..b7c003013d41 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -646,6 +646,10 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & BIT(12)) set_cpu_cap(c, X86_FEATURE_ACC_POWER); + /* Bit 14 indicates the Runtime Average Power Limit interface. */ + if (c->x86_power & BIT(14)) + set_cpu_cap(c, X86_FEATURE_RAPL); + #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSCALL32); #else diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 67944128876d..95521302630d 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -48,6 +48,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], enum tsx_ctrl_states { TSX_CTRL_ENABLE, TSX_CTRL_DISABLE, + TSX_CTRL_RTM_ALWAYS_ABORT, TSX_CTRL_NOT_SUPPORTED, }; @@ -56,6 +57,7 @@ extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; extern void __init tsx_init(void); extern void tsx_enable(void); extern void tsx_disable(void); +extern void tsx_clear_cpuid(void); #else static inline void tsx_init(void) { } #endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 0bd6c74e3ba1..6d50136f7ab9 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -260,6 +260,10 @@ static void early_init_hygon(struct cpuinfo_x86 *c) if (c->x86_power & BIT(12)) set_cpu_cap(c, X86_FEATURE_ACC_POWER); + /* Bit 14 indicates the Runtime Average Power Limit interface. */ + if (c->x86_power & BIT(14)) + set_cpu_cap(c, X86_FEATURE_RAPL); + #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSCALL32); #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 8adffc17fa8b..861e919eba9a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -717,8 +717,10 @@ static void init_intel(struct cpuinfo_x86 *c) if (tsx_ctrl_state == TSX_CTRL_ENABLE) tsx_enable(); - if (tsx_ctrl_state == TSX_CTRL_DISABLE) + else if (tsx_ctrl_state == TSX_CTRL_DISABLE) tsx_disable(); + else if (tsx_ctrl_state == TSX_CTRL_RTM_ALWAYS_ABORT) + tsx_clear_cpuid(); split_lock_init(); bus_lock_init(); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index e486f96b3cb3..08831acc1d03 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -77,27 +77,29 @@ struct smca_bank_name { }; static struct smca_bank_name smca_names[] = { - [SMCA_LS] = { "load_store", "Load Store Unit" }, - [SMCA_LS_V2] = { "load_store", "Load Store Unit" }, - [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, - [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, - [SMCA_DE] = { "decode_unit", "Decode Unit" }, - [SMCA_RESERVED] = { "reserved", "Reserved" }, - [SMCA_EX] = { "execution_unit", "Execution Unit" }, - [SMCA_FP] = { "floating_point", "Floating Point Unit" }, - [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, - [SMCA_CS] = { "coherent_slave", "Coherent Slave" }, - [SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" }, - [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, - [SMCA_UMC] = { "umc", "Unified Memory Controller" }, - [SMCA_PB] = { "param_block", "Parameter Block" }, - [SMCA_PSP] = { "psp", "Platform Security Processor" }, - [SMCA_PSP_V2] = { "psp", "Platform Security Processor" }, - [SMCA_SMU] = { "smu", "System Management Unit" }, - [SMCA_SMU_V2] = { "smu", "System Management Unit" }, - [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" }, - [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" }, - [SMCA_PCIE] = { "pcie", "PCI Express Unit" }, + [SMCA_LS ... SMCA_LS_V2] = { "load_store", "Load Store Unit" }, + [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, + [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, + [SMCA_DE] = { "decode_unit", "Decode Unit" }, + [SMCA_RESERVED] = { "reserved", "Reserved" }, + [SMCA_EX] = { "execution_unit", "Execution Unit" }, + [SMCA_FP] = { "floating_point", "Floating Point Unit" }, + [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, + [SMCA_CS ... SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" }, + [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, + + /* UMC v2 is separate because both of them can exist in a single system. */ + [SMCA_UMC] = { "umc", "Unified Memory Controller" }, + [SMCA_UMC_V2] = { "umc_v2", "Unified Memory Controller v2" }, + [SMCA_PB] = { "param_block", "Parameter Block" }, + [SMCA_PSP ... SMCA_PSP_V2] = { "psp", "Platform Security Processor" }, + [SMCA_SMU ... SMCA_SMU_V2] = { "smu", "System Management Unit" }, + [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" }, + [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" }, + [SMCA_PCIE ... SMCA_PCIE_V2] = { "pcie", "PCI Express Unit" }, + [SMCA_XGMI_PCS] = { "xgmi_pcs", "Ext Global Memory Interconnect PCS Unit" }, + [SMCA_XGMI_PHY] = { "xgmi_phy", "Ext Global Memory Interconnect PHY Unit" }, + [SMCA_WAFL_PHY] = { "wafl_phy", "WAFL PHY Unit" }, }; static const char *smca_get_name(enum smca_bank_types t) @@ -155,6 +157,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = { /* Unified Memory Controller MCA type */ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) }, + { SMCA_UMC_V2, HWID_MCATYPE(0x96, 0x1) }, /* Parameter Block MCA type */ { SMCA_PB, HWID_MCATYPE(0x05, 0x0) }, @@ -175,6 +178,16 @@ static struct smca_hwid smca_hwid_mcatypes[] = { /* PCI Express Unit MCA type */ { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) }, + { SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) }, + + /* xGMI PCS MCA type */ + { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) }, + + /* xGMI PHY MCA type */ + { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) }, + + /* WAFL PHY MCA type */ + { SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) }, }; struct smca_bank smca_banks[MAX_NR_BANKS]; diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c index b58b85380ddb..0e3ae64d3b76 100644 --- a/arch/x86/kernel/cpu/mce/apei.c +++ b/arch/x86/kernel/cpu/mce/apei.c @@ -36,7 +36,8 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) mce_setup(&m); m.bank = -1; /* Fake a memory read error with unknown channel */ - m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; + m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f; + m.misc = (MCI_MISC_ADDR_PHYS << 6) | PAGE_SHIFT; if (severity >= GHES_SEV_RECOVERABLE) m.status |= MCI_STATUS_UC; diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index e2ad30e474f8..9c7a5f049292 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -2,7 +2,7 @@ /* * Intel Transactional Synchronization Extensions (TSX) control. * - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * Author: * Pawan Gupta <pawan.kumar.gupta@linux.intel.com> @@ -84,13 +84,46 @@ static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) return TSX_CTRL_ENABLE; } +void tsx_clear_cpuid(void) +{ + u64 msr; + + /* + * MSR_TFA_TSX_CPUID_CLEAR bit is only present when both CPUID + * bits RTM_ALWAYS_ABORT and TSX_FORCE_ABORT are present. + */ + if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) && + boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { + rdmsrl(MSR_TSX_FORCE_ABORT, msr); + msr |= MSR_TFA_TSX_CPUID_CLEAR; + wrmsrl(MSR_TSX_FORCE_ABORT, msr); + } +} + void __init tsx_init(void) { char arg[5] = {}; int ret; - if (!tsx_ctrl_is_supported()) + /* + * Hardware will always abort a TSX transaction if both CPUID bits + * RTM_ALWAYS_ABORT and TSX_FORCE_ABORT are set. In this case, it is + * better not to enumerate CPUID.RTM and CPUID.HLE bits. Clear them + * here. + */ + if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) && + boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { + tsx_ctrl_state = TSX_CTRL_RTM_ALWAYS_ABORT; + tsx_clear_cpuid(); + setup_clear_cpu_cap(X86_FEATURE_RTM); + setup_clear_cpu_cap(X86_FEATURE_HLE); return; + } + + if (!tsx_ctrl_is_supported()) { + tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED; + return; + } ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg)); if (ret >= 0) { diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index ec3ae3054792..b7b92cdf3add 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -221,28 +221,18 @@ sanitize_restored_user_xstate(union fpregs_state *state, if (use_xsave()) { /* - * Note: we don't need to zero the reserved bits in the - * xstate_header here because we either didn't copy them at all, - * or we checked earlier that they aren't set. + * Clear all feature bits which are not set in + * user_xfeatures and clear all extended features + * for fx_only mode. */ + u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures; /* - * 'user_xfeatures' might have bits clear which are - * set in header->xfeatures. This represents features that - * were in init state prior to a signal delivery, and need - * to be reset back to the init state. Clear any user - * feature bits which are set in the kernel buffer to get - * them back to the init state. - * - * Supervisor state is unchanged by input from userspace. - * Ensure supervisor state bits stay set and supervisor - * state is not modified. + * Supervisor state has to be preserved. The sigframe + * restore can only modify user features, i.e. @mask + * cannot contain them. */ - if (fx_only) - header->xfeatures = XFEATURE_MASK_FPSSE; - else - header->xfeatures &= user_xfeatures | - xfeatures_mask_supervisor(); + header->xfeatures &= mask | xfeatures_mask_supervisor(); } if (use_fxsr()) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d0eef963aad1..1cadb2faf740 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -441,12 +441,35 @@ static void __init print_xstate_offset_size(void) } /* + * All supported features have either init state all zeros or are + * handled in setup_init_fpu() individually. This is an explicit + * feature list and does not use XFEATURE_MASK*SUPPORTED to catch + * newly added supported features at build time and make people + * actually look at the init state for the new feature. + */ +#define XFEATURES_INIT_FPSTATE_HANDLED \ + (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM | \ + XFEATURE_MASK_PKRU | \ + XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR | \ + XFEATURE_MASK_PASID) + +/* * setup the xstate image representing the init state */ static void __init setup_init_fpu_buf(void) { static int on_boot_cpu __initdata = 1; + BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | + XFEATURE_MASK_SUPERVISOR_SUPPORTED) != + XFEATURES_INIT_FPSTATE_HANDLED); + WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -466,10 +489,22 @@ static void __init setup_init_fpu_buf(void) copy_kernel_to_xregs_booting(&init_fpstate.xsave); /* - * Dump the init state again. This is to identify the init state - * of any feature which is not represented by all zero's. + * All components are now in init state. Read the state back so + * that init_fpstate contains all non-zero init state. This only + * works with XSAVE, but not with XSAVEOPT and XSAVES because + * those use the init optimization which skips writing data for + * components in init state. + * + * XSAVE could be used, but that would require to reshuffle the + * data when XSAVES is available because XSAVES uses xstate + * compaction. But doing so is a pointless exercise because most + * components have an all zeros init state except for the legacy + * ones (FP and SSE). Those can be saved with FXSAVE into the + * legacy area. Adding new features requires to ensure that init + * state is all zeroes or if not to add the necessary handling + * here. */ - copy_xregs_to_kernel_booting(&init_fpstate.xsave); + fxsave(&init_fpstate.fxsave); } static int xfeature_uncompacted_offset(int xfeature_nr) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 6a2eb62c85e6..674906fad43b 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -15,50 +15,75 @@ #include <asm/kprobes.h> #include <asm/alternative.h> #include <asm/text-patching.h> +#include <asm/insn.h> -static void bug_at(const void *ip, int line) +int arch_jump_entry_size(struct jump_entry *entry) { - /* - * The location is not an op that we were expecting. - * Something went wrong. Crash the box, as something could be - * corrupting the kernel. - */ - pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph) %d\n", ip, ip, ip, line); - BUG(); + struct insn insn = {}; + + insn_decode_kernel(&insn, (void *)jump_entry_code(entry)); + BUG_ON(insn.length != 2 && insn.length != 5); + + return insn.length; } -static const void * -__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type) +struct jump_label_patch { + const void *code; + int size; +}; + +static struct jump_label_patch +__jump_label_patch(struct jump_entry *entry, enum jump_label_type type) { - const void *expect, *code; + const void *expect, *code, *nop; const void *addr, *dest; - int line; + int size; addr = (void *)jump_entry_code(entry); dest = (void *)jump_entry_target(entry); - code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); + size = arch_jump_entry_size(entry); + switch (size) { + case JMP8_INSN_SIZE: + code = text_gen_insn(JMP8_INSN_OPCODE, addr, dest); + nop = x86_nops[size]; + break; - if (type == JUMP_LABEL_JMP) { - expect = x86_nops[5]; line = __LINE__; - } else { - expect = code; line = __LINE__; + case JMP32_INSN_SIZE: + code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); + nop = x86_nops[size]; + break; + + default: BUG(); } - if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) - bug_at(addr, line); + if (type == JUMP_LABEL_JMP) + expect = nop; + else + expect = code; + + if (memcmp(addr, expect, size)) { + /* + * The location is not an op that we were expecting. + * Something went wrong. Crash the box, as something could be + * corrupting the kernel. + */ + pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) size:%d type:%d\n", + addr, addr, addr, expect, size, type); + BUG(); + } if (type == JUMP_LABEL_NOP) - code = x86_nops[5]; + code = nop; - return code; + return (struct jump_label_patch){.code = code, .size = size}; } static inline void __jump_label_transform(struct jump_entry *entry, enum jump_label_type type, int init) { - const void *opcode = __jump_label_set_jump_code(entry, type); + const struct jump_label_patch jlp = __jump_label_patch(entry, type); /* * As long as only a single processor is running and the code is still @@ -72,12 +97,11 @@ static inline void __jump_label_transform(struct jump_entry *entry, * always nop being the 'currently valid' instruction */ if (init || system_state == SYSTEM_BOOTING) { - text_poke_early((void *)jump_entry_code(entry), opcode, - JUMP_LABEL_NOP_SIZE); + text_poke_early((void *)jump_entry_code(entry), jlp.code, jlp.size); return; } - text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL); + text_poke_bp((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL); } static void __ref jump_label_transform(struct jump_entry *entry, @@ -98,7 +122,7 @@ void arch_jump_label_transform(struct jump_entry *entry, bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type) { - const void *opcode; + struct jump_label_patch jlp; if (system_state == SYSTEM_BOOTING) { /* @@ -109,9 +133,8 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, } mutex_lock(&text_mutex); - opcode = __jump_label_set_jump_code(entry, type); - text_poke_queue((void *)jump_entry_code(entry), - opcode, JUMP_LABEL_NOP_SIZE, NULL); + jlp = __jump_label_patch(entry, type); + text_poke_queue((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL); mutex_unlock(&text_mutex); return true; } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 651b81cd648e..a6895e440bc3 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -7,12 +7,11 @@ * Author: Joerg Roedel <jroedel@suse.de> */ -#define pr_fmt(fmt) "SEV-ES: " fmt +#define pr_fmt(fmt) "SEV: " fmt #include <linux/sched/debug.h> /* For show_regs() */ #include <linux/percpu-defs.h> #include <linux/mem_encrypt.h> -#include <linux/lockdep.h> #include <linux/printk.h> #include <linux/mm_types.h> #include <linux/set_memory.h> @@ -192,11 +191,19 @@ void noinstr __sev_es_ist_exit(void) this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); } -static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) +/* + * Nothing shall interrupt this code path while holding the per-CPU + * GHCB. The backup GHCB is only for NMIs interrupting this path. + * + * Callers must disable local interrupts around it. + */ +static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) { struct sev_es_runtime_data *data; struct ghcb *ghcb; + WARN_ON(!irqs_disabled()); + data = this_cpu_read(runtime_data); ghcb = &data->ghcb_page; @@ -213,7 +220,9 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) data->ghcb_active = false; data->backup_ghcb_active = false; + instrumentation_begin(); panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); + instrumentation_end(); } /* Mark backup_ghcb active before writing to it */ @@ -258,17 +267,24 @@ static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) { char buffer[MAX_INSN_SIZE]; - int res; + int insn_bytes; - res = insn_fetch_from_user_inatomic(ctxt->regs, buffer); - if (!res) { + insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); + if (insn_bytes == 0) { + /* Nothing could be copied */ ctxt->fi.vector = X86_TRAP_PF; ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; ctxt->fi.cr2 = ctxt->regs->ip; return ES_EXCEPTION; + } else if (insn_bytes == -EINVAL) { + /* Effective RIP could not be calculated */ + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + ctxt->fi.cr2 = 0; + return ES_EXCEPTION; } - if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res)) + if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) return ES_DECODE_FAILED; if (ctxt->insn.immediate.got) @@ -479,11 +495,13 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt /* Include code shared with pre-decompression boot stage */ #include "sev-shared.c" -static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) +static noinstr void __sev_put_ghcb(struct ghcb_state *state) { struct sev_es_runtime_data *data; struct ghcb *ghcb; + WARN_ON(!irqs_disabled()); + data = this_cpu_read(runtime_data); ghcb = &data->ghcb_page; @@ -507,7 +525,7 @@ void noinstr __sev_es_nmi_complete(void) struct ghcb_state state; struct ghcb *ghcb; - ghcb = sev_es_get_ghcb(&state); + ghcb = __sev_get_ghcb(&state); vc_ghcb_invalidate(ghcb); ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); @@ -517,7 +535,7 @@ void noinstr __sev_es_nmi_complete(void) sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); VMGEXIT(); - sev_es_put_ghcb(&state); + __sev_put_ghcb(&state); } static u64 get_jump_table_addr(void) @@ -529,7 +547,7 @@ static u64 get_jump_table_addr(void) local_irq_save(flags); - ghcb = sev_es_get_ghcb(&state); + ghcb = __sev_get_ghcb(&state); vc_ghcb_invalidate(ghcb); ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); @@ -543,7 +561,7 @@ static u64 get_jump_table_addr(void) ghcb_sw_exit_info_2_is_valid(ghcb)) ret = ghcb->save.sw_exit_info_2; - sev_es_put_ghcb(&state); + __sev_put_ghcb(&state); local_irq_restore(flags); @@ -668,7 +686,7 @@ static void sev_es_ap_hlt_loop(void) struct ghcb_state state; struct ghcb *ghcb; - ghcb = sev_es_get_ghcb(&state); + ghcb = __sev_get_ghcb(&state); while (true) { vc_ghcb_invalidate(ghcb); @@ -685,7 +703,7 @@ static void sev_es_ap_hlt_loop(void) break; } - sev_es_put_ghcb(&state); + __sev_put_ghcb(&state); } /* @@ -775,7 +793,7 @@ void __init sev_es_init_vc_handling(void) sev_es_setup_play_dead(); /* Secondary CPUs use the runtime #VC handler */ - initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication; + initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; } static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) @@ -1213,14 +1231,6 @@ static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, return ES_EXCEPTION; } -static __always_inline void vc_handle_trap_db(struct pt_regs *regs) -{ - if (user_mode(regs)) - noist_exc_debug(regs); - else - exc_debug(regs); -} - static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, struct ghcb *ghcb, unsigned long exit_code) @@ -1316,44 +1326,15 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); } -/* - * Main #VC exception handler. It is called when the entry code was able to - * switch off the IST to a safe kernel stack. - * - * With the current implementation it is always possible to switch to a safe - * stack because #VC exceptions only happen at known places, like intercepted - * instructions or accesses to MMIO areas/IO ports. They can also happen with - * code instrumentation when the hypervisor intercepts #DB, but the critical - * paths are forbidden to be instrumented, so #DB exceptions currently also - * only happen in safe places. - */ -DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) +static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) { - irqentry_state_t irq_state; struct ghcb_state state; struct es_em_ctxt ctxt; enum es_result result; struct ghcb *ghcb; + bool ret = true; - /* - * Handle #DB before calling into !noinstr code to avoid recursive #DB. - */ - if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) { - vc_handle_trap_db(regs); - return; - } - - irq_state = irqentry_nmi_enter(regs); - lockdep_assert_irqs_disabled(); - instrumentation_begin(); - - /* - * This is invoked through an interrupt gate, so IRQs are disabled. The - * code below might walk page-tables for user or kernel addresses, so - * keep the IRQs disabled to protect us against concurrent TLB flushes. - */ - - ghcb = sev_es_get_ghcb(&state); + ghcb = __sev_get_ghcb(&state); vc_ghcb_invalidate(ghcb); result = vc_init_em_ctxt(&ctxt, regs, error_code); @@ -1361,7 +1342,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) if (result == ES_OK) result = vc_handle_exitcode(&ctxt, ghcb, error_code); - sev_es_put_ghcb(&state); + __sev_put_ghcb(&state); /* Done - now check the result */ switch (result) { @@ -1369,17 +1350,20 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) vc_finish_insn(&ctxt); break; case ES_UNSUPPORTED: - pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", + pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", error_code, regs->ip); - goto fail; + ret = false; + break; case ES_VMM_ERROR: pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", error_code, regs->ip); - goto fail; + ret = false; + break; case ES_DECODE_FAILED: pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", error_code, regs->ip); - goto fail; + ret = false; + break; case ES_EXCEPTION: vc_forward_exception(&ctxt); break; @@ -1395,24 +1379,52 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) BUG(); } -out: - instrumentation_end(); - irqentry_nmi_exit(regs, irq_state); + return ret; +} - return; +static __always_inline bool vc_is_db(unsigned long error_code) +{ + return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; +} -fail: - if (user_mode(regs)) { - /* - * Do not kill the machine if user-space triggered the - * exception. Send SIGBUS instead and let user-space deal with - * it. - */ - force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); - } else { - pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n", - result); +/* + * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode + * and will panic when an error happens. + */ +DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) +{ + irqentry_state_t irq_state; + + /* + * With the current implementation it is always possible to switch to a + * safe stack because #VC exceptions only happen at known places, like + * intercepted instructions or accesses to MMIO areas/IO ports. They can + * also happen with code instrumentation when the hypervisor intercepts + * #DB, but the critical paths are forbidden to be instrumented, so #DB + * exceptions currently also only happen in safe places. + * + * But keep this here in case the noinstr annotations are violated due + * to bug elsewhere. + */ + if (unlikely(on_vc_fallback_stack(regs))) { + instrumentation_begin(); + panic("Can't handle #VC exception from unsupported context\n"); + instrumentation_end(); + } + /* + * Handle #DB before calling into !noinstr code to avoid recursive #DB. + */ + if (vc_is_db(error_code)) { + exc_debug(regs); + return; + } + + irq_state = irqentry_nmi_enter(regs); + + instrumentation_begin(); + + if (!vc_raw_handle_exception(regs, error_code)) { /* Show some debug info */ show_regs(regs); @@ -1423,23 +1435,38 @@ fail: panic("Returned from Terminate-Request to Hypervisor\n"); } - goto out; + instrumentation_end(); + irqentry_nmi_exit(regs, irq_state); } -/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */ -DEFINE_IDTENTRY_VC_IST(exc_vmm_communication) +/* + * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode + * and will kill the current task with SIGBUS when an error happens. + */ +DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) { + /* + * Handle #DB before calling into !noinstr code to avoid recursive #DB. + */ + if (vc_is_db(error_code)) { + noist_exc_debug(regs); + return; + } + + irqentry_enter_from_user_mode(regs); instrumentation_begin(); - panic("Can't handle #VC exception from unsupported context\n"); - instrumentation_end(); -} -DEFINE_IDTENTRY_VC(exc_vmm_communication) -{ - if (likely(!on_vc_fallback_stack(regs))) - safe_stack_exc_vmm_communication(regs, error_code); - else - ist_exc_vmm_communication(regs, error_code); + if (!vc_raw_handle_exception(regs, error_code)) { + /* + * Do not kill the machine if user-space triggered the + * exception. Send SIGBUS instead and let user-space deal with + * it. + */ + force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); + } + + instrumentation_end(); + irqentry_exit_to_user_mode(regs); } bool __init handle_vc_boot_ghcb(struct pt_regs *regs) diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index 8daa70b0d2da..576b47e7523d 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -346,14 +346,12 @@ bool fixup_umip_exception(struct pt_regs *regs) if (!regs) return false; - nr_copied = insn_fetch_from_user(regs, buf); - /* - * The insn_fetch_from_user above could have failed if user code - * is protected by a memory protection key. Give up on emulation - * in such a case. Should we issue a page fault? + * Give up on emulation if fetching the instruction failed. Should a + * page fault or a #GP be issued? */ - if (!nr_copied) + nr_copied = insn_fetch_from_user(regs, buf); + if (nr_copied <= 0) return false; if (!insn_decode_from_regs(&insn, regs, buf, nr_copied)) diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index a67afd74232c..a1d24fdc07cf 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -1417,7 +1417,7 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) } } -static unsigned long insn_get_effective_ip(struct pt_regs *regs) +static int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip) { unsigned long seg_base = 0; @@ -1430,10 +1430,12 @@ static unsigned long insn_get_effective_ip(struct pt_regs *regs) if (!user_64bit_mode(regs)) { seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); if (seg_base == -1L) - return 0; + return -EINVAL; } - return seg_base + regs->ip; + *ip = seg_base + regs->ip; + + return 0; } /** @@ -1446,18 +1448,17 @@ static unsigned long insn_get_effective_ip(struct pt_regs *regs) * * Returns: * - * Number of instruction bytes copied. - * - * 0 if nothing was copied. + * - number of instruction bytes copied. + * - 0 if nothing was copied. + * - -EINVAL if the linear address of the instruction could not be calculated */ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) { unsigned long ip; int not_copied; - ip = insn_get_effective_ip(regs); - if (!ip) - return 0; + if (insn_get_effective_ip(regs, &ip)) + return -EINVAL; not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE); @@ -1475,18 +1476,17 @@ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) * * Returns: * - * Number of instruction bytes copied. - * - * 0 if nothing was copied. + * - number of instruction bytes copied. + * - 0 if nothing was copied. + * - -EINVAL if the linear address of the instruction could not be calculated. */ int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) { unsigned long ip; int not_copied; - ip = insn_get_effective_ip(regs); - if (!ip) - return 0; + if (insn_get_effective_ip(regs, &ip)) + return -EINVAL; not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 8a26e705cb06..147c30a81f15 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -468,7 +468,7 @@ void __init efi_init(void) */ if (!efi_runtime_supported()) - pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); + pr_err("No EFI runtime due to 32/64-bit mismatch with kernel\n"); if (!efi_runtime_supported() || efi_runtime_disabled()) { efi_memmap_unmap(); diff --git a/arch/x86/realmode/Makefile b/arch/x86/realmode/Makefile index 6b1f3a4eeb44..a0b491ae2de8 100644 --- a/arch/x86/realmode/Makefile +++ b/arch/x86/realmode/Makefile @@ -10,7 +10,6 @@ # Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n KCSAN_SANITIZE := n -OBJECT_FILES_NON_STANDARD := y subdir- := rm |