diff options
Diffstat (limited to 'arch')
272 files changed, 2205 insertions, 1340 deletions
diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig index 724c4075df40..dd2dd9f0861f 100644 --- a/arch/alpha/configs/defconfig +++ b/arch/alpha/configs/defconfig @@ -25,19 +25,18 @@ CONFIG_PNP=y CONFIG_ISAPNP=y CONFIG_BLK_DEV_FD=y CONFIG_BLK_DEV_LOOP=m -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_IDE_GENERIC=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_ALI15X3=y -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_CY82C693=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_SCSI_AIC7XXX=m CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set +CONFIG_ATA=y +# CONFIG_SATA_PMP is not set +CONFIG_PATA_ALI=y +CONFIG_PATA_CMD64X=y +CONFIG_PATA_CYPRESS=y +CONFIG_ATA_GENERIC=y CONFIG_NETDEVICES=y CONFIG_DUMMY=m CONFIG_NET_ETHERNET=y diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 5622578742fd..3000a2e8ee21 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -482,7 +482,7 @@ 550 common process_madvise sys_process_madvise 551 common epoll_pwait2 sys_epoll_pwait2 552 common mount_setattr sys_mount_setattr -553 common quotactl_path sys_quotactl_path +# 553 reserved for quotactl_path 554 common landlock_create_ruleset sys_landlock_create_ruleset 555 common landlock_add_rule sys_landlock_add_rule 556 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 4392c9c189c4..e47adc97a89b 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -31,7 +31,7 @@ endif ifdef CONFIG_ARC_CURR_IN_REG -# For a global register defintion, make sure it gets passed to every file +# For a global register definition, make sure it gets passed to every file # We had a customer reported bug where some code built in kernel was NOT using # any kernel headers, and missing the r25 global register # Can't do unconditionally because of recursive include issues diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index d1781bdf6527..d42917e803e1 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -116,7 +116,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, * * Technically the lock is also needed for UP (boils down to irq save/restore) * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to - * be disabled thus can't possibly be interrpted/preempted/clobbered by xchg() + * be disabled thus can't possibly be interrupted/preempted/clobbered by xchg() * Other way around, xchg is one instruction anyways, so can't be interrupted * as such */ @@ -143,7 +143,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, /* * "atomic" variant of xchg() * REQ: It needs to follow the same serialization rules as other atomic_xxx() - * Since xchg() doesn't always do that, it would seem that following defintion + * Since xchg() doesn't always do that, it would seem that following definition * is incorrect. But here's the rationale: * SMP : Even xchg() takes the atomic_ops_lock, so OK. * LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index ad9b7fe4dba3..4a9d33372fe2 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -7,6 +7,18 @@ #include <uapi/asm/page.h> +#ifdef CONFIG_ARC_HAS_PAE40 + +#define MAX_POSSIBLE_PHYSMEM_BITS 40 +#define PAGE_MASK_PHYS (0xff00000000ull | PAGE_MASK) + +#else /* CONFIG_ARC_HAS_PAE40 */ + +#define MAX_POSSIBLE_PHYSMEM_BITS 32 +#define PAGE_MASK_PHYS PAGE_MASK + +#endif /* CONFIG_ARC_HAS_PAE40 */ + #ifndef __ASSEMBLY__ #define clear_page(paddr) memset((paddr), 0, PAGE_SIZE) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 163641726a2b..5878846f00cf 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -107,8 +107,8 @@ #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL) - +#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \ + _PAGE_SPECIAL) /* More Abbrevaited helpers */ #define PAGE_U_NONE __pgprot(___DEF) #define PAGE_U_R __pgprot(___DEF | _PAGE_READ) @@ -132,13 +132,7 @@ #define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) -#ifdef CONFIG_ARC_HAS_PAE40 -#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE) -#define MAX_POSSIBLE_PHYSMEM_BITS 40 -#else -#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE) -#define MAX_POSSIBLE_PHYSMEM_BITS 32 -#endif +#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE) /************************************************************************** * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h index 2a97e2718a21..2a4ad619abfb 100644 --- a/arch/arc/include/uapi/asm/page.h +++ b/arch/arc/include/uapi/asm/page.h @@ -33,5 +33,4 @@ #define PAGE_MASK (~(PAGE_SIZE-1)) - #endif /* _UAPI__ASM_ARC_PAGE_H */ diff --git a/arch/arc/include/uapi/asm/sigcontext.h b/arch/arc/include/uapi/asm/sigcontext.h index 95f8a4380e11..7a5449dfcb29 100644 --- a/arch/arc/include/uapi/asm/sigcontext.h +++ b/arch/arc/include/uapi/asm/sigcontext.h @@ -18,6 +18,7 @@ */ struct sigcontext { struct user_regs_struct regs; + struct user_regs_arcv2 v2abi; }; #endif /* _ASM_ARC_SIGCONTEXT_H */ diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 1743506081da..2cb8dfe866b6 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -177,7 +177,7 @@ tracesys: ; Do the Sys Call as we normally would. ; Validate the Sys Call number - cmp r8, NR_syscalls + cmp r8, NR_syscalls - 1 mov.hi r0, -ENOSYS bhi tracesys_exit @@ -255,7 +255,7 @@ ENTRY(EV_Trap) ;============ Normal syscall case ; syscall num shd not exceed the total system calls avail - cmp r8, NR_syscalls + cmp r8, NR_syscalls - 1 mov.hi r0, -ENOSYS bhi .Lret_from_system_call diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index ecfbc42d3a40..345a0000554c 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -140,6 +140,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, ptr = &remcomInBuffer[1]; if (kgdb_hex2long(&ptr, &addr)) regs->ret = addr; + fallthrough; case 'D': case 'k': diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index d838d0d57696..3793876f42d9 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -50,14 +50,14 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new) int ret; /* - * This is only for old cores lacking LLOCK/SCOND, which by defintion + * This is only for old cores lacking LLOCK/SCOND, which by definition * can't possibly be SMP. Thus doesn't need to be SMP safe. * And this also helps reduce the overhead for serializing in * the UP case */ WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP)); - /* Z indicates to userspace if operation succeded */ + /* Z indicates to userspace if operation succeeded */ regs->status32 &= ~STATUS_Z_MASK; ret = access_ok(uaddr, sizeof(*uaddr)); @@ -107,7 +107,7 @@ fail: void arch_cpu_idle(void) { - /* Re-enable interrupts <= default irq priority before commiting SLEEP */ + /* Re-enable interrupts <= default irq priority before committing SLEEP */ const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO; __asm__ __volatile__( @@ -120,7 +120,7 @@ void arch_cpu_idle(void) void arch_cpu_idle(void) { - /* sleep, but enable both set E1/E2 (levels of interrutps) before committing */ + /* sleep, but enable both set E1/E2 (levels of interrupts) before committing */ __asm__ __volatile__("sleep 0x3 \n"); } diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index fdbe06c98895..cb2f88502baf 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -61,6 +61,41 @@ struct rt_sigframe { unsigned int sigret_magic; }; +static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +{ + int err = 0; +#ifndef CONFIG_ISA_ARCOMPACT + struct user_regs_arcv2 v2abi; + + v2abi.r30 = regs->r30; +#ifdef CONFIG_ARC_HAS_ACCL_REGS + v2abi.r58 = regs->r58; + v2abi.r59 = regs->r59; +#else + v2abi.r58 = v2abi.r59 = 0; +#endif + err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi)); +#endif + return err; +} + +static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +{ + int err = 0; +#ifndef CONFIG_ISA_ARCOMPACT + struct user_regs_arcv2 v2abi; + + err = __copy_from_user(&v2abi, &mctx->v2abi, sizeof(v2abi)); + + regs->r30 = v2abi.r30; +#ifdef CONFIG_ARC_HAS_ACCL_REGS + regs->r58 = v2abi.r58; + regs->r59 = v2abi.r59; +#endif +#endif + return err; +} + static int stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) @@ -94,6 +129,10 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch, sizeof(sf->uc.uc_mcontext.regs.scratch)); + + if (is_isa_arcv2()) + err |= save_arcv2_regs(&(sf->uc.uc_mcontext), regs); + err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t)); return err ? -EFAULT : 0; @@ -109,6 +148,10 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf) err |= __copy_from_user(&uregs.scratch, &(sf->uc.uc_mcontext.regs.scratch), sizeof(sf->uc.uc_mcontext.regs.scratch)); + + if (is_isa_arcv2()) + err |= restore_arcv2_regs(&(sf->uc.uc_mcontext), regs); + if (err) return -EFAULT; @@ -259,7 +302,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) regs->r2 = (unsigned long)&sf->uc; /* - * small optim to avoid unconditonally calling do_sigaltstack + * small optim to avoid unconditionally calling do_sigaltstack * in sigreturn path, now that we only have rt_sigreturn */ magic = MAGIC_SIGALTSTK; @@ -391,7 +434,7 @@ void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs) { /* - * ASM glue gaurantees that this is only called when returning to + * ASM glue guarantees that this is only called when returning to * user mode */ if (test_thread_flag(TIF_NOTIFY_RESUME)) diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 33ce59d91461..e2146a8da195 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -57,7 +57,6 @@ SECTIONS .init.ramfs : { INIT_RAM_FS } . = ALIGN(PAGE_SIZE); - _stext = .; HEAD_TEXT_SECTION INIT_TEXT_SECTION(L1_CACHE_BYTES) @@ -83,6 +82,7 @@ SECTIONS .text : { _text = .; + _stext = .; TEXT_TEXT SCHED_TEXT CPUIDLE_TEXT diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 33832e36bdb7..e2ed355438c9 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -157,7 +157,16 @@ void __init setup_arch_memory(void) min_high_pfn = PFN_DOWN(high_mem_start); max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz); - max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn; + /* + * max_high_pfn should be ok here for both HIGHMEM and HIGHMEM+PAE. + * For HIGHMEM without PAE max_high_pfn should be less than + * min_low_pfn to guarantee that these two regions don't overlap. + * For PAE case highmem is greater than lowmem, so it is natural + * to use max_high_pfn. + * + * In both cases, holes should be handled by pfn_valid(). + */ + max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn; high_memory = (void *)(min_high_pfn << PAGE_SHIFT); diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index fac4adc90204..95c649fbc95a 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -53,9 +53,10 @@ EXPORT_SYMBOL(ioremap); void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, unsigned long flags) { + unsigned int off; unsigned long vaddr; struct vm_struct *area; - phys_addr_t off, end; + phys_addr_t end; pgprot_t prot = __pgprot(flags); /* Don't allow wraparound, zero size */ @@ -72,7 +73,7 @@ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, /* Mappings have to be page-aligned */ off = paddr & ~PAGE_MASK; - paddr &= PAGE_MASK; + paddr &= PAGE_MASK_PHYS; size = PAGE_ALIGN(end + 1) - paddr; /* diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 9bb3c24f3677..9c7c68247289 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -576,7 +576,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, pte_t *ptep) { unsigned long vaddr = vaddr_unaligned & PAGE_MASK; - phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK; + phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS; struct page *page = pfn_to_page(pte_pfn(*ptep)); create_tlb(vma, vaddr, ptep); diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index 7d2c72562c73..9148a01ed6d9 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -105,9 +105,13 @@ phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; phy-reset-duration = <20>; phy-supply = <&sw2_reg>; - phy-handle = <ðphy0>; status = "okay"; + fixed-link { + speed = <1000>; + full-duplex; + }; + mdio { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi index 236fc205c389..d0768ae429fa 100644 --- a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi +++ b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi @@ -406,6 +406,18 @@ vin-supply = <&sw1_reg>; }; +®_pu { + vin-supply = <&sw1_reg>; +}; + +®_vdd1p1 { + vin-supply = <&sw2_reg>; +}; + +®_vdd2p5 { + vin-supply = <&sw2_reg>; +}; + &uart1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart1>; diff --git a/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi b/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi index 828cf3e39784..c4e146f3341b 100644 --- a/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi +++ b/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi @@ -126,7 +126,7 @@ compatible = "nxp,pca8574"; reg = <0x3a>; gpio-controller; - #gpio-cells = <1>; + #gpio-cells = <2>; }; }; diff --git a/arch/arm/boot/dts/imx7d-meerkat96.dts b/arch/arm/boot/dts/imx7d-meerkat96.dts index 5339210b63d0..dd8003bd1fc0 100644 --- a/arch/arm/boot/dts/imx7d-meerkat96.dts +++ b/arch/arm/boot/dts/imx7d-meerkat96.dts @@ -193,7 +193,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc1>; keep-power-in-suspend; - tuning-step = <2>; + fsl,tuning-step = <2>; vmmc-supply = <®_3p3v>; no-1-8-v; broken-cd; diff --git a/arch/arm/boot/dts/imx7d-pico.dtsi b/arch/arm/boot/dts/imx7d-pico.dtsi index e57da0d32b98..e519897fae08 100644 --- a/arch/arm/boot/dts/imx7d-pico.dtsi +++ b/arch/arm/boot/dts/imx7d-pico.dtsi @@ -351,7 +351,7 @@ pinctrl-2 = <&pinctrl_usdhc1_200mhz>; cd-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>; bus-width = <4>; - tuning-step = <2>; + fsl,tuning-step = <2>; vmmc-supply = <®_3p3v>; wakeup-source; no-1-8-v; diff --git a/arch/arm/configs/footbridge_defconfig b/arch/arm/configs/footbridge_defconfig index 2aa3ebeb89d7..7a32de51f0fa 100644 --- a/arch/arm/configs/footbridge_defconfig +++ b/arch/arm/configs/footbridge_defconfig @@ -64,7 +64,6 @@ CONFIG_PARIDE_ON26=m CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y -CONFIG_IDE=y CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_NET_VENDOR_3COM=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 875a3c28a267..363f1b1b08e3 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -215,8 +215,6 @@ CONFIG_IIO=m CONFIG_AD5446=m CONFIG_EEPROM_AT24=m CONFIG_SENSORS_LIS3_SPI=m -CONFIG_IDE=m -CONFIG_BLK_DEV_IDECS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=m CONFIG_CHR_DEV_ST=m diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h index 0d67ed682e07..bc4ffa7ca04c 100644 --- a/arch/arm/include/asm/cpuidle.h +++ b/arch/arm/include/asm/cpuidle.h @@ -7,9 +7,11 @@ #ifdef CONFIG_CPU_IDLE extern int arm_cpuidle_simple_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); +#define __cpuidle_method_section __used __section("__cpuidle_method_of_table") #else static inline int arm_cpuidle_simple_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { return -ENODEV; } +#define __cpuidle_method_section __maybe_unused /* drop silently */ #endif /* Common ARM WFI state */ @@ -42,8 +44,7 @@ struct of_cpuidle_method { #define CPUIDLE_METHOD_OF_DECLARE(name, _method, _ops) \ static const struct of_cpuidle_method __cpuidle_method_of_table_##name \ - __used __section("__cpuidle_method_of_table") \ - = { .method = _method, .ops = _ops } + __cpuidle_method_section = { .method = _method, .ops = _ops } extern int arm_cpuidle_suspend(int index); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 1a5edf562e85..73ca7797b92f 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -545,9 +545,11 @@ void notrace cpu_init(void) * In Thumb-2, msr with an immediate value is not allowed. */ #ifdef CONFIG_THUMB2_KERNEL -#define PLC "r" +#define PLC_l "l" +#define PLC_r "r" #else -#define PLC "I" +#define PLC_l "I" +#define PLC_r "I" #endif /* @@ -569,15 +571,15 @@ void notrace cpu_init(void) "msr cpsr_c, %9" : : "r" (stk), - PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), "I" (offsetof(struct stack, irq[0])), - PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | ABT_MODE), "I" (offsetof(struct stack, abt[0])), - PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | UND_MODE), "I" (offsetof(struct stack, und[0])), - PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), "I" (offsetof(struct stack, fiq[0])), - PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE) + PLC_l (PSR_F_BIT | PSR_I_BIT | SVC_MODE) : "r14"); #endif } diff --git a/arch/arm/mach-imx/pm-imx27.c b/arch/arm/mach-imx/pm-imx27.c index 020e6deb67c8..237e8aa9fe83 100644 --- a/arch/arm/mach-imx/pm-imx27.c +++ b/arch/arm/mach-imx/pm-imx27.c @@ -12,6 +12,7 @@ #include <linux/suspend.h> #include <linux/io.h> +#include "common.h" #include "hardware.h" static int mx27_suspend_enter(suspend_state_t state) diff --git a/arch/arm/mach-npcm/Kconfig b/arch/arm/mach-npcm/Kconfig index 658c8efb4ca1..a71cf1d189ae 100644 --- a/arch/arm/mach-npcm/Kconfig +++ b/arch/arm/mach-npcm/Kconfig @@ -10,6 +10,7 @@ config ARCH_WPCM450 bool "Support for WPCM450 BMC (Hermon)" depends on ARCH_MULTI_V5 select CPU_ARM926T + select WPCM450_AIC select NPCM7XX_TIMER help General support for WPCM450 BMC (Hermon). diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 2ee527c00284..1026a816dcc0 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -458,20 +458,6 @@ static struct gpiod_lookup_table leds_gpio_table = { #ifdef CONFIG_LEDS_TRIGGERS DEFINE_LED_TRIGGER(ams_delta_camera_led_trigger); - -static int ams_delta_camera_power(struct device *dev, int power) -{ - /* - * turn on camera LED - */ - if (power) - led_trigger_event(ams_delta_camera_led_trigger, LED_FULL); - else - led_trigger_event(ams_delta_camera_led_trigger, LED_OFF); - return 0; -} -#else -#define ams_delta_camera_power NULL #endif static struct platform_device ams_delta_audio_device = { diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c index c40cf5ef8607..977b0b744c22 100644 --- a/arch/arm/mach-omap1/board-h2.c +++ b/arch/arm/mach-omap1/board-h2.c @@ -320,7 +320,7 @@ static int tps_setup(struct i2c_client *client, void *context) { if (!IS_BUILTIN(CONFIG_TPS65010)) return -ENOSYS; - + tps65010_config_vregs1(TPS_LDO2_ENABLE | TPS_VLDO2_3_0V | TPS_LDO1_ENABLE | TPS_VLDO1_3_0V); @@ -394,6 +394,8 @@ static void __init h2_init(void) BUG_ON(gpio_request(H2_NAND_RB_GPIO_PIN, "NAND ready") < 0); gpio_direction_input(H2_NAND_RB_GPIO_PIN); + gpiod_add_lookup_table(&isp1301_gpiod_table); + omap_cfg_reg(L3_1610_FLASH_CS2B_OE); omap_cfg_reg(M8_1610_FLASH_CS2B_WE); diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c index 2c1e2b32b9b3..a745d64d4699 100644 --- a/arch/arm/mach-omap1/pm.c +++ b/arch/arm/mach-omap1/pm.c @@ -655,9 +655,13 @@ static int __init omap_pm_init(void) irq = INT_7XX_WAKE_UP_REQ; else if (cpu_is_omap16xx()) irq = INT_1610_WAKE_UP_REQ; - if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup", - NULL)) - pr_err("Failed to request irq %d (peripheral wakeup)\n", irq); + else + irq = -1; + + if (irq >= 0) { + if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup", NULL)) + pr_err("Failed to request irq %d (peripheral wakeup)\n", irq); + } /* Program new power ramp-up time * (0 for most boards since we don't lower voltage when in deep sleep) diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index 418a61ecb827..5e86145db0e2 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -322,6 +322,7 @@ static int n8x0_mmc_get_cover_state(struct device *dev, int slot) static void n8x0_mmc_callback(void *data, u8 card_mask) { +#ifdef CONFIG_MMC_OMAP int bit, *openp, index; if (board_is_n800()) { @@ -339,7 +340,6 @@ static void n8x0_mmc_callback(void *data, u8 card_mask) else *openp = 0; -#ifdef CONFIG_MMC_OMAP omap_mmc_notify_cover_event(mmc_device, index, *openp); #else pr_warn("MMC: notify cover event not available\n"); diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c index ec0d9b094744..bddfc7cd5d40 100644 --- a/arch/arm/mach-pxa/pxa_cplds_irqs.c +++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c @@ -121,8 +121,13 @@ static int cplds_probe(struct platform_device *pdev) return fpga->irq; base_irq = platform_get_irq(pdev, 1); - if (base_irq < 0) + if (base_irq < 0) { base_irq = 0; + } else { + ret = devm_irq_alloc_descs(&pdev->dev, base_irq, base_irq, CPLDS_NB_IRQ, 0); + if (ret < 0) + return ret; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); fpga->base = devm_ioremap_resource(&pdev->dev, res); diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index c7679d7db98b..28e03b5fec00 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -456,7 +456,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index f8f07469d259..a7e54a087b80 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -135,24 +135,18 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) return; } -int xen_swiotlb_detect(void) -{ - if (!xen_domain()) - return 0; - if (xen_feature(XENFEAT_direct_mapped)) - return 1; - /* legacy case */ - if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain()) - return 1; - return 0; -} - static int __init xen_mm_init(void) { struct gnttab_cache_flush cflush; + int rc; + if (!xen_swiotlb_detect()) return 0; - xen_swiotlb_init(); + + rc = xen_swiotlb_init(); + /* we can work with the default swiotlb */ + if (rc < 0 && rc != -EEXIST) + return rc; cflush.op = 0; cflush.a.dev_bus_addr = 0; diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild index d6465823b281..7b393cfec071 100644 --- a/arch/arm64/Kbuild +++ b/arch/arm64/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += kernel/ mm/ -obj-$(CONFIG_NET) += net/ +obj-y += kernel/ mm/ net/ obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_XEN) += xen/ obj-$(CONFIG_CRYPTO) += crypto/ diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 6409b47b73e4..7336c1fd0dda 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -165,6 +165,7 @@ config ARCH_MEDIATEK config ARCH_MESON bool "Amlogic Platforms" + select COMMON_CLK select MESON_IRQ_GPIO help This enables support for the arm64 based Amlogic SoCs diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7ef44478560d..b52481f0605d 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -175,6 +175,9 @@ vdso_install: $(if $(CONFIG_COMPAT_VDSO), \ $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@) +archprepare: + $(Q)$(MAKE) $(build)=arch/arm64/tools kapi + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts index 6c309b97587d..e8d31279b7a3 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts @@ -46,7 +46,8 @@ eee-broken-100tx; qca,clk-out-frequency = <125000000>; qca,clk-out-strength = <AR803X_STRENGTH_FULL>; - vddio-supply = <&vddh>; + qca,keep-pll-enabled; + vddio-supply = <&vddio>; vddio: vddio-regulator { regulator-name = "VDDIO"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts index df212ed5bb94..e65d1c477e2c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts @@ -31,11 +31,10 @@ reg = <0x4>; eee-broken-1000t; eee-broken-100tx; - qca,clk-out-frequency = <125000000>; qca,clk-out-strength = <AR803X_STRENGTH_FULL>; - - vddio-supply = <&vddh>; + qca,keep-pll-enabled; + vddio-supply = <&vddio>; vddio: vddio-regulator { regulator-name = "VDDIO"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index eca06a0c3cf8..a30249ebffa8 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -197,8 +197,8 @@ ddr: memory-controller@1080000 { compatible = "fsl,qoriq-memory-controller"; reg = <0x0 0x1080000 0x0 0x1000>; - interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>; - big-endian; + interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>; + little-endian; }; dcfg: syscon@1e00000 { diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts index 631e01c1b9fd..be1e7d6f0ecb 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts @@ -88,11 +88,11 @@ pinctrl-0 = <&pinctrl_codec2>; reg = <0x18>; #sound-dai-cells = <0>; - HPVDD-supply = <®_3p3v>; - SPRVDD-supply = <®_3p3v>; - SPLVDD-supply = <®_3p3v>; - AVDD-supply = <®_3p3v>; - IOVDD-supply = <®_3p3v>; + HPVDD-supply = <®_gen_3p3>; + SPRVDD-supply = <®_gen_3p3>; + SPLVDD-supply = <®_gen_3p3>; + AVDD-supply = <®_gen_3p3>; + IOVDD-supply = <®_gen_3p3>; DVDD-supply = <&vgen4_reg>; reset-gpios = <&gpio3 4 GPIO_ACTIVE_HIGH>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi index 4dc8383478ee..a08a568c31d9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi @@ -45,8 +45,8 @@ reg_12p0_main: regulator-12p0-main { compatible = "regulator-fixed"; regulator-name = "12V_MAIN"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; + regulator-min-microvolt = <12000000>; + regulator-max-microvolt = <12000000>; regulator-always-on; }; @@ -77,15 +77,6 @@ regulator-always-on; }; - reg_3p3v: regulator-3p3v { - compatible = "regulator-fixed"; - vin-supply = <®_3p3_main>; - regulator-name = "GEN_3V3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-always-on; - }; - reg_usdhc2_vmmc: regulator-vsd-3v3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_reg_usdhc2>; @@ -415,11 +406,11 @@ pinctrl-0 = <&pinctrl_codec1>; reg = <0x18>; #sound-dai-cells = <0>; - HPVDD-supply = <®_3p3v>; - SPRVDD-supply = <®_3p3v>; - SPLVDD-supply = <®_3p3v>; - AVDD-supply = <®_3p3v>; - IOVDD-supply = <®_3p3v>; + HPVDD-supply = <®_gen_3p3>; + SPRVDD-supply = <®_gen_3p3>; + SPLVDD-supply = <®_gen_3p3>; + AVDD-supply = <®_gen_3p3>; + IOVDD-supply = <®_gen_3p3>; DVDD-supply = <&vgen4_reg>; reset-gpios = <&gpio3 3 GPIO_ACTIVE_LOW>; }; diff --git a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi index c62ddb9b2ba5..3771144a2ce4 100644 --- a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi +++ b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi @@ -14,7 +14,6 @@ ports { port@0 { - reg = <0>; csi20_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; @@ -29,7 +28,6 @@ ports { port@0 { - reg = <0>; csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi index d64fb8b1b86c..46f8dbf68904 100644 --- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi @@ -2573,6 +2573,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2628,6 +2632,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi index 5b05474dc272..d16a4be5ef77 100644 --- a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi @@ -2419,6 +2419,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2474,6 +2478,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts index e7b4a929bb17..2e3d1981cac4 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts +++ b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts @@ -33,7 +33,7 @@ status = "okay"; ports { - port { + port@0 { csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi index 20fa3caa050e..1aef34447abd 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi @@ -1823,6 +1823,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi index 8eb006cbd9af..1f51237ab0a6 100644 --- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi @@ -2709,6 +2709,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2764,6 +2768,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77950.dtsi b/arch/arm64/boot/dts/renesas/r8a77950.dtsi index 25b87da32eeb..b643d3079db1 100644 --- a/arch/arm64/boot/dts/renesas/r8a77950.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77950.dtsi @@ -192,6 +192,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77951.dtsi b/arch/arm64/boot/dts/renesas/r8a77951.dtsi index 5c39152e4570..85d66d15465a 100644 --- a/arch/arm64/boot/dts/renesas/r8a77951.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77951.dtsi @@ -3097,6 +3097,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -3152,6 +3156,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -3191,6 +3199,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77960.dtsi b/arch/arm64/boot/dts/renesas/r8a77960.dtsi index 25d947a81b29..12476e354d74 100644 --- a/arch/arm64/boot/dts/renesas/r8a77960.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77960.dtsi @@ -2761,6 +2761,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2816,6 +2820,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77961.dtsi b/arch/arm64/boot/dts/renesas/r8a77961.dtsi index ab081f14af9a..d9804768425a 100644 --- a/arch/arm64/boot/dts/renesas/r8a77961.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77961.dtsi @@ -2499,6 +2499,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2554,6 +2558,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi index 657b20d3533b..dcb9df861d74 100644 --- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi @@ -2575,6 +2575,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2630,6 +2634,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi index 5a5d5649332a..e8f6352c3665 100644 --- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi @@ -1106,6 +1106,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi index 1ffa4a995a7a..7b51d464de0e 100644 --- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi @@ -1439,6 +1439,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -1478,6 +1482,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts index 295d34f1d216..4715e4a4abe0 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts +++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts @@ -298,8 +298,6 @@ ports { port@0 { - reg = <0>; - csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi index 5010f23fafcc..0eaea58f4210 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi @@ -1970,6 +1970,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index e18747df219f..453ffcef24fa 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -349,7 +349,6 @@ ports { port@0 { - reg = <0>; csi20_in: endpoint { clock-lanes = <0>; data-lanes = <1>; @@ -364,8 +363,6 @@ ports { port@0 { - reg = <0>; - csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2 3 4>; diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi index b2bcbf23eefd..ca59d1f711f8 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi @@ -42,12 +42,12 @@ }; }; - dmss: dmss { + dmss: bus@48000000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; dma-ranges; - ranges; + ranges = <0x00 0x48000000 0x00 0x48000000 0x00 0x06400000>; ti,sci-dev-id = <25>; @@ -134,7 +134,7 @@ }; }; - dmsc: dmsc@44043000 { + dmsc: system-controller@44043000 { compatible = "ti,k2g-sci"; ti,host-id = <12>; mbox-names = "rx", "tx"; @@ -148,7 +148,7 @@ #power-domain-cells = <2>; }; - k3_clks: clocks { + k3_clks: clock-controller { compatible = "ti,k2g-sci-clk"; #clock-cells = <2>; }; @@ -373,8 +373,9 @@ clocks = <&k3_clks 145 0>; }; - main_gpio_intr: interrupt-controller0 { + main_gpio_intr: interrupt-controller@a00000 { compatible = "ti,sci-intr"; + reg = <0x00 0x00a00000 0x00 0x800>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi index 99e94dee1bd4..deb19ae5e168 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi @@ -74,8 +74,9 @@ clocks = <&k3_clks 148 0>; }; - mcu_gpio_intr: interrupt-controller1 { + mcu_gpio_intr: interrupt-controller@4210000 { compatible = "ti,sci-intr"; + reg = <0x00 0x04210000 0x00 0x200>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index cb340d1b401f..6cd3131eb9ff 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -433,8 +433,9 @@ #phy-cells = <0>; }; - intr_main_gpio: interrupt-controller0 { + intr_main_gpio: interrupt-controller@a00000 { compatible = "ti,sci-intr"; + reg = <0x0 0x00a00000 0x0 0x400>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; @@ -444,18 +445,19 @@ ti,interrupt-ranges = <0 392 32>; }; - main-navss { + main_navss: bus@30800000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; - ranges; + ranges = <0x0 0x30800000 0x0 0x30800000 0x0 0xbc00000>; dma-coherent; dma-ranges; ti,sci-dev-id = <118>; - intr_main_navss: interrupt-controller1 { + intr_main_navss: interrupt-controller@310e0000 { compatible = "ti,sci-intr"; + reg = <0x0 0x310e0000 0x0 0x2000>; ti,intr-trigger-type = <4>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi index 0388c02c2203..f5b8ef2f5f77 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi @@ -116,11 +116,11 @@ }; }; - mcu-navss { + mcu_navss: bus@28380000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; - ranges; + ranges = <0x00 0x28380000 0x00 0x28380000 0x00 0x03880000>; dma-coherent; dma-ranges; diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi index ed42f13e7663..7cb864b4d74a 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi @@ -6,24 +6,24 @@ */ &cbass_wakeup { - dmsc: dmsc { + dmsc: system-controller@44083000 { compatible = "ti,am654-sci"; ti,host-id = <12>; - #address-cells = <1>; - #size-cells = <1>; - ranges; mbox-names = "rx", "tx"; mboxes= <&secure_proxy_main 11>, <&secure_proxy_main 13>; + reg-names = "debug_messages"; + reg = <0x44083000 0x1000>; + k3_pds: power-controller { compatible = "ti,sci-pm-domain"; #power-domain-cells = <2>; }; - k3_clks: clocks { + k3_clks: clock-controller { compatible = "ti,k2g-sci-clk"; #clock-cells = <2>; }; @@ -69,8 +69,9 @@ power-domains = <&k3_pds 115 TI_SCI_PD_EXCLUSIVE>; }; - intr_wkup_gpio: interrupt-controller2 { + intr_wkup_gpio: interrupt-controller@42200000 { compatible = "ti,sci-intr"; + reg = <0x42200000 0x200>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts index 9e87fb313a54..eddb2ffb93ca 100644 --- a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts +++ b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts @@ -85,12 +85,6 @@ gpios = <&wkup_gpio0 27 GPIO_ACTIVE_LOW>; }; }; - - clk_ov5640_fixed: clock { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <24000000>; - }; }; &wkup_pmx0 { @@ -287,23 +281,6 @@ pinctrl-names = "default"; pinctrl-0 = <&main_i2c1_pins_default>; clock-frequency = <400000>; - - ov5640: camera@3c { - compatible = "ovti,ov5640"; - reg = <0x3c>; - - clocks = <&clk_ov5640_fixed>; - clock-names = "xclk"; - - port { - csi2_cam0: endpoint { - remote-endpoint = <&csi2_phy0>; - clock-lanes = <0>; - data-lanes = <1 2>; - }; - }; - }; - }; &main_i2c2 { @@ -496,14 +473,6 @@ }; }; -&csi2_0 { - csi2_phy0: endpoint { - remote-endpoint = <&csi2_cam0>; - clock-lanes = <0>; - data-lanes = <1 2>; - }; -}; - &mcu_cpsw { pinctrl-names = "default"; pinctrl-0 = <&mcu_cpsw_pins_default &mcu_mdio_pins_default>; diff --git a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi index f86c493a44f1..19fea8adbcff 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi @@ -68,8 +68,9 @@ }; }; - main_gpio_intr: interrupt-controller0 { + main_gpio_intr: interrupt-controller@a00000 { compatible = "ti,sci-intr"; + reg = <0x00 0x00a00000 0x00 0x800>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; @@ -85,9 +86,12 @@ #size-cells = <2>; ranges = <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>; ti,sci-dev-id = <199>; + dma-coherent; + dma-ranges; - main_navss_intr: interrupt-controller1 { + main_navss_intr: interrupt-controller@310e0000 { compatible = "ti,sci-intr"; + reg = <0x00 0x310e0000 0x00 0x4000>; ti,intr-trigger-type = <4>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi index 5e74e43822c3..5663fe3ea466 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi @@ -6,7 +6,7 @@ */ &cbass_mcu_wakeup { - dmsc: dmsc@44083000 { + dmsc: system-controller@44083000 { compatible = "ti,k2g-sci"; ti,host-id = <12>; @@ -23,7 +23,7 @@ #power-domain-cells = <2>; }; - k3_clks: clocks { + k3_clks: clock-controller { compatible = "ti,k2g-sci-clk"; #clock-cells = <2>; }; @@ -96,8 +96,9 @@ clock-names = "fclk"; }; - wkup_gpio_intr: interrupt-controller2 { + wkup_gpio_intr: interrupt-controller@42200000 { compatible = "ti,sci-intr"; + reg = <0x00 0x42200000 0x00 0x400>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi index c2aa45a3ac79..3bcafe4c1742 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -76,8 +76,9 @@ }; }; - main_gpio_intr: interrupt-controller0 { + main_gpio_intr: interrupt-controller@a00000 { compatible = "ti,sci-intr"; + reg = <0x00 0x00a00000 0x00 0x800>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; @@ -87,18 +88,19 @@ ti,interrupt-ranges = <8 392 56>; }; - main-navss { + main_navss: bus@30000000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; - ranges; + ranges = <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>; dma-coherent; dma-ranges; ti,sci-dev-id = <199>; - main_navss_intr: interrupt-controller1 { + main_navss_intr: interrupt-controller@310e0000 { compatible = "ti,sci-intr"; + reg = <0x0 0x310e0000 0x0 0x4000>; ti,intr-trigger-type = <4>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi index d56e3475aee7..5e825e4d0306 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi @@ -6,7 +6,7 @@ */ &cbass_mcu_wakeup { - dmsc: dmsc@44083000 { + dmsc: system-controller@44083000 { compatible = "ti,k2g-sci"; ti,host-id = <12>; @@ -23,7 +23,7 @@ #power-domain-cells = <2>; }; - k3_clks: clocks { + k3_clks: clock-controller { compatible = "ti,k2g-sci-clk"; #clock-cells = <2>; }; @@ -96,8 +96,9 @@ clock-names = "fclk"; }; - wkup_gpio_intr: interrupt-controller2 { + wkup_gpio_intr: interrupt-controller@42200000 { compatible = "ti,sci-intr"; + reg = <0x00 0x42200000 0x00 0x400>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; @@ -249,11 +250,11 @@ }; }; - mcu-navss { + mcu_navss: bus@28380000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; - ranges; + ranges = <0x00 0x28380000 0x00 0x28380000 0x00 0x03880000>; dma-coherent; dma-ranges; diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 07ac208edc89..26889dbfe904 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -5,3 +5,5 @@ generic-y += qrwlock.h generic-y += qspinlock.h generic-y += set_memory.h generic-y += user.h + +generated-y += cpucaps.h diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 2175ec0004ed..451e11e5fd23 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -74,7 +74,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx, * This insanity brought to you by speculative system register reads, * out-of-order memory accesses, sequence locks and Thomas Gleixner. * - * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + * https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/ */ #define arch_counter_enforce_ordering(val) do { \ u64 tmp, _val = (val); \ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h deleted file mode 100644 index b0c5eda0498f..000000000000 --- a/arch/arm64/include/asm/cpucaps.h +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm64/include/asm/cpucaps.h - * - * Copyright (C) 2016 ARM Ltd. - */ -#ifndef __ASM_CPUCAPS_H -#define __ASM_CPUCAPS_H - -#define ARM64_WORKAROUND_CLEAN_CACHE 0 -#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 -#define ARM64_WORKAROUND_845719 2 -#define ARM64_HAS_SYSREG_GIC_CPUIF 3 -#define ARM64_HAS_PAN 4 -#define ARM64_HAS_LSE_ATOMICS 5 -#define ARM64_WORKAROUND_CAVIUM_23154 6 -#define ARM64_WORKAROUND_834220 7 -#define ARM64_HAS_NO_HW_PREFETCH 8 -#define ARM64_HAS_VIRT_HOST_EXTN 11 -#define ARM64_WORKAROUND_CAVIUM_27456 12 -#define ARM64_HAS_32BIT_EL0 13 -#define ARM64_SPECTRE_V3A 14 -#define ARM64_HAS_CNP 15 -#define ARM64_HAS_NO_FPSIMD 16 -#define ARM64_WORKAROUND_REPEAT_TLBI 17 -#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 -#define ARM64_WORKAROUND_858921 19 -#define ARM64_WORKAROUND_CAVIUM_30115 20 -#define ARM64_HAS_DCPOP 21 -#define ARM64_SVE 22 -#define ARM64_UNMAP_KERNEL_AT_EL0 23 -#define ARM64_SPECTRE_V2 24 -#define ARM64_HAS_RAS_EXTN 25 -#define ARM64_WORKAROUND_843419 26 -#define ARM64_HAS_CACHE_IDC 27 -#define ARM64_HAS_CACHE_DIC 28 -#define ARM64_HW_DBM 29 -#define ARM64_SPECTRE_V4 30 -#define ARM64_MISMATCHED_CACHE_TYPE 31 -#define ARM64_HAS_STAGE2_FWB 32 -#define ARM64_HAS_CRC32 33 -#define ARM64_SSBS 34 -#define ARM64_WORKAROUND_1418040 35 -#define ARM64_HAS_SB 36 -#define ARM64_WORKAROUND_SPECULATIVE_AT 37 -#define ARM64_HAS_ADDRESS_AUTH_ARCH 38 -#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39 -#define ARM64_HAS_GENERIC_AUTH_ARCH 40 -#define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41 -#define ARM64_HAS_IRQ_PRIO_MASKING 42 -#define ARM64_HAS_DCPODP 43 -#define ARM64_WORKAROUND_1463225 44 -#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 -#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 -#define ARM64_WORKAROUND_1542419 47 -#define ARM64_HAS_E0PD 48 -#define ARM64_HAS_RNG 49 -#define ARM64_HAS_AMU_EXTN 50 -#define ARM64_HAS_ADDRESS_AUTH 51 -#define ARM64_HAS_GENERIC_AUTH 52 -#define ARM64_HAS_32BIT_EL1 53 -#define ARM64_BTI 54 -#define ARM64_HAS_ARMv8_4_TTL 55 -#define ARM64_HAS_TLB_RANGE 56 -#define ARM64_MTE 57 -#define ARM64_WORKAROUND_1508412 58 -#define ARM64_HAS_LDAPR 59 -#define ARM64_KVM_PROTECTED_MODE 60 -#define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP 61 -#define ARM64_HAS_EPAN 62 - -#define ARM64_NCAPS 63 - -#endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index cf8df032b9c3..5e9b33cbac51 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -63,6 +63,7 @@ #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 #define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp 20 +#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 21 #ifndef __ASSEMBLY__ @@ -201,6 +202,8 @@ extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); +extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu); + extern u64 __vgic_v3_get_gic_config(void); extern u64 __vgic_v3_read_vmcr(void); extern void __vgic_v3_write_vmcr(u32 vmcr); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index f612c090f2e4..01b9857757f2 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -463,4 +463,9 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; } +static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature) +{ + return test_bit(feature, vcpu->arch.features); +} + #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 7859749d6628..5dab69d2c22b 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -893,8 +893,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) -#define __NR_quotactl_path 443 -__SYSCALL(__NR_quotactl_path, sys_quotactl_path) +/* 443 is reserved for quotactl_path */ #define __NR_landlock_create_ruleset 444 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) #define __NR_landlock_add_rule 445 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1cb39c0803a4..e720148232a0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -720,11 +720,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) return ret; } - if (run->immediate_exit) - return -EINTR; - vcpu_load(vcpu); + if (run->immediate_exit) { + ret = -EINTR; + goto out; + } + kvm_sigset_activate(vcpu); ret = 1; @@ -897,6 +899,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_sigset_deactivate(vcpu); +out: + /* + * In the unlikely event that we are returning to userspace + * with pending exceptions or PC adjustment, commit these + * adjustments in order to give userspace a consistent view of + * the vcpu state. Note that this relies on __kvm_adjust_pc() + * being preempt-safe on VHE. + */ + if (unlikely(vcpu->arch.flags & (KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_INCREMENT_PC))) + kvm_call_hyp(__kvm_adjust_pc, vcpu); + vcpu_put(vcpu); return ret; } diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 73629094f903..11541b94b328 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -296,7 +296,7 @@ static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) *vcpu_pc(vcpu) = vect_offset; } -void kvm_inject_exception(struct kvm_vcpu *vcpu) +static void kvm_inject_exception(struct kvm_vcpu *vcpu) { if (vcpu_el1_is_32bit(vcpu)) { switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { @@ -329,3 +329,19 @@ void kvm_inject_exception(struct kvm_vcpu *vcpu) } } } + +/* + * Adjust the guest PC (and potentially exception state) depending on + * flags provided by the emulation code. + */ +void __kvm_adjust_pc(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) { + kvm_inject_exception(vcpu); + vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + } else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { + kvm_skip_instr(vcpu); + vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC; + } +} diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h index 61716359035d..4fdfeabefeb4 100644 --- a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h +++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h @@ -13,8 +13,6 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_host.h> -void kvm_inject_exception(struct kvm_vcpu *vcpu); - static inline void kvm_skip_instr(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) { @@ -44,22 +42,6 @@ static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) } /* - * Adjust the guest PC on entry, depending on flags provided by EL1 - * for the purpose of emulation (MMIO, sysreg) or exception injection. - */ -static inline void __adjust_pc(struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) { - kvm_inject_exception(vcpu); - vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | - KVM_ARM64_EXCEPT_MASK); - } else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { - kvm_skip_instr(vcpu); - vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC; - } -} - -/* * Skip an instruction while host sysregs are live. * Assumes host is always 64-bit. */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index f36420a80474..1632f001f4ed 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -28,6 +28,13 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu)); } +static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); + + __kvm_adjust_pc(kern_hyp_va(vcpu)); +} + static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt) { __kvm_flush_vm_context(); @@ -170,6 +177,7 @@ typedef void (*hcall_t)(struct kvm_cpu_context *); static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_vcpu_run), + HANDLE_FUNC(__kvm_adjust_pc), HANDLE_FUNC(__kvm_flush_vm_context), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), HANDLE_FUNC(__kvm_tlb_flush_vmid), diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index e342f7f4f4fb..4b60c0056c04 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -23,8 +23,8 @@ extern unsigned long hyp_nr_cpus; struct host_kvm host_kvm; -struct hyp_pool host_s2_mem; -struct hyp_pool host_s2_dev; +static struct hyp_pool host_s2_mem; +static struct hyp_pool host_s2_dev; /* * Copies of the host's CPU features registers holding sanitized values. diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 7488f53b0aa2..a3d3a275344e 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -17,7 +17,6 @@ #include <nvhe/trap_handler.h> struct hyp_pool hpool; -struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; unsigned long hyp_nr_cpus; #define hyp_percpu_size ((unsigned long)__per_cpu_end - \ @@ -27,6 +26,7 @@ static void *vmemmap_base; static void *hyp_pgt_base; static void *host_s2_mem_pgt_base; static void *host_s2_dev_pgt_base; +static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; static int divide_memory_pool(void *virt, unsigned long size) { diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index e9f6ea704d07..f7af9688c1f7 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -4,7 +4,6 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ -#include <hyp/adjust_pc.h> #include <hyp/switch.h> #include <hyp/sysreg-sr.h> @@ -201,7 +200,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) */ __debug_save_host_buffers_nvhe(vcpu); - __adjust_pc(vcpu); + __kvm_adjust_pc(vcpu); /* * We must restore the 32-bit state before the sysregs, thanks diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 7b8f7db5c1ed..b3229924d243 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -4,7 +4,6 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ -#include <hyp/adjust_pc.h> #include <hyp/switch.h> #include <linux/arm-smccc.h> @@ -132,7 +131,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __load_guest_stage2(vcpu->arch.hw_mmu); __activate_traps(vcpu); - __adjust_pc(vcpu); + __kvm_adjust_pc(vcpu); sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c5d1f3c87dbd..c10207fed2f3 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1156,13 +1156,13 @@ out_unlock: bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) { if (!kvm->arch.mmu.pgt) - return 0; + return false; __unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT, (range->end - range->start) << PAGE_SHIFT, range->may_block); - return 0; + return false; } bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) @@ -1170,7 +1170,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) kvm_pfn_t pfn = pte_pfn(range->pte); if (!kvm->arch.mmu.pgt) - return 0; + return false; WARN_ON(range->end - range->start != 1); @@ -1190,7 +1190,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) PAGE_SIZE, __pfn_to_phys(pfn), KVM_PGTABLE_PROT_R, NULL); - return 0; + return false; } bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) @@ -1200,7 +1200,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) pte_t pte; if (!kvm->arch.mmu.pgt) - return 0; + return false; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); @@ -1213,7 +1213,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { if (!kvm->arch.mmu.pgt) - return 0; + return false; return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 956cdc240148..d37ebee085cf 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -166,6 +166,25 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) return 0; } +static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *tmp; + bool is32bit; + int i; + + is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT); + if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit) + return false; + + /* Check that the vcpus are either all 32bit or all 64bit */ + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit) + return false; + } + + return true; +} + /** * kvm_reset_vcpu - sets core registers and sys_regs to reset value * @vcpu: The VCPU pointer @@ -217,13 +236,14 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } } + if (!vcpu_allowed_register_width(vcpu)) { + ret = -EINVAL; + goto out; + } + switch (vcpu->arch.target) { default: if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { - if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) { - ret = -EINVAL; - goto out; - } pstate = VCPU_RESET_PSTATE_SVC; } else { pstate = VCPU_RESET_PSTATE_EL1; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 76ea2800c33e..1a7968ad078c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -399,14 +399,14 @@ static bool trap_bvr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, rd, dbg_reg); else dbg_to_reg(vcpu, p, rd, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); return true; } @@ -414,7 +414,7 @@ static bool trap_bvr(struct kvm_vcpu *vcpu, static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -424,7 +424,7 @@ static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -434,21 +434,21 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val; } static bool trap_bcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, rd, dbg_reg); else dbg_to_reg(vcpu, p, rd, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); return true; } @@ -456,7 +456,7 @@ static bool trap_bcr(struct kvm_vcpu *vcpu, static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -467,7 +467,7 @@ static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -477,22 +477,22 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val; } static bool trap_wvr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, rd, dbg_reg); else dbg_to_reg(vcpu, p, rd, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]); + trace_trap_reg(__func__, rd->CRm, p->is_write, + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]); return true; } @@ -500,7 +500,7 @@ static bool trap_wvr(struct kvm_vcpu *vcpu, static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -510,7 +510,7 @@ static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -520,21 +520,21 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val; } static bool trap_wcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, rd, dbg_reg); else dbg_to_reg(vcpu, p, rd, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); return true; } @@ -542,7 +542,7 @@ static bool trap_wcr(struct kvm_vcpu *vcpu, static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -552,7 +552,7 @@ static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -562,7 +562,7 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val; } static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index ac485163a4a7..6d44c028d1c9 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -55,8 +55,10 @@ void __sync_icache_dcache(pte_t pte) { struct page *page = pte_page(pte); - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + if (!test_bit(PG_dcache_clean, &page->flags)) { sync_icache_aliases(page_address(page), page_size(page)); + set_bit(PG_dcache_clean, &page->flags); + } } EXPORT_SYMBOL_GPL(__sync_icache_dcache); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 16a2b2b1c54d..e55409caaee3 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -43,6 +43,7 @@ #include <linux/sizes.h> #include <asm/tlb.h> #include <asm/alternative.h> +#include <asm/xen/swiotlb-xen.h> /* * We need to be able to catch inadvertent references to memstart_addr @@ -482,7 +483,7 @@ void __init mem_init(void) if (swiotlb_force == SWIOTLB_FORCE || max_pfn > PFN_DOWN(arm64_dma_phys_limit)) swiotlb_init(1); - else + else if (!xen_swiotlb_detect()) swiotlb_force = SWIOTLB_NO_FORCE; set_max_mapnr(max_pfn - PHYS_PFN_OFFSET); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6dd9369e3ea0..89b66ef43a0f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -515,7 +515,8 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (rodata_full || crash_mem_map || debug_pagealloc_enabled()) + if (rodata_full || crash_mem_map || debug_pagealloc_enabled() || + IS_ENABLED(CONFIG_KFENCE)) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 0a48191534ff..97d7bcd8d4f2 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -447,6 +447,18 @@ SYM_FUNC_START(__cpu_setup) mov x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK) msr_s SYS_GCR_EL1, x10 + /* + * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then + * RGSR_EL1.SEED must be non-zero for IRG to produce + * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we + * must initialize it. + */ + mrs x10, CNTVCT_EL0 + ands x10, x10, #SYS_RGSR_EL1_SEED_MASK + csinc x10, x10, xzr, ne + lsl x10, x10, #SYS_RGSR_EL1_SEED_SHIFT + msr_s SYS_RGSR_EL1, x10 + /* clear any pending tag check faults in TFSR*_EL1 */ msr_s SYS_TFSR_EL1, xzr msr_s SYS_TFSRE0_EL1, xzr diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile new file mode 100644 index 000000000000..932b4fe5c768 --- /dev/null +++ b/arch/arm64/tools/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 + +gen := arch/$(ARCH)/include/generated +kapi := $(gen)/asm + +kapi-hdrs-y := $(kapi)/cpucaps.h + +targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y)) + +PHONY += kapi + +kapi: $(kapi-hdrs-y) $(gen-y) + +# Create output directory if not already present +_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') + +quiet_cmd_gen_cpucaps = GEN $@ + cmd_gen_cpucaps = mkdir -p $(dir $@) && \ + $(AWK) -f $(filter-out $(PHONY),$^) > $@ + +$(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE + $(call if_changed,gen_cpucaps) diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps new file mode 100644 index 000000000000..21fbdda7086e --- /dev/null +++ b/arch/arm64/tools/cpucaps @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Internal CPU capabilities constants, keep this list sorted + +BTI +HAS_32BIT_EL0 +HAS_32BIT_EL1 +HAS_ADDRESS_AUTH +HAS_ADDRESS_AUTH_ARCH +HAS_ADDRESS_AUTH_IMP_DEF +HAS_AMU_EXTN +HAS_ARMv8_4_TTL +HAS_CACHE_DIC +HAS_CACHE_IDC +HAS_CNP +HAS_CRC32 +HAS_DCPODP +HAS_DCPOP +HAS_E0PD +HAS_EPAN +HAS_GENERIC_AUTH +HAS_GENERIC_AUTH_ARCH +HAS_GENERIC_AUTH_IMP_DEF +HAS_IRQ_PRIO_MASKING +HAS_LDAPR +HAS_LSE_ATOMICS +HAS_NO_FPSIMD +HAS_NO_HW_PREFETCH +HAS_PAN +HAS_RAS_EXTN +HAS_RNG +HAS_SB +HAS_STAGE2_FWB +HAS_SYSREG_GIC_CPUIF +HAS_TLB_RANGE +HAS_VIRT_HOST_EXTN +HW_DBM +KVM_PROTECTED_MODE +MISMATCHED_CACHE_TYPE +MTE +SPECTRE_V2 +SPECTRE_V3A +SPECTRE_V4 +SSBS +SVE +UNMAP_KERNEL_AT_EL0 +WORKAROUND_834220 +WORKAROUND_843419 +WORKAROUND_845719 +WORKAROUND_858921 +WORKAROUND_1418040 +WORKAROUND_1463225 +WORKAROUND_1508412 +WORKAROUND_1542419 +WORKAROUND_CAVIUM_23154 +WORKAROUND_CAVIUM_27456 +WORKAROUND_CAVIUM_30115 +WORKAROUND_CAVIUM_TX2_219_PRFM +WORKAROUND_CAVIUM_TX2_219_TVM +WORKAROUND_CLEAN_CACHE +WORKAROUND_DEVICE_LOAD_ACQUIRE +WORKAROUND_NVIDIA_CARMEL_CNP +WORKAROUND_QCOM_FALKOR_E1003 +WORKAROUND_REPEAT_TLBI +WORKAROUND_SPECULATIVE_AT diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk new file mode 100755 index 000000000000..00c9e72a200a --- /dev/null +++ b/arch/arm64/tools/gen-cpucaps.awk @@ -0,0 +1,40 @@ +#!/bin/awk -f +# SPDX-License-Identifier: GPL-2.0 +# gen-cpucaps.awk: arm64 cpucaps header generator +# +# Usage: awk -f gen-cpucaps.awk cpucaps.txt + +# Log an error and terminate +function fatal(msg) { + print "Error at line " NR ": " msg > "/dev/stderr" + exit 1 +} + +# skip blank lines and comment lines +/^$/ { next } +/^#/ { next } + +BEGIN { + print "#ifndef __ASM_CPUCAPS_H" + print "#define __ASM_CPUCAPS_H" + print "" + print "/* Generated file - do not edit */" + cap_num = 0 + print "" +} + +/^[vA-Z0-9_]+$/ { + printf("#define ARM64_%-30s\t%d\n", $0, cap_num++) + next +} + +END { + printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num) + print "" + print "#endif /* __ASM_CPUCAPS_H */" +} + +# Any lines not handled by previous rules are unexpected +{ + fatal("unhandled statement") +} diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 1ee8e736a48e..bb11fe4c875a 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -363,7 +363,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c index 44f9b5216ac9..261a0f57cc9a 100644 --- a/arch/m68k/atari/config.c +++ b/arch/m68k/atari/config.c @@ -875,16 +875,8 @@ static const struct resource atari_scsi_tt_rsrc[] __initconst = { #define FALCON_IDE_BASE 0xfff00000 static const struct resource atari_falconide_rsrc[] __initconst = { - { - .flags = IORESOURCE_MEM, - .start = FALCON_IDE_BASE, - .end = FALCON_IDE_BASE + 0x39, - }, - { - .flags = IORESOURCE_IRQ, - .start = IRQ_MFP_FSCSI, - .end = IRQ_MFP_FSCSI, - }, + DEFINE_RES_MEM(FALCON_IDE_BASE, 0x38), + DEFINE_RES_MEM(FALCON_IDE_BASE + 0x38, 2), }; int __init atari_platform_init(void) diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 59b727b69357..4fe26d54627e 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -323,11 +323,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m -CONFIG_IDE=y -CONFIG_IDE_GD_ATAPI=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GAYLE=y -CONFIG_BLK_DEV_BUDDHA=y CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -344,6 +339,11 @@ CONFIG_GVP11_SCSI=y CONFIG_SCSI_A4000T=y CONFIG_SCSI_ZORRO7XX=y CONFIG_SCSI_ZORRO_ESP=y +CONFIG_ATA=y +# CONFIG_ATA_VERBOSE_ERROR is not set +# CONFIG_ATA_BMDMA is not set +CONFIG_PATA_GAYLE=y +CONFIG_PATA_BUDDHA=y CONFIG_MD=y CONFIG_MD_LINEAR=m CONFIG_BLK_DEV_DM=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 9cc9f1a06516..21b2990fe9af 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -324,10 +324,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m -CONFIG_IDE=y -CONFIG_IDE_GD_ATAPI=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_FALCON_IDE=y CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -339,6 +335,10 @@ CONFIG_SCSI_SAS_ATTRS=m CONFIG_ISCSI_TCP=m CONFIG_ISCSI_BOOT_SYSFS=m CONFIG_ATARI_SCSI=y +CONFIG_ATA=y +# CONFIG_ATA_VERBOSE_ERROR is not set +# CONFIG_ATA_BMDMA is not set +CONFIG_PATA_FALCON=y CONFIG_MD=y CONFIG_MD_LINEAR=m CONFIG_BLK_DEV_DM=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 4e68b72d9c50..b03300df13fc 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -315,11 +315,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m -CONFIG_IDE=y -CONFIG_IDE_GD_ATAPI=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_PLATFORM=y -CONFIG_BLK_DEV_MAC_IDE=y CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -332,6 +327,10 @@ CONFIG_ISCSI_TCP=m CONFIG_ISCSI_BOOT_SYSFS=m CONFIG_MAC_SCSI=y CONFIG_SCSI_MAC_ESP=y +CONFIG_ATA=y +# CONFIG_ATA_VERBOSE_ERROR is not set +# CONFIG_ATA_BMDMA is not set +CONFIG_PATA_PLATFORM=y CONFIG_MD=y CONFIG_MD_LINEAR=m CONFIG_BLK_DEV_DM=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index d31896293c39..e2c8368e2231 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -344,15 +344,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m -CONFIG_IDE=y -CONFIG_IDE_GD_ATAPI=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_PLATFORM=y -CONFIG_BLK_DEV_GAYLE=y -CONFIG_BLK_DEV_BUDDHA=y -CONFIG_BLK_DEV_FALCON_IDE=y -CONFIG_BLK_DEV_MAC_IDE=y -CONFIG_BLK_DEV_Q40IDE=y CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -376,6 +367,13 @@ CONFIG_MVME147_SCSI=y CONFIG_MVME16x_SCSI=y CONFIG_BVME6000_SCSI=y CONFIG_SUN3X_ESP=y +CONFIG_ATA=y +# CONFIG_ATA_VERBOSE_ERROR is not set +# CONFIG_ATA_BMDMA is not set +CONFIG_PATA_FALCON=y +CONFIG_PATA_GAYLE=y +CONFIG_PATA_BUDDHA=y +CONFIG_PATA_PLATFORM=y CONFIG_MD=y CONFIG_MD_LINEAR=m CONFIG_BLK_DEV_DM=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 664025a0f6a4..514e2e8cddbd 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -314,10 +314,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m -CONFIG_IDE=y -CONFIG_IDE_GD_ATAPI=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_Q40IDE=y CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -328,6 +324,10 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m CONFIG_ISCSI_TCP=m CONFIG_ISCSI_BOOT_SYSFS=m +CONFIG_ATA=y +# CONFIG_ATA_VERBOSE_ERROR is not set +# CONFIG_ATA_BMDMA is not set +CONFIG_PATA_FALCON=y CONFIG_MD=y CONFIG_MD_LINEAR=m CONFIG_BLK_DEV_DM=m diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index a4b7ee1df211..8f215e79e70e 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -623,7 +623,8 @@ static inline void siginfo_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x12); /* _sigfault._perf */ - BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x14); /* _sigpoll */ BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0c); diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 0dd019dc2136..79c2d24c89dd 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -442,7 +442,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 1cdac959bd91..5d16f9b47aa9 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -933,13 +933,15 @@ static const struct resource mac_scsi_ccl_rsrc[] __initconst = { }, }; -static const struct resource mac_ide_quadra_rsrc[] __initconst = { - DEFINE_RES_MEM(0x50F1A000, 0x104), +static const struct resource mac_pata_quadra_rsrc[] __initconst = { + DEFINE_RES_MEM(0x50F1A000, 0x38), + DEFINE_RES_MEM(0x50F1A038, 0x04), DEFINE_RES_IRQ(IRQ_NUBUS_F), }; -static const struct resource mac_ide_pb_rsrc[] __initconst = { - DEFINE_RES_MEM(0x50F1A000, 0x104), +static const struct resource mac_pata_pb_rsrc[] __initconst = { + DEFINE_RES_MEM(0x50F1A000, 0x38), + DEFINE_RES_MEM(0x50F1A038, 0x04), DEFINE_RES_IRQ(IRQ_NUBUS_C), }; @@ -949,7 +951,7 @@ static const struct resource mac_pata_baboon_rsrc[] __initconst = { DEFINE_RES_IRQ(IRQ_BABOON_1), }; -static const struct pata_platform_info mac_pata_baboon_data __initconst = { +static const struct pata_platform_info mac_pata_data __initconst = { .ioport_shift = 2, }; @@ -1067,17 +1069,19 @@ int __init mac_platform_init(void) switch (macintosh_config->ide_type) { case MAC_IDE_QUADRA: - platform_device_register_simple("mac_ide", -1, - mac_ide_quadra_rsrc, ARRAY_SIZE(mac_ide_quadra_rsrc)); + platform_device_register_resndata(NULL, "pata_platform", -1, + mac_pata_quadra_rsrc, ARRAY_SIZE(mac_pata_quadra_rsrc), + &mac_pata_data, sizeof(mac_pata_data)); break; case MAC_IDE_PB: - platform_device_register_simple("mac_ide", -1, - mac_ide_pb_rsrc, ARRAY_SIZE(mac_ide_pb_rsrc)); + platform_device_register_resndata(NULL, "pata_platform", -1, + mac_pata_pb_rsrc, ARRAY_SIZE(mac_pata_pb_rsrc), + &mac_pata_data, sizeof(mac_pata_data)); break; case MAC_IDE_BABOON: platform_device_register_resndata(NULL, "pata_platform", -1, mac_pata_baboon_rsrc, ARRAY_SIZE(mac_pata_baboon_rsrc), - &mac_pata_baboon_data, sizeof(mac_pata_baboon_data)); + &mac_pata_data, sizeof(mac_pata_data)); break; } diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index d6a423875231..5caf1e5be1c2 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c @@ -286,14 +286,39 @@ static int q40_set_rtc_pll(struct rtc_pll_info *pll) return -EINVAL; } -static __init int q40_add_kbd_device(void) -{ - struct platform_device *pdev; +#define PCIDE_BASE1 0x1f0 +#define PCIDE_BASE2 0x170 +#define PCIDE_CTL 0x206 + +static const struct resource q40_pata_rsrc_0[] __initconst = { + DEFINE_RES_MEM(q40_isa_io_base + PCIDE_BASE1 * 4, 0x38), + DEFINE_RES_MEM(q40_isa_io_base + (PCIDE_BASE1 + PCIDE_CTL) * 4, 2), + DEFINE_RES_IO(PCIDE_BASE1, 8), + DEFINE_RES_IO(PCIDE_BASE1 + PCIDE_CTL, 1), + DEFINE_RES_IRQ(14), +}; +static const struct resource q40_pata_rsrc_1[] __initconst = { + DEFINE_RES_MEM(q40_isa_io_base + PCIDE_BASE2 * 4, 0x38), + DEFINE_RES_MEM(q40_isa_io_base + (PCIDE_BASE2 + PCIDE_CTL) * 4, 2), + DEFINE_RES_IO(PCIDE_BASE2, 8), + DEFINE_RES_IO(PCIDE_BASE2 + PCIDE_CTL, 1), + DEFINE_RES_IRQ(15), +}; + +static __init int q40_platform_init(void) +{ if (!MACH_IS_Q40) return -ENODEV; - pdev = platform_device_register_simple("q40kbd", -1, NULL, 0); - return PTR_ERR_OR_ZERO(pdev); + platform_device_register_simple("q40kbd", -1, NULL, 0); + + platform_device_register_simple("atari-falcon-ide", 0, q40_pata_rsrc_0, + ARRAY_SIZE(q40_pata_rsrc_0)); + + platform_device_register_simple("atari-falcon-ide", 1, q40_pata_rsrc_1, + ARRAY_SIZE(q40_pata_rsrc_1)); + + return 0; } -arch_initcall(q40_add_kbd_device); +arch_initcall(q40_platform_init); diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 2ac716984ca2..b11395a20c20 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -448,7 +448,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/alchemy/board-xxs1500.c b/arch/mips/alchemy/board-xxs1500.c index b184baa4e56a..f175bce2987f 100644 --- a/arch/mips/alchemy/board-xxs1500.c +++ b/arch/mips/alchemy/board-xxs1500.c @@ -18,6 +18,7 @@ #include <asm/reboot.h> #include <asm/setup.h> #include <asm/mach-au1x00/au1000.h> +#include <asm/mach-au1x00/gpio-au1000.h> #include <prom.h> const char *get_system_type(void) diff --git a/arch/mips/include/asm/mips-boards/launch.h b/arch/mips/include/asm/mips-boards/launch.h index f93aa5ee2e2e..3481ed4c117b 100644 --- a/arch/mips/include/asm/mips-boards/launch.h +++ b/arch/mips/include/asm/mips-boards/launch.h @@ -3,6 +3,9 @@ * */ +#ifndef _ASM_MIPS_BOARDS_LAUNCH_H +#define _ASM_MIPS_BOARDS_LAUNCH_H + #ifndef _ASSEMBLER_ struct cpulaunch { @@ -34,3 +37,5 @@ struct cpulaunch { /* Polling period in count cycles for secondary CPU's */ #define LAUNCHPERIOD 10000 + +#endif /* _ASM_MIPS_BOARDS_LAUNCH_H */ diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 5e0096657251..9220909526f9 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -381,7 +381,7 @@ 440 n32 process_madvise sys_process_madvise 441 n32 epoll_pwait2 compat_sys_epoll_pwait2 442 n32 mount_setattr sys_mount_setattr -443 n32 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 n32 landlock_create_ruleset sys_landlock_create_ruleset 445 n32 landlock_add_rule sys_landlock_add_rule 446 n32 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 9974f5f8e49b..9cd1c34f31b5 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -357,7 +357,7 @@ 440 n64 process_madvise sys_process_madvise 441 n64 epoll_pwait2 sys_epoll_pwait2 442 n64 mount_setattr sys_mount_setattr -443 n64 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 n64 landlock_create_ruleset sys_landlock_create_ruleset 445 n64 landlock_add_rule sys_landlock_add_rule 446 n64 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 39d6e71e57b6..d560c467a8c6 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -430,7 +430,7 @@ 440 o32 process_madvise sys_process_madvise 441 o32 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 o32 mount_setattr sys_mount_setattr -443 o32 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 o32 landlock_create_ruleset sys_landlock_create_ruleset 445 o32 landlock_add_rule sys_landlock_add_rule 446 o32 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c index de03838b343b..a9b72eacfc0b 100644 --- a/arch/mips/lib/mips-atomic.c +++ b/arch/mips/lib/mips-atomic.c @@ -37,7 +37,7 @@ */ notrace void arch_local_irq_disable(void) { - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -53,7 +53,7 @@ notrace void arch_local_irq_disable(void) : /* no inputs */ : "memory"); - preempt_enable(); + preempt_enable_notrace(); } EXPORT_SYMBOL(arch_local_irq_disable); @@ -61,7 +61,7 @@ notrace unsigned long arch_local_irq_save(void) { unsigned long flags; - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -78,7 +78,7 @@ notrace unsigned long arch_local_irq_save(void) : /* no inputs */ : "memory"); - preempt_enable(); + preempt_enable_notrace(); return flags; } @@ -88,7 +88,7 @@ notrace void arch_local_irq_restore(unsigned long flags) { unsigned long __tmp1; - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -106,7 +106,7 @@ notrace void arch_local_irq_restore(unsigned long flags) : "0" (flags) : "memory"); - preempt_enable(); + preempt_enable_notrace(); } EXPORT_SYMBOL(arch_local_irq_restore); diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index a7bf0c80371c..830ab91e574f 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -158,31 +158,29 @@ unsigned long _page_cachable_default; EXPORT_SYMBOL(_page_cachable_default); #define PM(p) __pgprot(_page_cachable_default | (p)) -#define PVA(p) PM(_PAGE_VALID | _PAGE_ACCESSED | (p)) static inline void setup_protection_map(void) { protection_map[0] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[1] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[2] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[3] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[4] = PVA(_PAGE_PRESENT); - protection_map[5] = PVA(_PAGE_PRESENT); - protection_map[6] = PVA(_PAGE_PRESENT); - protection_map[7] = PVA(_PAGE_PRESENT); + protection_map[1] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[2] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[3] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[4] = PM(_PAGE_PRESENT); + protection_map[5] = PM(_PAGE_PRESENT); + protection_map[6] = PM(_PAGE_PRESENT); + protection_map[7] = PM(_PAGE_PRESENT); protection_map[8] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[9] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[10] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | + protection_map[9] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ); - protection_map[11] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); - protection_map[12] = PVA(_PAGE_PRESENT); - protection_map[13] = PVA(_PAGE_PRESENT); - protection_map[14] = PVA(_PAGE_PRESENT); - protection_map[15] = PVA(_PAGE_PRESENT); + protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); + protection_map[12] = PM(_PAGE_PRESENT); + protection_map[13] = PM(_PAGE_PRESENT); + protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE); + protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE); } -#undef _PVA #undef PM void cpu_cache_init(void) diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 0c5de07da097..0135376c5de5 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -8,6 +8,7 @@ #include <linux/io.h> #include <linux/clk.h> +#include <linux/export.h> #include <linux/init.h> #include <linux/sizes.h> #include <linux/of_fdt.h> @@ -25,6 +26,7 @@ __iomem void *rt_sysc_membase; __iomem void *rt_memc_membase; +EXPORT_SYMBOL_GPL(rt_sysc_membase); __iomem void *plat_of_remap_node(const char *node) { diff --git a/arch/openrisc/include/asm/barrier.h b/arch/openrisc/include/asm/barrier.h new file mode 100644 index 000000000000..7538294721be --- /dev/null +++ b/arch/openrisc/include/asm/barrier.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#define mb() asm volatile ("l.msync" ::: "memory") + +#include <asm-generic/barrier.h> + +#endif /* __ASM_BARRIER_H */ diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 2416a9f91533..c6f9e7b9f7cb 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -278,6 +278,8 @@ void calibrate_delay(void) pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n", loops_per_jiffy / (500000 / HZ), (loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy); + + of_node_put(cpu); } void __init setup_arch(char **cmdline_p) diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index d5641198b90c..cfef61a7b6c2 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -75,7 +75,6 @@ static void __init map_ram(void) /* These mark extents of read-only kernel pages... * ...from vmlinux.lds.S */ - struct memblock_region *region; v = PAGE_OFFSET; @@ -121,7 +120,7 @@ static void __init map_ram(void) } printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__, - region->base, region->base + region->size); + start, end); } } @@ -129,7 +128,6 @@ void __init paging_init(void) { extern void tlb_init(void); - unsigned long end; int i; printk(KERN_INFO "Setting up paging and PTEs.\n"); @@ -145,8 +143,6 @@ void __init paging_init(void) */ current_pgd[smp_processor_id()] = init_mm.pgd; - end = (unsigned long)__va(max_low_pfn * PAGE_SIZE); - map_ram(); zone_sizes_init(); diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 5ac80b83d745..aabc37f8cae3 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -440,7 +440,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi index c2717f31925a..ccda0a91abf0 100644 --- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi @@ -122,7 +122,15 @@ }; /include/ "pq3-i2c-0.dtsi" + i2c@3000 { + fsl,i2c-erratum-a004447; + }; + /include/ "pq3-i2c-1.dtsi" + i2c@3100 { + fsl,i2c-erratum-a004447; + }; + /include/ "pq3-duart-0.dtsi" /include/ "pq3-espi-0.dtsi" spi0: spi@7000 { diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi index 872e4485dc3f..ddc018d42252 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi @@ -371,7 +371,23 @@ }; /include/ "qoriq-i2c-0.dtsi" + i2c@118000 { + fsl,i2c-erratum-a004447; + }; + + i2c@118100 { + fsl,i2c-erratum-a004447; + }; + /include/ "qoriq-i2c-1.dtsi" + i2c@119000 { + fsl,i2c-erratum-a004447; + }; + + i2c@119100 { + fsl,i2c-erratum-a004447; + }; + /include/ "qoriq-duart-0.dtsi" /include/ "qoriq-duart-1.dtsi" /include/ "qoriq-gpio-0.dtsi" diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 443050906018..e3b29eda8074 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -448,6 +448,9 @@ */ long plpar_hcall_norets(unsigned long opcode, ...); +/* Variant which does not do hcall tracing */ +long plpar_hcall_norets_notrace(unsigned long opcode, ...); + /** * plpar_hcall: - Make a pseries hypervisor call * @opcode: The hypervisor call to make. diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 44cde2e129b8..59f704408d65 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -153,8 +153,6 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup */ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { - if (user_mode(regs)) - kuep_unlock(); } static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) @@ -222,6 +220,13 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte local_paca->irq_soft_mask = IRQS_ALL_DISABLED; local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) && + regs->nip < (unsigned long)__end_interrupts) { + // Kernel code running below __end_interrupts is + // implicitly soft-masked. + regs->softe = IRQS_ALL_DISABLED; + } + /* Don't do any per-CPU operations until interrupt state is fixed */ if (nmi_disables_ftrace(regs)) { diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 2d5c6bec2b4f..93ce3ec25387 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -50,7 +50,7 @@ l_yes: 1098: nop; \ .pushsection __jump_table, "aw"; \ .long 1098b - ., LABEL - .; \ - FTR_ENTRY_LONG KEY; \ + FTR_ENTRY_LONG KEY - .; \ .popsection #endif diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1e83359f286b..7f2e90db2050 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -51,6 +51,7 @@ /* PPC-specific vcpu->requests bit members */ #define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0) #define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1) +#define KVM_REQ_PENDING_TIMER KVM_ARCH_REQ(2) #include <linux/mmu_notifier.h> diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index 5d1726bb28e7..bcb7b5f917be 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -28,19 +28,35 @@ static inline u32 yield_count_of(int cpu) return be32_to_cpu(yield_count); } +/* + * Spinlock code confers and prods, so don't trace the hcalls because the + * tracing code takes spinlocks which can cause recursion deadlocks. + * + * These calls are made while the lock is not held: the lock slowpath yields if + * it can not acquire the lock, and unlock slow path might prod if a waiter has + * yielded). So this may not be a problem for simple spin locks because the + * tracing does not technically recurse on the lock, but we avoid it anyway. + * + * However the queued spin lock contended path is more strictly ordered: the + * H_CONFER hcall is made after the task has queued itself on the lock, so then + * recursing on that lock will cause the task to then queue up again behind the + * first instance (or worse: queued spinlocks use tricks that assume a context + * never waits on more than one spinlock, so such recursion may cause random + * corruption in the lock code). + */ static inline void yield_to_preempted(int cpu, u32 yield_count) { - plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count); + plpar_hcall_norets_notrace(H_CONFER, get_hard_smp_processor_id(cpu), yield_count); } static inline void prod_cpu(int cpu) { - plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); + plpar_hcall_norets_notrace(H_PROD, get_hard_smp_processor_id(cpu)); } static inline void yield_to_any(void) { - plpar_hcall_norets(H_CONFER, -1, 0); + plpar_hcall_norets_notrace(H_CONFER, -1, 0); } #else static inline bool is_shared_processor(void) diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index ece84a430701..83e0f701ebc6 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -28,7 +28,11 @@ static inline void set_cede_latency_hint(u8 latency_hint) static inline long cede_processor(void) { - return plpar_hcall_norets(H_CEDE); + /* + * We cannot call tracepoints inside RCU idle regions which + * means we must not trace H_CEDE. + */ + return plpar_hcall_norets_notrace(H_CEDE); } static inline long extended_cede_processor(unsigned long latency_hint) diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h index 33fa5dd8ee6a..714a35f0d425 100644 --- a/arch/powerpc/include/asm/pte-walk.h +++ b/arch/powerpc/include/asm/pte-walk.h @@ -31,6 +31,35 @@ static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift) pgd_t *pgdir = init_mm.pgd; return __find_linux_pte(pgdir, ea, NULL, hshift); } + +/* + * Convert a kernel vmap virtual address (vmalloc or ioremap space) to a + * physical address, without taking locks. This can be used in real-mode. + */ +static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr) +{ + pte_t *ptep; + phys_addr_t pa; + int hugepage_shift; + + /* + * init_mm does not free page tables, and does not do THP. It may + * have huge pages from huge vmalloc / ioremap etc. + */ + ptep = find_init_mm_pte(addr, &hugepage_shift); + if (WARN_ON(!ptep)) + return 0; + + pa = PFN_PHYS(pte_pfn(*ptep)); + + if (!hugepage_shift) + hugepage_shift = PAGE_SHIFT; + + pa |= addr & ((1ul << hugepage_shift) - 1); + + return pa; +} + /* * This is what we should always use. Any other lockless page table lookup needs * careful audit against THP split. diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 9c9ab2746168..b476a685f066 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -19,6 +19,7 @@ #ifndef _ASM_POWERPC_PTRACE_H #define _ASM_POWERPC_PTRACE_H +#include <linux/err.h> #include <uapi/asm/ptrace.h> #include <asm/asm-const.h> @@ -152,25 +153,6 @@ extern unsigned long profile_pc(struct pt_regs *regs); long do_syscall_trace_enter(struct pt_regs *regs); void do_syscall_trace_leave(struct pt_regs *regs); -#define kernel_stack_pointer(regs) ((regs)->gpr[1]) -static inline int is_syscall_success(struct pt_regs *regs) -{ - return !(regs->ccr & 0x10000000); -} - -static inline long regs_return_value(struct pt_regs *regs) -{ - if (is_syscall_success(regs)) - return regs->gpr[3]; - else - return -regs->gpr[3]; -} - -static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) -{ - regs->gpr[3] = rc; -} - #ifdef __powerpc64__ #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1) #else @@ -235,6 +217,31 @@ static __always_inline void set_trap_norestart(struct pt_regs *regs) regs->trap |= 0x1; } +#define kernel_stack_pointer(regs) ((regs)->gpr[1]) +static inline int is_syscall_success(struct pt_regs *regs) +{ + if (trap_is_scv(regs)) + return !IS_ERR_VALUE((unsigned long)regs->gpr[3]); + else + return !(regs->ccr & 0x10000000); +} + +static inline long regs_return_value(struct pt_regs *regs) +{ + if (trap_is_scv(regs)) + return regs->gpr[3]; + + if (is_syscall_success(regs)) + return regs->gpr[3]; + else + return -regs->gpr[3]; +} + +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->gpr[3] = rc; +} + #define arch_has_single_step() (1) #define arch_has_block_step() (true) #define ARCH_HAS_USER_SINGLE_STEP_REPORT diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index fd1b518eed17..ba0f88f3a30d 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -41,11 +41,17 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - /* - * If the system call failed, - * regs->gpr[3] contains a positive ERRORCODE. - */ - return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; + if (trap_is_scv(regs)) { + unsigned long error = regs->gpr[3]; + + return IS_ERR_VALUE(error) ? error : 0; + } else { + /* + * If the system call failed, + * regs->gpr[3] contains a positive ERRORCODE. + */ + return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; + } } static inline long syscall_get_return_value(struct task_struct *task, @@ -58,18 +64,22 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - /* - * In the general case it's not obvious that we must deal with CCR - * here, as the syscall exit path will also do that for us. However - * there are some places, eg. the signal code, which check ccr to - * decide if the value in r3 is actually an error. - */ - if (error) { - regs->ccr |= 0x10000000L; - regs->gpr[3] = error; + if (trap_is_scv(regs)) { + regs->gpr[3] = (long) error ?: val; } else { - regs->ccr &= ~0x10000000L; - regs->gpr[3] = val; + /* + * In the general case it's not obvious that we must deal with + * CCR here, as the syscall exit path will also do that for us. + * However there are some places, eg. the signal code, which + * check ccr to decide if the value in r3 is actually an error. + */ + if (error) { + regs->ccr |= 0x10000000L; + regs->gpr[3] = error; + } else { + regs->ccr &= ~0x10000000L; + regs->gpr[3] = val; + } } } diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index a09e4240c5b1..22c79ab40006 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -157,7 +157,7 @@ do { \ "2: lwz%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ EX_TABLE(2b, %l2) \ - : "=r" (x) \ + : "=&r" (x) \ : "m" (*addr) \ : \ : label) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f24cd53ff26e..3bbdcc86d01b 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -346,28 +346,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) */ static inline unsigned long eeh_token_to_phys(unsigned long token) { - pte_t *ptep; - unsigned long pa; - int hugepage_shift; - - /* - * We won't find hugepages here(this is iomem). Hence we are not - * worried about _PAGE_SPLITTING/collapse. Also we will not hit - * page table free, because of init_mm. - */ - ptep = find_init_mm_pte(token, &hugepage_shift); - if (!ptep) - return token; - - pa = pte_pfn(*ptep); - - /* On radix we can do hugepage mappings for io, so handle that */ - if (!hugepage_shift) - hugepage_shift = PAGE_SHIFT; - - pa <<= PAGE_SHIFT; - pa |= token & ((1ul << hugepage_shift) - 1); - return pa; + return ppc_find_vmap_phys(token); } /* diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 7c3654b0d0f4..f1ae710274bc 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -340,6 +340,12 @@ ret_from_mc_except: andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \ bne masked_interrupt_book3e_##n +/* + * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is + * called, because that does SAVE_NVGPRS which must see the original register + * values, otherwise the scratch values might be restored when exiting the + * interrupt. + */ #define PROLOG_ADDITION_2REGS_GEN(n) \ std r14,PACA_EXGEN+EX_R14(r13); \ std r15,PACA_EXGEN+EX_R15(r13) @@ -535,6 +541,10 @@ __end_interrupts: PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR + std r14,_DAR(r1) + std r15,_DSISR(r1) + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) EXCEPTION_COMMON(0x300) b storage_fault_common @@ -544,6 +554,10 @@ __end_interrupts: PROLOG_ADDITION_2REGS) li r15,0 mr r14,r10 + std r14,_DAR(r1) + std r15,_DSISR(r1) + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) EXCEPTION_COMMON(0x400) b storage_fault_common @@ -557,6 +571,10 @@ __end_interrupts: PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR + std r14,_DAR(r1) + std r15,_DSISR(r1) + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) EXCEPTION_COMMON(0x600) b alignment_more /* no room, go out of line */ @@ -565,10 +583,10 @@ __end_interrupts: NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM, PROLOG_ADDITION_1REG) mfspr r14,SPRN_ESR - EXCEPTION_COMMON(0x700) std r14,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) + EXCEPTION_COMMON(0x700) + addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception REST_NVGPRS(r1) b interrupt_return @@ -725,11 +743,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) * normal exception */ mfspr r14,SPRN_DBSR - EXCEPTION_COMMON_CRIT(0xd00) std r14,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) + EXCEPTION_COMMON_CRIT(0xd00) + addi r3,r1,STACK_FRAME_OVERHEAD bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -796,11 +814,11 @@ kernel_dbg_exc: * normal exception */ mfspr r14,SPRN_DBSR - EXCEPTION_COMMON_DBG(0xd08) std r14,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) + EXCEPTION_COMMON_DBG(0xd08) + addi r3,r1,STACK_FRAME_OVERHEAD bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -931,11 +949,7 @@ masked_interrupt_book3e_0x2c0: * original values stashed away in the PACA */ storage_fault_common: - std r14,_DAR(r1) - std r15,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - ld r14,PACA_EXGEN+EX_R14(r13) - ld r15,PACA_EXGEN+EX_R15(r13) bl do_page_fault b interrupt_return @@ -944,11 +958,7 @@ storage_fault_common: * continues here. */ alignment_more: - std r14,_DAR(r1) - std r15,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - ld r14,PACA_EXGEN+EX_R14(r13) - ld r15,PACA_EXGEN+EX_R15(r13) bl alignment_exception REST_NVGPRS(r1) b interrupt_return diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index e4559f8914eb..e0938ba298f2 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -34,9 +34,6 @@ notrace long system_call_exception(long r3, long r4, long r5, syscall_fn f; kuep_lock(); -#ifdef CONFIG_PPC32 - kuap_save_and_lock(regs); -#endif regs->orig_gpr3 = r3; @@ -427,6 +424,7 @@ again: /* Restore user access locks last */ kuap_user_restore(regs); + kuep_unlock(); return ret; } diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 51bbaae94ccc..c877f074d174 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -55,7 +55,6 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) #ifdef CONFIG_PPC_INDIRECT_MMIO struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) { - unsigned hugepage_shift; struct iowa_bus *bus; int token; @@ -65,22 +64,13 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) bus = &iowa_busses[token - 1]; else { unsigned long vaddr, paddr; - pte_t *ptep; vaddr = (unsigned long)PCI_FIX_ADDR(addr); if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) return NULL; - /* - * We won't find huge pages here (iomem). Also can't hit - * a page table free due to init_mm - */ - ptep = find_init_mm_pte(vaddr, &hugepage_shift); - if (ptep == NULL) - paddr = 0; - else { - WARN_ON(hugepage_shift); - paddr = pte_pfn(*ptep) << PAGE_SHIFT; - } + + paddr = ppc_find_vmap_phys(vaddr); + bus = iowa_pci_find(vaddr, paddr); if (bus == NULL) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 57d6b85e9b96..2af89a5e379f 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -898,7 +898,6 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, unsigned int order; unsigned int nio_pages, io_order; struct page *page; - size_t size_io = size; size = PAGE_ALIGN(size); order = get_order(size); @@ -925,9 +924,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - size_io = IOMMU_PAGE_ALIGN(size_io, tbl); - nio_pages = size_io >> tbl->it_page_shift; - io_order = get_iommu_order(size_io, tbl); + nio_pages = size >> tbl->it_page_shift; + io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); if (mapping == DMA_MAPPING_ERROR) { @@ -942,9 +940,10 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, void *vaddr, dma_addr_t dma_handle) { if (tbl) { - size_t size_io = IOMMU_PAGE_ALIGN(size, tbl); - unsigned int nio_pages = size_io >> tbl->it_page_shift; + unsigned int nio_pages; + size = PAGE_ALIGN(size); + nio_pages = size >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 01ab2163659e..e8c2a6373157 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -108,7 +108,6 @@ int arch_prepare_kprobe(struct kprobe *p) int ret = 0; struct kprobe *prev; struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr); - struct ppc_inst prefix = ppc_inst_read((struct ppc_inst *)(p->addr - 1)); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); @@ -116,7 +115,8 @@ int arch_prepare_kprobe(struct kprobe *p) } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) { printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n"); ret = -EINVAL; - } else if (ppc_inst_prefixed(prefix)) { + } else if ((unsigned long)p->addr & ~PAGE_MASK && + ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 8b2c1a8553a0..cfc03e016ff2 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -356,13 +356,16 @@ static void __init setup_legacy_serial_console(int console) static int __init ioremap_legacy_serial_console(void) { - struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console]; - struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console]; + struct plat_serial8250_port *port; + struct legacy_serial_info *info; void __iomem *vaddr; if (legacy_serial_console < 0) return 0; + info = &legacy_serial_infos[legacy_serial_console]; + port = &legacy_serial_ports[legacy_serial_console]; + if (!info->early_addr) return 0; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index b779d25761cf..e42b85e4f1aa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -369,11 +369,11 @@ void __init early_setup(unsigned long dt_ptr) apply_feature_fixups(); setup_feature_keys(); - early_ioremap_setup(); - /* Initialize the hash table or TLB handling */ early_init_mmu(); + early_ioremap_setup(); + /* * After firmware and early platform setup code has set things up, * we note the SPR values for configurable control/performance diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index f4aafa337c2e..1f07317964e4 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -166,9 +166,9 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from) } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #else -#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0) +#define unsafe_copy_fpr_to_user(to, task, label) do { if (0) goto label;} while (0) -#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0) +#define unsafe_copy_fpr_from_user(task, from, label) do { if (0) goto label;} while (0) static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index dca66481d0c2..f9e1f5428b9e 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -902,6 +902,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block); user_write_access_end(); + /* Save the siginfo outside of the unsafe block. */ + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + goto badframe; + /* Make sure signal handler doesn't get spurious FP exceptions */ tsk->thread.fp_state.fpscr = 0; @@ -915,11 +919,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, regs->nip = (unsigned long) &frame->tramp[0]; } - - /* Save the siginfo outside of the unsafe block. */ - if (copy_siginfo_to_user(&frame->info, &ksig->info)) - goto badframe; - /* Allocate a dummy caller frame for the signal handler. */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 2e68fbb57cc6..8f052ff4058c 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -522,7 +522,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2d9193cd73be..c63e263312a4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -840,7 +840,7 @@ bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range) kvm_unmap_radix(kvm, range->slot, gfn); } else { for (gfn = range->start; gfn < range->end; gfn++) - kvm_unmap_rmapp(kvm, range->slot, range->start); + kvm_unmap_rmapp(kvm, range->slot, gfn); } return false; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 28a80d240b76..bc0813644666 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3936,7 +3936,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) break; } cur = ktime_get(); - } while (single_task_running() && ktime_before(cur, stop)); + } while (kvm_vcpu_can_poll(cur, stop)); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; @@ -4455,7 +4455,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) mtspr(SPRN_EBBRR, ebb_regs[1]); mtspr(SPRN_BESCR, ebb_regs[2]); mtspr(SPRN_TAR, user_tar); - mtspr(SPRN_FSCR, current->thread.fscr); } mtspr(SPRN_VRSAVE, user_vrsave); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 7af7c70f1468..7a0f12404e0e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -23,20 +23,9 @@ #include <asm/pte-walk.h> /* Translate address of a vmalloc'd thing to a linear map address */ -static void *real_vmalloc_addr(void *x) +static void *real_vmalloc_addr(void *addr) { - unsigned long addr = (unsigned long) x; - pte_t *p; - /* - * assume we don't have huge pages in vmalloc space... - * So don't worry about THP collapse/split. Called - * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore. - */ - p = find_init_mm_pte(addr, NULL); - if (!p || !pte_present(*p)) - return NULL; - addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); - return __va(addr); + return __va(ppc_find_vmap_phys((unsigned long)addr)); } /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 5e634db4809b..004f0d4e665f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -59,6 +59,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define STACK_SLOT_UAMOR (SFS-88) #define STACK_SLOT_DAWR1 (SFS-96) #define STACK_SLOT_DAWRX1 (SFS-104) +#define STACK_SLOT_FSCR (SFS-112) /* the following is used by the P9 short path */ #define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */ @@ -686,6 +687,8 @@ BEGIN_FTR_SECTION std r6, STACK_SLOT_DAWR0(r1) std r7, STACK_SLOT_DAWRX0(r1) std r8, STACK_SLOT_IAMR(r1) + mfspr r5, SPRN_FSCR + std r5, STACK_SLOT_FSCR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION mfspr r6, SPRN_DAWR1 @@ -1663,6 +1666,10 @@ FTR_SECTION_ELSE ld r7, STACK_SLOT_HFSCR(r1) mtspr SPRN_HFSCR, r7 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) +BEGIN_FTR_SECTION + ld r5, STACK_SLOT_FSCR(r1) + mtspr SPRN_FSCR, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* * Restore various registers to 0, where non-zero values * set by the guest could disrupt the host. diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 1fd31b4b0e13..fe26f2fa0f3f 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -14,6 +14,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/sched/mm.h> +#include <linux/stop_machine.h> #include <asm/cputable.h> #include <asm/code-patching.h> #include <asm/page.h> @@ -149,17 +150,17 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - - if (types & STF_BARRIER_FALLBACK) + // See comment in do_entry_flush_fixups() RE order of patching + if (types & STF_BARRIER_FALLBACK) { + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); patch_branch((struct ppc_inst *)(dest + 1), - (unsigned long)&stf_barrier_fallback, - BRANCH_SET_LINK); - else - patch_instruction((struct ppc_inst *)(dest + 1), - ppc_inst(instrs[1])); - - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK); + } else { + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + } } printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, @@ -227,11 +228,25 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) : "unknown"); } +static int __do_stf_barrier_fixups(void *data) +{ + enum stf_barrier_type *types = data; + + do_stf_entry_barrier_fixups(*types); + do_stf_exit_barrier_fixups(*types); + + return 0; +} void do_stf_barrier_fixups(enum stf_barrier_type types) { - do_stf_entry_barrier_fixups(types); - do_stf_exit_barrier_fixups(types); + /* + * The call to the fallback entry flush, and the fallback/sync-ori exit + * flush can not be safely patched in/out while other CPUs are executing + * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs + * spin in the stop machine core with interrupts hard disabled. + */ + stop_machine(__do_stf_barrier_fixups, &types, NULL); } void do_uaccess_flush_fixups(enum l1d_flush_type types) @@ -284,8 +299,9 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types) : "unknown"); } -void do_entry_flush_fixups(enum l1d_flush_type types) +static int __do_entry_flush_fixups(void *data) { + enum l1d_flush_type types = *(enum l1d_flush_type *)data; unsigned int instrs[3], *dest; long *start, *end; int i; @@ -309,6 +325,31 @@ void do_entry_flush_fixups(enum l1d_flush_type types) if (types & L1D_FLUSH_MTTRIG) instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + /* + * If we're patching in or out the fallback flush we need to be careful about the + * order in which we patch instructions. That's because it's possible we could + * take a page fault after patching one instruction, so the sequence of + * instructions must be safe even in a half patched state. + * + * To make that work, when patching in the fallback flush we patch in this order: + * - the mflr (dest) + * - the mtlr (dest + 2) + * - the branch (dest + 1) + * + * That ensures the sequence is safe to execute at any point. In contrast if we + * patch the mtlr last, it's possible we could return from the branch and not + * restore LR, leading to a crash later. + * + * When patching out the fallback flush (either with nops or another flush type), + * we patch in this order: + * - the branch (dest + 1) + * - the mtlr (dest + 2) + * - the mflr (dest) + * + * Note we are protected by stop_machine() from other CPUs executing the code in a + * semi-patched state. + */ + start = PTRRELOC(&__start___entry_flush_fixup); end = PTRRELOC(&__stop___entry_flush_fixup); for (i = 0; start < end; start++, i++) { @@ -316,15 +357,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - - if (types == L1D_FLUSH_FALLBACK) - patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback, - BRANCH_SET_LINK); - else + if (types == L1D_FLUSH_FALLBACK) { + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_branch((struct ppc_inst *)(dest + 1), + (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK); + } else { patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + } } start = PTRRELOC(&__start___scv_entry_flush_fixup); @@ -334,15 +376,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - - if (types == L1D_FLUSH_FALLBACK) - patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&scv_entry_flush_fallback, - BRANCH_SET_LINK); - else + if (types == L1D_FLUSH_FALLBACK) { + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_branch((struct ppc_inst *)(dest + 1), + (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK); + } else { patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + } } @@ -354,6 +397,19 @@ void do_entry_flush_fixups(enum l1d_flush_type types) : "ori type" : (types & L1D_FLUSH_MTTRIG) ? "mttrig type" : "unknown"); + + return 0; +} + +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + /* + * The call to the fallback flush can not be safely patched in/out while + * other CPUs are executing it. So call __do_entry_flush_fixups() on one + * CPU while all other CPUs spin in the stop machine core with interrupts + * hard disabled. + */ + stop_machine(__do_entry_flush_fixups, &types, NULL); } void do_rfi_flush_fixups(enum l1d_flush_type types) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 043bbeaf407c..a6b36a40897a 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -20,6 +20,7 @@ #include <asm/machdep.h> #include <asm/rtas.h> #include <asm/kasan.h> +#include <asm/sparsemem.h> #include <asm/svm.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 16d4d1b6a1ff..51622411a7cc 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2254,7 +2254,7 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) bool use_siar = regs_use_siar(regs); unsigned long siar = mfspr(SPRN_SIAR); - if (ppmu->flags & PPMU_P10_DD1) { + if (ppmu && (ppmu->flags & PPMU_P10_DD1)) { if (siar) return siar; else diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 2136e42833af..8a2b8d64265b 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -102,6 +102,16 @@ END_FTR_SECTION(0, 1); \ #define HCALL_BRANCH(LABEL) #endif +_GLOBAL_TOC(plpar_hcall_norets_notrace) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + HVSC /* invoke the hypervisor */ + lwz r0,8(r1) + mtcrf 0xff,r0 + blr /* return r3 = status */ + _GLOBAL_TOC(plpar_hcall_norets) HMT_MEDIUM diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 1f3152ad7213..dab356e3ff87 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1829,30 +1829,28 @@ void hcall_tracepoint_unregfunc(void) #endif /* - * Since the tracing code might execute hcalls we need to guard against - * recursion. One example of this are spinlocks calling H_YIELD on - * shared processor partitions. + * Keep track of hcall tracing depth and prevent recursion. Warn if any is + * detected because it may indicate a problem. This will not catch all + * problems with tracing code making hcalls, because the tracing might have + * been invoked from a non-hcall, so the first hcall could recurse into it + * without warning here, but this better than nothing. + * + * Hcalls with specific problems being traced should use the _notrace + * plpar_hcall variants. */ static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); -void __trace_hcall_entry(unsigned long opcode, unsigned long *args) +notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args) { unsigned long flags; unsigned int *depth; - /* - * We cannot call tracepoints inside RCU idle regions which - * means we must not trace H_CEDE. - */ - if (opcode == H_CEDE) - return; - local_irq_save(flags); depth = this_cpu_ptr(&hcall_trace_depth); - if (*depth) + if (WARN_ON_ONCE(*depth)) goto out; (*depth)++; @@ -1864,19 +1862,16 @@ out: local_irq_restore(flags); } -void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) +notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) { unsigned long flags; unsigned int *depth; - if (opcode == H_CEDE) - return; - local_irq_save(flags); depth = this_cpu_ptr(&hcall_trace_depth); - if (*depth) + if (*depth) /* Don't warn again on the way out */ goto out; (*depth)++; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a8ad8eb76120..18ec0f9bb8d5 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -34,6 +34,7 @@ config RISCV select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_SUPPORTS_HUGETLBFS if MMU + select ARCH_USE_MEMTEST select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if 64BIT @@ -60,11 +61,11 @@ config RISCV select GENERIC_TIME_VSYSCALL if MMU && 64BIT select HANDLE_DOMAIN_IRQ select HAVE_ARCH_AUDITSYSCALL - select HAVE_ARCH_JUMP_LABEL - select HAVE_ARCH_JUMP_LABEL_RELATIVE + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL + select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL select HAVE_ARCH_KASAN if MMU && 64BIT select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT - select HAVE_ARCH_KGDB + select HAVE_ARCH_KGDB if !XIP_KERNEL select HAVE_ARCH_KGDB_QXFER_PKT select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER @@ -79,9 +80,9 @@ config RISCV select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO if MMU && 64BIT select HAVE_IRQ_TIME_ACCOUNTING - select HAVE_KPROBES - select HAVE_KPROBES_ON_FTRACE - select HAVE_KRETPROBES + select HAVE_KPROBES if !XIP_KERNEL + select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL + select HAVE_KRETPROBES if !XIP_KERNEL select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -230,11 +231,11 @@ config ARCH_RV64I bool "RV64I" select 64BIT select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000 - select HAVE_DYNAMIC_FTRACE if MMU && $(cc-option,-fpatchable-function-entry=8) + select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8) select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_TRACER if !XIP_KERNEL select SWIOTLB if MMU endchoice diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index ed963761fbd2..30676ebb16eb 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -14,6 +14,7 @@ config SOC_SIFIVE select CLK_SIFIVE select CLK_SIFIVE_PRCI select SIFIVE_PLIC + select RISCV_ERRATA_ALTERNATIVE select ERRATA_SIFIVE help This enables support for SiFive SoC platform hardware. diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 3eb9590a0775..99ecd8bcfd77 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -16,7 +16,7 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y) CC_FLAGS_FTRACE := -fpatchable-function-entry=8 endif -ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy) +ifeq ($(CONFIG_CMODEL_MEDLOW),y) KBUILD_CFLAGS_MODULE += -mcmodel=medany endif @@ -38,6 +38,15 @@ else KBUILD_LDFLAGS += -melf32lriscv endif +ifeq ($(CONFIG_LD_IS_LLD),y) + KBUILD_CFLAGS += -mno-relax + KBUILD_AFLAGS += -mno-relax +ifneq ($(LLVM_IAS),1) + KBUILD_CFLAGS += -Wa,-mno-relax + KBUILD_AFLAGS += -Wa,-mno-relax +endif +endif + # ISA string setting riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima diff --git a/arch/riscv/boot/dts/microchip/Makefile b/arch/riscv/boot/dts/microchip/Makefile index 622b12771fd3..855c1502d912 100644 --- a/arch/riscv/boot/dts/microchip/Makefile +++ b/arch/riscv/boot/dts/microchip/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += microchip-mpfs-icicle-kit.dtb +obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y)) diff --git a/arch/riscv/boot/dts/sifive/Makefile b/arch/riscv/boot/dts/sifive/Makefile index 74c47fe9fc22..d90e4eb0ade8 100644 --- a/arch/riscv/boot/dts/sifive/Makefile +++ b/arch/riscv/boot/dts/sifive/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_SOC_SIFIVE) += hifive-unleashed-a00.dtb \ hifive-unmatched-a00.dtb +obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y)) diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi index 8eef82e4199f..abbb960f90a0 100644 --- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi @@ -273,7 +273,7 @@ cache-size = <2097152>; cache-unified; interrupt-parent = <&plic0>; - interrupts = <19 20 21 22>; + interrupts = <19 21 22 20>; reg = <0x0 0x2010000 0x0 0x1000>; }; gpio: gpio@10060000 { diff --git a/arch/riscv/errata/sifive/Makefile b/arch/riscv/errata/sifive/Makefile index bdd5fc843b8e..2fde48db0619 100644 --- a/arch/riscv/errata/sifive/Makefile +++ b/arch/riscv/errata/sifive/Makefile @@ -1,2 +1,2 @@ -obj-y += errata_cip_453.o +obj-$(CONFIG_ERRATA_SIFIVE_CIP_453) += errata_cip_453.o obj-y += errata.o diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 88c08705f64a..67406c376389 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -51,7 +51,7 @@ REG_ASM " " newlen "\n" \ ".word " errata_id "\n" -#define ALT_NEW_CONSTENT(vendor_id, errata_id, enable, new_c) \ +#define ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) \ ".if " __stringify(enable) " == 1\n" \ ".pushsection .alternative, \"a\"\n" \ ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \ @@ -69,7 +69,7 @@ "886 :\n" \ old_c "\n" \ "887 :\n" \ - ALT_NEW_CONSTENT(vendor_id, errata_id, enable, new_c) + ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) #define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)) diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index 1e954101906a..e4e291d40759 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -42,8 +42,8 @@ struct kimage_arch { unsigned long fdt_addr; }; -const extern unsigned char riscv_kexec_relocate[]; -const extern unsigned int riscv_kexec_relocate_size; +extern const unsigned char riscv_kexec_relocate[]; +extern const unsigned int riscv_kexec_relocate_size; typedef void (*riscv_kexec_method)(unsigned long first_ind_entry, unsigned long jump_addr, diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 9469f464e71a..380cd3a7e548 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -30,9 +30,8 @@ #define BPF_JIT_REGION_SIZE (SZ_128M) #ifdef CONFIG_64BIT -/* KASLR should leave at least 128MB for BPF after the kernel */ -#define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end) -#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE) +#define BPF_JIT_REGION_START (BPF_JIT_REGION_END - BPF_JIT_REGION_SIZE) +#define BPF_JIT_REGION_END (MODULES_END) #else #define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) #define BPF_JIT_REGION_END (VMALLOC_END) diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index cc048143fba5..9e99e1db156b 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -14,8 +14,9 @@ #include <asm/set_memory.h> /* For set_memory_x() */ #include <linux/compiler.h> /* For unreachable() */ #include <linux/cpu.h> /* For cpu_down() */ +#include <linux/reboot.h> -/** +/* * kexec_image_info - Print received image details */ static void @@ -39,7 +40,7 @@ kexec_image_info(const struct kimage *image) } } -/** +/* * machine_kexec_prepare - Initialize kexec * * This function is called from do_kexec_load, when the user has @@ -100,7 +101,7 @@ machine_kexec_prepare(struct kimage *image) } -/** +/* * machine_kexec_cleanup - Cleanup any leftovers from * machine_kexec_prepare * @@ -135,7 +136,7 @@ void machine_shutdown(void) #endif } -/** +/* * machine_crash_shutdown - Prepare to kexec after a kernel crash * * This function is called by crash_kexec just before machine_kexec @@ -151,7 +152,7 @@ machine_crash_shutdown(struct pt_regs *regs) pr_info("Starting crashdump kernel...\n"); } -/** +/* * machine_kexec - Jump to the loaded kimage * * This function is called by kernel_kexec which is called by the diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 10b965c34536..15cc65ac7ca6 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -84,6 +84,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } +#ifdef CONFIG_MMU void *alloc_insn_page(void) { return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, @@ -91,6 +92,7 @@ void *alloc_insn_page(void) VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __builtin_return_address(0)); } +#endif /* install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 03901d3a8b02..9a1b7a0603b2 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -231,13 +231,13 @@ static void __init init_resources(void) /* Clean-up any unused pre-allocated resources */ mem_res_sz = (num_resources - res_idx + 1) * sizeof(*mem_res); - memblock_free((phys_addr_t) mem_res, mem_res_sz); + memblock_free(__pa(mem_res), mem_res_sz); return; error: /* Better an empty resource tree than an inconsistent one */ release_child_resources(&iomem_resource); - memblock_free((phys_addr_t) mem_res, mem_res_sz); + memblock_free(__pa(mem_res), mem_res_sz); } diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 2b3e0cb90d78..bde85fc53357 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -27,10 +27,10 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame_pointer(regs); sp = user_stack_pointer(regs); pc = instruction_pointer(regs); - } else if (task == NULL || task == current) { - fp = (unsigned long)__builtin_frame_address(0); - sp = sp_in_global; - pc = (unsigned long)walk_stackframe; + } else if (task == current) { + fp = (unsigned long)__builtin_frame_address(1); + sp = (unsigned long)__builtin_frame_address(0); + pc = (unsigned long)__builtin_return_address(0); } else { /* task blocked in __switch_to */ fp = task->thread.s[0]; @@ -106,15 +106,15 @@ static bool print_trace_address(void *arg, unsigned long pc) return true; } -void dump_backtrace(struct pt_regs *regs, struct task_struct *task, +noinline void dump_backtrace(struct pt_regs *regs, struct task_struct *task, const char *loglvl) { - pr_cont("%sCall Trace:\n", loglvl); walk_stackframe(task, regs, print_trace_address, (void *)loglvl); } void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { + pr_cont("%sCall Trace:\n", loglvl); dump_backtrace(NULL, task, loglvl); } @@ -139,7 +139,7 @@ unsigned long get_wchan(struct task_struct *task) #ifdef CONFIG_STACKTRACE -void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, +noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { walk_stackframe(task, regs, consume_entry, cookie); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 0721b9798595..7bc88d8aab97 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -86,8 +86,13 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code, } } +#if defined (CONFIG_XIP_KERNEL) && defined (CONFIG_RISCV_ERRATA_ALTERNATIVE) +#define __trap_section __section(".xip.traps") +#else +#define __trap_section +#endif #define DO_ERROR_INFO(name, signo, code, str) \ -asmlinkage __visible void name(struct pt_regs *regs) \ +asmlinkage __visible __trap_section void name(struct pt_regs *regs) \ { \ do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \ } @@ -111,7 +116,7 @@ DO_ERROR_INFO(do_trap_store_misaligned, int handle_misaligned_load(struct pt_regs *regs); int handle_misaligned_store(struct pt_regs *regs); -asmlinkage void do_trap_load_misaligned(struct pt_regs *regs) +asmlinkage void __trap_section do_trap_load_misaligned(struct pt_regs *regs) { if (!handle_misaligned_load(regs)) return; @@ -119,7 +124,7 @@ asmlinkage void do_trap_load_misaligned(struct pt_regs *regs) "Oops - load address misaligned"); } -asmlinkage void do_trap_store_misaligned(struct pt_regs *regs) +asmlinkage void __trap_section do_trap_store_misaligned(struct pt_regs *regs) { if (!handle_misaligned_store(regs)) return; @@ -146,7 +151,7 @@ static inline unsigned long get_break_insn_length(unsigned long pc) return GET_INSN_LENGTH(insn); } -asmlinkage __visible void do_trap_break(struct pt_regs *regs) +asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) { #ifdef CONFIG_KPROBES if (kprobe_single_step_handler(regs)) diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index 4b29b9917f99..a3ff09c4c3f9 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -99,9 +99,22 @@ SECTIONS } PERCPU_SECTION(L1_CACHE_BYTES) - . = ALIGN(PAGE_SIZE); + . = ALIGN(8); + .alternative : { + __alt_start = .; + *(.alternative) + __alt_end = .; + } __init_end = .; + . = ALIGN(16); + .xip.traps : { + __xip_traps_start = .; + *(.xip.traps) + __xip_traps_end = .; + } + + . = ALIGN(PAGE_SIZE); .sdata : { __global_pointer$ = . + 0x800; *(.sdata*) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 4faf8bd157ea..4c4c92ce0bb8 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -746,14 +746,18 @@ void __init protect_kernel_text_data(void) unsigned long init_data_start = (unsigned long)__init_data_begin; unsigned long rodata_start = (unsigned long)__start_rodata; unsigned long data_start = (unsigned long)_data; - unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn))); +#if defined(CONFIG_64BIT) && defined(CONFIG_MMU) + unsigned long end_va = kernel_virt_addr + load_sz; +#else + unsigned long end_va = (unsigned long)(__va(PFN_PHYS(max_low_pfn))); +#endif set_memory_ro(text_start, (init_text_start - text_start) >> PAGE_SHIFT); set_memory_ro(init_text_start, (init_data_start - init_text_start) >> PAGE_SHIFT); set_memory_nx(init_data_start, (rodata_start - init_data_start) >> PAGE_SHIFT); /* rodata section is marked readonly in mark_rodata_ro */ set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); - set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT); + set_memory_nx(data_start, (end_va - data_start) >> PAGE_SHIFT); } void mark_rodata_ro(void) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 9daacae93e33..d7189c8714a9 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -169,7 +169,7 @@ static void __init kasan_shallow_populate(void *start, void *end) void __init kasan_init(void) { - phys_addr_t _start, _end; + phys_addr_t p_start, p_end; u64 i; /* @@ -189,9 +189,9 @@ void __init kasan_init(void) (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); /* Populate the linear mapping */ - for_each_mem_range(i, &_start, &_end) { - void *start = (void *)__va(_start); - void *end = (void *)__va(_end); + for_each_mem_range(i, &p_start, &p_end) { + void *start = (void *)__va(p_start); + void *end = (void *)__va(p_end); if (start >= end) break; @@ -201,7 +201,7 @@ void __init kasan_init(void) /* Populate kernel, BPF, modules mapping */ kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR), - kasan_mem_to_shadow((const void *)BPF_JIT_REGION_END)); + kasan_mem_to_shadow((const void *)MODULES_VADDR + SZ_2G)); for (i = 0; i < PTRS_PER_PTE; i++) set_pte(&kasan_early_shadow_pte[i], diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 2b543163d90a..76c6034428be 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -91,12 +91,16 @@ struct stack_frame { CALL_ARGS_4(arg1, arg2, arg3, arg4); \ register unsigned long r4 asm("6") = (unsigned long)(arg5) -#define CALL_FMT_0 "=&d" (r2) : -#define CALL_FMT_1 "+&d" (r2) : -#define CALL_FMT_2 CALL_FMT_1 "d" (r3), -#define CALL_FMT_3 CALL_FMT_2 "d" (r4), -#define CALL_FMT_4 CALL_FMT_3 "d" (r5), -#define CALL_FMT_5 CALL_FMT_4 "d" (r6), +/* + * To keep this simple mark register 2-6 as being changed (volatile) + * by the called function, even though register 6 is saved/nonvolatile. + */ +#define CALL_FMT_0 "=&d" (r2) +#define CALL_FMT_1 "+&d" (r2) +#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3) +#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4) +#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5) +#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6) #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory" #define CALL_CLOBBER_4 CALL_CLOBBER_5 @@ -118,7 +122,7 @@ struct stack_frame { " brasl 14,%[_fn]\n" \ " la 15,0(%[_prev])\n" \ : [_prev] "=&a" (prev), CALL_FMT_##nr \ - [_stack] "R" (stack), \ + : [_stack] "R" (stack), \ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ [_frame] "d" (frame), \ [_fn] "X" (fn) : CALL_CLOBBER_##nr); \ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 12de7a9c85b3..e84f495e7eb2 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -418,6 +418,7 @@ ENTRY(\name) xgr %r6,%r6 xgr %r7,%r7 xgr %r10,%r10 + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) tm %r8,0x0001 # coming from user space? @@ -651,9 +652,9 @@ ENDPROC(stack_overflow) .Lcleanup_sie_mcck: larl %r13,.Lsie_entry slgr %r9,%r13 - larl %r13,.Lsie_skip + lghi %r13,.Lsie_skip - .Lsie_entry clgr %r9,%r13 - jh .Lcleanup_sie_int + jhe .Lcleanup_sie_int oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST .Lcleanup_sie_int: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 90163e6184f5..080e7aed181f 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -512,7 +512,6 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) /* No handlers present - check for system call restart */ clear_pt_regs_flag(regs, PIF_SYSCALL); - clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); if (current->thread.system_call) { regs->int_code = current->thread.system_call; switch (regs->gprs[2]) { diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 7e4a2aba366d..0690263df1dd 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -445,7 +445,7 @@ 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index bfcc327acc6b..26aa2614ee35 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -66,7 +66,10 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c { static cpumask_t mask; - cpumask_copy(&mask, cpumask_of(cpu)); + cpumask_clear(&mask); + if (!cpu_online(cpu)) + goto out; + cpumask_set_cpu(cpu, &mask); switch (topology_mode) { case TOPOLOGY_MODE_HW: while (info) { @@ -83,10 +86,10 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c default: fallthrough; case TOPOLOGY_MODE_SINGLE: - cpumask_copy(&mask, cpumask_of(cpu)); break; } cpumask_and(&mask, &mask, cpu_online_mask); +out: cpumask_copy(dst, &mask); } @@ -95,7 +98,10 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) static cpumask_t mask; int i; - cpumask_copy(&mask, cpumask_of(cpu)); + cpumask_clear(&mask); + if (!cpu_online(cpu)) + goto out; + cpumask_set_cpu(cpu, &mask); if (topology_mode != TOPOLOGY_MODE_HW) goto out; cpu -= cpu % (smp_cpu_mtid + 1); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 813b6e93dc83..c8841f476e91 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -140,7 +140,12 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm) /* Allocate variable storage */ vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE); vlen += uv_info.guest_virt_base_stor_len; - kvm->arch.pv.stor_var = vzalloc(vlen); + /* + * The Create Secure Configuration Ultravisor Call does not support + * using large pages for the virtual memory area. + * This is a hardware limitation. + */ + kvm->arch.pv.stor_var = vmalloc_no_huge(vlen); if (!kvm->arch.pv.stor_var) goto out_err; return 0; diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index f47a0dc55445..0b91499ebdcf 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -445,7 +445,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index f5beecdac693..e76b22157099 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -180,7 +180,6 @@ static inline void arch_ftrace_nmi_exit(void) { } BUILD_TRAP_HANDLER(nmi) { - unsigned int cpu = smp_processor_id(); TRAP_HANDLER_DECL; arch_ftrace_nmi_enter(); diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index b9e1c0e735b7..e34cc30ef22c 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -488,7 +488,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/x86/Makefile b/arch/x86/Makefile index c77c5d8a7b3e..cb5e8d39cac1 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -178,11 +178,6 @@ ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,) endif -ifdef CONFIG_LTO_CLANG -KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \ - -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) -endif - # Workaround for a gcc prelease that unfortunately was shipped in a suse release KBUILD_CFLAGS += -Wno-sign-compare # @@ -202,7 +197,13 @@ ifdef CONFIG_RETPOLINE endif endif -KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE) +KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) + +ifdef CONFIG_LTO_CLANG +ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0) +KBUILD_LDFLAGS += -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) +endif +endif ifdef CONFIG_X86_NEED_RELOCS LDFLAGS_vmlinux := --emit-relocs --discard-none diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6e5522aebbbd..431bf7f846c3 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -30,6 +30,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ KBUILD_CFLAGS := -m$(BITS) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIE +KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone @@ -48,10 +49,10 @@ KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no) KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h KBUILD_CFLAGS += $(CLANG_FLAGS) -# sev-es.c indirectly inludes inat-table.h which is generated during +# sev.c indirectly inludes inat-table.h which is generated during # compilation and stored in $(objtree). Add the directory to the includes so # that the compiler finds it even with out-of-tree builds (make O=/some/path). -CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/ +CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n @@ -93,7 +94,7 @@ ifdef CONFIG_X86_64 vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o vmlinux-objs-y += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o - vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev-es.o + vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index dde042f64cca..743f13ea25c1 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -172,7 +172,7 @@ void __puthex(unsigned long value) } } -#if CONFIG_X86_NEED_RELOCS +#ifdef CONFIG_X86_NEED_RELOCS static void handle_relocations(void *output, unsigned long output_len, unsigned long virt_addr) { diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index e5612f035498..31139256859f 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -79,7 +79,7 @@ struct mem_vector { u64 size; }; -#if CONFIG_RANDOMIZE_BASE +#ifdef CONFIG_RANDOMIZE_BASE /* kaslr.c */ void choose_random_location(unsigned long input, unsigned long input_size, diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev.c index 82041bd380e5..670e998fe930 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev.c @@ -13,7 +13,7 @@ #include "misc.h" #include <asm/pgtable_types.h> -#include <asm/sev-es.h> +#include <asm/sev.h> #include <asm/trapnr.h> #include <asm/trap_pf.h> #include <asm/msr-index.h> @@ -117,7 +117,7 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, #include "../../lib/insn.c" /* Include code for early handlers */ -#include "../../kernel/sev-es-shared.c" +#include "../../kernel/sev-shared.c" static bool early_setup_sev_es(void) { diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 7b2542b13ebd..04bce95bc7e3 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -130,8 +130,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) /* User code screwed up. */ regs->ax = -EFAULT; - instrumentation_end(); local_irq_disable(); + instrumentation_end(); irqentry_exit_to_user_mode(regs); return false; } @@ -269,15 +269,16 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) irqentry_state_t state = irqentry_enter(regs); bool inhcall; + instrumentation_begin(); run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs); inhcall = get_and_clear_inhcall(); if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) { - instrumentation_begin(); irqentry_exit_cond_resched(); instrumentation_end(); restore_inhcall(inhcall); } else { + instrumentation_end(); irqentry_exit(regs, state); } } diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a16a5294d55f..1886aaf19914 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -506,7 +506,7 @@ SYM_CODE_START(\asmsym) movq %rsp, %rdi /* pt_regs pointer */ - call \cfunc + call kernel_\cfunc /* * No need to switch back to the IST stack. The current stack is either @@ -517,7 +517,7 @@ SYM_CODE_START(\asmsym) /* Switch to the regular task stack */ .Lfrom_usermode_switch_stack_\@: - idtentry_body safe_stack_\cfunc, has_error_code=1 + idtentry_body user_\cfunc, has_error_code=1 _ASM_NOKPROBE(\asmsym) SYM_CODE_END(\asmsym) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 28a1423ce32e..4bbc267fb36b 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -447,7 +447,7 @@ 440 i386 process_madvise sys_process_madvise 441 i386 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 i386 mount_setattr sys_mount_setattr -443 i386 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 i386 landlock_create_ruleset sys_landlock_create_ruleset 445 i386 landlock_add_rule sys_landlock_add_rule 446 i386 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index ecd551b08d05..ce18119ea0d0 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,7 +364,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8e509325c2c3..8f71dd72ef95 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -396,10 +396,12 @@ int x86_reserve_hardware(void) if (!atomic_inc_not_zero(&pmc_refcount)) { mutex_lock(&pmc_reserve_mutex); if (atomic_read(&pmc_refcount) == 0) { - if (!reserve_pmc_hardware()) + if (!reserve_pmc_hardware()) { err = -EBUSY; - else + } else { reserve_ds_buffers(); + reserve_lbr_buffers(); + } } if (!err) atomic_inc(&pmc_refcount); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2521d03de5e0..3a77f66730d0 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6015,7 +6015,13 @@ __init int intel_pmu_init(void) tsx_attr = hsw_tsx_events_attrs; intel_pmu_pebs_data_source_skl(pmem); - if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { + /* + * Processors with CPUID.RTM_ALWAYS_ABORT have TSX deprecated by default. + * TSX force abort hooks are not required on these systems. Only deploy + * workaround when microcode has not enabled X86_FEATURE_RTM_ALWAYS_ABORT. + */ + if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) && + !boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)) { x86_pmu.flags |= PMU_FL_TFA; x86_pmu.get_event_constraints = tfa_get_event_constraints; x86_pmu.enable_all = intel_tfa_pmu_enable_all; @@ -6253,7 +6259,7 @@ __init int intel_pmu_init(void) * Check all LBT MSR here. * Disable LBR access if any LBR MSRs can not be accessed. */ - if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) + if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL)) x86_pmu.lbr_nr = 0; for (i = 0; i < x86_pmu.lbr_nr; i++) { if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 76dbab6ac9fb..e8453de7a964 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -658,7 +658,6 @@ static inline bool branch_user_callstack(unsigned br_sel) void intel_pmu_lbr_add(struct perf_event *event) { - struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu.lbr_nr) @@ -696,11 +695,6 @@ void intel_pmu_lbr_add(struct perf_event *event) perf_sched_cb_inc(event->ctx->pmu); if (!cpuc->lbr_users++ && !event->total_time_running) intel_pmu_lbr_reset(); - - if (static_cpu_has(X86_FEATURE_ARCH_LBR) && - kmem_cache && !cpuc->lbr_xsave && - (cpuc->lbr_users != cpuc->lbr_pebs_users)) - cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL); } void release_lbr_buffers(void) @@ -722,6 +716,27 @@ void release_lbr_buffers(void) } } +void reserve_lbr_buffers(void) +{ + struct kmem_cache *kmem_cache; + struct cpu_hw_events *cpuc; + int cpu; + + if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) + return; + + for_each_possible_cpu(cpu) { + cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; + if (!kmem_cache || cpuc->lbr_xsave) + continue; + + cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, + GFP_KERNEL | __GFP_ZERO, + cpu_to_node(cpu)); + } +} + void intel_pmu_lbr_del(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 63f097289a84..3a75a2c601c2 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1406,6 +1406,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool die_id = i; else die_id = topology_phys_to_logical_pkg(i); + if (die_id < 0) + die_id = -ENODEV; map->pbus_to_dieid[bus] = die_id; break; } @@ -1452,14 +1454,14 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool i = -1; if (reverse) { for (bus = 255; bus >= 0; bus--) { - if (map->pbus_to_dieid[bus] >= 0) + if (map->pbus_to_dieid[bus] != -1) i = map->pbus_to_dieid[bus]; else map->pbus_to_dieid[bus] = i; } } else { for (bus = 0; bus <= 255; bus++) { - if (map->pbus_to_dieid[bus] >= 0) + if (map->pbus_to_dieid[bus] != -1) i = map->pbus_to_dieid[bus]; else map->pbus_to_dieid[bus] = i; @@ -5097,9 +5099,10 @@ static struct intel_uncore_type icx_uncore_m2m = { .perf_ctr = SNR_M2M_PCI_PMON_CTR0, .event_ctl = SNR_M2M_PCI_PMON_CTL0, .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT, .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL, .ops = &snr_m2m_uncore_pci_ops, - .format_group = &skx_uncore_format_group, + .format_group = &snr_m2m_uncore_format_group, }; static struct attribute *icx_upi_uncore_formats_attr[] = { diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 27fa85e7d4fd..ad87cb36f7c8 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1244,6 +1244,8 @@ void reserve_ds_buffers(void); void release_lbr_buffers(void); +void reserve_lbr_buffers(void); + extern struct event_constraint bts_constraint; extern struct event_constraint vlbr_constraint; @@ -1393,6 +1395,10 @@ static inline void release_lbr_buffers(void) { } +static inline void reserve_lbr_buffers(void) +{ +} + static inline int intel_pmu_init(void) { return 0; diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 84a1042c3b01..85feafacc445 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -764,13 +764,14 @@ static struct rapl_model model_spr = { .rapl_msrs = intel_rapl_spr_msrs, }; -static struct rapl_model model_amd_fam17h = { +static struct rapl_model model_amd_hygon = { .events = BIT(PERF_RAPL_PKG), .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, .rapl_msrs = amd_rapl_msrs, }; static const struct x86_cpu_id rapl_model_match[] __initconst = { + X86_MATCH_FEATURE(X86_FEATURE_RAPL, &model_amd_hygon), X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &model_snb), X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &model_snbep), X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &model_snb), @@ -803,9 +804,6 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), - X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h), - X86_MATCH_VENDOR_FAM(HYGON, 0x18, &model_amd_fam17h), - X86_MATCH_VENDOR_FAM(AMD, 0x19, &model_amd_fam17h), {}, }; MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 412b51e059c8..48067af94678 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -174,6 +174,7 @@ static inline int apic_is_clustered_box(void) extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); extern void lapic_assign_system_vectors(void); extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); +extern void lapic_update_legacy_vectors(void); extern void lapic_online(void); extern void lapic_offline(void); extern bool apic_needs_pit(void); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ac37830ae941..d0ce5cfd3ac1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -108,7 +108,7 @@ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ -/* free ( 3*32+29) */ +#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ #define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ @@ -378,6 +378,7 @@ #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ #define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ +#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ #define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index b7dd944dc867..8f28fafa98b3 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,11 +56,8 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -#ifdef CONFIG_IOMMU_SUPPORT -# define DISABLE_ENQCMD 0 -#else -# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) -#endif +/* Force disable because it's broken beyond repair */ +#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index ed33a14188f6..23bef08a8388 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -106,10 +106,6 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); */ #define PASID_DISABLED 0 -#ifdef CONFIG_IOMMU_SUPPORT -/* Update current's PASID MSR/state by mm's PASID. */ -void update_pasid(void); -#else static inline void update_pasid(void) { } -#endif + #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 8d33ad80704f..16bf4d4a8159 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); } +static inline void fxsave(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); + else + asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); +} + /* These macros all use (%edi)/(%rdi) as the single memory argument. */ #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" @@ -272,28 +280,6 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) -{ - u64 mask = xfeatures_mask_all; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - XSTATE_OP(XSAVES, xstate, lmask, hmask, err); - else - XSTATE_OP(XSAVE, xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - WARN_ON_FPU(err); -} - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) { u64 mask = -1; @@ -578,19 +564,19 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) * PKRU state is switched eagerly because it needs to be valid before we * return to userland e.g. for a copy_to_user() operation. */ - if (current->mm) { + if (!(current->flags & PF_KTHREAD)) { + /* + * If the PKRU bit in xsave.header.xfeatures is not set, + * then the PKRU component was in init state, which means + * XRSTOR will set PKRU to 0. If the bit is not set then + * get_xsave_addr() will return NULL because the PKRU value + * in memory is not valid. This means pkru_val has to be + * set to 0 and not to init_pkru_value. + */ pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); - if (pk) - pkru_val = pk->pkru; + pkru_val = pk ? pk->pkru : 0; } __write_pkru(pkru_val); - - /* - * Expensive PASID MSR write will be avoided in update_pasid() because - * TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated - * unless it's different from mm->pasid to reduce overhead. - */ - update_pasid(); } #endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 73d45b0dfff2..cd9f3e304944 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -312,8 +312,8 @@ static __always_inline void __##func(struct pt_regs *regs) */ #define DECLARE_IDTENTRY_VC(vector, func) \ DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \ - __visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code); \ - __visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code) + __visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \ + __visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code) /** * DEFINE_IDTENTRY_IST - Emit code for IST entry points @@ -355,33 +355,24 @@ static __always_inline void __##func(struct pt_regs *regs) DEFINE_IDTENTRY_RAW_ERRORCODE(func) /** - * DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler - which runs on a safe stack. + * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler + when raised from kernel mode * @func: Function name of the entry point * * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE */ -#define DEFINE_IDTENTRY_VC_SAFE_STACK(func) \ - DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func) +#define DEFINE_IDTENTRY_VC_KERNEL(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func) /** - * DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler - which runs on the VC fall-back stack + * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler + when raised from user mode * @func: Function name of the entry point * * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE */ -#define DEFINE_IDTENTRY_VC_IST(func) \ - DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func) - -/** - * DEFINE_IDTENTRY_VC - Emit code for VMM communication handler - * @func: Function name of the entry point - * - * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE - */ -#define DEFINE_IDTENTRY_VC(func) \ - DEFINE_IDTENTRY_RAW_ERRORCODE(func) +#define DEFINE_IDTENTRY_VC_USER(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func) #else /* CONFIG_X86_64 */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 955b06d6325a..27158436f322 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -102,7 +102,8 @@ #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ #define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */ -#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Willow Cove */ + +#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 610a05374c02..0449b125d27f 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -4,8 +4,6 @@ #define HAVE_JUMP_LABEL_BATCH -#define JUMP_LABEL_NOP_SIZE 5 - #include <asm/asm.h> #include <asm/nops.h> @@ -14,15 +12,35 @@ #include <linux/stringify.h> #include <linux/types.h> +#define JUMP_TABLE_ENTRY \ + ".pushsection __jump_table, \"aw\" \n\t" \ + _ASM_ALIGN "\n\t" \ + ".long 1b - . \n\t" \ + ".long %l[l_yes] - . \n\t" \ + _ASM_PTR "%c0 + %c1 - .\n\t" \ + ".popsection \n\t" + +#ifdef CONFIG_STACK_VALIDATION + +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:" + "jmp %l[l_yes] # objtool NOPs this \n\t" + JUMP_TABLE_ENTRY + : : "i" (key), "i" (2 | branch) : : l_yes); + + return false; +l_yes: + return true; +} + +#else + static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { asm_volatile_goto("1:" ".byte " __stringify(BYTES_NOP5) "\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - ".long 1b - ., %l[l_yes] - . \n\t" - _ASM_PTR "%c0 + %c1 - .\n\t" - ".popsection \n\t" + JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); return false; @@ -30,16 +48,13 @@ l_yes: return true; } +#endif /* STACK_VALIDATION */ + static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { asm_volatile_goto("1:" - ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" - "2:\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - ".long 1b - ., %l[l_yes] - . \n\t" - _ASM_PTR "%c0 + %c1 - .\n\t" - ".popsection \n\t" + "jmp %l[l_yes]\n\t" + JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); return false; @@ -47,41 +62,7 @@ l_yes: return true; } -#else /* __ASSEMBLY__ */ - -.macro STATIC_JUMP_IF_TRUE target, key, def -.Lstatic_jump_\@: - .if \def - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .else - .byte BYTES_NOP5 - .endif - .pushsection __jump_table, "aw" - _ASM_ALIGN - .long .Lstatic_jump_\@ - ., \target - . - _ASM_PTR \key - . - .popsection -.endm - -.macro STATIC_JUMP_IF_FALSE target, key, def -.Lstatic_jump_\@: - .if \def - .byte BYTES_NOP5 - .else - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .endif - .pushsection __jump_table, "aw" - _ASM_ALIGN - .long .Lstatic_jump_\@ - ., \target - . - _ASM_PTR \key + 1 - . - .popsection -.endm +extern int arch_jump_entry_size(struct jump_entry *entry); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 323641097f63..e7bef91cee04 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -99,6 +99,7 @@ KVM_X86_OP_NULL(post_block) KVM_X86_OP_NULL(vcpu_blocking) KVM_X86_OP_NULL(vcpu_unblocking) KVM_X86_OP_NULL(update_pi_irte) +KVM_X86_OP_NULL(start_assignment) KVM_X86_OP_NULL(apicv_post_state_restore) KVM_X86_OP_NULL(dy_apicv_has_pending_interrupt) KVM_X86_OP_NULL(set_hv_timer) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 55efbacfc244..9c7ced0e3171 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1352,6 +1352,7 @@ struct kvm_x86_ops { int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); + void (*start_assignment)(struct kvm *kvm); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ddfb3cad8dff..0607ec4f5091 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -305,7 +305,7 @@ extern void apei_mce_report_mem_error(int corrected, /* These may be used by multiple smca_hwid_mcatypes */ enum smca_bank_types { SMCA_LS = 0, /* Load Store */ - SMCA_LS_V2, /* Load Store */ + SMCA_LS_V2, SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ @@ -314,17 +314,22 @@ enum smca_bank_types { SMCA_FP, /* Floating Point */ SMCA_L3_CACHE, /* L3 Cache */ SMCA_CS, /* Coherent Slave */ - SMCA_CS_V2, /* Coherent Slave */ + SMCA_CS_V2, SMCA_PIE, /* Power, Interrupts, etc. */ SMCA_UMC, /* Unified Memory Controller */ + SMCA_UMC_V2, SMCA_PB, /* Parameter Block */ SMCA_PSP, /* Platform Security Processor */ - SMCA_PSP_V2, /* Platform Security Processor */ + SMCA_PSP_V2, SMCA_SMU, /* System Management Unit */ - SMCA_SMU_V2, /* System Management Unit */ + SMCA_SMU_V2, SMCA_MP5, /* Microprocessor 5 Unit */ SMCA_NBIO, /* Northbridge IO Unit */ SMCA_PCIE, /* PCI Express Unit */ + SMCA_PCIE_V2, + SMCA_XGMI_PCS, /* xGMI PCS Unit */ + SMCA_XGMI_PHY, /* xGMI PHY Unit */ + SMCA_WAFL_PHY, /* WAFL PHY Unit */ N_SMCA_BANK_TYPES }; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 742d89a00721..a7c413432b33 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -537,9 +537,9 @@ /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG 0xc0010010 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 @@ -772,6 +772,10 @@ #define MSR_TFA_RTM_FORCE_ABORT_BIT 0 #define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT) +#define MSR_TFA_TSX_CPUID_CLEAR_BIT 1 +#define MSR_TFA_TSX_CPUID_CLEAR BIT_ULL(MSR_TFA_TSX_CPUID_CLEAR_BIT) +#define MSR_TFA_SDV_ENABLE_RTM_BIT 2 +#define MSR_TFA_SDV_ENABLE_RTM BIT_ULL(MSR_TFA_SDV_ENABLE_RTM_BIT) /* P4/Xeon+ specific */ #define MSR_IA32_MCG_EAX 0x00000180 diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index ca840fec7776..4bde0dc66100 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -75,7 +75,7 @@ void copy_page(void *to, void *from); * * With page table isolation enabled, we map the LDT in ... [stay tuned] */ -static inline unsigned long task_size_max(void) +static __always_inline unsigned long task_size_max(void) { unsigned long ret; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 154321d29050..556b2b17c3e2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -787,8 +787,10 @@ DECLARE_PER_CPU(u64, msr_misc_features_shadow); #ifdef CONFIG_CPU_SUP_AMD extern u32 amd_get_nodes_per_socket(void); +extern u32 amd_get_highest_perf(void); #else static inline u32 amd_get_nodes_per_socket(void) { return 0; } +static inline u32 amd_get_highest_perf(void) { return 0; } #endif static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h new file mode 100644 index 000000000000..2cef6c5a52c2 --- /dev/null +++ b/arch/x86/include/asm/sev-common.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD SEV header common between the guest and the hypervisor. + * + * Author: Brijesh Singh <brijesh.singh@amd.com> + */ + +#ifndef __ASM_X86_SEV_COMMON_H +#define __ASM_X86_SEV_COMMON_H + +#define GHCB_MSR_INFO_POS 0 +#define GHCB_DATA_LOW 12 +#define GHCB_MSR_INFO_MASK (BIT_ULL(GHCB_DATA_LOW) - 1) + +#define GHCB_DATA(v) \ + (((unsigned long)(v) & ~GHCB_MSR_INFO_MASK) >> GHCB_DATA_LOW) + +/* SEV Information Request/Response */ +#define GHCB_MSR_SEV_INFO_RESP 0x001 +#define GHCB_MSR_SEV_INFO_REQ 0x002 +#define GHCB_MSR_VER_MAX_POS 48 +#define GHCB_MSR_VER_MAX_MASK 0xffff +#define GHCB_MSR_VER_MIN_POS 32 +#define GHCB_MSR_VER_MIN_MASK 0xffff +#define GHCB_MSR_CBIT_POS 24 +#define GHCB_MSR_CBIT_MASK 0xff +#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ + ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ + (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ + (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ + GHCB_MSR_SEV_INFO_RESP) +#define GHCB_MSR_INFO(v) ((v) & 0xfffUL) +#define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK) +#define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK) + +/* CPUID Request/Response */ +#define GHCB_MSR_CPUID_REQ 0x004 +#define GHCB_MSR_CPUID_RESP 0x005 +#define GHCB_MSR_CPUID_FUNC_POS 32 +#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff +#define GHCB_MSR_CPUID_VALUE_POS 32 +#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff +#define GHCB_MSR_CPUID_REG_POS 30 +#define GHCB_MSR_CPUID_REG_MASK 0x3 +#define GHCB_CPUID_REQ_EAX 0 +#define GHCB_CPUID_REQ_EBX 1 +#define GHCB_CPUID_REQ_ECX 2 +#define GHCB_CPUID_REQ_EDX 3 +#define GHCB_CPUID_REQ(fn, reg) \ + (GHCB_MSR_CPUID_REQ | \ + (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \ + (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS)) + +/* AP Reset Hold */ +#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 +#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 + +/* GHCB Hypervisor Feature Request/Response */ +#define GHCB_MSR_HV_FT_REQ 0x080 +#define GHCB_MSR_HV_FT_RESP 0x081 + +#define GHCB_MSR_TERM_REQ 0x100 +#define GHCB_MSR_TERM_REASON_SET_POS 12 +#define GHCB_MSR_TERM_REASON_SET_MASK 0xf +#define GHCB_MSR_TERM_REASON_POS 16 +#define GHCB_MSR_TERM_REASON_MASK 0xff +#define GHCB_SEV_TERM_REASON(reason_set, reason_val) \ + (((((u64)reason_set) & GHCB_MSR_TERM_REASON_SET_MASK) << GHCB_MSR_TERM_REASON_SET_POS) | \ + ((((u64)reason_val) & GHCB_MSR_TERM_REASON_MASK) << GHCB_MSR_TERM_REASON_POS)) + +#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 +#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 + +#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) + +#endif diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev.h index cf1d957c7091..fa5cd05d3b5b 100644 --- a/arch/x86/include/asm/sev-es.h +++ b/arch/x86/include/asm/sev.h @@ -10,34 +10,12 @@ #include <linux/types.h> #include <asm/insn.h> +#include <asm/sev-common.h> -#define GHCB_SEV_INFO 0x001UL -#define GHCB_SEV_INFO_REQ 0x002UL -#define GHCB_INFO(v) ((v) & 0xfffUL) -#define GHCB_PROTO_MAX(v) (((v) >> 48) & 0xffffUL) -#define GHCB_PROTO_MIN(v) (((v) >> 32) & 0xffffUL) -#define GHCB_PROTO_OUR 0x0001UL -#define GHCB_SEV_CPUID_REQ 0x004UL -#define GHCB_CPUID_REQ_EAX 0 -#define GHCB_CPUID_REQ_EBX 1 -#define GHCB_CPUID_REQ_ECX 2 -#define GHCB_CPUID_REQ_EDX 3 -#define GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \ - (((unsigned long)reg & 3) << 30) | \ - (((unsigned long)fn) << 32)) +#define GHCB_PROTO_OUR 0x0001UL +#define GHCB_PROTOCOL_MAX 1ULL +#define GHCB_DEFAULT_USAGE 0ULL -#define GHCB_PROTOCOL_MAX 0x0001UL -#define GHCB_DEFAULT_USAGE 0x0000UL - -#define GHCB_SEV_CPUID_RESP 0x005UL -#define GHCB_SEV_TERMINATE 0x100UL -#define GHCB_SEV_TERMINATE_REASON(reason_set, reason_val) \ - (((((u64)reason_set) & 0x7) << 12) | \ - ((((u64)reason_val) & 0xff) << 16)) -#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 -#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 - -#define GHCB_SEV_GHCB_RESP_CODE(v) ((v) & 0xfff) #define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); } enum es_result { diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h index ddbdefd5b94f..91a7b6687c3b 100644 --- a/arch/x86/include/asm/thermal.h +++ b/arch/x86/include/asm/thermal.h @@ -3,11 +3,13 @@ #define _ASM_X86_THERMAL_H #ifdef CONFIG_X86_THERMAL_VECTOR +void therm_lvt_init(void); void intel_init_thermal(struct cpuinfo_x86 *c); bool x86_thermal_enabled(void); void intel_thermal_interrupt(void); #else -static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } +static inline void therm_lvt_init(void) { } +static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } #endif #endif /* _ASM_X86_THERMAL_H */ diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h index 119ac8612d89..136e5e57cfe1 100644 --- a/arch/x86/include/asm/vdso/clocksource.h +++ b/arch/x86/include/asm/vdso/clocksource.h @@ -7,4 +7,6 @@ VDSO_CLOCKMODE_PVCLOCK, \ VDSO_CLOCKMODE_HVCLOCK +#define HAVE_VDSO_CLOCKMODE_HVCLOCK + #endif /* __ASM_VDSO_CLOCKSOURCE_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0704c2a94272..0f66682ac02a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -20,7 +20,7 @@ CFLAGS_REMOVE_kvmclock.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg -CFLAGS_REMOVE_sev-es.o = -pg +CFLAGS_REMOVE_sev.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n @@ -28,7 +28,7 @@ KASAN_SANITIZE_dumpstack.o := n KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n -KASAN_SANITIZE_sev-es.o := n +KASAN_SANITIZE_sev.o := n # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. @@ -148,7 +148,7 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o -obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev-es.o +obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 49ae4e1ac9cd..7de599eba7f0 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -197,7 +197,8 @@ static int __init ffh_cstate_init(void) struct cpuinfo_x86 *c = &boot_cpu_data; if (c->x86_vendor != X86_VENDOR_INTEL && - c->x86_vendor != X86_VENDOR_AMD) + c->x86_vendor != X86_VENDOR_AMD && + c->x86_vendor != X86_VENDOR_HYGON) return -1; cpu_cstate_entry = alloc_percpu(struct cstate_entry); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 6974b5174495..6fe5b44fcbc9 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -183,41 +183,69 @@ done: } /* + * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90) + * + * @instr: instruction byte stream + * @instrlen: length of the above + * @off: offset within @instr where the first NOP has been detected + * + * Return: number of NOPs found (and replaced). + */ +static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off) +{ + unsigned long flags; + int i = off, nnops; + + while (i < instrlen) { + if (instr[i] != 0x90) + break; + + i++; + } + + nnops = i - off; + + if (nnops <= 1) + return nnops; + + local_irq_save(flags); + add_nops(instr + off, nnops); + local_irq_restore(flags); + + DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i); + + return nnops; +} + +/* * "noinline" to cause control flow change and thus invalidate I$ and * cause refetch after modification. */ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) { - unsigned long flags; struct insn insn; - int nop, i = 0; + int i = 0; /* - * Jump over the non-NOP insns, the remaining bytes must be single-byte - * NOPs, optimize them. + * Jump over the non-NOP insns and optimize single-byte NOPs into bigger + * ones. */ for (;;) { if (insn_decode_kernel(&insn, &instr[i])) return; + /* + * See if this and any potentially following NOPs can be + * optimized. + */ if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) - break; - - if ((i += insn.length) >= a->instrlen) - return; - } + i += optimize_nops_range(instr, a->instrlen, i); + else + i += insn.length; - for (nop = i; i < a->instrlen; i++) { - if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i])) + if (i >= a->instrlen) return; } - - local_irq_save(flags); - add_nops(instr + nop, i - nop); - local_irq_restore(flags); - - DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", - instr, nop, a->instrlen); } /* diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 09083094eb57..23dda362dc0f 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -25,6 +25,7 @@ #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 #define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 +#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -57,6 +58,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) }, {} }; @@ -72,6 +74,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4a39fb429f15..d262811ce14b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2604,6 +2604,7 @@ static void __init apic_bsp_setup(bool upmode) end_local_APIC_setup(); irq_remap_enable_fault_handling(); setup_IO_APIC(); + lapic_update_legacy_vectors(); } #ifdef CONFIG_UP_LATE_INIT diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6dbdc7c22bb7..fb67ed5e7e6a 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -738,6 +738,26 @@ void lapic_assign_legacy_vector(unsigned int irq, bool replace) irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace); } +void __init lapic_update_legacy_vectors(void) +{ + unsigned int i; + + if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0) + return; + + /* + * If the IO/APIC is disabled via config, kernel command line or + * lack of enumeration then all legacy interrupts are routed + * through the PIC. Make sure that they are marked as legacy + * vectors. PIC_CASCADE_IRQ has already been marked in + * lapic_assign_system_vectors(). + */ + for (i = 0; i < nr_legacy_irqs(); i++) { + if (i != PIC_CASCADE_IR) + lapic_assign_legacy_vector(i, true); + } +} + void __init lapic_assign_system_vectors(void) { unsigned int i, vector = 0; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2d11384dc9ab..b7c003013d41 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -593,8 +593,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) */ if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) { /* Check if memory encryption is enabled */ - rdmsrl(MSR_K8_SYSCFG, msr); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + rdmsrl(MSR_AMD64_SYSCFG, msr); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) goto clear_all; /* @@ -646,6 +646,10 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & BIT(12)) set_cpu_cap(c, X86_FEATURE_ACC_POWER); + /* Bit 14 indicates the Runtime Average Power Limit interface. */ + if (c->x86_power & BIT(14)) + set_cpu_cap(c, X86_FEATURE_RAPL); + #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSCALL32); #else @@ -1165,3 +1169,19 @@ void set_dr_addr_mask(unsigned long mask, int dr) break; } } + +u32 amd_get_highest_perf(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) || + (c->x86_model >= 0x70 && c->x86_model < 0x80))) + return 166; + + if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) || + (c->x86_model >= 0x40 && c->x86_model < 0x70))) + return 166; + + return 255; +} +EXPORT_SYMBOL_GPL(amd_get_highest_perf); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 67944128876d..95521302630d 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -48,6 +48,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], enum tsx_ctrl_states { TSX_CTRL_ENABLE, TSX_CTRL_DISABLE, + TSX_CTRL_RTM_ALWAYS_ABORT, TSX_CTRL_NOT_SUPPORTED, }; @@ -56,6 +57,7 @@ extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; extern void __init tsx_init(void); extern void tsx_enable(void); extern void tsx_disable(void); +extern void tsx_clear_cpuid(void); #else static inline void tsx_init(void) { } #endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 0bd6c74e3ba1..6d50136f7ab9 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -260,6 +260,10 @@ static void early_init_hygon(struct cpuinfo_x86 *c) if (c->x86_power & BIT(12)) set_cpu_cap(c, X86_FEATURE_ACC_POWER); + /* Bit 14 indicates the Runtime Average Power Limit interface. */ + if (c->x86_power & BIT(14)) + set_cpu_cap(c, X86_FEATURE_RAPL); + #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSCALL32); #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 8adffc17fa8b..861e919eba9a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -717,8 +717,10 @@ static void init_intel(struct cpuinfo_x86 *c) if (tsx_ctrl_state == TSX_CTRL_ENABLE) tsx_enable(); - if (tsx_ctrl_state == TSX_CTRL_DISABLE) + else if (tsx_ctrl_state == TSX_CTRL_DISABLE) tsx_disable(); + else if (tsx_ctrl_state == TSX_CTRL_RTM_ALWAYS_ABORT) + tsx_clear_cpuid(); split_lock_init(); bus_lock_init(); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index e486f96b3cb3..08831acc1d03 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -77,27 +77,29 @@ struct smca_bank_name { }; static struct smca_bank_name smca_names[] = { - [SMCA_LS] = { "load_store", "Load Store Unit" }, - [SMCA_LS_V2] = { "load_store", "Load Store Unit" }, - [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, - [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, - [SMCA_DE] = { "decode_unit", "Decode Unit" }, - [SMCA_RESERVED] = { "reserved", "Reserved" }, - [SMCA_EX] = { "execution_unit", "Execution Unit" }, - [SMCA_FP] = { "floating_point", "Floating Point Unit" }, - [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, - [SMCA_CS] = { "coherent_slave", "Coherent Slave" }, - [SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" }, - [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, - [SMCA_UMC] = { "umc", "Unified Memory Controller" }, - [SMCA_PB] = { "param_block", "Parameter Block" }, - [SMCA_PSP] = { "psp", "Platform Security Processor" }, - [SMCA_PSP_V2] = { "psp", "Platform Security Processor" }, - [SMCA_SMU] = { "smu", "System Management Unit" }, - [SMCA_SMU_V2] = { "smu", "System Management Unit" }, - [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" }, - [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" }, - [SMCA_PCIE] = { "pcie", "PCI Express Unit" }, + [SMCA_LS ... SMCA_LS_V2] = { "load_store", "Load Store Unit" }, + [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, + [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, + [SMCA_DE] = { "decode_unit", "Decode Unit" }, + [SMCA_RESERVED] = { "reserved", "Reserved" }, + [SMCA_EX] = { "execution_unit", "Execution Unit" }, + [SMCA_FP] = { "floating_point", "Floating Point Unit" }, + [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, + [SMCA_CS ... SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" }, + [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, + + /* UMC v2 is separate because both of them can exist in a single system. */ + [SMCA_UMC] = { "umc", "Unified Memory Controller" }, + [SMCA_UMC_V2] = { "umc_v2", "Unified Memory Controller v2" }, + [SMCA_PB] = { "param_block", "Parameter Block" }, + [SMCA_PSP ... SMCA_PSP_V2] = { "psp", "Platform Security Processor" }, + [SMCA_SMU ... SMCA_SMU_V2] = { "smu", "System Management Unit" }, + [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" }, + [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" }, + [SMCA_PCIE ... SMCA_PCIE_V2] = { "pcie", "PCI Express Unit" }, + [SMCA_XGMI_PCS] = { "xgmi_pcs", "Ext Global Memory Interconnect PCS Unit" }, + [SMCA_XGMI_PHY] = { "xgmi_phy", "Ext Global Memory Interconnect PHY Unit" }, + [SMCA_WAFL_PHY] = { "wafl_phy", "WAFL PHY Unit" }, }; static const char *smca_get_name(enum smca_bank_types t) @@ -155,6 +157,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = { /* Unified Memory Controller MCA type */ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) }, + { SMCA_UMC_V2, HWID_MCATYPE(0x96, 0x1) }, /* Parameter Block MCA type */ { SMCA_PB, HWID_MCATYPE(0x05, 0x0) }, @@ -175,6 +178,16 @@ static struct smca_hwid smca_hwid_mcatypes[] = { /* PCI Express Unit MCA type */ { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) }, + { SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) }, + + /* xGMI PCS MCA type */ + { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) }, + + /* xGMI PHY MCA type */ + { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) }, + + /* WAFL PHY MCA type */ + { SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) }, }; struct smca_bank smca_banks[MAX_NR_BANKS]; diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c index b58b85380ddb..0e3ae64d3b76 100644 --- a/arch/x86/kernel/cpu/mce/apei.c +++ b/arch/x86/kernel/cpu/mce/apei.c @@ -36,7 +36,8 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) mce_setup(&m); m.bank = -1; /* Fake a memory read error with unknown channel */ - m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; + m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f; + m.misc = (MCI_MISC_ADDR_PHYS << 6) | PAGE_SHIFT; if (severity >= GHES_SEV_RECOVERABLE) m.status |= MCI_STATUS_UC; diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 0c3b372318b7..b5f43049fa5f 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -836,7 +836,7 @@ int __init amd_special_default_mtrr(void) if (boot_cpu_data.x86 < 0xf) return 0; /* In case some hypervisor doesn't pass SYSCFG through: */ - if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) + if (rdmsr_safe(MSR_AMD64_SYSCFG, &l, &h) < 0) return 0; /* * Memory between 4GB and top of mem is forced WB by this magic bit. diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index b90f3f437765..558108296f3c 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -53,13 +53,13 @@ static inline void k8_check_syscfg_dram_mod_en(void) (boot_cpu_data.x86 >= 0x0f))) return; - rdmsr(MSR_K8_SYSCFG, lo, hi); + rdmsr(MSR_AMD64_SYSCFG, lo, hi); if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) { pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" " not cleared by BIOS, clearing this bit\n", smp_processor_id()); lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY; - mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi); + mtrr_wrmsr(MSR_AMD64_SYSCFG, lo, hi); } } diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 3ef5868ac588..7aecb2fc3186 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -63,7 +63,7 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) case 15: return msr - MSR_P4_BPU_PERFCTR0; } - fallthrough; + break; case X86_VENDOR_ZHAOXIN: case X86_VENDOR_CENTAUR: return msr - MSR_ARCH_PERFMON_PERFCTR0; @@ -96,7 +96,7 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) case 15: return msr - MSR_P4_BSU_ESCR0; } - fallthrough; + break; case X86_VENDOR_ZHAOXIN: case X86_VENDOR_CENTAUR: return msr - MSR_ARCH_PERFMON_EVENTSEL0; diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c index 6ad165a5c0cc..64511c4a5200 100644 --- a/arch/x86/kernel/cpu/sgx/virt.c +++ b/arch/x86/kernel/cpu/sgx/virt.c @@ -212,6 +212,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file) list_splice_tail(&secs_pages, &zombie_secs_pages); mutex_unlock(&zombie_secs_pages_lock); + xa_destroy(&vepc->page_array); kfree(vepc); return 0; diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index e2ad30e474f8..9c7a5f049292 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -2,7 +2,7 @@ /* * Intel Transactional Synchronization Extensions (TSX) control. * - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * Author: * Pawan Gupta <pawan.kumar.gupta@linux.intel.com> @@ -84,13 +84,46 @@ static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) return TSX_CTRL_ENABLE; } +void tsx_clear_cpuid(void) +{ + u64 msr; + + /* + * MSR_TFA_TSX_CPUID_CLEAR bit is only present when both CPUID + * bits RTM_ALWAYS_ABORT and TSX_FORCE_ABORT are present. + */ + if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) && + boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { + rdmsrl(MSR_TSX_FORCE_ABORT, msr); + msr |= MSR_TFA_TSX_CPUID_CLEAR; + wrmsrl(MSR_TSX_FORCE_ABORT, msr); + } +} + void __init tsx_init(void) { char arg[5] = {}; int ret; - if (!tsx_ctrl_is_supported()) + /* + * Hardware will always abort a TSX transaction if both CPUID bits + * RTM_ALWAYS_ABORT and TSX_FORCE_ABORT are set. In this case, it is + * better not to enumerate CPUID.RTM and CPUID.HLE bits. Clear them + * here. + */ + if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) && + boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { + tsx_ctrl_state = TSX_CTRL_RTM_ALWAYS_ABORT; + tsx_clear_cpuid(); + setup_clear_cpu_cap(X86_FEATURE_RTM); + setup_clear_cpu_cap(X86_FEATURE_HLE); return; + } + + if (!tsx_ctrl_is_supported()) { + tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED; + return; + } ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg)); if (ret >= 0) { diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index a4ec65317a7f..b7b92cdf3add 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -221,28 +221,18 @@ sanitize_restored_user_xstate(union fpregs_state *state, if (use_xsave()) { /* - * Note: we don't need to zero the reserved bits in the - * xstate_header here because we either didn't copy them at all, - * or we checked earlier that they aren't set. + * Clear all feature bits which are not set in + * user_xfeatures and clear all extended features + * for fx_only mode. */ + u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures; /* - * 'user_xfeatures' might have bits clear which are - * set in header->xfeatures. This represents features that - * were in init state prior to a signal delivery, and need - * to be reset back to the init state. Clear any user - * feature bits which are set in the kernel buffer to get - * them back to the init state. - * - * Supervisor state is unchanged by input from userspace. - * Ensure supervisor state bits stay set and supervisor - * state is not modified. + * Supervisor state has to be preserved. The sigframe + * restore can only modify user features, i.e. @mask + * cannot contain them. */ - if (fx_only) - header->xfeatures = XFEATURE_MASK_FPSSE; - else - header->xfeatures &= user_xfeatures | - xfeatures_mask_supervisor(); + header->xfeatures &= mask | xfeatures_mask_supervisor(); } if (use_fxsr()) { @@ -307,13 +297,17 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) return 0; } - if (!access_ok(buf, size)) - return -EACCES; + if (!access_ok(buf, size)) { + ret = -EACCES; + goto out; + } - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; + if (!static_cpu_has(X86_FEATURE_FPU)) { + ret = fpregs_soft_set(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), + NULL, buf); + goto out; + } if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; @@ -369,6 +363,25 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpregs_unlock(); return 0; } + + /* + * The above did an FPU restore operation, restricted to + * the user portion of the registers, and failed, but the + * microcode might have modified the FPU registers + * nevertheless. + * + * If the FPU registers do not belong to current, then + * invalidate the FPU register state otherwise the task might + * preempt current and return to user space with corrupted + * FPU registers. + * + * In case current owns the FPU registers then no further + * action is required. The fixup below will handle it + * correctly. + */ + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + __cpu_invalidate_fpregs_state(); + fpregs_unlock(); } else { /* @@ -377,7 +390,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ ret = __copy_from_user(&env, buf, sizeof(env)); if (ret) - goto err_out; + goto out; envp = &env; } @@ -405,16 +418,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) if (use_xsave() && !fx_only) { u64 init_bv = xfeatures_mask_user() & ~user_xfeatures; - if (using_compacted_format()) { - ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); - } else { - ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); - - if (!ret && state_size > offsetof(struct xregs_state, header)) - ret = validate_user_xstate_header(&fpu->state.xsave.header); - } + ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); if (ret) - goto err_out; + goto out; sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, fx_only); @@ -434,7 +440,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); if (ret) { ret = -EFAULT; - goto err_out; + goto out; } sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, @@ -452,7 +458,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } else { ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); if (ret) - goto err_out; + goto out; fpregs_lock(); ret = copy_kernel_to_fregs_err(&fpu->state.fsave); @@ -463,7 +469,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpregs_deactivate(fpu); fpregs_unlock(); -err_out: +out: if (ret) fpu__clear_user_states(fpu); return ret; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index a85c64000218..1cadb2faf740 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -441,12 +441,35 @@ static void __init print_xstate_offset_size(void) } /* + * All supported features have either init state all zeros or are + * handled in setup_init_fpu() individually. This is an explicit + * feature list and does not use XFEATURE_MASK*SUPPORTED to catch + * newly added supported features at build time and make people + * actually look at the init state for the new feature. + */ +#define XFEATURES_INIT_FPSTATE_HANDLED \ + (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM | \ + XFEATURE_MASK_PKRU | \ + XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR | \ + XFEATURE_MASK_PASID) + +/* * setup the xstate image representing the init state */ static void __init setup_init_fpu_buf(void) { static int on_boot_cpu __initdata = 1; + BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | + XFEATURE_MASK_SUPERVISOR_SUPPORTED) != + XFEATURES_INIT_FPSTATE_HANDLED); + WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -466,10 +489,22 @@ static void __init setup_init_fpu_buf(void) copy_kernel_to_xregs_booting(&init_fpstate.xsave); /* - * Dump the init state again. This is to identify the init state - * of any feature which is not represented by all zero's. + * All components are now in init state. Read the state back so + * that init_fpstate contains all non-zero init state. This only + * works with XSAVE, but not with XSAVEOPT and XSAVES because + * those use the init optimization which skips writing data for + * components in init state. + * + * XSAVE could be used, but that would require to reshuffle the + * data when XSAVES is available because XSAVES uses xstate + * compaction. But doing so is a pointless exercise because most + * components have an all zeros init state except for the legacy + * ones (FP and SSE). Those can be saved with FXSAVE into the + * legacy area. Adding new features requires to ensure that init + * state is all zeroes or if not to add the necessary handling + * here. */ - copy_xregs_to_kernel_booting(&init_fpstate.xsave); + fxsave(&init_fpstate.fxsave); } static int xfeature_uncompacted_offset(int xfeature_nr) @@ -1402,60 +1437,3 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, return 0; } #endif /* CONFIG_PROC_PID_ARCH_STATUS */ - -#ifdef CONFIG_IOMMU_SUPPORT -void update_pasid(void) -{ - u64 pasid_state; - u32 pasid; - - if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) - return; - - if (!current->mm) - return; - - pasid = READ_ONCE(current->mm->pasid); - /* Set the valid bit in the PASID MSR/state only for valid pasid. */ - pasid_state = pasid == PASID_DISABLED ? - pasid : pasid | MSR_IA32_PASID_VALID; - - /* - * No need to hold fregs_lock() since the task's fpstate won't - * be changed by others (e.g. ptrace) while the task is being - * switched to or is in IPI. - */ - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { - /* The MSR is active and can be directly updated. */ - wrmsrl(MSR_IA32_PASID, pasid_state); - } else { - struct fpu *fpu = ¤t->thread.fpu; - struct ia32_pasid_state *ppasid_state; - struct xregs_state *xsave; - - /* - * The CPU's xstate registers are not currently active. Just - * update the PASID state in the memory buffer here. The - * PASID MSR will be loaded when returning to user mode. - */ - xsave = &fpu->state.xsave; - xsave->header.xfeatures |= XFEATURE_MASK_PASID; - ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID); - /* - * Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state - * won't be NULL and no need to check its value. - * - * Only update the task's PASID state when it's different - * from the mm's pasid. - */ - if (ppasid_state->pasid != pasid_state) { - /* - * Invalid fpregs so that state restoring will pick up - * the PASID state. - */ - __fpu_invalidate_fpregs_state(fpu); - ppasid_state->pasid = pasid_state; - } - } -} -#endif /* CONFIG_IOMMU_SUPPORT */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 18be44163a50..de01903c3735 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -39,7 +39,7 @@ #include <asm/realmode.h> #include <asm/extable.h> #include <asm/trapnr.h> -#include <asm/sev-es.h> +#include <asm/sev.h> /* * Manage page tables very early on. diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 6a2eb62c85e6..674906fad43b 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -15,50 +15,75 @@ #include <asm/kprobes.h> #include <asm/alternative.h> #include <asm/text-patching.h> +#include <asm/insn.h> -static void bug_at(const void *ip, int line) +int arch_jump_entry_size(struct jump_entry *entry) { - /* - * The location is not an op that we were expecting. - * Something went wrong. Crash the box, as something could be - * corrupting the kernel. - */ - pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph) %d\n", ip, ip, ip, line); - BUG(); + struct insn insn = {}; + + insn_decode_kernel(&insn, (void *)jump_entry_code(entry)); + BUG_ON(insn.length != 2 && insn.length != 5); + + return insn.length; } -static const void * -__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type) +struct jump_label_patch { + const void *code; + int size; +}; + +static struct jump_label_patch +__jump_label_patch(struct jump_entry *entry, enum jump_label_type type) { - const void *expect, *code; + const void *expect, *code, *nop; const void *addr, *dest; - int line; + int size; addr = (void *)jump_entry_code(entry); dest = (void *)jump_entry_target(entry); - code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); + size = arch_jump_entry_size(entry); + switch (size) { + case JMP8_INSN_SIZE: + code = text_gen_insn(JMP8_INSN_OPCODE, addr, dest); + nop = x86_nops[size]; + break; - if (type == JUMP_LABEL_JMP) { - expect = x86_nops[5]; line = __LINE__; - } else { - expect = code; line = __LINE__; + case JMP32_INSN_SIZE: + code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); + nop = x86_nops[size]; + break; + + default: BUG(); } - if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) - bug_at(addr, line); + if (type == JUMP_LABEL_JMP) + expect = nop; + else + expect = code; + + if (memcmp(addr, expect, size)) { + /* + * The location is not an op that we were expecting. + * Something went wrong. Crash the box, as something could be + * corrupting the kernel. + */ + pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) size:%d type:%d\n", + addr, addr, addr, expect, size, type); + BUG(); + } if (type == JUMP_LABEL_NOP) - code = x86_nops[5]; + code = nop; - return code; + return (struct jump_label_patch){.code = code, .size = size}; } static inline void __jump_label_transform(struct jump_entry *entry, enum jump_label_type type, int init) { - const void *opcode = __jump_label_set_jump_code(entry, type); + const struct jump_label_patch jlp = __jump_label_patch(entry, type); /* * As long as only a single processor is running and the code is still @@ -72,12 +97,11 @@ static inline void __jump_label_transform(struct jump_entry *entry, * always nop being the 'currently valid' instruction */ if (init || system_state == SYSTEM_BOOTING) { - text_poke_early((void *)jump_entry_code(entry), opcode, - JUMP_LABEL_NOP_SIZE); + text_poke_early((void *)jump_entry_code(entry), jlp.code, jlp.size); return; } - text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL); + text_poke_bp((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL); } static void __ref jump_label_transform(struct jump_entry *entry, @@ -98,7 +122,7 @@ void arch_jump_label_transform(struct jump_entry *entry, bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type) { - const void *opcode; + struct jump_label_patch jlp; if (system_state == SYSTEM_BOOTING) { /* @@ -109,9 +133,8 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, } mutex_lock(&text_mutex); - opcode = __jump_label_set_jump_code(entry, type); - text_poke_queue((void *)jump_entry_code(entry), - opcode, JUMP_LABEL_NOP_SIZE, NULL); + jlp = __jump_label_patch(entry, type); + text_poke_queue((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL); mutex_unlock(&text_mutex); return true; } diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index b5cb49e57df8..c94dec6a1834 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -95,7 +95,7 @@ static void get_fam10h_pci_mmconf_base(void) return; /* SYS_CFG */ - address = MSR_K8_SYSCFG; + address = MSR_AMD64_SYSCFG; rdmsrl(address, val); /* TOP_MEM2 is not enabled? */ diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 2ef961cf4cfc..4bce802d25fb 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -33,7 +33,7 @@ #include <asm/reboot.h> #include <asm/cache.h> #include <asm/nospec-branch.h> -#include <asm/sev-es.h> +#include <asm/sev.h> #define CREATE_TRACE_POINTS #include <trace/events/nmi.h> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 72920af0b3c0..1e720626069a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -44,6 +44,7 @@ #include <asm/pci-direct.h> #include <asm/prom.h> #include <asm/proto.h> +#include <asm/thermal.h> #include <asm/unwind.h> #include <asm/vsyscall.h> #include <linux/vmalloc.h> @@ -637,11 +638,11 @@ static void __init trim_snb_memory(void) * them from accessing certain memory ranges, namely anything below * 1M and in the pages listed in bad_pages[] above. * - * To avoid these pages being ever accessed by SNB gfx devices - * reserve all memory below the 1 MB mark and bad_pages that have - * not already been reserved at boot time. + * To avoid these pages being ever accessed by SNB gfx devices reserve + * bad_pages that have not already been reserved at boot time. + * All memory below the 1 MB mark is anyway reserved later during + * setup_arch(), so there is no need to reserve it here. */ - memblock_reserve(0, 1<<20); for (i = 0; i < ARRAY_SIZE(bad_pages); i++) { if (memblock_reserve(bad_pages[i], PAGE_SIZE)) @@ -733,14 +734,14 @@ static void __init early_reserve_memory(void) * The first 4Kb of memory is a BIOS owned area, but generally it is * not listed as such in the E820 table. * - * Reserve the first memory page and typically some additional - * memory (64KiB by default) since some BIOSes are known to corrupt - * low memory. See the Kconfig help text for X86_RESERVE_LOW. + * Reserve the first 64K of memory since some BIOSes are known to + * corrupt low memory. After the real mode trampoline is allocated the + * rest of the memory below 640k is reserved. * * In addition, make sure page 0 is always reserved because on * systems with L1TF its contents can be leaked to user processes. */ - memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); + memblock_reserve(0, SZ_64K); early_reserve_initrd(); @@ -751,6 +752,7 @@ static void __init early_reserve_memory(void) reserve_ibft_region(); reserve_bios_regions(); + trim_snb_memory(); } /* @@ -1081,14 +1083,20 @@ void __init setup_arch(char **cmdline_p) (max_pfn_mapped<<PAGE_SHIFT) - 1); #endif - reserve_real_mode(); - /* - * Reserving memory causing GPU hangs on Sandy Bridge integrated - * graphics devices should be done after we allocated memory under - * 1M for the real mode trampoline. + * Find free memory for the real mode trampoline and place it + * there. + * If there is not enough free memory under 1M, on EFI-enabled + * systems there will be additional attempt to reclaim the memory + * for the real mode trampoline at efi_free_boot_services(). + * + * Unconditionally reserve the entire first 1M of RAM because + * BIOSes are know to corrupt low memory and several + * hundred kilobytes are not worth complex detection what memory gets + * clobbered. Moreover, on machines with SandyBridge graphics or in + * setups that use crashkernel the entire 1M is reserved anyway. */ - trim_snb_memory(); + reserve_real_mode(); init_mem_mapping(); @@ -1226,6 +1234,14 @@ void __init setup_arch(char **cmdline_p) x86_init.timers.wallclock_init(); + /* + * This needs to run before setup_local_APIC() which soft-disables the + * local APIC temporarily and that masks the thermal LVT interrupt, + * leading to softlockups on machines which have configured SMI + * interrupt delivery. + */ + therm_lvt_init(); + mcheck_init(); register_refined_jiffies(CLOCK_TICK_RATE); diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-shared.c index 0aa9f13efd57..9f90f460a28c 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -26,13 +26,13 @@ static bool __init sev_es_check_cpu_features(void) static void __noreturn sev_es_terminate(unsigned int reason) { - u64 val = GHCB_SEV_TERMINATE; + u64 val = GHCB_MSR_TERM_REQ; /* * Tell the hypervisor what went wrong - only reason-set 0 is * currently supported. */ - val |= GHCB_SEV_TERMINATE_REASON(0, reason); + val |= GHCB_SEV_TERM_REASON(0, reason); /* Request Guest Termination from Hypvervisor */ sev_es_wr_ghcb_msr(val); @@ -47,15 +47,15 @@ static bool sev_es_negotiate_protocol(void) u64 val; /* Do the GHCB protocol version negotiation */ - sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ); + sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_INFO(val) != GHCB_SEV_INFO) + if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) return false; - if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR || - GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR) + if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTO_OUR || + GHCB_MSR_PROTO_MIN(val) > GHCB_PROTO_OUR) return false; return true; @@ -63,6 +63,7 @@ static bool sev_es_negotiate_protocol(void) static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) { + ghcb->save.sw_exit_code = 0; memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } @@ -153,28 +154,28 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->ax = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->bx = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->cx = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->dx = val >> 32; diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev.c index 73873b007838..a6895e440bc3 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev.c @@ -7,12 +7,11 @@ * Author: Joerg Roedel <jroedel@suse.de> */ -#define pr_fmt(fmt) "SEV-ES: " fmt +#define pr_fmt(fmt) "SEV: " fmt #include <linux/sched/debug.h> /* For show_regs() */ #include <linux/percpu-defs.h> #include <linux/mem_encrypt.h> -#include <linux/lockdep.h> #include <linux/printk.h> #include <linux/mm_types.h> #include <linux/set_memory.h> @@ -22,7 +21,7 @@ #include <asm/cpu_entry_area.h> #include <asm/stacktrace.h> -#include <asm/sev-es.h> +#include <asm/sev.h> #include <asm/insn-eval.h> #include <asm/fpu/internal.h> #include <asm/processor.h> @@ -192,19 +191,39 @@ void noinstr __sev_es_ist_exit(void) this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); } -static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) +/* + * Nothing shall interrupt this code path while holding the per-CPU + * GHCB. The backup GHCB is only for NMIs interrupting this path. + * + * Callers must disable local interrupts around it. + */ +static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) { struct sev_es_runtime_data *data; struct ghcb *ghcb; + WARN_ON(!irqs_disabled()); + data = this_cpu_read(runtime_data); ghcb = &data->ghcb_page; if (unlikely(data->ghcb_active)) { /* GHCB is already in use - save its contents */ - if (unlikely(data->backup_ghcb_active)) - return NULL; + if (unlikely(data->backup_ghcb_active)) { + /* + * Backup-GHCB is also already in use. There is no way + * to continue here so just kill the machine. To make + * panic() work, mark GHCBs inactive so that messages + * can be printed out. + */ + data->ghcb_active = false; + data->backup_ghcb_active = false; + + instrumentation_begin(); + panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); + instrumentation_end(); + } /* Mark backup_ghcb active before writing to it */ data->backup_ghcb_active = true; @@ -221,24 +240,6 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) return ghcb; } -static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) -{ - struct sev_es_runtime_data *data; - struct ghcb *ghcb; - - data = this_cpu_read(runtime_data); - ghcb = &data->ghcb_page; - - if (state->ghcb) { - /* Restore GHCB from Backup */ - *ghcb = *state->ghcb; - data->backup_ghcb_active = false; - state->ghcb = NULL; - } else { - data->ghcb_active = false; - } -} - /* Needed in vc_early_forward_exception */ void do_early_exception(struct pt_regs *regs, int trapnr); @@ -266,17 +267,24 @@ static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) { char buffer[MAX_INSN_SIZE]; - int res; + int insn_bytes; - res = insn_fetch_from_user_inatomic(ctxt->regs, buffer); - if (!res) { + insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); + if (insn_bytes == 0) { + /* Nothing could be copied */ ctxt->fi.vector = X86_TRAP_PF; ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; ctxt->fi.cr2 = ctxt->regs->ip; return ES_EXCEPTION; + } else if (insn_bytes == -EINVAL) { + /* Effective RIP could not be calculated */ + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + ctxt->fi.cr2 = 0; + return ES_EXCEPTION; } - if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res)) + if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) return ES_DECODE_FAILED; if (ctxt->insn.immediate.got) @@ -323,31 +331,44 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; - /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ - if (!user_mode(ctxt->regs) && !access_ok(target, size)) { - memcpy(dst, buf, size); - return ES_OK; - } - + /* + * This function uses __put_user() independent of whether kernel or user + * memory is accessed. This works fine because __put_user() does no + * sanity checks of the pointer being accessed. All that it does is + * to report when the access failed. + * + * Also, this function runs in atomic context, so __put_user() is not + * allowed to sleep. The page-fault handler detects that it is running + * in atomic context and will not try to take mmap_sem and handle the + * fault, so additional pagefault_enable()/disable() calls are not + * needed. + * + * The access can't be done via copy_to_user() here because + * vc_write_mem() must not use string instructions to access unsafe + * memory. The reason is that MOVS is emulated by the #VC handler by + * splitting the move up into a read and a write and taking a nested #VC + * exception on whatever of them is the MMIO access. Using string + * instructions here would cause infinite nesting. + */ switch (size) { case 1: memcpy(&d1, buf, 1); - if (put_user(d1, target)) + if (__put_user(d1, target)) goto fault; break; case 2: memcpy(&d2, buf, 2); - if (put_user(d2, target)) + if (__put_user(d2, target)) goto fault; break; case 4: memcpy(&d4, buf, 4); - if (put_user(d4, target)) + if (__put_user(d4, target)) goto fault; break; case 8: memcpy(&d8, buf, 8); - if (put_user(d8, target)) + if (__put_user(d8, target)) goto fault; break; default: @@ -378,30 +399,43 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; - /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ - if (!user_mode(ctxt->regs) && !access_ok(s, size)) { - memcpy(buf, src, size); - return ES_OK; - } - + /* + * This function uses __get_user() independent of whether kernel or user + * memory is accessed. This works fine because __get_user() does no + * sanity checks of the pointer being accessed. All that it does is + * to report when the access failed. + * + * Also, this function runs in atomic context, so __get_user() is not + * allowed to sleep. The page-fault handler detects that it is running + * in atomic context and will not try to take mmap_sem and handle the + * fault, so additional pagefault_enable()/disable() calls are not + * needed. + * + * The access can't be done via copy_from_user() here because + * vc_read_mem() must not use string instructions to access unsafe + * memory. The reason is that MOVS is emulated by the #VC handler by + * splitting the move up into a read and a write and taking a nested #VC + * exception on whatever of them is the MMIO access. Using string + * instructions here would cause infinite nesting. + */ switch (size) { case 1: - if (get_user(d1, s)) + if (__get_user(d1, s)) goto fault; memcpy(buf, &d1, 1); break; case 2: - if (get_user(d2, s)) + if (__get_user(d2, s)) goto fault; memcpy(buf, &d2, 2); break; case 4: - if (get_user(d4, s)) + if (__get_user(d4, s)) goto fault; memcpy(buf, &d4, 4); break; case 8: - if (get_user(d8, s)) + if (__get_user(d8, s)) goto fault; memcpy(buf, &d8, 8); break; @@ -459,14 +493,39 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt } /* Include code shared with pre-decompression boot stage */ -#include "sev-es-shared.c" +#include "sev-shared.c" + +static noinstr void __sev_put_ghcb(struct ghcb_state *state) +{ + struct sev_es_runtime_data *data; + struct ghcb *ghcb; + + WARN_ON(!irqs_disabled()); + + data = this_cpu_read(runtime_data); + ghcb = &data->ghcb_page; + + if (state->ghcb) { + /* Restore GHCB from Backup */ + *ghcb = *state->ghcb; + data->backup_ghcb_active = false; + state->ghcb = NULL; + } else { + /* + * Invalidate the GHCB so a VMGEXIT instruction issued + * from userspace won't appear to be valid. + */ + vc_ghcb_invalidate(ghcb); + data->ghcb_active = false; + } +} void noinstr __sev_es_nmi_complete(void) { struct ghcb_state state; struct ghcb *ghcb; - ghcb = sev_es_get_ghcb(&state); + ghcb = __sev_get_ghcb(&state); vc_ghcb_invalidate(ghcb); ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); @@ -476,7 +535,7 @@ void noinstr __sev_es_nmi_complete(void) sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); VMGEXIT(); - sev_es_put_ghcb(&state); + __sev_put_ghcb(&state); } static u64 get_jump_table_addr(void) @@ -488,7 +547,7 @@ static u64 get_jump_table_addr(void) local_irq_save(flags); - ghcb = sev_es_get_ghcb(&state); + ghcb = __sev_get_ghcb(&state); vc_ghcb_invalidate(ghcb); ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); @@ -502,7 +561,7 @@ static u64 get_jump_table_addr(void) ghcb_sw_exit_info_2_is_valid(ghcb)) ret = ghcb->save.sw_exit_info_2; - sev_es_put_ghcb(&state); + __sev_put_ghcb(&state); local_irq_restore(flags); @@ -627,7 +686,7 @@ static void sev_es_ap_hlt_loop(void) struct ghcb_state state; struct ghcb *ghcb; - ghcb = sev_es_get_ghcb(&state); + ghcb = __sev_get_ghcb(&state); while (true) { vc_ghcb_invalidate(ghcb); @@ -644,7 +703,7 @@ static void sev_es_ap_hlt_loop(void) break; } - sev_es_put_ghcb(&state); + __sev_put_ghcb(&state); } /* @@ -734,7 +793,7 @@ void __init sev_es_init_vc_handling(void) sev_es_setup_play_dead(); /* Secondary CPUs use the runtime #VC handler */ - initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication; + initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; } static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) @@ -1172,14 +1231,6 @@ static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, return ES_EXCEPTION; } -static __always_inline void vc_handle_trap_db(struct pt_regs *regs) -{ - if (user_mode(regs)) - noist_exc_debug(regs); - else - exc_debug(regs); -} - static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, struct ghcb *ghcb, unsigned long exit_code) @@ -1255,6 +1306,10 @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) case X86_TRAP_UD: exc_invalid_op(ctxt->regs); break; + case X86_TRAP_PF: + write_cr2(ctxt->fi.cr2); + exc_page_fault(ctxt->regs, error_code); + break; case X86_TRAP_AC: exc_alignment_check(ctxt->regs, error_code); break; @@ -1271,55 +1326,15 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); } -/* - * Main #VC exception handler. It is called when the entry code was able to - * switch off the IST to a safe kernel stack. - * - * With the current implementation it is always possible to switch to a safe - * stack because #VC exceptions only happen at known places, like intercepted - * instructions or accesses to MMIO areas/IO ports. They can also happen with - * code instrumentation when the hypervisor intercepts #DB, but the critical - * paths are forbidden to be instrumented, so #DB exceptions currently also - * only happen in safe places. - */ -DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) +static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) { - struct sev_es_runtime_data *data = this_cpu_read(runtime_data); - irqentry_state_t irq_state; struct ghcb_state state; struct es_em_ctxt ctxt; enum es_result result; struct ghcb *ghcb; + bool ret = true; - /* - * Handle #DB before calling into !noinstr code to avoid recursive #DB. - */ - if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) { - vc_handle_trap_db(regs); - return; - } - - irq_state = irqentry_nmi_enter(regs); - lockdep_assert_irqs_disabled(); - instrumentation_begin(); - - /* - * This is invoked through an interrupt gate, so IRQs are disabled. The - * code below might walk page-tables for user or kernel addresses, so - * keep the IRQs disabled to protect us against concurrent TLB flushes. - */ - - ghcb = sev_es_get_ghcb(&state); - if (!ghcb) { - /* - * Mark GHCBs inactive so that panic() is able to print the - * message. - */ - data->ghcb_active = false; - data->backup_ghcb_active = false; - - panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); - } + ghcb = __sev_get_ghcb(&state); vc_ghcb_invalidate(ghcb); result = vc_init_em_ctxt(&ctxt, regs, error_code); @@ -1327,7 +1342,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) if (result == ES_OK) result = vc_handle_exitcode(&ctxt, ghcb, error_code); - sev_es_put_ghcb(&state); + __sev_put_ghcb(&state); /* Done - now check the result */ switch (result) { @@ -1335,17 +1350,20 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) vc_finish_insn(&ctxt); break; case ES_UNSUPPORTED: - pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", + pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", error_code, regs->ip); - goto fail; + ret = false; + break; case ES_VMM_ERROR: pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", error_code, regs->ip); - goto fail; + ret = false; + break; case ES_DECODE_FAILED: pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", error_code, regs->ip); - goto fail; + ret = false; + break; case ES_EXCEPTION: vc_forward_exception(&ctxt); break; @@ -1361,24 +1379,52 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) BUG(); } -out: - instrumentation_end(); - irqentry_nmi_exit(regs, irq_state); + return ret; +} - return; +static __always_inline bool vc_is_db(unsigned long error_code) +{ + return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; +} -fail: - if (user_mode(regs)) { - /* - * Do not kill the machine if user-space triggered the - * exception. Send SIGBUS instead and let user-space deal with - * it. - */ - force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); - } else { - pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n", - result); +/* + * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode + * and will panic when an error happens. + */ +DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) +{ + irqentry_state_t irq_state; + + /* + * With the current implementation it is always possible to switch to a + * safe stack because #VC exceptions only happen at known places, like + * intercepted instructions or accesses to MMIO areas/IO ports. They can + * also happen with code instrumentation when the hypervisor intercepts + * #DB, but the critical paths are forbidden to be instrumented, so #DB + * exceptions currently also only happen in safe places. + * + * But keep this here in case the noinstr annotations are violated due + * to bug elsewhere. + */ + if (unlikely(on_vc_fallback_stack(regs))) { + instrumentation_begin(); + panic("Can't handle #VC exception from unsupported context\n"); + instrumentation_end(); + } + + /* + * Handle #DB before calling into !noinstr code to avoid recursive #DB. + */ + if (vc_is_db(error_code)) { + exc_debug(regs); + return; + } + irq_state = irqentry_nmi_enter(regs); + + instrumentation_begin(); + + if (!vc_raw_handle_exception(regs, error_code)) { /* Show some debug info */ show_regs(regs); @@ -1389,23 +1435,38 @@ fail: panic("Returned from Terminate-Request to Hypervisor\n"); } - goto out; + instrumentation_end(); + irqentry_nmi_exit(regs, irq_state); } -/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */ -DEFINE_IDTENTRY_VC_IST(exc_vmm_communication) +/* + * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode + * and will kill the current task with SIGBUS when an error happens. + */ +DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) { + /* + * Handle #DB before calling into !noinstr code to avoid recursive #DB. + */ + if (vc_is_db(error_code)) { + noist_exc_debug(regs); + return; + } + + irqentry_enter_from_user_mode(regs); instrumentation_begin(); - panic("Can't handle #VC exception from unsupported context\n"); - instrumentation_end(); -} -DEFINE_IDTENTRY_VC(exc_vmm_communication) -{ - if (likely(!on_vc_fallback_stack(regs))) - safe_stack_exc_vmm_communication(regs, error_code); - else - ist_exc_vmm_communication(regs, error_code); + if (!vc_raw_handle_exception(regs, error_code)) { + /* + * Do not kill the machine if user-space triggered the + * exception. Send SIGBUS instead and let user-space deal with + * it. + */ + force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); + } + + instrumentation_end(); + irqentry_exit_to_user_mode(regs); } bool __init handle_vc_boot_ghcb(struct pt_regs *regs) diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 0e5d0a7e203b..06743ec054d2 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -127,6 +127,9 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C); + BUILD_BUG_ON(offsetof(siginfo_t, si_trapno) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_trapno) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10); @@ -138,8 +141,10 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14); - BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x18); - BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x18); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x20); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_data) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_type) != 0x14); CHECK_CSI_OFFSET(_sigpoll); CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0ad5214f598a..7770245cc7fa 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -2043,7 +2043,7 @@ static bool amd_set_max_freq_ratio(void) return false; } - highest_perf = perf_caps.highest_perf; + highest_perf = amd_get_highest_perf(); nominal_perf = perf_caps.nominal_perf; if (!highest_perf || !nominal_perf) { diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index 8daa70b0d2da..576b47e7523d 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -346,14 +346,12 @@ bool fixup_umip_exception(struct pt_regs *regs) if (!regs) return false; - nr_copied = insn_fetch_from_user(regs, buf); - /* - * The insn_fetch_from_user above could have failed if user code - * is protected by a memory protection key. Give up on emulation - * in such a case. Should we issue a page fault? + * Give up on emulation if fetching the instruction failed. Should a + * page fault or a #GP be issued? */ - if (!nr_copied) + nr_copied = insn_fetch_from_user(regs, buf); + if (nr_copied <= 0) return false; if (!insn_decode_from_regs(&insn, regs, buf, nr_copied)) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9a48f138832d..b4da665bb892 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -655,6 +655,7 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func) if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) entry->ecx = F(RDPID); ++array->nent; + break; default: break; } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8a0ccdb56076..5e5de05a8fbf 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5111,7 +5111,7 @@ done: return rc; } -int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) +int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type) { int rc = X86EMUL_CONTINUE; int mode = ctxt->mode; @@ -5322,7 +5322,8 @@ done_prefixes: ctxt->execute = opcode.u.execute; - if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD))) + if (unlikely(emulation_type & EMULTYPE_TRAP_UD) && + likely(!(ctxt->d & EmulateOnUD))) return EMULATION_FAILED; if (unlikely(ctxt->d & diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index f98370a39936..f00830e5202f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1172,6 +1172,7 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm) { struct kvm_hv *hv = to_kvm_hv(kvm); u64 gfn; + int idx; if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET || @@ -1190,9 +1191,16 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm) gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; hv->tsc_ref.tsc_sequence = 0; + + /* + * Take the srcu lock as memslots will be accessed to check the gfn + * cache generation against the memslots generation. + */ + idx = srcu_read_lock(&kvm->srcu); if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; + srcu_read_unlock(&kvm->srcu, idx); out_unlock: mutex_unlock(&hv->hv_lock); diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index f016838faedd..3e870bf9ca4d 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -314,7 +314,6 @@ struct x86_emulate_ctxt { int interruptibility; bool perm_ok; /* do not check permissions if true */ - bool ud; /* inject an #UD if host doesn't support insn */ bool tf; /* TF value before instruction (after for syscall/sysret) */ bool have_exception; @@ -491,7 +490,7 @@ enum x86_intercept { #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 #endif -int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); +int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type); bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); #define EMULATION_FAILED -1 #define EMULATION_OK 0 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c0ebef560bd1..17fa4ab1b834 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1410,6 +1410,9 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, if (!apic_x2apic_mode(apic)) valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); + if (alignment + len > 4) + return 1; + if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) return 1; @@ -1494,6 +1497,15 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic) static void cancel_hv_timer(struct kvm_lapic *apic); +static void cancel_apic_timer(struct kvm_lapic *apic) +{ + hrtimer_cancel(&apic->lapic_timer.timer); + preempt_disable(); + if (apic->lapic_timer.hv_timer_in_use) + cancel_hv_timer(apic); + preempt_enable(); +} + static void apic_update_lvtt(struct kvm_lapic *apic) { u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) & @@ -1502,11 +1514,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic) if (apic->lapic_timer.timer_mode != timer_mode) { if (apic_lvtt_tscdeadline(apic) != (timer_mode == APIC_LVT_TIMER_TSCDEADLINE)) { - hrtimer_cancel(&apic->lapic_timer.timer); - preempt_disable(); - if (apic->lapic_timer.hv_timer_in_use) - cancel_hv_timer(apic); - preempt_enable(); + cancel_apic_timer(apic); kvm_lapic_set_reg(apic, APIC_TMICT, 0); apic->lapic_timer.period = 0; apic->lapic_timer.tscdeadline = 0; @@ -1598,11 +1606,19 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; + if (lapic_timer_advance_dynamic) { + adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); + /* + * If the timer fired early, reread the TSC to account for the + * overhead of the above adjustment to avoid waiting longer + * than is necessary. + */ + if (guest_tsc < tsc_deadline) + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + } + if (guest_tsc < tsc_deadline) __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); - - if (lapic_timer_advance_dynamic) - adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); } void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) @@ -1661,7 +1677,7 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) } atomic_inc(&apic->lapic_timer.pending); - kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_make_request(KVM_REQ_UNBLOCK, vcpu); if (from_timer_fn) kvm_vcpu_kick(vcpu); } @@ -2084,7 +2100,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) if (apic_lvtt_tscdeadline(apic)) break; - hrtimer_cancel(&apic->lapic_timer.timer); + cancel_apic_timer(apic); kvm_lapic_set_reg(apic, APIC_TMICT, val); start_apic_timer(apic); break; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 0144c40d09c7..8d5876dfc6b7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4739,9 +4739,33 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu) context->inject_page_fault = kvm_inject_page_fault; } +static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu) +{ + union kvm_mmu_role role = kvm_calc_shadow_root_page_role_common(vcpu, false); + + /* + * Nested MMUs are used only for walking L2's gva->gpa, they never have + * shadow pages of their own and so "direct" has no meaning. Set it + * to "true" to try to detect bogus usage of the nested MMU. + */ + role.base.direct = true; + + if (!is_paging(vcpu)) + role.base.level = 0; + else if (is_long_mode(vcpu)) + role.base.level = is_la57_mode(vcpu) ? PT64_ROOT_5LEVEL : + PT64_ROOT_4LEVEL; + else if (is_pae(vcpu)) + role.base.level = PT32E_ROOT_LEVEL; + else + role.base.level = PT32_ROOT_LEVEL; + + return role; +} + static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) { - union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false); + union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu); struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; if (new_role.as_u64 == g_context->mmu_role.as_u64) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 70b7e44e3035..823a5919f9fa 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -90,8 +90,8 @@ struct guest_walker { gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; bool pte_writable[PT_MAX_FULL_LEVELS]; - unsigned pt_access; - unsigned pte_access; + unsigned int pt_access[PT_MAX_FULL_LEVELS]; + unsigned int pte_access; gfn_t gfn; struct x86_exception fault; }; @@ -418,13 +418,15 @@ retry_walk: } walker->ptes[walker->level - 1] = pte; + + /* Convert to ACC_*_MASK flags for struct guest_walker. */ + walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask); } while (!is_last_gpte(mmu, walker->level, pte)); pte_pkey = FNAME(gpte_pkeys)(vcpu, pte); accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0; /* Convert to ACC_*_MASK flags for struct guest_walker. */ - walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask); walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask); errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access); if (unlikely(errcode)) @@ -463,7 +465,8 @@ retry_walk: } pgprintk("%s: pte %llx pte_access %x pt_access %x\n", - __func__, (u64)pte, walker->pte_access, walker->pt_access); + __func__, (u64)pte, walker->pte_access, + walker->pt_access[walker->level - 1]); return 1; error: @@ -643,7 +646,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr, bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled; struct kvm_mmu_page *sp = NULL; struct kvm_shadow_walk_iterator it; - unsigned direct_access, access = gw->pt_access; + unsigned int direct_access, access; int top_level, level, req_level, ret; gfn_t base_gfn = gw->gfn; @@ -675,6 +678,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr, sp = NULL; if (!is_shadow_present_pte(*it.sptep)) { table_gfn = gw->table_gfn[it.level - 2]; + access = gw->pt_access[it.level - 2]; sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, false, access); } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 95eeb5ac6a8a..237317b1eddd 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1192,9 +1192,9 @@ bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) } /* - * Remove write access from all the SPTEs mapping GFNs [start, end). If - * skip_4k is set, SPTEs that map 4k pages, will not be write-protected. - * Returns true if an SPTE has been changed and the TLBs need to be flushed. + * Remove write access from all SPTEs at or above min_level that map GFNs + * [start, end). Returns true if an SPTE has been changed and the TLBs need to + * be flushed. */ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end, int min_level) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 712b4e0de481..5e7e920113f3 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -28,10 +28,8 @@ #include "svm.h" /* enable / disable AVIC */ -int avic; -#ifdef CONFIG_X86_LOCAL_APIC -module_param(avic, int, S_IRUGO); -#endif +bool avic; +module_param(avic, bool, S_IRUGO); #define SVM_AVIC_DOORBELL 0xc001011b @@ -223,7 +221,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, return &avic_physical_id_table[index]; } -/** +/* * Note: * AVIC hardware walks the nested page table to check permissions, * but does not use the SPA address specified in the leaf page @@ -766,7 +764,7 @@ out: return ret; } -/** +/* * Note: * The HW cannot support posting multicast/broadcast * interrupts to a vCPU. So, we still use legacy interrupt @@ -1007,7 +1005,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } -/** +/* * This function is called during VCPU halt/unhalt. */ static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 5bc887e9a986..8d36f0c73071 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -199,9 +199,19 @@ static void sev_asid_free(struct kvm_sev_info *sev) sev->misc_cg = NULL; } -static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) +static void sev_decommission(unsigned int handle) { struct sev_data_decommission decommission; + + if (!handle) + return; + + decommission.handle = handle; + sev_guest_decommission(&decommission, NULL); +} + +static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) +{ struct sev_data_deactivate deactivate; if (!handle) @@ -214,9 +224,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) sev_guest_deactivate(&deactivate, NULL); up_read(&sev_deactivate_lock); - /* decommission handle */ - decommission.handle = handle; - sev_guest_decommission(&decommission, NULL); + sev_decommission(handle); } static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) @@ -341,8 +349,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) /* Bind ASID to this guest */ ret = sev_bind_asid(kvm, start.handle, error); - if (ret) + if (ret) { + sev_decommission(start.handle); goto e_free_session; + } /* return handle to userspace */ params.handle = start.handle; @@ -1103,10 +1113,9 @@ __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp, struct sev_data_send_start data; int ret; + memset(&data, 0, sizeof(data)); data.handle = sev->handle; ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error); - if (ret < 0) - return ret; params->session_len = data.session_len; if (copy_to_user((void __user *)(uintptr_t)argp->data, params, @@ -1215,10 +1224,9 @@ __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp, struct sev_data_send_update_data data; int ret; + memset(&data, 0, sizeof(data)); data.handle = sev->handle; ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error); - if (ret < 0) - return ret; params->hdr_len = data.hdr_len; params->trans_len = data.trans_len; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index dfa351e605de..e088086f3de6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -863,8 +863,8 @@ static __init void svm_adjust_mmio_mask(void) return; /* If memory encryption is not enabled, use existing mask */ - rdmsrl(MSR_K8_SYSCFG, msr); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + rdmsrl(MSR_AMD64_SYSCFG, msr); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) return; enc_bit = cpuid_ebx(0x8000001f) & 0x3f; @@ -1010,9 +1010,7 @@ static __init int svm_hardware_setup(void) } if (avic) { - if (!npt_enabled || - !boot_cpu_has(X86_FEATURE_AVIC) || - !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) { + if (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)) { avic = false; } else { pr_info("AVIC enabled\n"); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e44567ceb865..2908c6ab5bb4 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -20,6 +20,7 @@ #include <linux/bits.h> #include <asm/svm.h> +#include <asm/sev-common.h> #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) @@ -479,7 +480,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops; #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL -extern int avic; +extern bool avic; static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data) { @@ -525,40 +526,9 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); /* sev.c */ -#define GHCB_VERSION_MAX 1ULL -#define GHCB_VERSION_MIN 1ULL - -#define GHCB_MSR_INFO_POS 0 -#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) - -#define GHCB_MSR_SEV_INFO_RESP 0x001 -#define GHCB_MSR_SEV_INFO_REQ 0x002 -#define GHCB_MSR_VER_MAX_POS 48 -#define GHCB_MSR_VER_MAX_MASK 0xffff -#define GHCB_MSR_VER_MIN_POS 32 -#define GHCB_MSR_VER_MIN_MASK 0xffff -#define GHCB_MSR_CBIT_POS 24 -#define GHCB_MSR_CBIT_MASK 0xff -#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ - ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ - (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ - (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ - GHCB_MSR_SEV_INFO_RESP) - -#define GHCB_MSR_CPUID_REQ 0x004 -#define GHCB_MSR_CPUID_RESP 0x005 -#define GHCB_MSR_CPUID_FUNC_POS 32 -#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff -#define GHCB_MSR_CPUID_VALUE_POS 32 -#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff -#define GHCB_MSR_CPUID_REG_POS 30 -#define GHCB_MSR_CPUID_REG_MASK 0x3 - -#define GHCB_MSR_TERM_REQ 0x100 -#define GHCB_MSR_TERM_REASON_SET_POS 12 -#define GHCB_MSR_TERM_REASON_SET_MASK 0xf -#define GHCB_MSR_TERM_REASON_POS 16 -#define GHCB_MSR_TERM_REASON_MASK 0xff +#define GHCB_VERSION_MAX 1ULL +#define GHCB_VERSION_MIN 1ULL + extern unsigned int max_sev_asid; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a61c015870e3..4f839148948b 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1550,16 +1550,16 @@ TRACE_EVENT(kvm_nested_vmenter_failed, TP_ARGS(msg, err), TP_STRUCT__entry( - __field(const char *, msg) + __string(msg, msg) __field(u32, err) ), TP_fast_assign( - __entry->msg = msg; + __assign_str(msg, msg); __entry->err = err; ), - TP_printk("%s%s", __entry->msg, !__entry->err ? "" : + TP_printk("%s%s", __get_str(msg), !__entry->err ? "" : __print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS)) ); diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 8dee8a5fbc17..aa0e7872fcc9 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -90,8 +90,7 @@ static inline bool cpu_has_vmx_preemption_timer(void) static inline bool cpu_has_vmx_posted_intr(void) { - return IS_ENABLED(CONFIG_X86_LOCAL_APIC) && - vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; } static inline bool cpu_has_load_ia32_efer(void) diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 459748680daf..5f81ef092bd4 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -238,6 +238,20 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu) /* + * Bail out of the block loop if the VM has an assigned + * device, but the blocking vCPU didn't reconfigure the + * PI.NV to the wakeup vector, i.e. the assigned device + * came along after the initial check in pi_pre_block(). + */ +void vmx_pi_start_assignment(struct kvm *kvm) +{ + if (!irq_remapping_cap(IRQ_POSTING_CAP)) + return; + + kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK); +} + +/* * pi_update_irte - set IRTE for Posted-Interrupts * * @kvm: kvm diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h index 0bdc41391c5b..7f7b2326caf5 100644 --- a/arch/x86/kvm/vmx/posted_intr.h +++ b/arch/x86/kvm/vmx/posted_intr.h @@ -95,5 +95,6 @@ void __init pi_init_cpu(int cpu); bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu); int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); +void vmx_pi_start_assignment(struct kvm *kvm); #endif /* __KVM_X86_VMX_POSTED_INTR_H */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4bceb5ca3a89..c2a779b688e6 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4843,7 +4843,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_run *kvm_run = vcpu->run; u32 intr_info, ex_no, error_code; - unsigned long cr2, rip, dr6; + unsigned long cr2, dr6; u32 vect_info; vect_info = vmx->idt_vectoring_info; @@ -4933,8 +4933,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) vmx->vcpu.arch.event_exit_inst_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); kvm_run->exit_reason = KVM_EXIT_DEBUG; - rip = kvm_rip_read(vcpu); - kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; + kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu); kvm_run->debug.arch.exception = ex_no; break; case AC_VECTOR: @@ -6248,6 +6247,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) switch (kvm_get_apic_mode(vcpu)) { case LAPIC_MODE_INVALID: WARN_ONCE(true, "Invalid local APIC state"); + break; case LAPIC_MODE_DISABLED: break; case LAPIC_MODE_XAPIC: @@ -7721,6 +7721,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .nested_ops = &vmx_nested_ops, .update_pi_irte = pi_update_irte, + .start_assignment = vmx_pi_start_assignment, #ifdef CONFIG_X86_64 .set_hv_timer = vmx_set_hv_timer, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9b6bca616929..e0f4a46649d7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3072,6 +3072,19 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu) static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) { ++vcpu->stat.tlb_flush; + + if (!tdp_enabled) { + /* + * A TLB flush on behalf of the guest is equivalent to + * INVPCID(all), toggling CR4.PGE, etc., which requires + * a forced sync of the shadow page tables. Unload the + * entire MMU here and the subsequent load will sync the + * shadow page tables, and also flush the TLB. + */ + kvm_mmu_unload(vcpu); + return; + } + static_call(kvm_x86_tlb_flush_guest)(vcpu); } @@ -3101,10 +3114,14 @@ static void record_steal_time(struct kvm_vcpu *vcpu) * expensive IPIs. */ if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) { + u8 st_preempted = xchg(&st->preempted, 0); + trace_kvm_pv_tlb_flush(vcpu->vcpu_id, - st->preempted & KVM_VCPU_FLUSH_TLB); - if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) + st_preempted & KVM_VCPU_FLUSH_TLB); + if (st_preempted & KVM_VCPU_FLUSH_TLB) kvm_vcpu_flush_tlb_guest(vcpu); + } else { + st->preempted = 0; } vcpu->arch.st.preempted = 0; @@ -3468,7 +3485,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_LASTBRANCHTOIP: case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: - case MSR_K8_SYSCFG: + case MSR_AMD64_SYSCFG: case MSR_K8_TSEG_ADDR: case MSR_K8_TSEG_MASK: case MSR_VM_HSAVE_PA: @@ -7089,7 +7106,10 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags) { - emul_to_vcpu(ctxt)->arch.hflags = emul_flags; + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + + vcpu->arch.hflags = emul_flags; + kvm_mmu_reset_context(vcpu); } static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, @@ -7226,6 +7246,11 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK); BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK); + ctxt->interruptibility = 0; + ctxt->have_exception = false; + ctxt->exception.vector = -1; + ctxt->perm_ok = false; + init_decode_cache(ctxt); vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } @@ -7561,14 +7586,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, kvm_vcpu_check_breakpoint(vcpu, &r)) return r; - ctxt->interruptibility = 0; - ctxt->have_exception = false; - ctxt->exception.vector = -1; - ctxt->perm_ok = false; - - ctxt->ud = emulation_type & EMULTYPE_TRAP_UD; - - r = x86_decode_insn(ctxt, insn, insn_len); + r = x86_decode_insn(ctxt, insn, insn_len, emulation_type); trace_kvm_emulate_insn_start(vcpu); ++vcpu->stat.insn_emulation; @@ -8243,6 +8261,7 @@ void kvm_arch_exit(void) kvm_x86_ops.hardware_enable = NULL; kvm_mmu_module_exit(); free_percpu(user_return_msrs); + kmem_cache_destroy(x86_emulator_cache); kmem_cache_destroy(x86_fpu_cache); #ifdef CONFIG_KVM_XEN static_key_deferred_flush(&kvm_xen_enabled); @@ -8360,6 +8379,9 @@ static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) vcpu->stat.directed_yield_attempted++; + if (single_task_running()) + goto no_yield; + rcu_read_lock(); map = rcu_dereference(vcpu->kvm->arch.apic_map); @@ -9496,7 +9518,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (r <= 0) break; - kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_clear_request(KVM_REQ_UNBLOCK, vcpu); if (kvm_cpu_has_pending_timer(vcpu)) kvm_inject_pending_timer_irqs(vcpu); @@ -10115,8 +10137,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, kvm_update_dr7(vcpu); if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + - get_segment_base(vcpu, VCPU_SREG_CS); + vcpu->arch.singlestep_rip = kvm_get_linear_rip(vcpu); /* * Trigger an rflags update that will inject or remove the trace @@ -11499,7 +11520,8 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) void kvm_arch_start_assignment(struct kvm *kvm) { - atomic_inc(&kvm->arch.assigned_device_count); + if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1) + static_call_cond(kvm_x86_start_assignment)(kvm); } EXPORT_SYMBOL_GPL(kvm_arch_start_assignment); diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index a67afd74232c..a1d24fdc07cf 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -1417,7 +1417,7 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) } } -static unsigned long insn_get_effective_ip(struct pt_regs *regs) +static int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip) { unsigned long seg_base = 0; @@ -1430,10 +1430,12 @@ static unsigned long insn_get_effective_ip(struct pt_regs *regs) if (!user_64bit_mode(regs)) { seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); if (seg_base == -1L) - return 0; + return -EINVAL; } - return seg_base + regs->ip; + *ip = seg_base + regs->ip; + + return 0; } /** @@ -1446,18 +1448,17 @@ static unsigned long insn_get_effective_ip(struct pt_regs *regs) * * Returns: * - * Number of instruction bytes copied. - * - * 0 if nothing was copied. + * - number of instruction bytes copied. + * - 0 if nothing was copied. + * - -EINVAL if the linear address of the instruction could not be calculated */ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) { unsigned long ip; int not_copied; - ip = insn_get_effective_ip(regs); - if (!ip) - return 0; + if (insn_get_effective_ip(regs, &ip)) + return -EINVAL; not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE); @@ -1475,18 +1476,17 @@ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) * * Returns: * - * Number of instruction bytes copied. - * - * 0 if nothing was copied. + * - number of instruction bytes copied. + * - 0 if nothing was copied. + * - -EINVAL if the linear address of the instruction could not be calculated. */ int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) { unsigned long ip; int not_copied; - ip = insn_get_effective_ip(regs); - if (!ip) - return 0; + if (insn_get_effective_ip(regs, &ip)) + return -EINVAL; not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE); diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 4d32cb06ffd5..ec9922cba30a 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -58,12 +58,16 @@ SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg) 2: .skip 5-(2b-1b), 0x90 SYM_FUNC_END(__x86_indirect_alt_call_\reg) +STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg) + SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg) ANNOTATE_RETPOLINE_SAFE 1: jmp *%\reg 2: .skip 5-(2b-1b), 0x90 SYM_FUNC_END(__x86_indirect_alt_jmp_\reg) +STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg) + .endm /* diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index b93d6cd08a7f..121921b2927c 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -5,7 +5,7 @@ #include <xen/xen.h> #include <asm/fpu/internal.h> -#include <asm/sev-es.h> +#include <asm/sev.h> #include <asm/traps.h> #include <asm/kdebug.h> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 1c548ad00752..6bda7f67d737 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -836,8 +836,8 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, if (si_code == SEGV_PKUERR) force_sig_pkuerr((void __user *)address, pkey); - - force_sig_fault(SIGSEGV, si_code, (void __user *)address); + else + force_sig_fault(SIGSEGV, si_code, (void __user *)address); local_irq_disable(); } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 12c686c65ea9..60ade7dd71bd 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -118,7 +118,9 @@ static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *des if (!IS_ENABLED(CONFIG_EFI)) return; - if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA) + if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA || + (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA && + efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME)) desc->flags |= IORES_MAP_ENCRYPTED; } diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 04aba7e80a36..470b20208430 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -504,10 +504,6 @@ void __init sme_enable(struct boot_params *bp) #define AMD_SME_BIT BIT(0) #define AMD_SEV_BIT BIT(1) - /* Check the SEV MSR whether SEV or SME is enabled */ - sev_status = __rdmsr(MSR_AMD64_SEV); - feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; - /* * Check for the SME/SEV feature: * CPUID Fn8000_001F[EAX] @@ -519,17 +515,22 @@ void __init sme_enable(struct boot_params *bp) eax = 0x8000001f; ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); - if (!(eax & feature_mask)) + /* Check whether SEV or SME is supported */ + if (!(eax & (AMD_SEV_BIT | AMD_SME_BIT))) return; me_mask = 1UL << (ebx & 0x3f); + /* Check the SEV MSR whether SEV or SME is enabled */ + sev_status = __rdmsr(MSR_AMD64_SEV); + feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; + /* Check if memory encryption is enabled */ if (feature_mask == AMD_SME_BIT) { /* * No SME if Hypervisor bit is set. This check is here to * prevent a guest from trying to enable SME. For running as a - * KVM guest the MSR_K8_SYSCFG will be sufficient, but there + * KVM guest the MSR_AMD64_SYSCFG will be sufficient, but there * might be other hypervisors which emulate that MSR as non-zero * or even pass it through to the guest. * A malicious hypervisor can still trick a guest into this @@ -542,8 +543,8 @@ void __init sme_enable(struct boot_params *bp) return; /* For SME, check the SYSCFG MSR */ - msr = __rdmsr(MSR_K8_SYSCFG); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + msr = __rdmsr(MSR_AMD64_SYSCFG); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) return; } else { /* SEV state cannot be controlled by a command line option */ diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 5eb4dc2b97da..e94da744386f 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -254,7 +254,13 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) /* make sure all non-reserved blocks are inside the limits */ bi->start = max(bi->start, low); - bi->end = min(bi->end, high); + + /* preserve info for non-RAM areas above 'max_pfn': */ + if (bi->end > high) { + numa_add_memblk_to(bi->nid, high, bi->end, + &numa_reserved_meminfo); + bi->end = high; + } /* and there's no empty block */ if (bi->start >= bi->end) diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index ae744b6a0785..dd40d3fea74e 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -284,7 +284,7 @@ static int __init early_root_info_init(void) /* need to take out [4G, TOM2) for RAM*/ /* SYS_CFG */ - address = MSR_K8_SYSCFG; + address = MSR_AMD64_SYSCFG; rdmsrl(address, val); /* TOP_MEM2 is enabled? */ if (val & (1<<21)) { diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 02dc64625e64..2edd86649468 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); +#define RS690_LOWER_TOP_OF_DRAM2 0x30 +#define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1 +#define RS690_UPPER_TOP_OF_DRAM2 0x31 +#define RS690_HTIU_NB_INDEX 0xA8 +#define RS690_HTIU_NB_INDEX_WR_ENABLE 0x100 +#define RS690_HTIU_NB_DATA 0xAC + +/* + * Some BIOS implementations support RAM above 4GB, but do not configure the + * PCI host to respond to bus master accesses for these addresses. These + * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA + * works as expected for addresses below 4GB. + * + * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57) + * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf + */ +static void rs690_fix_64bit_dma(struct pci_dev *pdev) +{ + u32 val = 0; + phys_addr_t top_of_dram = __pa(high_memory - 1) + 1; + + if (top_of_dram <= (1ULL << 32)) + return; + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_LOWER_TOP_OF_DRAM2); + pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val); + + if (val) + return; + + pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram); + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); + pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32); + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); + pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, + top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma); + #endif diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 8a26e705cb06..147c30a81f15 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -468,7 +468,7 @@ void __init efi_init(void) */ if (!efi_runtime_supported()) - pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); + pr_err("No EFI runtime due to 32/64-bit mismatch with kernel\n"); if (!efi_runtime_supported() || efi_runtime_disabled()) { efi_memmap_unmap(); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index df7b5477fc4f..7515e78ef898 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -47,7 +47,7 @@ #include <asm/realmode.h> #include <asm/time.h> #include <asm/pgalloc.h> -#include <asm/sev-es.h> +#include <asm/sev.h> /* * We allocate runtime services regions top-down, starting from -4G, i.e. diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 7850111008a8..b15ebfe40a73 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -450,6 +450,18 @@ void __init efi_free_boot_services(void) size -= rm_size; } + /* + * Don't free memory under 1M for two reasons: + * - BIOS might clobber it + * - Crash kernel needs it to be reserved + */ + if (start + size < SZ_1M) + continue; + if (start < SZ_1M) { + size -= (SZ_1M - start); + start = SZ_1M; + } + memblock_free_late(start, size); } diff --git a/arch/x86/realmode/Makefile b/arch/x86/realmode/Makefile index 6b1f3a4eeb44..a0b491ae2de8 100644 --- a/arch/x86/realmode/Makefile +++ b/arch/x86/realmode/Makefile @@ -10,7 +10,6 @@ # Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n KCSAN_SANITIZE := n -OBJECT_FILES_NON_STANDARD := y subdir- := rm diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 1be71ef5e4c4..6534c92d0f83 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -9,7 +9,7 @@ #include <asm/realmode.h> #include <asm/tlbflush.h> #include <asm/crash.h> -#include <asm/sev-es.h> +#include <asm/sev.h> struct real_mode_header *real_mode_header; u32 *trampoline_cr4_features; @@ -29,14 +29,16 @@ void __init reserve_real_mode(void) /* Has to be under 1M so we can execute real-mode AP code. */ mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); - if (!mem) { + if (!mem) pr_info("No sub-1M memory is available for the trampoline\n"); - return; - } + else + set_real_mode_mem(mem); - memblock_reserve(mem, size); - set_real_mode_mem(mem); - crash_reserve_low_1M(); + /* + * Unconditionally reserve the entire fisrt 1M, see comment in + * setup_arch(). + */ + memblock_reserve(0, SZ_1M); } static void sme_sev_setup_real_mode(struct trampoline_header *th) diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index 84c5d1b33d10..cc8391f86cdb 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -123,9 +123,9 @@ SYM_CODE_START(startup_32) */ btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags jnc .Ldone - movl $MSR_K8_SYSCFG, %ecx + movl $MSR_AMD64_SYSCFG, %ecx rdmsr - bts $MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax + bts $MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT, %eax jc .Ldone /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 17503fed2017..03149422dce2 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -592,8 +592,10 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_debug) DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap) { /* This should never happen and there is no way to handle it. */ + instrumentation_begin(); pr_err("Unknown trap in Xen PV mode."); BUG(); + instrumentation_end(); } #ifdef CONFIG_X86_MCE @@ -1273,16 +1275,16 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Get mfn list */ xen_build_dynamic_phys_to_machine(); + /* Work out if we support NX */ + get_cpu_cap(&boot_cpu_data); + x86_configure_nx(); + /* * Set up kernel GDT and segment registers, mainly so that * -fstack-protector code can be executed. */ xen_setup_gdt(0); - /* Work out if we support NX */ - get_cpu_cap(&boot_cpu_data); - x86_configure_nx(); - /* Determine virtual and physical address sizes */ get_cpu_address_sizes(&boot_cpu_data); diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 9d76d433d3d6..fd2f30227d96 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -413,7 +413,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self |