diff options
Diffstat (limited to 'arch')
240 files changed, 2134 insertions, 1283 deletions
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 5622578742fd..3000a2e8ee21 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -482,7 +482,7 @@ 550 common process_madvise sys_process_madvise 551 common epoll_pwait2 sys_epoll_pwait2 552 common mount_setattr sys_mount_setattr -553 common quotactl_path sys_quotactl_path +# 553 reserved for quotactl_path 554 common landlock_create_ruleset sys_landlock_create_ruleset 555 common landlock_add_rule sys_landlock_add_rule 556 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 4392c9c189c4..e47adc97a89b 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -31,7 +31,7 @@ endif ifdef CONFIG_ARC_CURR_IN_REG -# For a global register defintion, make sure it gets passed to every file +# For a global register definition, make sure it gets passed to every file # We had a customer reported bug where some code built in kernel was NOT using # any kernel headers, and missing the r25 global register # Can't do unconditionally because of recursive include issues diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 9b87e162e539..dfeffa25499b 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -116,7 +116,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, * * Technically the lock is also needed for UP (boils down to irq save/restore) * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to - * be disabled thus can't possibly be interrpted/preempted/clobbered by xchg() + * be disabled thus can't possibly be interrupted/preempted/clobbered by xchg() * Other way around, xchg is one instruction anyways, so can't be interrupted * as such */ @@ -143,7 +143,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, /* * "atomic" variant of xchg() * REQ: It needs to follow the same serialization rules as other atomic_xxx() - * Since xchg() doesn't always do that, it would seem that following defintion + * Since xchg() doesn't always do that, it would seem that following definition * is incorrect. But here's the rationale: * SMP : Even xchg() takes the atomic_ops_lock, so OK. * LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index ad9b7fe4dba3..4a9d33372fe2 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -7,6 +7,18 @@ #include <uapi/asm/page.h> +#ifdef CONFIG_ARC_HAS_PAE40 + +#define MAX_POSSIBLE_PHYSMEM_BITS 40 +#define PAGE_MASK_PHYS (0xff00000000ull | PAGE_MASK) + +#else /* CONFIG_ARC_HAS_PAE40 */ + +#define MAX_POSSIBLE_PHYSMEM_BITS 32 +#define PAGE_MASK_PHYS PAGE_MASK + +#endif /* CONFIG_ARC_HAS_PAE40 */ + #ifndef __ASSEMBLY__ #define clear_page(paddr) memset((paddr), 0, PAGE_SIZE) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 163641726a2b..5878846f00cf 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -107,8 +107,8 @@ #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL) - +#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \ + _PAGE_SPECIAL) /* More Abbrevaited helpers */ #define PAGE_U_NONE __pgprot(___DEF) #define PAGE_U_R __pgprot(___DEF | _PAGE_READ) @@ -132,13 +132,7 @@ #define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) -#ifdef CONFIG_ARC_HAS_PAE40 -#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE) -#define MAX_POSSIBLE_PHYSMEM_BITS 40 -#else -#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE) -#define MAX_POSSIBLE_PHYSMEM_BITS 32 -#endif +#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE) /************************************************************************** * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h index 2a97e2718a21..2a4ad619abfb 100644 --- a/arch/arc/include/uapi/asm/page.h +++ b/arch/arc/include/uapi/asm/page.h @@ -33,5 +33,4 @@ #define PAGE_MASK (~(PAGE_SIZE-1)) - #endif /* _UAPI__ASM_ARC_PAGE_H */ diff --git a/arch/arc/include/uapi/asm/sigcontext.h b/arch/arc/include/uapi/asm/sigcontext.h index 95f8a4380e11..7a5449dfcb29 100644 --- a/arch/arc/include/uapi/asm/sigcontext.h +++ b/arch/arc/include/uapi/asm/sigcontext.h @@ -18,6 +18,7 @@ */ struct sigcontext { struct user_regs_struct regs; + struct user_regs_arcv2 v2abi; }; #endif /* _ASM_ARC_SIGCONTEXT_H */ diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 1743506081da..2cb8dfe866b6 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -177,7 +177,7 @@ tracesys: ; Do the Sys Call as we normally would. ; Validate the Sys Call number - cmp r8, NR_syscalls + cmp r8, NR_syscalls - 1 mov.hi r0, -ENOSYS bhi tracesys_exit @@ -255,7 +255,7 @@ ENTRY(EV_Trap) ;============ Normal syscall case ; syscall num shd not exceed the total system calls avail - cmp r8, NR_syscalls + cmp r8, NR_syscalls - 1 mov.hi r0, -ENOSYS bhi .Lret_from_system_call diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index ecfbc42d3a40..345a0000554c 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -140,6 +140,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, ptr = &remcomInBuffer[1]; if (kgdb_hex2long(&ptr, &addr)) regs->ret = addr; + fallthrough; case 'D': case 'k': diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index d838d0d57696..3793876f42d9 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -50,14 +50,14 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new) int ret; /* - * This is only for old cores lacking LLOCK/SCOND, which by defintion + * This is only for old cores lacking LLOCK/SCOND, which by definition * can't possibly be SMP. Thus doesn't need to be SMP safe. * And this also helps reduce the overhead for serializing in * the UP case */ WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP)); - /* Z indicates to userspace if operation succeded */ + /* Z indicates to userspace if operation succeeded */ regs->status32 &= ~STATUS_Z_MASK; ret = access_ok(uaddr, sizeof(*uaddr)); @@ -107,7 +107,7 @@ fail: void arch_cpu_idle(void) { - /* Re-enable interrupts <= default irq priority before commiting SLEEP */ + /* Re-enable interrupts <= default irq priority before committing SLEEP */ const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO; __asm__ __volatile__( @@ -120,7 +120,7 @@ void arch_cpu_idle(void) void arch_cpu_idle(void) { - /* sleep, but enable both set E1/E2 (levels of interrutps) before committing */ + /* sleep, but enable both set E1/E2 (levels of interrupts) before committing */ __asm__ __volatile__("sleep 0x3 \n"); } diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index fdbe06c98895..cb2f88502baf 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -61,6 +61,41 @@ struct rt_sigframe { unsigned int sigret_magic; }; +static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +{ + int err = 0; +#ifndef CONFIG_ISA_ARCOMPACT + struct user_regs_arcv2 v2abi; + + v2abi.r30 = regs->r30; +#ifdef CONFIG_ARC_HAS_ACCL_REGS + v2abi.r58 = regs->r58; + v2abi.r59 = regs->r59; +#else + v2abi.r58 = v2abi.r59 = 0; +#endif + err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi)); +#endif + return err; +} + +static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +{ + int err = 0; +#ifndef CONFIG_ISA_ARCOMPACT + struct user_regs_arcv2 v2abi; + + err = __copy_from_user(&v2abi, &mctx->v2abi, sizeof(v2abi)); + + regs->r30 = v2abi.r30; +#ifdef CONFIG_ARC_HAS_ACCL_REGS + regs->r58 = v2abi.r58; + regs->r59 = v2abi.r59; +#endif +#endif + return err; +} + static int stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) @@ -94,6 +129,10 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch, sizeof(sf->uc.uc_mcontext.regs.scratch)); + + if (is_isa_arcv2()) + err |= save_arcv2_regs(&(sf->uc.uc_mcontext), regs); + err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t)); return err ? -EFAULT : 0; @@ -109,6 +148,10 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf) err |= __copy_from_user(&uregs.scratch, &(sf->uc.uc_mcontext.regs.scratch), sizeof(sf->uc.uc_mcontext.regs.scratch)); + + if (is_isa_arcv2()) + err |= restore_arcv2_regs(&(sf->uc.uc_mcontext), regs); + if (err) return -EFAULT; @@ -259,7 +302,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) regs->r2 = (unsigned long)&sf->uc; /* - * small optim to avoid unconditonally calling do_sigaltstack + * small optim to avoid unconditionally calling do_sigaltstack * in sigreturn path, now that we only have rt_sigreturn */ magic = MAGIC_SIGALTSTK; @@ -391,7 +434,7 @@ void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs) { /* - * ASM glue gaurantees that this is only called when returning to + * ASM glue guarantees that this is only called when returning to * user mode */ if (test_thread_flag(TIF_NOTIFY_RESUME)) diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 33ce59d91461..e2146a8da195 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -57,7 +57,6 @@ SECTIONS .init.ramfs : { INIT_RAM_FS } . = ALIGN(PAGE_SIZE); - _stext = .; HEAD_TEXT_SECTION INIT_TEXT_SECTION(L1_CACHE_BYTES) @@ -83,6 +82,7 @@ SECTIONS .text : { _text = .; + _stext = .; TEXT_TEXT SCHED_TEXT CPUIDLE_TEXT diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 33832e36bdb7..e2ed355438c9 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -157,7 +157,16 @@ void __init setup_arch_memory(void) min_high_pfn = PFN_DOWN(high_mem_start); max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz); - max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn; + /* + * max_high_pfn should be ok here for both HIGHMEM and HIGHMEM+PAE. + * For HIGHMEM without PAE max_high_pfn should be less than + * min_low_pfn to guarantee that these two regions don't overlap. + * For PAE case highmem is greater than lowmem, so it is natural + * to use max_high_pfn. + * + * In both cases, holes should be handled by pfn_valid(). + */ + max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn; high_memory = (void *)(min_high_pfn << PAGE_SHIFT); diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index fac4adc90204..95c649fbc95a 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -53,9 +53,10 @@ EXPORT_SYMBOL(ioremap); void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, unsigned long flags) { + unsigned int off; unsigned long vaddr; struct vm_struct *area; - phys_addr_t off, end; + phys_addr_t end; pgprot_t prot = __pgprot(flags); /* Don't allow wraparound, zero size */ @@ -72,7 +73,7 @@ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, /* Mappings have to be page-aligned */ off = paddr & ~PAGE_MASK; - paddr &= PAGE_MASK; + paddr &= PAGE_MASK_PHYS; size = PAGE_ALIGN(end + 1) - paddr; /* diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 9bb3c24f3677..9c7c68247289 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -576,7 +576,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, pte_t *ptep) { unsigned long vaddr = vaddr_unaligned & PAGE_MASK; - phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK; + phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS; struct page *page = pfn_to_page(pte_pfn(*ptep)); create_tlb(vma, vaddr, ptep); diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index 7d2c72562c73..9148a01ed6d9 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -105,9 +105,13 @@ phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; phy-reset-duration = <20>; phy-supply = <&sw2_reg>; - phy-handle = <ðphy0>; status = "okay"; + fixed-link { + speed = <1000>; + full-duplex; + }; + mdio { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi index 236fc205c389..d0768ae429fa 100644 --- a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi +++ b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi @@ -406,6 +406,18 @@ vin-supply = <&sw1_reg>; }; +®_pu { + vin-supply = <&sw1_reg>; +}; + +®_vdd1p1 { + vin-supply = <&sw2_reg>; +}; + +®_vdd2p5 { + vin-supply = <&sw2_reg>; +}; + &uart1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart1>; diff --git a/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi b/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi index 828cf3e39784..c4e146f3341b 100644 --- a/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi +++ b/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi @@ -126,7 +126,7 @@ compatible = "nxp,pca8574"; reg = <0x3a>; gpio-controller; - #gpio-cells = <1>; + #gpio-cells = <2>; }; }; diff --git a/arch/arm/boot/dts/imx7d-meerkat96.dts b/arch/arm/boot/dts/imx7d-meerkat96.dts index 5339210b63d0..dd8003bd1fc0 100644 --- a/arch/arm/boot/dts/imx7d-meerkat96.dts +++ b/arch/arm/boot/dts/imx7d-meerkat96.dts @@ -193,7 +193,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc1>; keep-power-in-suspend; - tuning-step = <2>; + fsl,tuning-step = <2>; vmmc-supply = <®_3p3v>; no-1-8-v; broken-cd; diff --git a/arch/arm/boot/dts/imx7d-pico.dtsi b/arch/arm/boot/dts/imx7d-pico.dtsi index e57da0d32b98..e519897fae08 100644 --- a/arch/arm/boot/dts/imx7d-pico.dtsi +++ b/arch/arm/boot/dts/imx7d-pico.dtsi @@ -351,7 +351,7 @@ pinctrl-2 = <&pinctrl_usdhc1_200mhz>; cd-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>; bus-width = <4>; - tuning-step = <2>; + fsl,tuning-step = <2>; vmmc-supply = <®_3p3v>; wakeup-source; no-1-8-v; diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h index 0d67ed682e07..bc4ffa7ca04c 100644 --- a/arch/arm/include/asm/cpuidle.h +++ b/arch/arm/include/asm/cpuidle.h @@ -7,9 +7,11 @@ #ifdef CONFIG_CPU_IDLE extern int arm_cpuidle_simple_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); +#define __cpuidle_method_section __used __section("__cpuidle_method_of_table") #else static inline int arm_cpuidle_simple_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { return -ENODEV; } +#define __cpuidle_method_section __maybe_unused /* drop silently */ #endif /* Common ARM WFI state */ @@ -42,8 +44,7 @@ struct of_cpuidle_method { #define CPUIDLE_METHOD_OF_DECLARE(name, _method, _ops) \ static const struct of_cpuidle_method __cpuidle_method_of_table_##name \ - __used __section("__cpuidle_method_of_table") \ - = { .method = _method, .ops = _ops } + __cpuidle_method_section = { .method = _method, .ops = _ops } extern int arm_cpuidle_suspend(int index); diff --git a/arch/arm/mach-imx/pm-imx27.c b/arch/arm/mach-imx/pm-imx27.c index 020e6deb67c8..237e8aa9fe83 100644 --- a/arch/arm/mach-imx/pm-imx27.c +++ b/arch/arm/mach-imx/pm-imx27.c @@ -12,6 +12,7 @@ #include <linux/suspend.h> #include <linux/io.h> +#include "common.h" #include "hardware.h" static int mx27_suspend_enter(suspend_state_t state) diff --git a/arch/arm/mach-npcm/Kconfig b/arch/arm/mach-npcm/Kconfig index 658c8efb4ca1..a71cf1d189ae 100644 --- a/arch/arm/mach-npcm/Kconfig +++ b/arch/arm/mach-npcm/Kconfig @@ -10,6 +10,7 @@ config ARCH_WPCM450 bool "Support for WPCM450 BMC (Hermon)" depends on ARCH_MULTI_V5 select CPU_ARM926T + select WPCM450_AIC select NPCM7XX_TIMER help General support for WPCM450 BMC (Hermon). diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 2ee527c00284..1026a816dcc0 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -458,20 +458,6 @@ static struct gpiod_lookup_table leds_gpio_table = { #ifdef CONFIG_LEDS_TRIGGERS DEFINE_LED_TRIGGER(ams_delta_camera_led_trigger); - -static int ams_delta_camera_power(struct device *dev, int power) -{ - /* - * turn on camera LED - */ - if (power) - led_trigger_event(ams_delta_camera_led_trigger, LED_FULL); - else - led_trigger_event(ams_delta_camera_led_trigger, LED_OFF); - return 0; -} -#else -#define ams_delta_camera_power NULL #endif static struct platform_device ams_delta_audio_device = { diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c index c40cf5ef8607..977b0b744c22 100644 --- a/arch/arm/mach-omap1/board-h2.c +++ b/arch/arm/mach-omap1/board-h2.c @@ -320,7 +320,7 @@ static int tps_setup(struct i2c_client *client, void *context) { if (!IS_BUILTIN(CONFIG_TPS65010)) return -ENOSYS; - + tps65010_config_vregs1(TPS_LDO2_ENABLE | TPS_VLDO2_3_0V | TPS_LDO1_ENABLE | TPS_VLDO1_3_0V); @@ -394,6 +394,8 @@ static void __init h2_init(void) BUG_ON(gpio_request(H2_NAND_RB_GPIO_PIN, "NAND ready") < 0); gpio_direction_input(H2_NAND_RB_GPIO_PIN); + gpiod_add_lookup_table(&isp1301_gpiod_table); + omap_cfg_reg(L3_1610_FLASH_CS2B_OE); omap_cfg_reg(M8_1610_FLASH_CS2B_WE); diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c index 2c1e2b32b9b3..a745d64d4699 100644 --- a/arch/arm/mach-omap1/pm.c +++ b/arch/arm/mach-omap1/pm.c @@ -655,9 +655,13 @@ static int __init omap_pm_init(void) irq = INT_7XX_WAKE_UP_REQ; else if (cpu_is_omap16xx()) irq = INT_1610_WAKE_UP_REQ; - if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup", - NULL)) - pr_err("Failed to request irq %d (peripheral wakeup)\n", irq); + else + irq = -1; + + if (irq >= 0) { + if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup", NULL)) + pr_err("Failed to request irq %d (peripheral wakeup)\n", irq); + } /* Program new power ramp-up time * (0 for most boards since we don't lower voltage when in deep sleep) diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index 418a61ecb827..5e86145db0e2 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -322,6 +322,7 @@ static int n8x0_mmc_get_cover_state(struct device *dev, int slot) static void n8x0_mmc_callback(void *data, u8 card_mask) { +#ifdef CONFIG_MMC_OMAP int bit, *openp, index; if (board_is_n800()) { @@ -339,7 +340,6 @@ static void n8x0_mmc_callback(void *data, u8 card_mask) else *openp = 0; -#ifdef CONFIG_MMC_OMAP omap_mmc_notify_cover_event(mmc_device, index, *openp); #else pr_warn("MMC: notify cover event not available\n"); diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c index ec0d9b094744..bddfc7cd5d40 100644 --- a/arch/arm/mach-pxa/pxa_cplds_irqs.c +++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c @@ -121,8 +121,13 @@ static int cplds_probe(struct platform_device *pdev) return fpga->irq; base_irq = platform_get_irq(pdev, 1); - if (base_irq < 0) + if (base_irq < 0) { base_irq = 0; + } else { + ret = devm_irq_alloc_descs(&pdev->dev, base_irq, base_irq, CPLDS_NB_IRQ, 0); + if (ret < 0) + return ret; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); fpga->base = devm_ioremap_resource(&pdev->dev, res); diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index c7679d7db98b..28e03b5fec00 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -456,7 +456,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index f8f07469d259..a7e54a087b80 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -135,24 +135,18 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) return; } -int xen_swiotlb_detect(void) -{ - if (!xen_domain()) - return 0; - if (xen_feature(XENFEAT_direct_mapped)) - return 1; - /* legacy case */ - if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain()) - return 1; - return 0; -} - static int __init xen_mm_init(void) { struct gnttab_cache_flush cflush; + int rc; + if (!xen_swiotlb_detect()) return 0; - xen_swiotlb_init(); + + rc = xen_swiotlb_init(); + /* we can work with the default swiotlb */ + if (rc < 0 && rc != -EEXIST) + return rc; cflush.op = 0; cflush.a.dev_bus_addr = 0; diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild index d6465823b281..7b393cfec071 100644 --- a/arch/arm64/Kbuild +++ b/arch/arm64/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += kernel/ mm/ -obj-$(CONFIG_NET) += net/ +obj-y += kernel/ mm/ net/ obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_XEN) += xen/ obj-$(CONFIG_CRYPTO) += crypto/ diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 6409b47b73e4..7336c1fd0dda 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -165,6 +165,7 @@ config ARCH_MEDIATEK config ARCH_MESON bool "Amlogic Platforms" + select COMMON_CLK select MESON_IRQ_GPIO help This enables support for the arm64 based Amlogic SoCs diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7ef44478560d..b52481f0605d 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -175,6 +175,9 @@ vdso_install: $(if $(CONFIG_COMPAT_VDSO), \ $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@) +archprepare: + $(Q)$(MAKE) $(build)=arch/arm64/tools kapi + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts index 6c309b97587d..e8d31279b7a3 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts @@ -46,7 +46,8 @@ eee-broken-100tx; qca,clk-out-frequency = <125000000>; qca,clk-out-strength = <AR803X_STRENGTH_FULL>; - vddio-supply = <&vddh>; + qca,keep-pll-enabled; + vddio-supply = <&vddio>; vddio: vddio-regulator { regulator-name = "VDDIO"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts index df212ed5bb94..e65d1c477e2c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts @@ -31,11 +31,10 @@ reg = <0x4>; eee-broken-1000t; eee-broken-100tx; - qca,clk-out-frequency = <125000000>; qca,clk-out-strength = <AR803X_STRENGTH_FULL>; - - vddio-supply = <&vddh>; + qca,keep-pll-enabled; + vddio-supply = <&vddio>; vddio: vddio-regulator { regulator-name = "VDDIO"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index eca06a0c3cf8..a30249ebffa8 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -197,8 +197,8 @@ ddr: memory-controller@1080000 { compatible = "fsl,qoriq-memory-controller"; reg = <0x0 0x1080000 0x0 0x1000>; - interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>; - big-endian; + interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>; + little-endian; }; dcfg: syscon@1e00000 { diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts index 631e01c1b9fd..be1e7d6f0ecb 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts @@ -88,11 +88,11 @@ pinctrl-0 = <&pinctrl_codec2>; reg = <0x18>; #sound-dai-cells = <0>; - HPVDD-supply = <®_3p3v>; - SPRVDD-supply = <®_3p3v>; - SPLVDD-supply = <®_3p3v>; - AVDD-supply = <®_3p3v>; - IOVDD-supply = <®_3p3v>; + HPVDD-supply = <®_gen_3p3>; + SPRVDD-supply = <®_gen_3p3>; + SPLVDD-supply = <®_gen_3p3>; + AVDD-supply = <®_gen_3p3>; + IOVDD-supply = <®_gen_3p3>; DVDD-supply = <&vgen4_reg>; reset-gpios = <&gpio3 4 GPIO_ACTIVE_HIGH>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi index 4dc8383478ee..a08a568c31d9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi @@ -45,8 +45,8 @@ reg_12p0_main: regulator-12p0-main { compatible = "regulator-fixed"; regulator-name = "12V_MAIN"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; + regulator-min-microvolt = <12000000>; + regulator-max-microvolt = <12000000>; regulator-always-on; }; @@ -77,15 +77,6 @@ regulator-always-on; }; - reg_3p3v: regulator-3p3v { - compatible = "regulator-fixed"; - vin-supply = <®_3p3_main>; - regulator-name = "GEN_3V3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-always-on; - }; - reg_usdhc2_vmmc: regulator-vsd-3v3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_reg_usdhc2>; @@ -415,11 +406,11 @@ pinctrl-0 = <&pinctrl_codec1>; reg = <0x18>; #sound-dai-cells = <0>; - HPVDD-supply = <®_3p3v>; - SPRVDD-supply = <®_3p3v>; - SPLVDD-supply = <®_3p3v>; - AVDD-supply = <®_3p3v>; - IOVDD-supply = <®_3p3v>; + HPVDD-supply = <®_gen_3p3>; + SPRVDD-supply = <®_gen_3p3>; + SPLVDD-supply = <®_gen_3p3>; + AVDD-supply = <®_gen_3p3>; + IOVDD-supply = <®_gen_3p3>; DVDD-supply = <&vgen4_reg>; reset-gpios = <&gpio3 3 GPIO_ACTIVE_LOW>; }; diff --git a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi index c62ddb9b2ba5..3771144a2ce4 100644 --- a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi +++ b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi @@ -14,7 +14,6 @@ ports { port@0 { - reg = <0>; csi20_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; @@ -29,7 +28,6 @@ ports { port@0 { - reg = <0>; csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi index d64fb8b1b86c..46f8dbf68904 100644 --- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi @@ -2573,6 +2573,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2628,6 +2632,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi index 5b05474dc272..d16a4be5ef77 100644 --- a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi @@ -2419,6 +2419,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2474,6 +2478,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts index e7b4a929bb17..2e3d1981cac4 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts +++ b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts @@ -33,7 +33,7 @@ status = "okay"; ports { - port { + port@0 { csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi index 20fa3caa050e..1aef34447abd 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi @@ -1823,6 +1823,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi index 8eb006cbd9af..1f51237ab0a6 100644 --- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi @@ -2709,6 +2709,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2764,6 +2768,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77950.dtsi b/arch/arm64/boot/dts/renesas/r8a77950.dtsi index 25b87da32eeb..b643d3079db1 100644 --- a/arch/arm64/boot/dts/renesas/r8a77950.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77950.dtsi @@ -192,6 +192,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77951.dtsi b/arch/arm64/boot/dts/renesas/r8a77951.dtsi index 5c39152e4570..85d66d15465a 100644 --- a/arch/arm64/boot/dts/renesas/r8a77951.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77951.dtsi @@ -3097,6 +3097,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -3152,6 +3156,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -3191,6 +3199,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77960.dtsi b/arch/arm64/boot/dts/renesas/r8a77960.dtsi index 25d947a81b29..12476e354d74 100644 --- a/arch/arm64/boot/dts/renesas/r8a77960.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77960.dtsi @@ -2761,6 +2761,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2816,6 +2820,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77961.dtsi b/arch/arm64/boot/dts/renesas/r8a77961.dtsi index ab081f14af9a..d9804768425a 100644 --- a/arch/arm64/boot/dts/renesas/r8a77961.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77961.dtsi @@ -2499,6 +2499,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2554,6 +2558,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi index 657b20d3533b..dcb9df861d74 100644 --- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi @@ -2575,6 +2575,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2630,6 +2634,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi index 5a5d5649332a..e8f6352c3665 100644 --- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi @@ -1106,6 +1106,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi index 1ffa4a995a7a..7b51d464de0e 100644 --- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi @@ -1439,6 +1439,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -1478,6 +1482,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts index 295d34f1d216..4715e4a4abe0 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts +++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts @@ -298,8 +298,6 @@ ports { port@0 { - reg = <0>; - csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi index 5010f23fafcc..0eaea58f4210 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi @@ -1970,6 +1970,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index e18747df219f..453ffcef24fa 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -349,7 +349,6 @@ ports { port@0 { - reg = <0>; csi20_in: endpoint { clock-lanes = <0>; data-lanes = <1>; @@ -364,8 +363,6 @@ ports { port@0 { - reg = <0>; - csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2 3 4>; diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi index b2bcbf23eefd..ca59d1f711f8 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi @@ -42,12 +42,12 @@ }; }; - dmss: dmss { + dmss: bus@48000000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; dma-ranges; - ranges; + ranges = <0x00 0x48000000 0x00 0x48000000 0x00 0x06400000>; ti,sci-dev-id = <25>; @@ -134,7 +134,7 @@ }; }; - dmsc: dmsc@44043000 { + dmsc: system-controller@44043000 { compatible = "ti,k2g-sci"; ti,host-id = <12>; mbox-names = "rx", "tx"; @@ -148,7 +148,7 @@ #power-domain-cells = <2>; }; - k3_clks: clocks { + k3_clks: clock-controller { compatible = "ti,k2g-sci-clk"; #clock-cells = <2>; }; @@ -373,8 +373,9 @@ clocks = <&k3_clks 145 0>; }; - main_gpio_intr: interrupt-controller0 { + main_gpio_intr: interrupt-controller@a00000 { compatible = "ti,sci-intr"; + reg = <0x00 0x00a00000 0x00 0x800>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi index 99e94dee1bd4..deb19ae5e168 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi @@ -74,8 +74,9 @@ clocks = <&k3_clks 148 0>; }; - mcu_gpio_intr: interrupt-controller1 { + mcu_gpio_intr: interrupt-controller@4210000 { compatible = "ti,sci-intr"; + reg = <0x00 0x04210000 0x00 0x200>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index cb340d1b401f..6cd3131eb9ff 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -433,8 +433,9 @@ #phy-cells = <0>; }; - intr_main_gpio: interrupt-controller0 { + intr_main_gpio: interrupt-controller@a00000 { compatible = "ti,sci-intr"; + reg = <0x0 0x00a00000 0x0 0x400>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; @@ -444,18 +445,19 @@ ti,interrupt-ranges = <0 392 32>; }; - main-navss { + main_navss: bus@30800000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; - ranges; + ranges = <0x0 0x30800000 0x0 0x30800000 0x0 0xbc00000>; dma-coherent; dma-ranges; ti,sci-dev-id = <118>; - intr_main_navss: interrupt-controller1 { + intr_main_navss: interrupt-controller@310e0000 { compatible = "ti,sci-intr"; + reg = <0x0 0x310e0000 0x0 0x2000>; ti,intr-trigger-type = <4>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi index 0388c02c2203..f5b8ef2f5f77 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi @@ -116,11 +116,11 @@ }; }; - mcu-navss { + mcu_navss: bus@28380000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; - ranges; + ranges = <0x00 0x28380000 0x00 0x28380000 0x00 0x03880000>; dma-coherent; dma-ranges; diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi index ed42f13e7663..7cb864b4d74a 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi @@ -6,24 +6,24 @@ */ &cbass_wakeup { - dmsc: dmsc { + dmsc: system-controller@44083000 { compatible = "ti,am654-sci"; ti,host-id = <12>; - #address-cells = <1>; - #size-cells = <1>; - ranges; mbox-names = "rx", "tx"; mboxes= <&secure_proxy_main 11>, <&secure_proxy_main 13>; + reg-names = "debug_messages"; + reg = <0x44083000 0x1000>; + k3_pds: power-controller { compatible = "ti,sci-pm-domain"; #power-domain-cells = <2>; }; - k3_clks: clocks { + k3_clks: clock-controller { compatible = "ti,k2g-sci-clk"; #clock-cells = <2>; }; @@ -69,8 +69,9 @@ power-domains = <&k3_pds 115 TI_SCI_PD_EXCLUSIVE>; }; - intr_wkup_gpio: interrupt-controller2 { + intr_wkup_gpio: interrupt-controller@42200000 { compatible = "ti,sci-intr"; + reg = <0x42200000 0x200>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts index 9e87fb313a54..eddb2ffb93ca 100644 --- a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts +++ b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts @@ -85,12 +85,6 @@ gpios = <&wkup_gpio0 27 GPIO_ACTIVE_LOW>; }; }; - - clk_ov5640_fixed: clock { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <24000000>; - }; }; &wkup_pmx0 { @@ -287,23 +281,6 @@ pinctrl-names = "default"; pinctrl-0 = <&main_i2c1_pins_default>; clock-frequency = <400000>; - - ov5640: camera@3c { - compatible = "ovti,ov5640"; - reg = <0x3c>; - - clocks = <&clk_ov5640_fixed>; - clock-names = "xclk"; - - port { - csi2_cam0: endpoint { - remote-endpoint = <&csi2_phy0>; - clock-lanes = <0>; - data-lanes = <1 2>; - }; - }; - }; - }; &main_i2c2 { @@ -496,14 +473,6 @@ }; }; -&csi2_0 { - csi2_phy0: endpoint { - remote-endpoint = <&csi2_cam0>; - clock-lanes = <0>; - data-lanes = <1 2>; - }; -}; - &mcu_cpsw { pinctrl-names = "default"; pinctrl-0 = <&mcu_cpsw_pins_default &mcu_mdio_pins_default>; diff --git a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi index f86c493a44f1..19fea8adbcff 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi @@ -68,8 +68,9 @@ }; }; - main_gpio_intr: interrupt-controller0 { + main_gpio_intr: interrupt-controller@a00000 { compatible = "ti,sci-intr"; + reg = <0x00 0x00a00000 0x00 0x800>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; @@ -85,9 +86,12 @@ #size-cells = <2>; ranges = <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>; ti,sci-dev-id = <199>; + dma-coherent; + dma-ranges; - main_navss_intr: interrupt-controller1 { + main_navss_intr: interrupt-controller@310e0000 { compatible = "ti,sci-intr"; + reg = <0x00 0x310e0000 0x00 0x4000>; ti,intr-trigger-type = <4>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi index 5e74e43822c3..5663fe3ea466 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi @@ -6,7 +6,7 @@ */ &cbass_mcu_wakeup { - dmsc: dmsc@44083000 { + dmsc: system-controller@44083000 { compatible = "ti,k2g-sci"; ti,host-id = <12>; @@ -23,7 +23,7 @@ #power-domain-cells = <2>; }; - k3_clks: clocks { + k3_clks: clock-controller { compatible = "ti,k2g-sci-clk"; #clock-cells = <2>; }; @@ -96,8 +96,9 @@ clock-names = "fclk"; }; - wkup_gpio_intr: interrupt-controller2 { + wkup_gpio_intr: interrupt-controller@42200000 { compatible = "ti,sci-intr"; + reg = <0x00 0x42200000 0x00 0x400>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi index c2aa45a3ac79..3bcafe4c1742 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -76,8 +76,9 @@ }; }; - main_gpio_intr: interrupt-controller0 { + main_gpio_intr: interrupt-controller@a00000 { compatible = "ti,sci-intr"; + reg = <0x00 0x00a00000 0x00 0x800>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; @@ -87,18 +88,19 @@ ti,interrupt-ranges = <8 392 56>; }; - main-navss { + main_navss: bus@30000000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; - ranges; + ranges = <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>; dma-coherent; dma-ranges; ti,sci-dev-id = <199>; - main_navss_intr: interrupt-controller1 { + main_navss_intr: interrupt-controller@310e0000 { compatible = "ti,sci-intr"; + reg = <0x0 0x310e0000 0x0 0x4000>; ti,intr-trigger-type = <4>; interrupt-controller; interrupt-parent = <&gic500>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi index d56e3475aee7..5e825e4d0306 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi @@ -6,7 +6,7 @@ */ &cbass_mcu_wakeup { - dmsc: dmsc@44083000 { + dmsc: system-controller@44083000 { compatible = "ti,k2g-sci"; ti,host-id = <12>; @@ -23,7 +23,7 @@ #power-domain-cells = <2>; }; - k3_clks: clocks { + k3_clks: clock-controller { compatible = "ti,k2g-sci-clk"; #clock-cells = <2>; }; @@ -96,8 +96,9 @@ clock-names = "fclk"; }; - wkup_gpio_intr: interrupt-controller2 { + wkup_gpio_intr: interrupt-controller@42200000 { compatible = "ti,sci-intr"; + reg = <0x00 0x42200000 0x00 0x400>; ti,intr-trigger-type = <1>; interrupt-controller; interrupt-parent = <&gic500>; @@ -249,11 +250,11 @@ }; }; - mcu-navss { + mcu_navss: bus@28380000 { compatible = "simple-mfd"; #address-cells = <2>; #size-cells = <2>; - ranges; + ranges = <0x00 0x28380000 0x00 0x28380000 0x00 0x03880000>; dma-coherent; dma-ranges; diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 07ac208edc89..26889dbfe904 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -5,3 +5,5 @@ generic-y += qrwlock.h generic-y += qspinlock.h generic-y += set_memory.h generic-y += user.h + +generated-y += cpucaps.h diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 2175ec0004ed..451e11e5fd23 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -74,7 +74,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx, * This insanity brought to you by speculative system register reads, * out-of-order memory accesses, sequence locks and Thomas Gleixner. * - * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + * https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/ */ #define arch_counter_enforce_ordering(val) do { \ u64 tmp, _val = (val); \ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h deleted file mode 100644 index b0c5eda0498f..000000000000 --- a/arch/arm64/include/asm/cpucaps.h +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm64/include/asm/cpucaps.h - * - * Copyright (C) 2016 ARM Ltd. - */ -#ifndef __ASM_CPUCAPS_H -#define __ASM_CPUCAPS_H - -#define ARM64_WORKAROUND_CLEAN_CACHE 0 -#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 -#define ARM64_WORKAROUND_845719 2 -#define ARM64_HAS_SYSREG_GIC_CPUIF 3 -#define ARM64_HAS_PAN 4 -#define ARM64_HAS_LSE_ATOMICS 5 -#define ARM64_WORKAROUND_CAVIUM_23154 6 -#define ARM64_WORKAROUND_834220 7 -#define ARM64_HAS_NO_HW_PREFETCH 8 -#define ARM64_HAS_VIRT_HOST_EXTN 11 -#define ARM64_WORKAROUND_CAVIUM_27456 12 -#define ARM64_HAS_32BIT_EL0 13 -#define ARM64_SPECTRE_V3A 14 -#define ARM64_HAS_CNP 15 -#define ARM64_HAS_NO_FPSIMD 16 -#define ARM64_WORKAROUND_REPEAT_TLBI 17 -#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 -#define ARM64_WORKAROUND_858921 19 -#define ARM64_WORKAROUND_CAVIUM_30115 20 -#define ARM64_HAS_DCPOP 21 -#define ARM64_SVE 22 -#define ARM64_UNMAP_KERNEL_AT_EL0 23 -#define ARM64_SPECTRE_V2 24 -#define ARM64_HAS_RAS_EXTN 25 -#define ARM64_WORKAROUND_843419 26 -#define ARM64_HAS_CACHE_IDC 27 -#define ARM64_HAS_CACHE_DIC 28 -#define ARM64_HW_DBM 29 -#define ARM64_SPECTRE_V4 30 -#define ARM64_MISMATCHED_CACHE_TYPE 31 -#define ARM64_HAS_STAGE2_FWB 32 -#define ARM64_HAS_CRC32 33 -#define ARM64_SSBS 34 -#define ARM64_WORKAROUND_1418040 35 -#define ARM64_HAS_SB 36 -#define ARM64_WORKAROUND_SPECULATIVE_AT 37 -#define ARM64_HAS_ADDRESS_AUTH_ARCH 38 -#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39 -#define ARM64_HAS_GENERIC_AUTH_ARCH 40 -#define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41 -#define ARM64_HAS_IRQ_PRIO_MASKING 42 -#define ARM64_HAS_DCPODP 43 -#define ARM64_WORKAROUND_1463225 44 -#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 -#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 -#define ARM64_WORKAROUND_1542419 47 -#define ARM64_HAS_E0PD 48 -#define ARM64_HAS_RNG 49 -#define ARM64_HAS_AMU_EXTN 50 -#define ARM64_HAS_ADDRESS_AUTH 51 -#define ARM64_HAS_GENERIC_AUTH 52 -#define ARM64_HAS_32BIT_EL1 53 -#define ARM64_BTI 54 -#define ARM64_HAS_ARMv8_4_TTL 55 -#define ARM64_HAS_TLB_RANGE 56 -#define ARM64_MTE 57 -#define ARM64_WORKAROUND_1508412 58 -#define ARM64_HAS_LDAPR 59 -#define ARM64_KVM_PROTECTED_MODE 60 -#define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP 61 -#define ARM64_HAS_EPAN 62 - -#define ARM64_NCAPS 63 - -#endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index cf8df032b9c3..5e9b33cbac51 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -63,6 +63,7 @@ #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 #define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp 20 +#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 21 #ifndef __ASSEMBLY__ @@ -201,6 +202,8 @@ extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); +extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu); + extern u64 __vgic_v3_get_gic_config(void); extern u64 __vgic_v3_read_vmcr(void); extern void __vgic_v3_write_vmcr(u32 vmcr); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index f612c090f2e4..01b9857757f2 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -463,4 +463,9 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; } +static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature) +{ + return test_bit(feature, vcpu->arch.features); +} + #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 7859749d6628..5dab69d2c22b 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -893,8 +893,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) -#define __NR_quotactl_path 443 -__SYSCALL(__NR_quotactl_path, sys_quotactl_path) +/* 443 is reserved for quotactl_path */ #define __NR_landlock_create_ruleset 444 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) #define __NR_landlock_add_rule 445 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1cb39c0803a4..e720148232a0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -720,11 +720,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) return ret; } - if (run->immediate_exit) - return -EINTR; - vcpu_load(vcpu); + if (run->immediate_exit) { + ret = -EINTR; + goto out; + } + kvm_sigset_activate(vcpu); ret = 1; @@ -897,6 +899,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_sigset_deactivate(vcpu); +out: + /* + * In the unlikely event that we are returning to userspace + * with pending exceptions or PC adjustment, commit these + * adjustments in order to give userspace a consistent view of + * the vcpu state. Note that this relies on __kvm_adjust_pc() + * being preempt-safe on VHE. + */ + if (unlikely(vcpu->arch.flags & (KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_INCREMENT_PC))) + kvm_call_hyp(__kvm_adjust_pc, vcpu); + vcpu_put(vcpu); return ret; } diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 73629094f903..11541b94b328 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -296,7 +296,7 @@ static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) *vcpu_pc(vcpu) = vect_offset; } -void kvm_inject_exception(struct kvm_vcpu *vcpu) +static void kvm_inject_exception(struct kvm_vcpu *vcpu) { if (vcpu_el1_is_32bit(vcpu)) { switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { @@ -329,3 +329,19 @@ void kvm_inject_exception(struct kvm_vcpu *vcpu) } } } + +/* + * Adjust the guest PC (and potentially exception state) depending on + * flags provided by the emulation code. + */ +void __kvm_adjust_pc(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) { + kvm_inject_exception(vcpu); + vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + } else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { + kvm_skip_instr(vcpu); + vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC; + } +} diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h index 61716359035d..4fdfeabefeb4 100644 --- a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h +++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h @@ -13,8 +13,6 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_host.h> -void kvm_inject_exception(struct kvm_vcpu *vcpu); - static inline void kvm_skip_instr(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) { @@ -44,22 +42,6 @@ static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) } /* - * Adjust the guest PC on entry, depending on flags provided by EL1 - * for the purpose of emulation (MMIO, sysreg) or exception injection. - */ -static inline void __adjust_pc(struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) { - kvm_inject_exception(vcpu); - vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | - KVM_ARM64_EXCEPT_MASK); - } else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { - kvm_skip_instr(vcpu); - vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC; - } -} - -/* * Skip an instruction while host sysregs are live. * Assumes host is always 64-bit. */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index f36420a80474..1632f001f4ed 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -28,6 +28,13 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu)); } +static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); + + __kvm_adjust_pc(kern_hyp_va(vcpu)); +} + static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt) { __kvm_flush_vm_context(); @@ -170,6 +177,7 @@ typedef void (*hcall_t)(struct kvm_cpu_context *); static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_vcpu_run), + HANDLE_FUNC(__kvm_adjust_pc), HANDLE_FUNC(__kvm_flush_vm_context), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), HANDLE_FUNC(__kvm_tlb_flush_vmid), diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index e342f7f4f4fb..4b60c0056c04 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -23,8 +23,8 @@ extern unsigned long hyp_nr_cpus; struct host_kvm host_kvm; -struct hyp_pool host_s2_mem; -struct hyp_pool host_s2_dev; +static struct hyp_pool host_s2_mem; +static struct hyp_pool host_s2_dev; /* * Copies of the host's CPU features registers holding sanitized values. diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 7488f53b0aa2..a3d3a275344e 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -17,7 +17,6 @@ #include <nvhe/trap_handler.h> struct hyp_pool hpool; -struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; unsigned long hyp_nr_cpus; #define hyp_percpu_size ((unsigned long)__per_cpu_end - \ @@ -27,6 +26,7 @@ static void *vmemmap_base; static void *hyp_pgt_base; static void *host_s2_mem_pgt_base; static void *host_s2_dev_pgt_base; +static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; static int divide_memory_pool(void *virt, unsigned long size) { diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index e9f6ea704d07..f7af9688c1f7 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -4,7 +4,6 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ -#include <hyp/adjust_pc.h> #include <hyp/switch.h> #include <hyp/sysreg-sr.h> @@ -201,7 +200,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) */ __debug_save_host_buffers_nvhe(vcpu); - __adjust_pc(vcpu); + __kvm_adjust_pc(vcpu); /* * We must restore the 32-bit state before the sysregs, thanks diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 7b8f7db5c1ed..b3229924d243 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -4,7 +4,6 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ -#include <hyp/adjust_pc.h> #include <hyp/switch.h> #include <linux/arm-smccc.h> @@ -132,7 +131,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __load_guest_stage2(vcpu->arch.hw_mmu); __activate_traps(vcpu); - __adjust_pc(vcpu); + __kvm_adjust_pc(vcpu); sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c5d1f3c87dbd..c10207fed2f3 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1156,13 +1156,13 @@ out_unlock: bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) { if (!kvm->arch.mmu.pgt) - return 0; + return false; __unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT, (range->end - range->start) << PAGE_SHIFT, range->may_block); - return 0; + return false; } bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) @@ -1170,7 +1170,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) kvm_pfn_t pfn = pte_pfn(range->pte); if (!kvm->arch.mmu.pgt) - return 0; + return false; WARN_ON(range->end - range->start != 1); @@ -1190,7 +1190,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) PAGE_SIZE, __pfn_to_phys(pfn), KVM_PGTABLE_PROT_R, NULL); - return 0; + return false; } bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) @@ -1200,7 +1200,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) pte_t pte; if (!kvm->arch.mmu.pgt) - return 0; + return false; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); @@ -1213,7 +1213,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { if (!kvm->arch.mmu.pgt) - return 0; + return false; return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 956cdc240148..d37ebee085cf 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -166,6 +166,25 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) return 0; } +static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *tmp; + bool is32bit; + int i; + + is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT); + if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit) + return false; + + /* Check that the vcpus are either all 32bit or all 64bit */ + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit) + return false; + } + + return true; +} + /** * kvm_reset_vcpu - sets core registers and sys_regs to reset value * @vcpu: The VCPU pointer @@ -217,13 +236,14 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } } + if (!vcpu_allowed_register_width(vcpu)) { + ret = -EINVAL; + goto out; + } + switch (vcpu->arch.target) { default: if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { - if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) { - ret = -EINVAL; - goto out; - } pstate = VCPU_RESET_PSTATE_SVC; } else { pstate = VCPU_RESET_PSTATE_EL1; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 76ea2800c33e..1a7968ad078c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -399,14 +399,14 @@ static bool trap_bvr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, rd, dbg_reg); else dbg_to_reg(vcpu, p, rd, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); return true; } @@ -414,7 +414,7 @@ static bool trap_bvr(struct kvm_vcpu *vcpu, static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -424,7 +424,7 @@ static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -434,21 +434,21 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val; } static bool trap_bcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, rd, dbg_reg); else dbg_to_reg(vcpu, p, rd, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); return true; } @@ -456,7 +456,7 @@ static bool trap_bcr(struct kvm_vcpu *vcpu, static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -467,7 +467,7 @@ static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -477,22 +477,22 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val; } static bool trap_wvr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, rd, dbg_reg); else dbg_to_reg(vcpu, p, rd, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]); + trace_trap_reg(__func__, rd->CRm, p->is_write, + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]); return true; } @@ -500,7 +500,7 @@ static bool trap_wvr(struct kvm_vcpu *vcpu, static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -510,7 +510,7 @@ static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -520,21 +520,21 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val; } static bool trap_wcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, rd, dbg_reg); else dbg_to_reg(vcpu, p, rd, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); return true; } @@ -542,7 +542,7 @@ static bool trap_wcr(struct kvm_vcpu *vcpu, static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -552,7 +552,7 @@ static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -562,7 +562,7 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val; } static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index ac485163a4a7..6d44c028d1c9 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -55,8 +55,10 @@ void __sync_icache_dcache(pte_t pte) { struct page *page = pte_page(pte); - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + if (!test_bit(PG_dcache_clean, &page->flags)) { sync_icache_aliases(page_address(page), page_size(page)); + set_bit(PG_dcache_clean, &page->flags); + } } EXPORT_SYMBOL_GPL(__sync_icache_dcache); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 16a2b2b1c54d..e55409caaee3 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -43,6 +43,7 @@ #include <linux/sizes.h> #include <asm/tlb.h> #include <asm/alternative.h> +#include <asm/xen/swiotlb-xen.h> /* * We need to be able to catch inadvertent references to memstart_addr @@ -482,7 +483,7 @@ void __init mem_init(void) if (swiotlb_force == SWIOTLB_FORCE || max_pfn > PFN_DOWN(arm64_dma_phys_limit)) swiotlb_init(1); - else + else if (!xen_swiotlb_detect()) swiotlb_force = SWIOTLB_NO_FORCE; set_max_mapnr(max_pfn - PHYS_PFN_OFFSET); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6dd9369e3ea0..89b66ef43a0f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -515,7 +515,8 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (rodata_full || crash_mem_map || debug_pagealloc_enabled()) + if (rodata_full || crash_mem_map || debug_pagealloc_enabled() || + IS_ENABLED(CONFIG_KFENCE)) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 0a48191534ff..97d7bcd8d4f2 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -447,6 +447,18 @@ SYM_FUNC_START(__cpu_setup) mov x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK) msr_s SYS_GCR_EL1, x10 + /* + * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then + * RGSR_EL1.SEED must be non-zero for IRG to produce + * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we + * must initialize it. + */ + mrs x10, CNTVCT_EL0 + ands x10, x10, #SYS_RGSR_EL1_SEED_MASK + csinc x10, x10, xzr, ne + lsl x10, x10, #SYS_RGSR_EL1_SEED_SHIFT + msr_s SYS_RGSR_EL1, x10 + /* clear any pending tag check faults in TFSR*_EL1 */ msr_s SYS_TFSR_EL1, xzr msr_s SYS_TFSRE0_EL1, xzr diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile new file mode 100644 index 000000000000..932b4fe5c768 --- /dev/null +++ b/arch/arm64/tools/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 + +gen := arch/$(ARCH)/include/generated +kapi := $(gen)/asm + +kapi-hdrs-y := $(kapi)/cpucaps.h + +targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y)) + +PHONY += kapi + +kapi: $(kapi-hdrs-y) $(gen-y) + +# Create output directory if not already present +_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') + +quiet_cmd_gen_cpucaps = GEN $@ + cmd_gen_cpucaps = mkdir -p $(dir $@) && \ + $(AWK) -f $(filter-out $(PHONY),$^) > $@ + +$(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE + $(call if_changed,gen_cpucaps) diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps new file mode 100644 index 000000000000..21fbdda7086e --- /dev/null +++ b/arch/arm64/tools/cpucaps @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Internal CPU capabilities constants, keep this list sorted + +BTI +HAS_32BIT_EL0 +HAS_32BIT_EL1 +HAS_ADDRESS_AUTH +HAS_ADDRESS_AUTH_ARCH +HAS_ADDRESS_AUTH_IMP_DEF +HAS_AMU_EXTN +HAS_ARMv8_4_TTL +HAS_CACHE_DIC +HAS_CACHE_IDC +HAS_CNP +HAS_CRC32 +HAS_DCPODP +HAS_DCPOP +HAS_E0PD +HAS_EPAN +HAS_GENERIC_AUTH +HAS_GENERIC_AUTH_ARCH +HAS_GENERIC_AUTH_IMP_DEF +HAS_IRQ_PRIO_MASKING +HAS_LDAPR +HAS_LSE_ATOMICS +HAS_NO_FPSIMD +HAS_NO_HW_PREFETCH +HAS_PAN +HAS_RAS_EXTN +HAS_RNG +HAS_SB +HAS_STAGE2_FWB +HAS_SYSREG_GIC_CPUIF +HAS_TLB_RANGE +HAS_VIRT_HOST_EXTN +HW_DBM +KVM_PROTECTED_MODE +MISMATCHED_CACHE_TYPE +MTE +SPECTRE_V2 +SPECTRE_V3A +SPECTRE_V4 +SSBS +SVE +UNMAP_KERNEL_AT_EL0 +WORKAROUND_834220 +WORKAROUND_843419 +WORKAROUND_845719 +WORKAROUND_858921 +WORKAROUND_1418040 +WORKAROUND_1463225 +WORKAROUND_1508412 +WORKAROUND_1542419 +WORKAROUND_CAVIUM_23154 +WORKAROUND_CAVIUM_27456 +WORKAROUND_CAVIUM_30115 +WORKAROUND_CAVIUM_TX2_219_PRFM +WORKAROUND_CAVIUM_TX2_219_TVM +WORKAROUND_CLEAN_CACHE +WORKAROUND_DEVICE_LOAD_ACQUIRE +WORKAROUND_NVIDIA_CARMEL_CNP +WORKAROUND_QCOM_FALKOR_E1003 +WORKAROUND_REPEAT_TLBI +WORKAROUND_SPECULATIVE_AT diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk new file mode 100755 index 000000000000..00c9e72a200a --- /dev/null +++ b/arch/arm64/tools/gen-cpucaps.awk @@ -0,0 +1,40 @@ +#!/bin/awk -f +# SPDX-License-Identifier: GPL-2.0 +# gen-cpucaps.awk: arm64 cpucaps header generator +# +# Usage: awk -f gen-cpucaps.awk cpucaps.txt + +# Log an error and terminate +function fatal(msg) { + print "Error at line " NR ": " msg > "/dev/stderr" + exit 1 +} + +# skip blank lines and comment lines +/^$/ { next } +/^#/ { next } + +BEGIN { + print "#ifndef __ASM_CPUCAPS_H" + print "#define __ASM_CPUCAPS_H" + print "" + print "/* Generated file - do not edit */" + cap_num = 0 + print "" +} + +/^[vA-Z0-9_]+$/ { + printf("#define ARM64_%-30s\t%d\n", $0, cap_num++) + next +} + +END { + printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num) + print "" + print "#endif /* __ASM_CPUCAPS_H */" +} + +# Any lines not handled by previous rules are unexpected +{ + fatal("unhandled statement") +} diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 1ee8e736a48e..bb11fe4c875a 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -363,7 +363,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index a4b7ee1df211..8f215e79e70e 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -623,7 +623,8 @@ static inline void siginfo_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x12); /* _sigfault._perf */ - BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x14); /* _sigpoll */ BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0c); diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 0dd019dc2136..79c2d24c89dd 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -442,7 +442,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 2ac716984ca2..b11395a20c20 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -448,7 +448,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/alchemy/board-xxs1500.c b/arch/mips/alchemy/board-xxs1500.c index b184baa4e56a..f175bce2987f 100644 --- a/arch/mips/alchemy/board-xxs1500.c +++ b/arch/mips/alchemy/board-xxs1500.c @@ -18,6 +18,7 @@ #include <asm/reboot.h> #include <asm/setup.h> #include <asm/mach-au1x00/au1000.h> +#include <asm/mach-au1x00/gpio-au1000.h> #include <prom.h> const char *get_system_type(void) diff --git a/arch/mips/include/asm/mips-boards/launch.h b/arch/mips/include/asm/mips-boards/launch.h index f93aa5ee2e2e..3481ed4c117b 100644 --- a/arch/mips/include/asm/mips-boards/launch.h +++ b/arch/mips/include/asm/mips-boards/launch.h @@ -3,6 +3,9 @@ * */ +#ifndef _ASM_MIPS_BOARDS_LAUNCH_H +#define _ASM_MIPS_BOARDS_LAUNCH_H + #ifndef _ASSEMBLER_ struct cpulaunch { @@ -34,3 +37,5 @@ struct cpulaunch { /* Polling period in count cycles for secondary CPU's */ #define LAUNCHPERIOD 10000 + +#endif /* _ASM_MIPS_BOARDS_LAUNCH_H */ diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 5e0096657251..9220909526f9 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -381,7 +381,7 @@ 440 n32 process_madvise sys_process_madvise 441 n32 epoll_pwait2 compat_sys_epoll_pwait2 442 n32 mount_setattr sys_mount_setattr -443 n32 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 n32 landlock_create_ruleset sys_landlock_create_ruleset 445 n32 landlock_add_rule sys_landlock_add_rule 446 n32 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 9974f5f8e49b..9cd1c34f31b5 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -357,7 +357,7 @@ 440 n64 process_madvise sys_process_madvise 441 n64 epoll_pwait2 sys_epoll_pwait2 442 n64 mount_setattr sys_mount_setattr -443 n64 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 n64 landlock_create_ruleset sys_landlock_create_ruleset 445 n64 landlock_add_rule sys_landlock_add_rule 446 n64 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 39d6e71e57b6..d560c467a8c6 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -430,7 +430,7 @@ 440 o32 process_madvise sys_process_madvise 441 o32 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 o32 mount_setattr sys_mount_setattr -443 o32 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 o32 landlock_create_ruleset sys_landlock_create_ruleset 445 o32 landlock_add_rule sys_landlock_add_rule 446 o32 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c index de03838b343b..a9b72eacfc0b 100644 --- a/arch/mips/lib/mips-atomic.c +++ b/arch/mips/lib/mips-atomic.c @@ -37,7 +37,7 @@ */ notrace void arch_local_irq_disable(void) { - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -53,7 +53,7 @@ notrace void arch_local_irq_disable(void) : /* no inputs */ : "memory"); - preempt_enable(); + preempt_enable_notrace(); } EXPORT_SYMBOL(arch_local_irq_disable); @@ -61,7 +61,7 @@ notrace unsigned long arch_local_irq_save(void) { unsigned long flags; - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -78,7 +78,7 @@ notrace unsigned long arch_local_irq_save(void) : /* no inputs */ : "memory"); - preempt_enable(); + preempt_enable_notrace(); return flags; } @@ -88,7 +88,7 @@ notrace void arch_local_irq_restore(unsigned long flags) { unsigned long __tmp1; - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -106,7 +106,7 @@ notrace void arch_local_irq_restore(unsigned long flags) : "0" (flags) : "memory"); - preempt_enable(); + preempt_enable_notrace(); } EXPORT_SYMBOL(arch_local_irq_restore); diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index a7bf0c80371c..830ab91e574f 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -158,31 +158,29 @@ unsigned long _page_cachable_default; EXPORT_SYMBOL(_page_cachable_default); #define PM(p) __pgprot(_page_cachable_default | (p)) -#define PVA(p) PM(_PAGE_VALID | _PAGE_ACCESSED | (p)) static inline void setup_protection_map(void) { protection_map[0] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[1] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[2] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[3] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[4] = PVA(_PAGE_PRESENT); - protection_map[5] = PVA(_PAGE_PRESENT); - protection_map[6] = PVA(_PAGE_PRESENT); - protection_map[7] = PVA(_PAGE_PRESENT); + protection_map[1] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[2] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[3] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[4] = PM(_PAGE_PRESENT); + protection_map[5] = PM(_PAGE_PRESENT); + protection_map[6] = PM(_PAGE_PRESENT); + protection_map[7] = PM(_PAGE_PRESENT); protection_map[8] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[9] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[10] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | + protection_map[9] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ); - protection_map[11] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); - protection_map[12] = PVA(_PAGE_PRESENT); - protection_map[13] = PVA(_PAGE_PRESENT); - protection_map[14] = PVA(_PAGE_PRESENT); - protection_map[15] = PVA(_PAGE_PRESENT); + protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); + protection_map[12] = PM(_PAGE_PRESENT); + protection_map[13] = PM(_PAGE_PRESENT); + protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE); + protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE); } -#undef _PVA #undef PM void cpu_cache_init(void) diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 0c5de07da097..0135376c5de5 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -8,6 +8,7 @@ #include <linux/io.h> #include <linux/clk.h> +#include <linux/export.h> #include <linux/init.h> #include <linux/sizes.h> #include <linux/of_fdt.h> @@ -25,6 +26,7 @@ __iomem void *rt_sysc_membase; __iomem void *rt_memc_membase; +EXPORT_SYMBOL_GPL(rt_sysc_membase); __iomem void *plat_of_remap_node(const char *node) { diff --git a/arch/openrisc/include/asm/barrier.h b/arch/openrisc/include/asm/barrier.h new file mode 100644 index 000000000000..7538294721be --- /dev/null +++ b/arch/openrisc/include/asm/barrier.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#define mb() asm volatile ("l.msync" ::: "memory") + +#include <asm-generic/barrier.h> + +#endif /* __ASM_BARRIER_H */ diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 2416a9f91533..c6f9e7b9f7cb 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -278,6 +278,8 @@ void calibrate_delay(void) pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n", loops_per_jiffy / (500000 / HZ), (loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy); + + of_node_put(cpu); } void __init setup_arch(char **cmdline_p) diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index d5641198b90c..cfef61a7b6c2 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -75,7 +75,6 @@ static void __init map_ram(void) /* These mark extents of read-only kernel pages... * ...from vmlinux.lds.S */ - struct memblock_region *region; v = PAGE_OFFSET; @@ -121,7 +120,7 @@ static void __init map_ram(void) } printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__, - region->base, region->base + region->size); + start, end); } } @@ -129,7 +128,6 @@ void __init paging_init(void) { extern void tlb_init(void); - unsigned long end; int i; printk(KERN_INFO "Setting up paging and PTEs.\n"); @@ -145,8 +143,6 @@ void __init paging_init(void) */ current_pgd[smp_processor_id()] = init_mm.pgd; - end = (unsigned long)__va(max_low_pfn * PAGE_SIZE); - map_ram(); zone_sizes_init(); diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 5ac80b83d745..aabc37f8cae3 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -440,7 +440,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi index c2717f31925a..ccda0a91abf0 100644 --- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi @@ -122,7 +122,15 @@ }; /include/ "pq3-i2c-0.dtsi" + i2c@3000 { + fsl,i2c-erratum-a004447; + }; + /include/ "pq3-i2c-1.dtsi" + i2c@3100 { + fsl,i2c-erratum-a004447; + }; + /include/ "pq3-duart-0.dtsi" /include/ "pq3-espi-0.dtsi" spi0: spi@7000 { diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi index 872e4485dc3f..ddc018d42252 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi @@ -371,7 +371,23 @@ }; /include/ "qoriq-i2c-0.dtsi" + i2c@118000 { + fsl,i2c-erratum-a004447; + }; + + i2c@118100 { + fsl,i2c-erratum-a004447; + }; + /include/ "qoriq-i2c-1.dtsi" + i2c@119000 { + fsl,i2c-erratum-a004447; + }; + + i2c@119100 { + fsl,i2c-erratum-a004447; + }; + /include/ "qoriq-duart-0.dtsi" /include/ "qoriq-duart-1.dtsi" /include/ "qoriq-gpio-0.dtsi" diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 443050906018..e3b29eda8074 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -448,6 +448,9 @@ */ long plpar_hcall_norets(unsigned long opcode, ...); +/* Variant which does not do hcall tracing */ +long plpar_hcall_norets_notrace(unsigned long opcode, ...); + /** * plpar_hcall: - Make a pseries hypervisor call * @opcode: The hypervisor call to make. diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 44cde2e129b8..59f704408d65 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -153,8 +153,6 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup */ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { - if (user_mode(regs)) - kuep_unlock(); } static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) @@ -222,6 +220,13 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte local_paca->irq_soft_mask = IRQS_ALL_DISABLED; local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) && + regs->nip < (unsigned long)__end_interrupts) { + // Kernel code running below __end_interrupts is + // implicitly soft-masked. + regs->softe = IRQS_ALL_DISABLED; + } + /* Don't do any per-CPU operations until interrupt state is fixed */ if (nmi_disables_ftrace(regs)) { diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 2d5c6bec2b4f..93ce3ec25387 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -50,7 +50,7 @@ l_yes: 1098: nop; \ .pushsection __jump_table, "aw"; \ .long 1098b - ., LABEL - .; \ - FTR_ENTRY_LONG KEY; \ + FTR_ENTRY_LONG KEY - .; \ .popsection #endif diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1e83359f286b..7f2e90db2050 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -51,6 +51,7 @@ /* PPC-specific vcpu->requests bit members */ #define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0) #define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1) +#define KVM_REQ_PENDING_TIMER KVM_ARCH_REQ(2) #include <linux/mmu_notifier.h> diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index 5d1726bb28e7..bcb7b5f917be 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -28,19 +28,35 @@ static inline u32 yield_count_of(int cpu) return be32_to_cpu(yield_count); } +/* + * Spinlock code confers and prods, so don't trace the hcalls because the + * tracing code takes spinlocks which can cause recursion deadlocks. + * + * These calls are made while the lock is not held: the lock slowpath yields if + * it can not acquire the lock, and unlock slow path might prod if a waiter has + * yielded). So this may not be a problem for simple spin locks because the + * tracing does not technically recurse on the lock, but we avoid it anyway. + * + * However the queued spin lock contended path is more strictly ordered: the + * H_CONFER hcall is made after the task has queued itself on the lock, so then + * recursing on that lock will cause the task to then queue up again behind the + * first instance (or worse: queued spinlocks use tricks that assume a context + * never waits on more than one spinlock, so such recursion may cause random + * corruption in the lock code). + */ static inline void yield_to_preempted(int cpu, u32 yield_count) { - plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count); + plpar_hcall_norets_notrace(H_CONFER, get_hard_smp_processor_id(cpu), yield_count); } static inline void prod_cpu(int cpu) { - plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); + plpar_hcall_norets_notrace(H_PROD, get_hard_smp_processor_id(cpu)); } static inline void yield_to_any(void) { - plpar_hcall_norets(H_CONFER, -1, 0); + plpar_hcall_norets_notrace(H_CONFER, -1, 0); } #else static inline bool is_shared_processor(void) diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index ece84a430701..83e0f701ebc6 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -28,7 +28,11 @@ static inline void set_cede_latency_hint(u8 latency_hint) static inline long cede_processor(void) { - return plpar_hcall_norets(H_CEDE); + /* + * We cannot call tracepoints inside RCU idle regions which + * means we must not trace H_CEDE. + */ + return plpar_hcall_norets_notrace(H_CEDE); } static inline long extended_cede_processor(unsigned long latency_hint) diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h index 33fa5dd8ee6a..714a35f0d425 100644 --- a/arch/powerpc/include/asm/pte-walk.h +++ b/arch/powerpc/include/asm/pte-walk.h @@ -31,6 +31,35 @@ static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift) pgd_t *pgdir = init_mm.pgd; return __find_linux_pte(pgdir, ea, NULL, hshift); } + +/* + * Convert a kernel vmap virtual address (vmalloc or ioremap space) to a + * physical address, without taking locks. This can be used in real-mode. + */ +static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr) +{ + pte_t *ptep; + phys_addr_t pa; + int hugepage_shift; + + /* + * init_mm does not free page tables, and does not do THP. It may + * have huge pages from huge vmalloc / ioremap etc. + */ + ptep = find_init_mm_pte(addr, &hugepage_shift); + if (WARN_ON(!ptep)) + return 0; + + pa = PFN_PHYS(pte_pfn(*ptep)); + + if (!hugepage_shift) + hugepage_shift = PAGE_SHIFT; + + pa |= addr & ((1ul << hugepage_shift) - 1); + + return pa; +} + /* * This is what we should always use. Any other lockless page table lookup needs * careful audit against THP split. diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 9c9ab2746168..b476a685f066 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -19,6 +19,7 @@ #ifndef _ASM_POWERPC_PTRACE_H #define _ASM_POWERPC_PTRACE_H +#include <linux/err.h> #include <uapi/asm/ptrace.h> #include <asm/asm-const.h> @@ -152,25 +153,6 @@ extern unsigned long profile_pc(struct pt_regs *regs); long do_syscall_trace_enter(struct pt_regs *regs); void do_syscall_trace_leave(struct pt_regs *regs); -#define kernel_stack_pointer(regs) ((regs)->gpr[1]) -static inline int is_syscall_success(struct pt_regs *regs) -{ - return !(regs->ccr & 0x10000000); -} - -static inline long regs_return_value(struct pt_regs *regs) -{ - if (is_syscall_success(regs)) - return regs->gpr[3]; - else - return -regs->gpr[3]; -} - -static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) -{ - regs->gpr[3] = rc; -} - #ifdef __powerpc64__ #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1) #else @@ -235,6 +217,31 @@ static __always_inline void set_trap_norestart(struct pt_regs *regs) regs->trap |= 0x1; } +#define kernel_stack_pointer(regs) ((regs)->gpr[1]) +static inline int is_syscall_success(struct pt_regs *regs) +{ + if (trap_is_scv(regs)) + return !IS_ERR_VALUE((unsigned long)regs->gpr[3]); + else + return !(regs->ccr & 0x10000000); +} + +static inline long regs_return_value(struct pt_regs *regs) +{ + if (trap_is_scv(regs)) + return regs->gpr[3]; + + if (is_syscall_success(regs)) + return regs->gpr[3]; + else + return -regs->gpr[3]; +} + +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->gpr[3] = rc; +} + #define arch_has_single_step() (1) #define arch_has_block_step() (true) #define ARCH_HAS_USER_SINGLE_STEP_REPORT diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index fd1b518eed17..ba0f88f3a30d 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -41,11 +41,17 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - /* - * If the system call failed, - * regs->gpr[3] contains a positive ERRORCODE. - */ - return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; + if (trap_is_scv(regs)) { + unsigned long error = regs->gpr[3]; + + return IS_ERR_VALUE(error) ? error : 0; + } else { + /* + * If the system call failed, + * regs->gpr[3] contains a positive ERRORCODE. + */ + return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; + } } static inline long syscall_get_return_value(struct task_struct *task, @@ -58,18 +64,22 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - /* - * In the general case it's not obvious that we must deal with CCR - * here, as the syscall exit path will also do that for us. However - * there are some places, eg. the signal code, which check ccr to - * decide if the value in r3 is actually an error. - */ - if (error) { - regs->ccr |= 0x10000000L; - regs->gpr[3] = error; + if (trap_is_scv(regs)) { + regs->gpr[3] = (long) error ?: val; } else { - regs->ccr &= ~0x10000000L; - regs->gpr[3] = val; + /* + * In the general case it's not obvious that we must deal with + * CCR here, as the syscall exit path will also do that for us. + * However there are some places, eg. the signal code, which + * check ccr to decide if the value in r3 is actually an error. + */ + if (error) { + regs->ccr |= 0x10000000L; + regs->gpr[3] = error; + } else { + regs->ccr &= ~0x10000000L; + regs->gpr[3] = val; + } } } diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index a09e4240c5b1..22c79ab40006 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -157,7 +157,7 @@ do { \ "2: lwz%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ EX_TABLE(2b, %l2) \ - : "=r" (x) \ + : "=&r" (x) \ : "m" (*addr) \ : \ : label) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f24cd53ff26e..3bbdcc86d01b 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -346,28 +346,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) */ static inline unsigned long eeh_token_to_phys(unsigned long token) { - pte_t *ptep; - unsigned long pa; - int hugepage_shift; - - /* - * We won't find hugepages here(this is iomem). Hence we are not - * worried about _PAGE_SPLITTING/collapse. Also we will not hit - * page table free, because of init_mm. - */ - ptep = find_init_mm_pte(token, &hugepage_shift); - if (!ptep) - return token; - - pa = pte_pfn(*ptep); - - /* On radix we can do hugepage mappings for io, so handle that */ - if (!hugepage_shift) - hugepage_shift = PAGE_SHIFT; - - pa <<= PAGE_SHIFT; - pa |= token & ((1ul << hugepage_shift) - 1); - return pa; + return ppc_find_vmap_phys(token); } /* diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 7c3654b0d0f4..f1ae710274bc 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -340,6 +340,12 @@ ret_from_mc_except: andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \ bne masked_interrupt_book3e_##n +/* + * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is + * called, because that does SAVE_NVGPRS which must see the original register + * values, otherwise the scratch values might be restored when exiting the + * interrupt. + */ #define PROLOG_ADDITION_2REGS_GEN(n) \ std r14,PACA_EXGEN+EX_R14(r13); \ std r15,PACA_EXGEN+EX_R15(r13) @@ -535,6 +541,10 @@ __end_interrupts: PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR + std r14,_DAR(r1) + std r15,_DSISR(r1) + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) EXCEPTION_COMMON(0x300) b storage_fault_common @@ -544,6 +554,10 @@ __end_interrupts: PROLOG_ADDITION_2REGS) li r15,0 mr r14,r10 + std r14,_DAR(r1) + std r15,_DSISR(r1) + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) EXCEPTION_COMMON(0x400) b storage_fault_common @@ -557,6 +571,10 @@ __end_interrupts: PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR + std r14,_DAR(r1) + std r15,_DSISR(r1) + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) EXCEPTION_COMMON(0x600) b alignment_more /* no room, go out of line */ @@ -565,10 +583,10 @@ __end_interrupts: NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM, PROLOG_ADDITION_1REG) mfspr r14,SPRN_ESR - EXCEPTION_COMMON(0x700) std r14,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) + EXCEPTION_COMMON(0x700) + addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception REST_NVGPRS(r1) b interrupt_return @@ -725,11 +743,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) * normal exception */ mfspr r14,SPRN_DBSR - EXCEPTION_COMMON_CRIT(0xd00) std r14,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) + EXCEPTION_COMMON_CRIT(0xd00) + addi r3,r1,STACK_FRAME_OVERHEAD bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -796,11 +814,11 @@ kernel_dbg_exc: * normal exception */ mfspr r14,SPRN_DBSR - EXCEPTION_COMMON_DBG(0xd08) std r14,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) + EXCEPTION_COMMON_DBG(0xd08) + addi r3,r1,STACK_FRAME_OVERHEAD bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -931,11 +949,7 @@ masked_interrupt_book3e_0x2c0: * original values stashed away in the PACA */ storage_fault_common: - std r14,_DAR(r1) - std r15,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - ld r14,PACA_EXGEN+EX_R14(r13) - ld r15,PACA_EXGEN+EX_R15(r13) bl do_page_fault b interrupt_return @@ -944,11 +958,7 @@ storage_fault_common: * continues here. */ alignment_more: - std r14,_DAR(r1) - std r15,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - ld r14,PACA_EXGEN+EX_R14(r13) - ld r15,PACA_EXGEN+EX_R15(r13) bl alignment_exception REST_NVGPRS(r1) b interrupt_return diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index e4559f8914eb..e0938ba298f2 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -34,9 +34,6 @@ notrace long system_call_exception(long r3, long r4, long r5, syscall_fn f; kuep_lock(); -#ifdef CONFIG_PPC32 - kuap_save_and_lock(regs); -#endif regs->orig_gpr3 = r3; @@ -427,6 +424,7 @@ again: /* Restore user access locks last */ kuap_user_restore(regs); + kuep_unlock(); return ret; } diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 51bbaae94ccc..c877f074d174 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -55,7 +55,6 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) #ifdef CONFIG_PPC_INDIRECT_MMIO struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) { - unsigned hugepage_shift; struct iowa_bus *bus; int token; @@ -65,22 +64,13 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) bus = &iowa_busses[token - 1]; else { unsigned long vaddr, paddr; - pte_t *ptep; vaddr = (unsigned long)PCI_FIX_ADDR(addr); if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) return NULL; - /* - * We won't find huge pages here (iomem). Also can't hit - * a page table free due to init_mm - */ - ptep = find_init_mm_pte(vaddr, &hugepage_shift); - if (ptep == NULL) - paddr = 0; - else { - WARN_ON(hugepage_shift); - paddr = pte_pfn(*ptep) << PAGE_SHIFT; - } + + paddr = ppc_find_vmap_phys(vaddr); + bus = iowa_pci_find(vaddr, paddr); if (bus == NULL) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 57d6b85e9b96..2af89a5e379f 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -898,7 +898,6 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, unsigned int order; unsigned int nio_pages, io_order; struct page *page; - size_t size_io = size; size = PAGE_ALIGN(size); order = get_order(size); @@ -925,9 +924,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - size_io = IOMMU_PAGE_ALIGN(size_io, tbl); - nio_pages = size_io >> tbl->it_page_shift; - io_order = get_iommu_order(size_io, tbl); + nio_pages = size >> tbl->it_page_shift; + io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); if (mapping == DMA_MAPPING_ERROR) { @@ -942,9 +940,10 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, void *vaddr, dma_addr_t dma_handle) { if (tbl) { - size_t size_io = IOMMU_PAGE_ALIGN(size, tbl); - unsigned int nio_pages = size_io >> tbl->it_page_shift; + unsigned int nio_pages; + size = PAGE_ALIGN(size); + nio_pages = size >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 01ab2163659e..e8c2a6373157 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -108,7 +108,6 @@ int arch_prepare_kprobe(struct kprobe *p) int ret = 0; struct kprobe *prev; struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr); - struct ppc_inst prefix = ppc_inst_read((struct ppc_inst *)(p->addr - 1)); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); @@ -116,7 +115,8 @@ int arch_prepare_kprobe(struct kprobe *p) } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) { printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n"); ret = -EINVAL; - } else if (ppc_inst_prefixed(prefix)) { + } else if ((unsigned long)p->addr & ~PAGE_MASK && + ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 8b2c1a8553a0..cfc03e016ff2 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -356,13 +356,16 @@ static void __init setup_legacy_serial_console(int console) static int __init ioremap_legacy_serial_console(void) { - struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console]; - struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console]; + struct plat_serial8250_port *port; + struct legacy_serial_info *info; void __iomem *vaddr; if (legacy_serial_console < 0) return 0; + info = &legacy_serial_infos[legacy_serial_console]; + port = &legacy_serial_ports[legacy_serial_console]; + if (!info->early_addr) return 0; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index b779d25761cf..e42b85e4f1aa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -369,11 +369,11 @@ void __init early_setup(unsigned long dt_ptr) apply_feature_fixups(); setup_feature_keys(); - early_ioremap_setup(); - /* Initialize the hash table or TLB handling */ early_init_mmu(); + early_ioremap_setup(); + /* * After firmware and early platform setup code has set things up, * we note the SPR values for configurable control/performance diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index f4aafa337c2e..1f07317964e4 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -166,9 +166,9 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from) } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #else -#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0) +#define unsafe_copy_fpr_to_user(to, task, label) do { if (0) goto label;} while (0) -#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0) +#define unsafe_copy_fpr_from_user(task, from, label) do { if (0) goto label;} while (0) static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index dca66481d0c2..f9e1f5428b9e 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -902,6 +902,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block); user_write_access_end(); + /* Save the siginfo outside of the unsafe block. */ + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + goto badframe; + /* Make sure signal handler doesn't get spurious FP exceptions */ tsk->thread.fp_state.fpscr = 0; @@ -915,11 +919,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, regs->nip = (unsigned long) &frame->tramp[0]; } - - /* Save the siginfo outside of the unsafe block. */ - if (copy_siginfo_to_user(&frame->info, &ksig->info)) - goto badframe; - /* Allocate a dummy caller frame for the signal handler. */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 2e68fbb57cc6..8f052ff4058c 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -522,7 +522,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2d9193cd73be..c63e263312a4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -840,7 +840,7 @@ bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range) kvm_unmap_radix(kvm, range->slot, gfn); } else { for (gfn = range->start; gfn < range->end; gfn++) - kvm_unmap_rmapp(kvm, range->slot, range->start); + kvm_unmap_rmapp(kvm, range->slot, gfn); } return false; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 28a80d240b76..bc0813644666 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3936,7 +3936,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) break; } cur = ktime_get(); - } while (single_task_running() && ktime_before(cur, stop)); + } while (kvm_vcpu_can_poll(cur, stop)); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; @@ -4455,7 +4455,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) mtspr(SPRN_EBBRR, ebb_regs[1]); mtspr(SPRN_BESCR, ebb_regs[2]); mtspr(SPRN_TAR, user_tar); - mtspr(SPRN_FSCR, current->thread.fscr); } mtspr(SPRN_VRSAVE, user_vrsave); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 7af7c70f1468..7a0f12404e0e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -23,20 +23,9 @@ #include <asm/pte-walk.h> /* Translate address of a vmalloc'd thing to a linear map address */ -static void *real_vmalloc_addr(void *x) +static void *real_vmalloc_addr(void *addr) { - unsigned long addr = (unsigned long) x; - pte_t *p; - /* - * assume we don't have huge pages in vmalloc space... - * So don't worry about THP collapse/split. Called - * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore. - */ - p = find_init_mm_pte(addr, NULL); - if (!p || !pte_present(*p)) - return NULL; - addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); - return __va(addr); + return __va(ppc_find_vmap_phys((unsigned long)addr)); } /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 5e634db4809b..004f0d4e665f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -59,6 +59,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define STACK_SLOT_UAMOR (SFS-88) #define STACK_SLOT_DAWR1 (SFS-96) #define STACK_SLOT_DAWRX1 (SFS-104) +#define STACK_SLOT_FSCR (SFS-112) /* the following is used by the P9 short path */ #define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */ @@ -686,6 +687,8 @@ BEGIN_FTR_SECTION std r6, STACK_SLOT_DAWR0(r1) std r7, STACK_SLOT_DAWRX0(r1) std r8, STACK_SLOT_IAMR(r1) + mfspr r5, SPRN_FSCR + std r5, STACK_SLOT_FSCR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION mfspr r6, SPRN_DAWR1 @@ -1663,6 +1666,10 @@ FTR_SECTION_ELSE ld r7, STACK_SLOT_HFSCR(r1) mtspr SPRN_HFSCR, r7 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) +BEGIN_FTR_SECTION + ld r5, STACK_SLOT_FSCR(r1) + mtspr SPRN_FSCR, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* * Restore various registers to 0, where non-zero values * set by the guest could disrupt the host. diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 1fd31b4b0e13..fe26f2fa0f3f 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -14,6 +14,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/sched/mm.h> +#include <linux/stop_machine.h> #include <asm/cputable.h> #include <asm/code-patching.h> #include <asm/page.h> @@ -149,17 +150,17 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - - if (types & STF_BARRIER_FALLBACK) + // See comment in do_entry_flush_fixups() RE order of patching + if (types & STF_BARRIER_FALLBACK) { + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); patch_branch((struct ppc_inst *)(dest + 1), - (unsigned long)&stf_barrier_fallback, - BRANCH_SET_LINK); - else - patch_instruction((struct ppc_inst *)(dest + 1), - ppc_inst(instrs[1])); - - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK); + } else { + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + } } printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, @@ -227,11 +228,25 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) : "unknown"); } +static int __do_stf_barrier_fixups(void *data) +{ + enum stf_barrier_type *types = data; + + do_stf_entry_barrier_fixups(*types); + do_stf_exit_barrier_fixups(*types); + + return 0; +} void do_stf_barrier_fixups(enum stf_barrier_type types) { - do_stf_entry_barrier_fixups(types); - do_stf_exit_barrier_fixups(types); + /* + * The call to the fallback entry flush, and the fallback/sync-ori exit + * flush can not be safely patched in/out while other CPUs are executing + * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs + * spin in the stop machine core with interrupts hard disabled. + */ + stop_machine(__do_stf_barrier_fixups, &types, NULL); } void do_uaccess_flush_fixups(enum l1d_flush_type types) @@ -284,8 +299,9 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types) : "unknown"); } -void do_entry_flush_fixups(enum l1d_flush_type types) +static int __do_entry_flush_fixups(void *data) { + enum l1d_flush_type types = *(enum l1d_flush_type *)data; unsigned int instrs[3], *dest; long *start, *end; int i; @@ -309,6 +325,31 @@ void do_entry_flush_fixups(enum l1d_flush_type types) if (types & L1D_FLUSH_MTTRIG) instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + /* + * If we're patching in or out the fallback flush we need to be careful about the + * order in which we patch instructions. That's because it's possible we could + * take a page fault after patching one instruction, so the sequence of + * instructions must be safe even in a half patched state. + * + * To make that work, when patching in the fallback flush we patch in this order: + * - the mflr (dest) + * - the mtlr (dest + 2) + * - the branch (dest + 1) + * + * That ensures the sequence is safe to execute at any point. In contrast if we + * patch the mtlr last, it's possible we could return from the branch and not + * restore LR, leading to a crash later. + * + * When patching out the fallback flush (either with nops or another flush type), + * we patch in this order: + * - the branch (dest + 1) + * - the mtlr (dest + 2) + * - the mflr (dest) + * + * Note we are protected by stop_machine() from other CPUs executing the code in a + * semi-patched state. + */ + start = PTRRELOC(&__start___entry_flush_fixup); end = PTRRELOC(&__stop___entry_flush_fixup); for (i = 0; start < end; start++, i++) { @@ -316,15 +357,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - - if (types == L1D_FLUSH_FALLBACK) - patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback, - BRANCH_SET_LINK); - else + if (types == L1D_FLUSH_FALLBACK) { + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_branch((struct ppc_inst *)(dest + 1), + (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK); + } else { patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + } } start = PTRRELOC(&__start___scv_entry_flush_fixup); @@ -334,15 +376,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - - if (types == L1D_FLUSH_FALLBACK) - patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&scv_entry_flush_fallback, - BRANCH_SET_LINK); - else + if (types == L1D_FLUSH_FALLBACK) { + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_branch((struct ppc_inst *)(dest + 1), + (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK); + } else { patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + } } @@ -354,6 +397,19 @@ void do_entry_flush_fixups(enum l1d_flush_type types) : "ori type" : (types & L1D_FLUSH_MTTRIG) ? "mttrig type" : "unknown"); + + return 0; +} + +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + /* + * The call to the fallback flush can not be safely patched in/out while + * other CPUs are executing it. So call __do_entry_flush_fixups() on one + * CPU while all other CPUs spin in the stop machine core with interrupts + * hard disabled. + */ + stop_machine(__do_entry_flush_fixups, &types, NULL); } void do_rfi_flush_fixups(enum l1d_flush_type types) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 043bbeaf407c..a6b36a40897a 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -20,6 +20,7 @@ #include <asm/machdep.h> #include <asm/rtas.h> #include <asm/kasan.h> +#include <asm/sparsemem.h> #include <asm/svm.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 16d4d1b6a1ff..51622411a7cc 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2254,7 +2254,7 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) bool use_siar = regs_use_siar(regs); unsigned long siar = mfspr(SPRN_SIAR); - if (ppmu->flags & PPMU_P10_DD1) { + if (ppmu && (ppmu->flags & PPMU_P10_DD1)) { if (siar) return siar; else diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 2136e42833af..8a2b8d64265b 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -102,6 +102,16 @@ END_FTR_SECTION(0, 1); \ #define HCALL_BRANCH(LABEL) #endif +_GLOBAL_TOC(plpar_hcall_norets_notrace) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + HVSC /* invoke the hypervisor */ + lwz r0,8(r1) + mtcrf 0xff,r0 + blr /* return r3 = status */ + _GLOBAL_TOC(plpar_hcall_norets) HMT_MEDIUM diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 1f3152ad7213..dab356e3ff87 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1829,30 +1829,28 @@ void hcall_tracepoint_unregfunc(void) #endif /* - * Since the tracing code might execute hcalls we need to guard against - * recursion. One example of this are spinlocks calling H_YIELD on - * shared processor partitions. + * Keep track of hcall tracing depth and prevent recursion. Warn if any is + * detected because it may indicate a problem. This will not catch all + * problems with tracing code making hcalls, because the tracing might have + * been invoked from a non-hcall, so the first hcall could recurse into it + * without warning here, but this better than nothing. + * + * Hcalls with specific problems being traced should use the _notrace + * plpar_hcall variants. */ static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); -void __trace_hcall_entry(unsigned long opcode, unsigned long *args) +notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args) { unsigned long flags; unsigned int *depth; - /* - * We cannot call tracepoints inside RCU idle regions which - * means we must not trace H_CEDE. - */ - if (opcode == H_CEDE) - return; - local_irq_save(flags); depth = this_cpu_ptr(&hcall_trace_depth); - if (*depth) + if (WARN_ON_ONCE(*depth)) goto out; (*depth)++; @@ -1864,19 +1862,16 @@ out: local_irq_restore(flags); } -void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) +notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) { unsigned long flags; unsigned int *depth; - if (opcode == H_CEDE) - return; - local_irq_save(flags); depth = this_cpu_ptr(&hcall_trace_depth); - if (*depth) + if (*depth) /* Don't warn again on the way out */ goto out; (*depth)++; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a8ad8eb76120..18ec0f9bb8d5 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -34,6 +34,7 @@ config RISCV select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_SUPPORTS_HUGETLBFS if MMU + select ARCH_USE_MEMTEST select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if 64BIT @@ -60,11 +61,11 @@ config RISCV select GENERIC_TIME_VSYSCALL if MMU && 64BIT select HANDLE_DOMAIN_IRQ select HAVE_ARCH_AUDITSYSCALL - select HAVE_ARCH_JUMP_LABEL - select HAVE_ARCH_JUMP_LABEL_RELATIVE + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL + select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL select HAVE_ARCH_KASAN if MMU && 64BIT select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT - select HAVE_ARCH_KGDB + select HAVE_ARCH_KGDB if !XIP_KERNEL select HAVE_ARCH_KGDB_QXFER_PKT select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER @@ -79,9 +80,9 @@ config RISCV select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO if MMU && 64BIT select HAVE_IRQ_TIME_ACCOUNTING - select HAVE_KPROBES - select HAVE_KPROBES_ON_FTRACE - select HAVE_KRETPROBES + select HAVE_KPROBES if !XIP_KERNEL + select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL + select HAVE_KRETPROBES if !XIP_KERNEL select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -230,11 +231,11 @@ config ARCH_RV64I bool "RV64I" select 64BIT select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000 - select HAVE_DYNAMIC_FTRACE if MMU && $(cc-option,-fpatchable-function-entry=8) + select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8) select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_TRACER if !XIP_KERNEL select SWIOTLB if MMU endchoice diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index ed963761fbd2..30676ebb16eb 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -14,6 +14,7 @@ config SOC_SIFIVE select CLK_SIFIVE select CLK_SIFIVE_PRCI select SIFIVE_PLIC + select RISCV_ERRATA_ALTERNATIVE select ERRATA_SIFIVE help This enables support for SiFive SoC platform hardware. diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 3eb9590a0775..99ecd8bcfd77 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -16,7 +16,7 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y) CC_FLAGS_FTRACE := -fpatchable-function-entry=8 endif -ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy) +ifeq ($(CONFIG_CMODEL_MEDLOW),y) KBUILD_CFLAGS_MODULE += -mcmodel=medany endif @@ -38,6 +38,15 @@ else KBUILD_LDFLAGS += -melf32lriscv endif +ifeq ($(CONFIG_LD_IS_LLD),y) + KBUILD_CFLAGS += -mno-relax + KBUILD_AFLAGS += -mno-relax +ifneq ($(LLVM_IAS),1) + KBUILD_CFLAGS += -Wa,-mno-relax + KBUILD_AFLAGS += -Wa,-mno-relax +endif +endif + # ISA string setting riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima diff --git a/arch/riscv/boot/dts/microchip/Makefile b/arch/riscv/boot/dts/microchip/Makefile index 622b12771fd3..855c1502d912 100644 --- a/arch/riscv/boot/dts/microchip/Makefile +++ b/arch/riscv/boot/dts/microchip/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += microchip-mpfs-icicle-kit.dtb +obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y)) diff --git a/arch/riscv/boot/dts/sifive/Makefile b/arch/riscv/boot/dts/sifive/Makefile index 74c47fe9fc22..d90e4eb0ade8 100644 --- a/arch/riscv/boot/dts/sifive/Makefile +++ b/arch/riscv/boot/dts/sifive/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_SOC_SIFIVE) += hifive-unleashed-a00.dtb \ hifive-unmatched-a00.dtb +obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y)) diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi index 8eef82e4199f..abbb960f90a0 100644 --- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi @@ -273,7 +273,7 @@ cache-size = <2097152>; cache-unified; interrupt-parent = <&plic0>; - interrupts = <19 20 21 22>; + interrupts = <19 21 22 20>; reg = <0x0 0x2010000 0x0 0x1000>; }; gpio: gpio@10060000 { diff --git a/arch/riscv/errata/sifive/Makefile b/arch/riscv/errata/sifive/Makefile index bdd5fc843b8e..2fde48db0619 100644 --- a/arch/riscv/errata/sifive/Makefile +++ b/arch/riscv/errata/sifive/Makefile @@ -1,2 +1,2 @@ -obj-y += errata_cip_453.o +obj-$(CONFIG_ERRATA_SIFIVE_CIP_453) += errata_cip_453.o obj-y += errata.o diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 88c08705f64a..67406c376389 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -51,7 +51,7 @@ REG_ASM " " newlen "\n" \ ".word " errata_id "\n" -#define ALT_NEW_CONSTENT(vendor_id, errata_id, enable, new_c) \ +#define ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) \ ".if " __stringify(enable) " == 1\n" \ ".pushsection .alternative, \"a\"\n" \ ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \ @@ -69,7 +69,7 @@ "886 :\n" \ old_c "\n" \ "887 :\n" \ - ALT_NEW_CONSTENT(vendor_id, errata_id, enable, new_c) + ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) #define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)) diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index 1e954101906a..e4e291d40759 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -42,8 +42,8 @@ struct kimage_arch { unsigned long fdt_addr; }; -const extern unsigned char riscv_kexec_relocate[]; -const extern unsigned int riscv_kexec_relocate_size; +extern const unsigned char riscv_kexec_relocate[]; +extern const unsigned int riscv_kexec_relocate_size; typedef void (*riscv_kexec_method)(unsigned long first_ind_entry, unsigned long jump_addr, diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 9469f464e71a..380cd3a7e548 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -30,9 +30,8 @@ #define BPF_JIT_REGION_SIZE (SZ_128M) #ifdef CONFIG_64BIT -/* KASLR should leave at least 128MB for BPF after the kernel */ -#define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end) -#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE) +#define BPF_JIT_REGION_START (BPF_JIT_REGION_END - BPF_JIT_REGION_SIZE) +#define BPF_JIT_REGION_END (MODULES_END) #else #define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) #define BPF_JIT_REGION_END (VMALLOC_END) diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index cc048143fba5..9e99e1db156b 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -14,8 +14,9 @@ #include <asm/set_memory.h> /* For set_memory_x() */ #include <linux/compiler.h> /* For unreachable() */ #include <linux/cpu.h> /* For cpu_down() */ +#include <linux/reboot.h> -/** +/* * kexec_image_info - Print received image details */ static void @@ -39,7 +40,7 @@ kexec_image_info(const struct kimage *image) } } -/** +/* * machine_kexec_prepare - Initialize kexec * * This function is called from do_kexec_load, when the user has @@ -100,7 +101,7 @@ machine_kexec_prepare(struct kimage *image) } -/** +/* * machine_kexec_cleanup - Cleanup any leftovers from * machine_kexec_prepare * @@ -135,7 +136,7 @@ void machine_shutdown(void) #endif } -/** +/* * machine_crash_shutdown - Prepare to kexec after a kernel crash * * This function is called by crash_kexec just before machine_kexec @@ -151,7 +152,7 @@ machine_crash_shutdown(struct pt_regs *regs) pr_info("Starting crashdump kernel...\n"); } -/** +/* * machine_kexec - Jump to the loaded kimage * * This function is called by kernel_kexec which is called by the diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 10b965c34536..15cc65ac7ca6 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -84,6 +84,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } +#ifdef CONFIG_MMU void *alloc_insn_page(void) { return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, @@ -91,6 +92,7 @@ void *alloc_insn_page(void) VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __builtin_return_address(0)); } +#endif /* install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 03901d3a8b02..9a1b7a0603b2 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -231,13 +231,13 @@ static void __init init_resources(void) /* Clean-up any unused pre-allocated resources */ mem_res_sz = (num_resources - res_idx + 1) * sizeof(*mem_res); - memblock_free((phys_addr_t) mem_res, mem_res_sz); + memblock_free(__pa(mem_res), mem_res_sz); return; error: /* Better an empty resource tree than an inconsistent one */ release_child_resources(&iomem_resource); - memblock_free((phys_addr_t) mem_res, mem_res_sz); + memblock_free(__pa(mem_res), mem_res_sz); } diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 2b3e0cb90d78..bde85fc53357 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -27,10 +27,10 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame_pointer(regs); sp = user_stack_pointer(regs); pc = instruction_pointer(regs); - } else if (task == NULL || task == current) { - fp = (unsigned long)__builtin_frame_address(0); - sp = sp_in_global; - pc = (unsigned long)walk_stackframe; + } else if (task == current) { + fp = (unsigned long)__builtin_frame_address(1); + sp = (unsigned long)__builtin_frame_address(0); + pc = (unsigned long)__builtin_return_address(0); } else { /* task blocked in __switch_to */ fp = task->thread.s[0]; @@ -106,15 +106,15 @@ static bool print_trace_address(void *arg, unsigned long pc) return true; } -void dump_backtrace(struct pt_regs *regs, struct task_struct *task, +noinline void dump_backtrace(struct pt_regs *regs, struct task_struct *task, const char *loglvl) { - pr_cont("%sCall Trace:\n", loglvl); walk_stackframe(task, regs, print_trace_address, (void *)loglvl); } void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { + pr_cont("%sCall Trace:\n", loglvl); dump_backtrace(NULL, task, loglvl); } @@ -139,7 +139,7 @@ unsigned long get_wchan(struct task_struct *task) #ifdef CONFIG_STACKTRACE -void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, +noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { walk_stackframe(task, regs, consume_entry, cookie); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 0721b9798595..7bc88d8aab97 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -86,8 +86,13 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code, } } +#if defined (CONFIG_XIP_KERNEL) && defined (CONFIG_RISCV_ERRATA_ALTERNATIVE) +#define __trap_section __section(".xip.traps") +#else +#define __trap_section +#endif #define DO_ERROR_INFO(name, signo, code, str) \ -asmlinkage __visible void name(struct pt_regs *regs) \ +asmlinkage __visible __trap_section void name(struct pt_regs *regs) \ { \ do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \ } @@ -111,7 +116,7 @@ DO_ERROR_INFO(do_trap_store_misaligned, int handle_misaligned_load(struct pt_regs *regs); int handle_misaligned_store(struct pt_regs *regs); -asmlinkage void do_trap_load_misaligned(struct pt_regs *regs) +asmlinkage void __trap_section do_trap_load_misaligned(struct pt_regs *regs) { if (!handle_misaligned_load(regs)) return; @@ -119,7 +124,7 @@ asmlinkage void do_trap_load_misaligned(struct pt_regs *regs) "Oops - load address misaligned"); } -asmlinkage void do_trap_store_misaligned(struct pt_regs *regs) +asmlinkage void __trap_section do_trap_store_misaligned(struct pt_regs *regs) { if (!handle_misaligned_store(regs)) return; @@ -146,7 +151,7 @@ static inline unsigned long get_break_insn_length(unsigned long pc) return GET_INSN_LENGTH(insn); } -asmlinkage __visible void do_trap_break(struct pt_regs *regs) +asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) { #ifdef CONFIG_KPROBES if (kprobe_single_step_handler(regs)) diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index 4b29b9917f99..a3ff09c4c3f9 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -99,9 +99,22 @@ SECTIONS } PERCPU_SECTION(L1_CACHE_BYTES) - . = ALIGN(PAGE_SIZE); + . = ALIGN(8); + .alternative : { + __alt_start = .; + *(.alternative) + __alt_end = .; + } __init_end = .; + . = ALIGN(16); + .xip.traps : { + __xip_traps_start = .; + *(.xip.traps) + __xip_traps_end = .; + } + + . = ALIGN(PAGE_SIZE); .sdata : { __global_pointer$ = . + 0x800; *(.sdata*) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 4faf8bd157ea..4c4c92ce0bb8 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -746,14 +746,18 @@ void __init protect_kernel_text_data(void) unsigned long init_data_start = (unsigned long)__init_data_begin; unsigned long rodata_start = (unsigned long)__start_rodata; unsigned long data_start = (unsigned long)_data; - unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn))); +#if defined(CONFIG_64BIT) && defined(CONFIG_MMU) + unsigned long end_va = kernel_virt_addr + load_sz; +#else + unsigned long end_va = (unsigned long)(__va(PFN_PHYS(max_low_pfn))); +#endif set_memory_ro(text_start, (init_text_start - text_start) >> PAGE_SHIFT); set_memory_ro(init_text_start, (init_data_start - init_text_start) >> PAGE_SHIFT); set_memory_nx(init_data_start, (rodata_start - init_data_start) >> PAGE_SHIFT); /* rodata section is marked readonly in mark_rodata_ro */ set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); - set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT); + set_memory_nx(data_start, (end_va - data_start) >> PAGE_SHIFT); } void mark_rodata_ro(void) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 9daacae93e33..d7189c8714a9 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -169,7 +169,7 @@ static void __init kasan_shallow_populate(void *start, void *end) void __init kasan_init(void) { - phys_addr_t _start, _end; + phys_addr_t p_start, p_end; u64 i; /* @@ -189,9 +189,9 @@ void __init kasan_init(void) (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); /* Populate the linear mapping */ - for_each_mem_range(i, &_start, &_end) { - void *start = (void *)__va(_start); - void *end = (void *)__va(_end); + for_each_mem_range(i, &p_start, &p_end) { + void *start = (void *)__va(p_start); + void *end = (void *)__va(p_end); if (start >= end) break; @@ -201,7 +201,7 @@ void __init kasan_init(void) /* Populate kernel, BPF, modules mapping */ kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR), - kasan_mem_to_shadow((const void *)BPF_JIT_REGION_END)); + kasan_mem_to_shadow((const void *)MODULES_VADDR + SZ_2G)); for (i = 0; i < PTRS_PER_PTE; i++) set_pte(&kasan_early_shadow_pte[i], diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 12de7a9c85b3..9cc71ca9a88f 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -651,9 +651,9 @@ ENDPROC(stack_overflow) .Lcleanup_sie_mcck: larl %r13,.Lsie_entry slgr %r9,%r13 - larl %r13,.Lsie_skip + lghi %r13,.Lsie_skip - .Lsie_entry clgr %r9,%r13 - jh .Lcleanup_sie_int + jhe .Lcleanup_sie_int oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST .Lcleanup_sie_int: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 7e4a2aba366d..0690263df1dd 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -445,7 +445,7 @@ 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index f47a0dc55445..0b91499ebdcf 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -445,7 +445,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index f5beecdac693..e76b22157099 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -180,7 +180,6 @@ static inline void arch_ftrace_nmi_exit(void) { } BUILD_TRAP_HANDLER(nmi) { - unsigned int cpu = smp_processor_id(); TRAP_HANDLER_DECL; arch_ftrace_nmi_enter(); diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index b9e1c0e735b7..e34cc30ef22c 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -488,7 +488,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/x86/Makefile b/arch/x86/Makefile index c77c5d8a7b3e..cb5e8d39cac1 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -178,11 +178,6 @@ ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,) endif -ifdef CONFIG_LTO_CLANG -KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \ - -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) -endif - # Workaround for a gcc prelease that unfortunately was shipped in a suse release KBUILD_CFLAGS += -Wno-sign-compare # @@ -202,7 +197,13 @@ ifdef CONFIG_RETPOLINE endif endif -KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE) +KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) + +ifdef CONFIG_LTO_CLANG +ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0) +KBUILD_LDFLAGS += -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) +endif +endif ifdef CONFIG_X86_NEED_RELOCS LDFLAGS_vmlinux := --emit-relocs --discard-none diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6e5522aebbbd..431bf7f846c3 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -30,6 +30,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ KBUILD_CFLAGS := -m$(BITS) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIE +KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone @@ -48,10 +49,10 @@ KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no) KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h KBUILD_CFLAGS += $(CLANG_FLAGS) -# sev-es.c indirectly inludes inat-table.h which is generated during +# sev.c indirectly inludes inat-table.h which is generated during # compilation and stored in $(objtree). Add the directory to the includes so # that the compiler finds it even with out-of-tree builds (make O=/some/path). -CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/ +CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n @@ -93,7 +94,7 @@ ifdef CONFIG_X86_64 vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o vmlinux-objs-y += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o - vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev-es.o + vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index dde042f64cca..743f13ea25c1 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -172,7 +172,7 @@ void __puthex(unsigned long value) } } -#if CONFIG_X86_NEED_RELOCS +#ifdef CONFIG_X86_NEED_RELOCS static void handle_relocations(void *output, unsigned long output_len, unsigned long virt_addr) { diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index e5612f035498..31139256859f 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -79,7 +79,7 @@ struct mem_vector { u64 size; }; -#if CONFIG_RANDOMIZE_BASE +#ifdef CONFIG_RANDOMIZE_BASE /* kaslr.c */ void choose_random_location(unsigned long input, unsigned long input_size, diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev.c index 82041bd380e5..670e998fe930 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev.c @@ -13,7 +13,7 @@ #include "misc.h" #include <asm/pgtable_types.h> -#include <asm/sev-es.h> +#include <asm/sev.h> #include <asm/trapnr.h> #include <asm/trap_pf.h> #include <asm/msr-index.h> @@ -117,7 +117,7 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, #include "../../lib/insn.c" /* Include code for early handlers */ -#include "../../kernel/sev-es-shared.c" +#include "../../kernel/sev-shared.c" static bool early_setup_sev_es(void) { diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 28a1423ce32e..4bbc267fb36b 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -447,7 +447,7 @@ 440 i386 process_madvise sys_process_madvise 441 i386 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 i386 mount_setattr sys_mount_setattr -443 i386 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 i386 landlock_create_ruleset sys_landlock_create_ruleset 445 i386 landlock_add_rule sys_landlock_add_rule 446 i386 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index ecd551b08d05..ce18119ea0d0 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,7 +364,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8e509325c2c3..8f71dd72ef95 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -396,10 +396,12 @@ int x86_reserve_hardware(void) if (!atomic_inc_not_zero(&pmc_refcount)) { mutex_lock(&pmc_reserve_mutex); if (atomic_read(&pmc_refcount) == 0) { - if (!reserve_pmc_hardware()) + if (!reserve_pmc_hardware()) { err = -EBUSY; - else + } else { reserve_ds_buffers(); + reserve_lbr_buffers(); + } } if (!err) atomic_inc(&pmc_refcount); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2521d03de5e0..e28892270c58 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6253,7 +6253,7 @@ __init int intel_pmu_init(void) * Check all LBT MSR here. * Disable LBR access if any LBR MSRs can not be accessed. */ - if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) + if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL)) x86_pmu.lbr_nr = 0; for (i = 0; i < x86_pmu.lbr_nr; i++) { if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 76dbab6ac9fb..4409d2cccfda 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -658,7 +658,6 @@ static inline bool branch_user_callstack(unsigned br_sel) void intel_pmu_lbr_add(struct perf_event *event) { - struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu.lbr_nr) @@ -696,11 +695,6 @@ void intel_pmu_lbr_add(struct perf_event *event) perf_sched_cb_inc(event->ctx->pmu); if (!cpuc->lbr_users++ && !event->total_time_running) intel_pmu_lbr_reset(); - - if (static_cpu_has(X86_FEATURE_ARCH_LBR) && - kmem_cache && !cpuc->lbr_xsave && - (cpuc->lbr_users != cpuc->lbr_pebs_users)) - cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL); } void release_lbr_buffers(void) @@ -722,6 +716,26 @@ void release_lbr_buffers(void) } } +void reserve_lbr_buffers(void) +{ + struct kmem_cache *kmem_cache; + struct cpu_hw_events *cpuc; + int cpu; + + if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) + return; + + for_each_possible_cpu(cpu) { + cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; + if (!kmem_cache || cpuc->lbr_xsave) + continue; + + cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, GFP_KERNEL, + cpu_to_node(cpu)); + } +} + void intel_pmu_lbr_del(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 63f097289a84..3a75a2c601c2 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1406,6 +1406,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool die_id = i; else die_id = topology_phys_to_logical_pkg(i); + if (die_id < 0) + die_id = -ENODEV; map->pbus_to_dieid[bus] = die_id; break; } @@ -1452,14 +1454,14 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool i = -1; if (reverse) { for (bus = 255; bus >= 0; bus--) { - if (map->pbus_to_dieid[bus] >= 0) + if (map->pbus_to_dieid[bus] != -1) i = map->pbus_to_dieid[bus]; else map->pbus_to_dieid[bus] = i; } } else { for (bus = 0; bus <= 255; bus++) { - if (map->pbus_to_dieid[bus] >= 0) + if (map->pbus_to_dieid[bus] != -1) i = map->pbus_to_dieid[bus]; else map->pbus_to_dieid[bus] = i; @@ -5097,9 +5099,10 @@ static struct intel_uncore_type icx_uncore_m2m = { .perf_ctr = SNR_M2M_PCI_PMON_CTR0, .event_ctl = SNR_M2M_PCI_PMON_CTL0, .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT, .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL, .ops = &snr_m2m_uncore_pci_ops, - .format_group = &skx_uncore_format_group, + .format_group = &snr_m2m_uncore_format_group, }; static struct attribute *icx_upi_uncore_formats_attr[] = { diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 27fa85e7d4fd..ad87cb36f7c8 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1244,6 +1244,8 @@ void reserve_ds_buffers(void); void release_lbr_buffers(void); +void reserve_lbr_buffers(void); + extern struct event_constraint bts_constraint; extern struct event_constraint vlbr_constraint; @@ -1393,6 +1395,10 @@ static inline void release_lbr_buffers(void) { } +static inline void reserve_lbr_buffers(void) +{ +} + static inline int intel_pmu_init(void) { return 0; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 412b51e059c8..48067af94678 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -174,6 +174,7 @@ static inline int apic_is_clustered_box(void) extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); extern void lapic_assign_system_vectors(void); extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); +extern void lapic_update_legacy_vectors(void); extern void lapic_online(void); extern void lapic_offline(void); extern bool apic_needs_pit(void); diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index b7dd944dc867..8f28fafa98b3 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,11 +56,8 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -#ifdef CONFIG_IOMMU_SUPPORT -# define DISABLE_ENQCMD 0 -#else -# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) -#endif +/* Force disable because it's broken beyond repair */ +#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index ed33a14188f6..23bef08a8388 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -106,10 +106,6 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); */ #define PASID_DISABLED 0 -#ifdef CONFIG_IOMMU_SUPPORT -/* Update current's PASID MSR/state by mm's PASID. */ -void update_pasid(void); -#else static inline void update_pasid(void) { } -#endif + #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 8d33ad80704f..fdee23ea4e17 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -578,19 +578,19 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) * PKRU state is switched eagerly because it needs to be valid before we * return to userland e.g. for a copy_to_user() operation. */ - if (current->mm) { + if (!(current->flags & PF_KTHREAD)) { + /* + * If the PKRU bit in xsave.header.xfeatures is not set, + * then the PKRU component was in init state, which means + * XRSTOR will set PKRU to 0. If the bit is not set then + * get_xsave_addr() will return NULL because the PKRU value + * in memory is not valid. This means pkru_val has to be + * set to 0 and not to init_pkru_value. + */ pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); - if (pk) - pkru_val = pk->pkru; + pkru_val = pk ? pk->pkru : 0; } __write_pkru(pkru_val); - - /* - * Expensive PASID MSR write will be avoided in update_pasid() because - * TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated - * unless it's different from mm->pasid to reduce overhead. - */ - update_pasid(); } #endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 323641097f63..e7bef91cee04 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -99,6 +99,7 @@ KVM_X86_OP_NULL(post_block) KVM_X86_OP_NULL(vcpu_blocking) KVM_X86_OP_NULL(vcpu_unblocking) KVM_X86_OP_NULL(update_pi_irte) +KVM_X86_OP_NULL(start_assignment) KVM_X86_OP_NULL(apicv_post_state_restore) KVM_X86_OP_NULL(dy_apicv_has_pending_interrupt) KVM_X86_OP_NULL(set_hv_timer) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index cbbcee0a84f9..9c7ced0e3171 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -113,6 +113,7 @@ #define VALID_PAGE(x) ((x) != INVALID_PAGE) #define UNMAPPED_GVA (~(gpa_t)0) +#define INVALID_GPA (~(gpa_t)0) /* KVM Hugepage definitions for x86 */ #define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G @@ -199,6 +200,7 @@ enum x86_intercept_stage; #define KVM_NR_DB_REGS 4 +#define DR6_BUS_LOCK (1 << 11) #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) #define DR6_BT (1 << 15) @@ -212,7 +214,7 @@ enum x86_intercept_stage; * DR6_ACTIVE_LOW is also used as the init/reset value for DR6. */ #define DR6_ACTIVE_LOW 0xffff0ff0 -#define DR6_VOLATILE 0x0001e00f +#define DR6_VOLATILE 0x0001e80f #define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE) #define DR7_BP_EN_MASK 0x000000ff @@ -407,7 +409,7 @@ struct kvm_mmu { u32 pkru_mask; u64 *pae_root; - u64 *lm_root; + u64 *pml4_root; /* * check zero bits on shadow page table entries, these @@ -1350,6 +1352,7 @@ struct kvm_x86_ops { int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); + void (*start_assignment)(struct kvm *kvm); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); @@ -1417,6 +1420,7 @@ struct kvm_arch_async_pf { bool direct_map; }; +extern u32 __read_mostly kvm_nr_uret_msrs; extern u64 __read_mostly host_efer; extern bool __read_mostly allow_smaller_maxphyaddr; extern struct kvm_x86_ops kvm_x86_ops; @@ -1775,9 +1779,15 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit); -void kvm_define_user_return_msr(unsigned index, u32 msr); +int kvm_add_user_return_msr(u32 msr); +int kvm_find_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); +static inline bool kvm_is_supported_user_return_msr(u32 msr) +{ + return kvm_find_user_return_msr(msr) >= 0; +} + u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 338119852512..69299878b200 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -7,8 +7,6 @@ #include <linux/interrupt.h> #include <uapi/asm/kvm_para.h> -extern void kvmclock_init(void); - #ifdef CONFIG_KVM_GUEST bool kvm_check_and_clear_guest_paused(void); #else @@ -86,13 +84,14 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, } #ifdef CONFIG_KVM_GUEST +void kvmclock_init(void); +void kvmclock_disable(void); bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); void kvm_async_pf_task_wait_schedule(u32 token); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_apf_flags(void); -void kvm_disable_steal_time(void); bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token); DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled); @@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void) return 0; } -static inline void kvm_disable_steal_time(void) -{ - return; -} - static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) { return false; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 742d89a00721..211ba3375ee9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -537,9 +537,9 @@ /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG 0xc0010010 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 154321d29050..556b2b17c3e2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -787,8 +787,10 @@ DECLARE_PER_CPU(u64, msr_misc_features_shadow); #ifdef CONFIG_CPU_SUP_AMD extern u32 amd_get_nodes_per_socket(void); +extern u32 amd_get_highest_perf(void); #else static inline u32 amd_get_nodes_per_socket(void) { return 0; } +static inline u32 amd_get_highest_perf(void) { return 0; } #endif static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h new file mode 100644 index 000000000000..629c3df243f0 --- /dev/null +++ b/arch/x86/include/asm/sev-common.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD SEV header common between the guest and the hypervisor. + * + * Author: Brijesh Singh <brijesh.singh@amd.com> + */ + +#ifndef __ASM_X86_SEV_COMMON_H +#define __ASM_X86_SEV_COMMON_H + +#define GHCB_MSR_INFO_POS 0 +#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) + +#define GHCB_MSR_SEV_INFO_RESP 0x001 +#define GHCB_MSR_SEV_INFO_REQ 0x002 +#define GHCB_MSR_VER_MAX_POS 48 +#define GHCB_MSR_VER_MAX_MASK 0xffff +#define GHCB_MSR_VER_MIN_POS 32 +#define GHCB_MSR_VER_MIN_MASK 0xffff +#define GHCB_MSR_CBIT_POS 24 +#define GHCB_MSR_CBIT_MASK 0xff +#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ + ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ + (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ + (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ + GHCB_MSR_SEV_INFO_RESP) +#define GHCB_MSR_INFO(v) ((v) & 0xfffUL) +#define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK) +#define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK) + +#define GHCB_MSR_CPUID_REQ 0x004 +#define GHCB_MSR_CPUID_RESP 0x005 +#define GHCB_MSR_CPUID_FUNC_POS 32 +#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff +#define GHCB_MSR_CPUID_VALUE_POS 32 +#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff +#define GHCB_MSR_CPUID_REG_POS 30 +#define GHCB_MSR_CPUID_REG_MASK 0x3 +#define GHCB_CPUID_REQ_EAX 0 +#define GHCB_CPUID_REQ_EBX 1 +#define GHCB_CPUID_REQ_ECX 2 +#define GHCB_CPUID_REQ_EDX 3 +#define GHCB_CPUID_REQ(fn, reg) \ + (GHCB_MSR_CPUID_REQ | \ + (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \ + (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS)) + +#define GHCB_MSR_TERM_REQ 0x100 +#define GHCB_MSR_TERM_REASON_SET_POS 12 +#define GHCB_MSR_TERM_REASON_SET_MASK 0xf +#define GHCB_MSR_TERM_REASON_POS 16 +#define GHCB_MSR_TERM_REASON_MASK 0xff +#define GHCB_SEV_TERM_REASON(reason_set, reason_val) \ + (((((u64)reason_set) & GHCB_MSR_TERM_REASON_SET_MASK) << GHCB_MSR_TERM_REASON_SET_POS) | \ + ((((u64)reason_val) & GHCB_MSR_TERM_REASON_MASK) << GHCB_MSR_TERM_REASON_POS)) + +#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 +#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 + +#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) + +#endif diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev.h index cf1d957c7091..fa5cd05d3b5b 100644 --- a/arch/x86/include/asm/sev-es.h +++ b/arch/x86/include/asm/sev.h @@ -10,34 +10,12 @@ #include <linux/types.h> #include <asm/insn.h> +#include <asm/sev-common.h> -#define GHCB_SEV_INFO 0x001UL -#define GHCB_SEV_INFO_REQ 0x002UL -#define GHCB_INFO(v) ((v) & 0xfffUL) -#define GHCB_PROTO_MAX(v) (((v) >> 48) & 0xffffUL) -#define GHCB_PROTO_MIN(v) (((v) >> 32) & 0xffffUL) -#define GHCB_PROTO_OUR 0x0001UL -#define GHCB_SEV_CPUID_REQ 0x004UL -#define GHCB_CPUID_REQ_EAX 0 -#define GHCB_CPUID_REQ_EBX 1 -#define GHCB_CPUID_REQ_ECX 2 -#define GHCB_CPUID_REQ_EDX 3 -#define GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \ - (((unsigned long)reg & 3) << 30) | \ - (((unsigned long)fn) << 32)) +#define GHCB_PROTO_OUR 0x0001UL +#define GHCB_PROTOCOL_MAX 1ULL +#define GHCB_DEFAULT_USAGE 0ULL -#define GHCB_PROTOCOL_MAX 0x0001UL -#define GHCB_DEFAULT_USAGE 0x0000UL - -#define GHCB_SEV_CPUID_RESP 0x005UL -#define GHCB_SEV_TERMINATE 0x100UL -#define GHCB_SEV_TERMINATE_REASON(reason_set, reason_val) \ - (((((u64)reason_set) & 0x7) << 12) | \ - ((((u64)reason_val) & 0xff) << 16)) -#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 -#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 - -#define GHCB_SEV_GHCB_RESP_CODE(v) ((v) & 0xfff) #define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); } enum es_result { diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h index ddbdefd5b94f..91a7b6687c3b 100644 --- a/arch/x86/include/asm/thermal.h +++ b/arch/x86/include/asm/thermal.h @@ -3,11 +3,13 @@ #define _ASM_X86_THERMAL_H #ifdef CONFIG_X86_THERMAL_VECTOR +void therm_lvt_init(void); void intel_init_thermal(struct cpuinfo_x86 *c); bool x86_thermal_enabled(void); void intel_thermal_interrupt(void); #else -static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } +static inline void therm_lvt_init(void) { } +static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } #endif #endif /* _ASM_X86_THERMAL_H */ diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h index 119ac8612d89..136e5e57cfe1 100644 --- a/arch/x86/include/asm/vdso/clocksource.h +++ b/arch/x86/include/asm/vdso/clocksource.h @@ -7,4 +7,6 @@ VDSO_CLOCKMODE_PVCLOCK, \ VDSO_CLOCKMODE_HVCLOCK +#define HAVE_VDSO_CLOCKMODE_HVCLOCK + #endif /* __ASM_VDSO_CLOCKSOURCE_H */ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 5a3022c8af82..0662f644aad9 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr { __u16 flags; } smm; + __u16 pad; + __u32 flags; __u64 preemption_timer_deadline; }; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0704c2a94272..0f66682ac02a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -20,7 +20,7 @@ CFLAGS_REMOVE_kvmclock.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg -CFLAGS_REMOVE_sev-es.o = -pg +CFLAGS_REMOVE_sev.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n @@ -28,7 +28,7 @@ KASAN_SANITIZE_dumpstack.o := n KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n -KASAN_SANITIZE_sev-es.o := n +KASAN_SANITIZE_sev.o := n # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. @@ -148,7 +148,7 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o -obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev-es.o +obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 6974b5174495..6fe5b44fcbc9 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -183,41 +183,69 @@ done: } /* + * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90) + * + * @instr: instruction byte stream + * @instrlen: length of the above + * @off: offset within @instr where the first NOP has been detected + * + * Return: number of NOPs found (and replaced). + */ +static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off) +{ + unsigned long flags; + int i = off, nnops; + + while (i < instrlen) { + if (instr[i] != 0x90) + break; + + i++; + } + + nnops = i - off; + + if (nnops <= 1) + return nnops; + + local_irq_save(flags); + add_nops(instr + off, nnops); + local_irq_restore(flags); + + DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i); + + return nnops; +} + +/* * "noinline" to cause control flow change and thus invalidate I$ and * cause refetch after modification. */ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) { - unsigned long flags; struct insn insn; - int nop, i = 0; + int i = 0; /* - * Jump over the non-NOP insns, the remaining bytes must be single-byte - * NOPs, optimize them. + * Jump over the non-NOP insns and optimize single-byte NOPs into bigger + * ones. */ for (;;) { if (insn_decode_kernel(&insn, &instr[i])) return; + /* + * See if this and any potentially following NOPs can be + * optimized. + */ if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) - break; - - if ((i += insn.length) >= a->instrlen) - return; - } + i += optimize_nops_range(instr, a->instrlen, i); + else + i += insn.length; - for (nop = i; i < a->instrlen; i++) { - if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i])) + if (i >= a->instrlen) return; } - - local_irq_save(flags); - add_nops(instr + nop, i - nop); - local_irq_restore(flags); - - DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", - instr, nop, a->instrlen); } /* diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4a39fb429f15..d262811ce14b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2604,6 +2604,7 @@ static void __init apic_bsp_setup(bool upmode) end_local_APIC_setup(); irq_remap_enable_fault_handling(); setup_IO_APIC(); + lapic_update_legacy_vectors(); } #ifdef CONFIG_UP_LATE_INIT diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6dbdc7c22bb7..fb67ed5e7e6a 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -738,6 +738,26 @@ void lapic_assign_legacy_vector(unsigned int irq, bool replace) irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace); } +void __init lapic_update_legacy_vectors(void) +{ + unsigned int i; + + if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0) + return; + + /* + * If the IO/APIC is disabled via config, kernel command line or + * lack of enumeration then all legacy interrupts are routed + * through the PIC. Make sure that they are marked as legacy + * vectors. PIC_CASCADE_IRQ has already been marked in + * lapic_assign_system_vectors(). + */ + for (i = 0; i < nr_legacy_irqs(); i++) { + if (i != PIC_CASCADE_IR) + lapic_assign_legacy_vector(i, true); + } +} + void __init lapic_assign_system_vectors(void) { unsigned int i, vector = 0; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2d11384dc9ab..c06ac56eae4d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -593,8 +593,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) */ if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) { /* Check if memory encryption is enabled */ - rdmsrl(MSR_K8_SYSCFG, msr); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + rdmsrl(MSR_AMD64_SYSCFG, msr); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) goto clear_all; /* @@ -1165,3 +1165,19 @@ void set_dr_addr_mask(unsigned long mask, int dr) break; } } + +u32 amd_get_highest_perf(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) || + (c->x86_model >= 0x70 && c->x86_model < 0x80))) + return 166; + + if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) || + (c->x86_model >= 0x40 && c->x86_model < 0x70))) + return 166; + + return 255; +} +EXPORT_SYMBOL_GPL(amd_get_highest_perf); diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 0c3b372318b7..b5f43049fa5f 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -836,7 +836,7 @@ int __init amd_special_default_mtrr(void) if (boot_cpu_data.x86 < 0xf) return 0; /* In case some hypervisor doesn't pass SYSCFG through: */ - if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) + if (rdmsr_safe(MSR_AMD64_SYSCFG, &l, &h) < 0) return 0; /* * Memory between 4GB and top of mem is forced WB by this magic bit. diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index b90f3f437765..558108296f3c 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -53,13 +53,13 @@ static inline void k8_check_syscfg_dram_mod_en(void) (boot_cpu_data.x86 >= 0x0f))) return; - rdmsr(MSR_K8_SYSCFG, lo, hi); + rdmsr(MSR_AMD64_SYSCFG, lo, hi); if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) { pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" " not cleared by BIOS, clearing this bit\n", smp_processor_id()); lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY; - mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi); + mtrr_wrmsr(MSR_AMD64_SYSCFG, lo, hi); } } diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 3ef5868ac588..7aecb2fc3186 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -63,7 +63,7 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) case 15: return msr - MSR_P4_BPU_PERFCTR0; } - fallthrough; + break; case X86_VENDOR_ZHAOXIN: case X86_VENDOR_CENTAUR: return msr - MSR_ARCH_PERFMON_PERFCTR0; @@ -96,7 +96,7 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) case 15: return msr - MSR_P4_BSU_ESCR0; } - fallthrough; + break; case X86_VENDOR_ZHAOXIN: case X86_VENDOR_CENTAUR: return msr - MSR_ARCH_PERFMON_EVENTSEL0; diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c index 6ad165a5c0cc..64511c4a5200 100644 --- a/arch/x86/kernel/cpu/sgx/virt.c +++ b/arch/x86/kernel/cpu/sgx/virt.c @@ -212,6 +212,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file) list_splice_tail(&secs_pages, &zombie_secs_pages); mutex_unlock(&zombie_secs_pages_lock); + xa_destroy(&vepc->page_array); kfree(vepc); return 0; diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index a4ec65317a7f..ec3ae3054792 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -307,13 +307,17 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) return 0; } - if (!access_ok(buf, size)) - return -EACCES; + if (!access_ok(buf, size)) { + ret = -EACCES; + goto out; + } - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; + if (!static_cpu_has(X86_FEATURE_FPU)) { + ret = fpregs_soft_set(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), + NULL, buf); + goto out; + } if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; @@ -369,6 +373,25 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpregs_unlock(); return 0; } + + /* + * The above did an FPU restore operation, restricted to + * the user portion of the registers, and failed, but the + * microcode might have modified the FPU registers + * nevertheless. + * + * If the FPU registers do not belong to current, then + * invalidate the FPU register state otherwise the task might + * preempt current and return to user space with corrupted + * FPU registers. + * + * In case current owns the FPU registers then no further + * action is required. The fixup below will handle it + * correctly. + */ + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + __cpu_invalidate_fpregs_state(); + fpregs_unlock(); } else { /* @@ -377,7 +400,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ ret = __copy_from_user(&env, buf, sizeof(env)); if (ret) - goto err_out; + goto out; envp = &env; } @@ -405,16 +428,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) if (use_xsave() && !fx_only) { u64 init_bv = xfeatures_mask_user() & ~user_xfeatures; - if (using_compacted_format()) { - ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); - } else { - ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); - - if (!ret && state_size > offsetof(struct xregs_state, header)) - ret = validate_user_xstate_header(&fpu->state.xsave.header); - } + ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); if (ret) - goto err_out; + goto out; sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, fx_only); @@ -434,7 +450,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); if (ret) { ret = -EFAULT; - goto err_out; + goto out; } sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, @@ -452,7 +468,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } else { ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); if (ret) - goto err_out; + goto out; fpregs_lock(); ret = copy_kernel_to_fregs_err(&fpu->state.fsave); @@ -463,7 +479,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpregs_deactivate(fpu); fpregs_unlock(); -err_out: +out: if (ret) fpu__clear_user_states(fpu); return ret; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index a85c64000218..d0eef963aad1 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1402,60 +1402,3 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, return 0; } #endif /* CONFIG_PROC_PID_ARCH_STATUS */ - -#ifdef CONFIG_IOMMU_SUPPORT -void update_pasid(void) -{ - u64 pasid_state; - u32 pasid; - - if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) - return; - - if (!current->mm) - return; - - pasid = READ_ONCE(current->mm->pasid); - /* Set the valid bit in the PASID MSR/state only for valid pasid. */ - pasid_state = pasid == PASID_DISABLED ? - pasid : pasid | MSR_IA32_PASID_VALID; - - /* - * No need to hold fregs_lock() since the task's fpstate won't - * be changed by others (e.g. ptrace) while the task is being - * switched to or is in IPI. - */ - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { - /* The MSR is active and can be directly updated. */ - wrmsrl(MSR_IA32_PASID, pasid_state); - } else { - struct fpu *fpu = ¤t->thread.fpu; - struct ia32_pasid_state *ppasid_state; - struct xregs_state *xsave; - - /* - * The CPU's xstate registers are not currently active. Just - * update the PASID state in the memory buffer here. The - * PASID MSR will be loaded when returning to user mode. - */ - xsave = &fpu->state.xsave; - xsave->header.xfeatures |= XFEATURE_MASK_PASID; - ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID); - /* - * Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state - * won't be NULL and no need to check its value. - * - * Only update the task's PASID state when it's different - * from the mm's pasid. - */ - if (ppasid_state->pasid != pasid_state) { - /* - * Invalid fpregs so that state restoring will pick up - * the PASID state. - */ - __fpu_invalidate_fpregs_state(fpu); - ppasid_state->pasid = pasid_state; - } - } -} -#endif /* CONFIG_IOMMU_SUPPORT */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 18be44163a50..de01903c3735 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -39,7 +39,7 @@ #include <asm/realmode.h> #include <asm/extable.h> #include <asm/trapnr.h> -#include <asm/sev-es.h> +#include <asm/sev.h> /* * Manage page tables very early on. diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d307c22e5c18..a26643dc6bd6 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -26,6 +26,7 @@ #include <linux/kprobes.h> #include <linux/nmi.h> #include <linux/swait.h> +#include <linux/syscore_ops.h> #include <asm/timer.h> #include <asm/cpu.h> #include <asm/traps.h> @@ -37,6 +38,7 @@ #include <asm/tlb.h> #include <asm/cpuidle_haltpoll.h> #include <asm/ptrace.h> +#include <asm/reboot.h> #include <asm/svm.h> DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled); @@ -345,7 +347,7 @@ static void kvm_guest_cpu_init(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); __this_cpu_write(apf_reason.enabled, 1); - pr_info("KVM setup async PF for cpu %d\n", smp_processor_id()); + pr_info("setup async PF for cpu %d\n", smp_processor_id()); } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { @@ -371,34 +373,17 @@ static void kvm_pv_disable_apf(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); __this_cpu_write(apf_reason.enabled, 0); - pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id()); + pr_info("disable async PF for cpu %d\n", smp_processor_id()); } -static void kvm_pv_guest_cpu_reboot(void *unused) +static void kvm_disable_steal_time(void) { - /* - * We disable PV EOI before we load a new kernel by kexec, - * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory. - * New kernel can re-enable when it boots. - */ - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) - wrmsrl(MSR_KVM_PV_EOI_EN, 0); - kvm_pv_disable_apf(); - kvm_disable_steal_time(); -} + if (!has_steal_clock) + return; -static int kvm_pv_reboot_notify(struct notifier_block *nb, - unsigned long code, void *unused) -{ - if (code == SYS_RESTART) - on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); - return NOTIFY_DONE; + wrmsr(MSR_KVM_STEAL_TIME, 0, 0); } -static struct notifier_block kvm_pv_reboot_nb = { - .notifier_call = kvm_pv_reboot_notify, -}; - static u64 kvm_steal_clock(int cpu) { u64 steal; @@ -416,14 +401,6 @@ static u64 kvm_steal_clock(int cpu) return steal; } -void kvm_disable_steal_time(void) -{ - if (!has_steal_clock) - return; - - wrmsr(MSR_KVM_STEAL_TIME, 0, 0); -} - static inline void __set_percpu_decrypted(void *ptr, unsigned long size) { early_set_memory_decrypted((unsigned long) ptr, size); @@ -451,6 +428,27 @@ static void __init sev_map_percpu_data(void) } } +static void kvm_guest_cpu_offline(bool shutdown) +{ + kvm_disable_steal_time(); + if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) + wrmsrl(MSR_KVM_PV_EOI_EN, 0); + kvm_pv_disable_apf(); + if (!shutdown) + apf_task_wake_all(); + kvmclock_disable(); +} + +static int kvm_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + kvm_guest_cpu_init(); + local_irq_restore(flags); + return 0; +} + #ifdef CONFIG_SMP static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask); @@ -635,31 +633,64 @@ static void __init kvm_smp_prepare_boot_cpu(void) kvm_spinlock_init(); } -static void kvm_guest_cpu_offline(void) +static int kvm_cpu_down_prepare(unsigned int cpu) { - kvm_disable_steal_time(); - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) - wrmsrl(MSR_KVM_PV_EOI_EN, 0); - kvm_pv_disable_apf(); - apf_task_wake_all(); + unsigned long flags; + + local_irq_save(flags); + kvm_guest_cpu_offline(false); + local_irq_restore(flags); + return 0; } -static int kvm_cpu_online(unsigned int cpu) +#endif + +static int kvm_suspend(void) { - local_irq_disable(); - kvm_guest_cpu_init(); - local_irq_enable(); + kvm_guest_cpu_offline(false); + return 0; } -static int kvm_cpu_down_prepare(unsigned int cpu) +static void kvm_resume(void) { - local_irq_disable(); - kvm_guest_cpu_offline(); - local_irq_enable(); - return 0; + kvm_cpu_online(raw_smp_processor_id()); +} + +static struct syscore_ops kvm_syscore_ops = { + .suspend = kvm_suspend, + .resume = kvm_resume, +}; + +static void kvm_pv_guest_cpu_reboot(void *unused) +{ + kvm_guest_cpu_offline(true); +} + +static int kvm_pv_reboot_notify(struct notifier_block *nb, + unsigned long code, void *unused) +{ + if (code == SYS_RESTART) + on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); + return NOTIFY_DONE; } +static struct notifier_block kvm_pv_reboot_nb = { + .notifier_call = kvm_pv_reboot_notify, +}; + +/* + * After a PV feature is registered, the host will keep writing to the + * registered memory location. If the guest happens to shutdown, this memory + * won't be valid. In cases like kexec, in which you install a new kernel, this + * means a random memory location will be kept being written. + */ +#ifdef CONFIG_KEXEC_CORE +static void kvm_crash_shutdown(struct pt_regs *regs) +{ + kvm_guest_cpu_offline(true); + native_machine_crash_shutdown(regs); +} #endif static void __init kvm_guest_init(void) @@ -704,6 +735,12 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif +#ifdef CONFIG_KEXEC_CORE + machine_ops.crash_shutdown = kvm_crash_shutdown; +#endif + + register_syscore_ops(&kvm_syscore_ops); + /* * Hard lockup detection is enabled by default. Disable it, as guests * can get false positives too easily, for example if the host is diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d37ed4e1d033..ad273e5861c1 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -20,7 +20,6 @@ #include <asm/hypervisor.h> #include <asm/mem_encrypt.h> #include <asm/x86_init.h> -#include <asm/reboot.h> #include <asm/kvmclock.h> static int kvmclock __initdata = 1; @@ -203,28 +202,9 @@ static void kvm_setup_secondary_clock(void) } #endif -/* - * After the clock is registered, the host will keep writing to the - * registered memory location. If the guest happens to shutdown, this memory - * won't be valid. In cases like kexec, in which you install a new kernel, this - * means a random memory location will be kept being written. So before any - * kind of shutdown from our side, we unregister the clock by writing anything - * that does not have the 'enable' bit set in the msr - */ -#ifdef CONFIG_KEXEC_CORE -static void kvm_crash_shutdown(struct pt_regs *regs) -{ - native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_crash_shutdown(regs); -} -#endif - -static void kvm_shutdown(void) +void kvmclock_disable(void) { native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_shutdown(); } static void __init kvmclock_init_mem(void) @@ -351,10 +331,6 @@ void __init kvmclock_init(void) #endif x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; - machine_ops.shutdown = kvm_shutdown; -#ifdef CONFIG_KEXEC_CORE - machine_ops.crash_shutdown = kvm_crash_shutdown; -#endif kvm_get_preset_lpj(); /* diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index b5cb49e57df8..c94dec6a1834 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -95,7 +95,7 @@ static void get_fam10h_pci_mmconf_base(void) return; /* SYS_CFG */ - address = MSR_K8_SYSCFG; + address = MSR_AMD64_SYSCFG; rdmsrl(address, val); /* TOP_MEM2 is not enabled? */ diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 2ef961cf4cfc..4bce802d25fb 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -33,7 +33,7 @@ #include <asm/reboot.h> #include <asm/cache.h> #include <asm/nospec-branch.h> -#include <asm/sev-es.h> +#include <asm/sev.h> #define CREATE_TRACE_POINTS #include <trace/events/nmi.h> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 72920af0b3c0..1e720626069a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -44,6 +44,7 @@ #include <asm/pci-direct.h> #include <asm/prom.h> #include <asm/proto.h> +#include <asm/thermal.h> #include <asm/unwind.h> #include <asm/vsyscall.h> #include <linux/vmalloc.h> @@ -637,11 +638,11 @@ static void __init trim_snb_memory(void) * them from accessing certain memory ranges, namely anything below * 1M and in the pages listed in bad_pages[] above. * - * To avoid these pages being ever accessed by SNB gfx devices - * reserve all memory below the 1 MB mark and bad_pages that have - * not already been reserved at boot time. + * To avoid these pages being ever accessed by SNB gfx devices reserve + * bad_pages that have not already been reserved at boot time. + * All memory below the 1 MB mark is anyway reserved later during + * setup_arch(), so there is no need to reserve it here. */ - memblock_reserve(0, 1<<20); for (i = 0; i < ARRAY_SIZE(bad_pages); i++) { if (memblock_reserve(bad_pages[i], PAGE_SIZE)) @@ -733,14 +734,14 @@ static void __init early_reserve_memory(void) * The first 4Kb of memory is a BIOS owned area, but generally it is * not listed as such in the E820 table. * - * Reserve the first memory page and typically some additional - * memory (64KiB by default) since some BIOSes are known to corrupt - * low memory. See the Kconfig help text for X86_RESERVE_LOW. + * Reserve the first 64K of memory since some BIOSes are known to + * corrupt low memory. After the real mode trampoline is allocated the + * rest of the memory below 640k is reserved. * * In addition, make sure page 0 is always reserved because on * systems with L1TF its contents can be leaked to user processes. */ - memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); + memblock_reserve(0, SZ_64K); early_reserve_initrd(); @@ -751,6 +752,7 @@ static void __init early_reserve_memory(void) reserve_ibft_region(); reserve_bios_regions(); + trim_snb_memory(); } /* @@ -1081,14 +1083,20 @@ void __init setup_arch(char **cmdline_p) (max_pfn_mapped<<PAGE_SHIFT) - 1); #endif - reserve_real_mode(); - /* - * Reserving memory causing GPU hangs on Sandy Bridge integrated - * graphics devices should be done after we allocated memory under - * 1M for the real mode trampoline. + * Find free memory for the real mode trampoline and place it + * there. + * If there is not enough free memory under 1M, on EFI-enabled + * systems there will be additional attempt to reclaim the memory + * for the real mode trampoline at efi_free_boot_services(). + * + * Unconditionally reserve the entire first 1M of RAM because + * BIOSes are know to corrupt low memory and several + * hundred kilobytes are not worth complex detection what memory gets + * clobbered. Moreover, on machines with SandyBridge graphics or in + * setups that use crashkernel the entire 1M is reserved anyway. */ - trim_snb_memory(); + reserve_real_mode(); init_mem_mapping(); @@ -1226,6 +1234,14 @@ void __init setup_arch(char **cmdline_p) x86_init.timers.wallclock_init(); + /* + * This needs to run before setup_local_APIC() which soft-disables the + * local APIC temporarily and that masks the thermal LVT interrupt, + * leading to softlockups on machines which have configured SMI + * interrupt delivery. + */ + therm_lvt_init(); + mcheck_init(); register_refined_jiffies(CLOCK_TICK_RATE); diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-shared.c index 0aa9f13efd57..9f90f460a28c 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -26,13 +26,13 @@ static bool __init sev_es_check_cpu_features(void) static void __noreturn sev_es_terminate(unsigned int reason) { - u64 val = GHCB_SEV_TERMINATE; + u64 val = GHCB_MSR_TERM_REQ; /* * Tell the hypervisor what went wrong - only reason-set 0 is * currently supported. */ - val |= GHCB_SEV_TERMINATE_REASON(0, reason); + val |= GHCB_SEV_TERM_REASON(0, reason); /* Request Guest Termination from Hypvervisor */ sev_es_wr_ghcb_msr(val); @@ -47,15 +47,15 @@ static bool sev_es_negotiate_protocol(void) u64 val; /* Do the GHCB protocol version negotiation */ - sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ); + sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_INFO(val) != GHCB_SEV_INFO) + if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) return false; - if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR || - GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR) + if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTO_OUR || + GHCB_MSR_PROTO_MIN(val) > GHCB_PROTO_OUR) return false; return true; @@ -63,6 +63,7 @@ static bool sev_es_negotiate_protocol(void) static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) { + ghcb->save.sw_exit_code = 0; memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } @@ -153,28 +154,28 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->ax = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->bx = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->cx = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->dx = val >> 32; diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev.c index 73873b007838..651b81cd648e 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev.c @@ -22,7 +22,7 @@ #include <asm/cpu_entry_area.h> #include <asm/stacktrace.h> -#include <asm/sev-es.h> +#include <asm/sev.h> #include <asm/insn-eval.h> #include <asm/fpu/internal.h> #include <asm/processor.h> @@ -203,8 +203,18 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) if (unlikely(data->ghcb_active)) { /* GHCB is already in use - save its contents */ - if (unlikely(data->backup_ghcb_active)) - return NULL; + if (unlikely(data->backup_ghcb_active)) { + /* + * Backup-GHCB is also already in use. There is no way + * to continue here so just kill the machine. To make + * panic() work, mark GHCBs inactive so that messages + * can be printed out. + */ + data->ghcb_active = false; + data->backup_ghcb_active = false; + + panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); + } /* Mark backup_ghcb active before writing to it */ data->backup_ghcb_active = true; @@ -221,24 +231,6 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) return ghcb; } -static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) -{ - struct sev_es_runtime_data *data; - struct ghcb *ghcb; - - data = this_cpu_read(runtime_data); - ghcb = &data->ghcb_page; - - if (state->ghcb) { - /* Restore GHCB from Backup */ - *ghcb = *state->ghcb; - data->backup_ghcb_active = false; - state->ghcb = NULL; - } else { - data->ghcb_active = false; - } -} - /* Needed in vc_early_forward_exception */ void do_early_exception(struct pt_regs *regs, int trapnr); @@ -323,31 +315,44 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; - /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ - if (!user_mode(ctxt->regs) && !access_ok(target, size)) { - memcpy(dst, buf, size); - return ES_OK; - } - + /* + * This function uses __put_user() independent of whether kernel or user + * memory is accessed. This works fine because __put_user() does no + * sanity checks of the pointer being accessed. All that it does is + * to report when the access failed. + * + * Also, this function runs in atomic context, so __put_user() is not + * allowed to sleep. The page-fault handler detects that it is running + * in atomic context and will not try to take mmap_sem and handle the + * fault, so additional pagefault_enable()/disable() calls are not + * needed. + * + * The access can't be done via copy_to_user() here because + * vc_write_mem() must not use string instructions to access unsafe + * memory. The reason is that MOVS is emulated by the #VC handler by + * splitting the move up into a read and a write and taking a nested #VC + * exception on whatever of them is the MMIO access. Using string + * instructions here would cause infinite nesting. + */ switch (size) { case 1: memcpy(&d1, buf, 1); - if (put_user(d1, target)) + if (__put_user(d1, target)) goto fault; break; case 2: memcpy(&d2, buf, 2); - if (put_user(d2, target)) + if (__put_user(d2, target)) goto fault; break; case 4: memcpy(&d4, buf, 4); - if (put_user(d4, target)) + if (__put_user(d4, target)) goto fault; break; case 8: memcpy(&d8, buf, 8); - if (put_user(d8, target)) + if (__put_user(d8, target)) goto fault; break; default: @@ -378,30 +383,43 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; - /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ - if (!user_mode(ctxt->regs) && !access_ok(s, size)) { - memcpy(buf, src, size); - return ES_OK; - } - + /* + * This function uses __get_user() independent of whether kernel or user + * memory is accessed. This works fine because __get_user() does no + * sanity checks of the pointer being accessed. All that it does is + * to report when the access failed. + * + * Also, this function runs in atomic context, so __get_user() is not + * allowed to sleep. The page-fault handler detects that it is running + * in atomic context and will not try to take mmap_sem and handle the + * fault, so additional pagefault_enable()/disable() calls are not + * needed. + * + * The access can't be done via copy_from_user() here because + * vc_read_mem() must not use string instructions to access unsafe + * memory. The reason is that MOVS is emulated by the #VC handler by + * splitting the move up into a read and a write and taking a nested #VC + * exception on whatever of them is the MMIO access. Using string + * instructions here would cause infinite nesting. + */ switch (size) { case 1: - if (get_user(d1, s)) + if (__get_user(d1, s)) goto fault; memcpy(buf, &d1, 1); break; case 2: - if (get_user(d2, s)) + if (__get_user(d2, s)) goto fault; memcpy(buf, &d2, 2); break; case 4: - if (get_user(d4, s)) + if (__get_user(d4, s)) goto fault; memcpy(buf, &d4, 4); break; case 8: - if (get_user(d8, s)) + if (__get_user(d8, s)) goto fault; memcpy(buf, &d8, 8); break; @@ -459,7 +477,30 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt } /* Include code shared with pre-decompression boot stage */ -#include "sev-es-shared.c" +#include "sev-shared.c" + +static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) +{ + struct sev_es_runtime_data *data; + struct ghcb *ghcb; + + data = this_cpu_read(runtime_data); + ghcb = &data->ghcb_page; + + if (state->ghcb) { + /* Restore GHCB from Backup */ + *ghcb = *state->ghcb; + data->backup_ghcb_active = false; + state->ghcb = NULL; + } else { + /* + * Invalidate the GHCB so a VMGEXIT instruction issued + * from userspace won't appear to be valid. + */ + vc_ghcb_invalidate(ghcb); + data->ghcb_active = false; + } +} void noinstr __sev_es_nmi_complete(void) { @@ -1255,6 +1296,10 @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) case X86_TRAP_UD: exc_invalid_op(ctxt->regs); break; + case X86_TRAP_PF: + write_cr2(ctxt->fi.cr2); + exc_page_fault(ctxt->regs, error_code); + break; case X86_TRAP_AC: exc_alignment_check(ctxt->regs, error_code); break; @@ -1284,7 +1329,6 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) */ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) { - struct sev_es_runtime_data *data = this_cpu_read(runtime_data); irqentry_state_t irq_state; struct ghcb_state state; struct es_em_ctxt ctxt; @@ -1310,16 +1354,6 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) */ ghcb = sev_es_get_ghcb(&state); - if (!ghcb) { - /* - * Mark GHCBs inactive so that panic() is able to print the - * message. - */ - data->ghcb_active = false; - data->backup_ghcb_active = false; - - panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); - } vc_ghcb_invalidate(ghcb); result = vc_init_em_ctxt(&ctxt, regs, error_code); diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 0e5d0a7e203b..06743ec054d2 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -127,6 +127,9 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C); + BUILD_BUG_ON(offsetof(siginfo_t, si_trapno) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_trapno) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10); @@ -138,8 +141,10 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14); - BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x18); - BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x18); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x20); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_data) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_type) != 0x14); CHECK_CSI_OFFSET(_sigpoll); CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0ad5214f598a..7770245cc7fa 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -2043,7 +2043,7 @@ static bool amd_set_max_freq_ratio(void) return false; } - highest_perf = perf_caps.highest_perf; + highest_perf = amd_get_highest_perf(); nominal_perf = perf_caps.nominal_perf; if (!highest_perf || !nominal_perf) { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 19606a341888..b4da665bb892 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -458,7 +458,7 @@ void kvm_set_cpu_caps(void) F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ | - F(SGX_LC) + F(SGX_LC) | F(BUS_LOCK_DETECT) ); /* Set LA57 based on hardware capability. */ if (cpuid_ecx(7) & F(LA57)) @@ -567,6 +567,21 @@ void kvm_set_cpu_caps(void) F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | F(PMM) | F(PMM_EN) ); + + /* + * Hide RDTSCP and RDPID if either feature is reported as supported but + * probing MSR_TSC_AUX failed. This is purely a sanity check and + * should never happen, but the guest will likely crash if RDTSCP or + * RDPID is misreported, and KVM has botched MSR_TSC_AUX emulation in + * the past. For example, the sanity check may fire if this instance of + * KVM is running as L1 on top of an older, broken KVM. + */ + if (WARN_ON((kvm_cpu_cap_has(X86_FEATURE_RDTSCP) || + kvm_cpu_cap_has(X86_FEATURE_RDPID)) && + !kvm_is_supported_user_return_msr(MSR_TSC_AUX))) { + kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); + kvm_cpu_cap_clear(X86_FEATURE_RDPID); + } } EXPORT_SYMBOL_GPL(kvm_set_cpu_caps); @@ -637,8 +652,10 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func) case 7: entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; entry->eax = 0; - entry->ecx = F(RDPID); + if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) + entry->ecx = F(RDPID); ++array->nent; + break; default: break; } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 77e1c89a95a7..5e5de05a8fbf 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4502,7 +4502,7 @@ static const struct opcode group8[] = { * from the register case of group9. */ static const struct gprefix pfx_0f_c7_7 = { - N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp), + N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid), }; @@ -5111,7 +5111,7 @@ done: return rc; } -int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) +int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type) { int rc = X86EMUL_CONTINUE; int mode = ctxt->mode; @@ -5322,7 +5322,8 @@ done_prefixes: ctxt->execute = opcode.u.execute; - if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD))) + if (unlikely(emulation_type & EMULTYPE_TRAP_UD) && + likely(!(ctxt->d & EmulateOnUD))) return EMULATION_FAILED; if (unlikely(ctxt->d & diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index f98370a39936..f00830e5202f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1172,6 +1172,7 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm) { struct kvm_hv *hv = to_kvm_hv(kvm); u64 gfn; + int idx; if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET || @@ -1190,9 +1191,16 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm) gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; hv->tsc_ref.tsc_sequence = 0; + + /* + * Take the srcu lock as memslots will be accessed to check the gfn + * cache generation against the memslots generation. + */ + idx = srcu_read_lock(&kvm->srcu); if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; + srcu_read_unlock(&kvm->srcu, idx); out_unlock: mutex_unlock(&hv->hv_lock); diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 0d359115429a..3e870bf9ca4d 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -314,7 +314,6 @@ struct x86_emulate_ctxt { int interruptibility; bool perm_ok; /* do not check permissions if true */ - bool ud; /* inject an #UD if host doesn't support insn */ bool tf; /* TF value before instruction (after for syscall/sysret) */ bool have_exception; @@ -468,6 +467,7 @@ enum x86_intercept { x86_intercept_clgi, x86_intercept_skinit, x86_intercept_rdtscp, + x86_intercept_rdpid, x86_intercept_icebp, x86_intercept_wbinvd, x86_intercept_monitor, @@ -490,7 +490,7 @@ enum x86_intercept { #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 #endif -int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); +int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type); bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); #define EMULATION_FAILED -1 #define EMULATION_OK 0 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 152591f9243a..17fa4ab1b834 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1410,6 +1410,9 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, if (!apic_x2apic_mode(apic)) valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); + if (alignment + len > 4) + return 1; + if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) return 1; @@ -1494,6 +1497,15 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic) static void cancel_hv_timer(struct kvm_lapic *apic); +static void cancel_apic_timer(struct kvm_lapic *apic) +{ + hrtimer_cancel(&apic->lapic_timer.timer); + preempt_disable(); + if (apic->lapic_timer.hv_timer_in_use) + cancel_hv_timer(apic); + preempt_enable(); +} + static void apic_update_lvtt(struct kvm_lapic *apic) { u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) & @@ -1502,11 +1514,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic) if (apic->lapic_timer.timer_mode != timer_mode) { if (apic_lvtt_tscdeadline(apic) != (timer_mode == APIC_LVT_TIMER_TSCDEADLINE)) { - hrtimer_cancel(&apic->lapic_timer.timer); - preempt_disable(); - if (apic->lapic_timer.hv_timer_in_use) - cancel_hv_timer(apic); - preempt_enable(); + cancel_apic_timer(apic); kvm_lapic_set_reg(apic, APIC_TMICT, 0); apic->lapic_timer.period = 0; apic->lapic_timer.tscdeadline = 0; @@ -1598,11 +1606,19 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; + if (lapic_timer_advance_dynamic) { + adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); + /* + * If the timer fired early, reread the TSC to account for the + * overhead of the above adjustment to avoid waiting longer + * than is necessary. + */ + if (guest_tsc < tsc_deadline) + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + } + if (guest_tsc < tsc_deadline) __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); - - if (lapic_timer_advance_dynamic) - adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); } void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) @@ -1661,7 +1677,7 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) } atomic_inc(&apic->lapic_timer.pending); - kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_make_request(KVM_REQ_UNBLOCK, vcpu); if (from_timer_fn) kvm_vcpu_kick(vcpu); } @@ -1913,8 +1929,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) if (!apic->lapic_timer.hv_timer_in_use) goto out; WARN_ON(rcuwait_active(&vcpu->wait)); - cancel_hv_timer(apic); apic_timer_expired(apic, false); + cancel_hv_timer(apic); if (apic_lvtt_period(apic) && apic->lapic_timer.period) { advance_periodic_target_expiration(apic); @@ -2084,7 +2100,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) if (apic_lvtt_tscdeadline(apic)) break; - hrtimer_cancel(&apic->lapic_timer.timer); + cancel_apic_timer(apic); kvm_lapic_set_reg(apic, APIC_TMICT, val); start_apic_timer(apic); break; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 4b3ee244ebe0..8d5876dfc6b7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3310,12 +3310,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) { pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; - if (WARN_ON_ONCE(!mmu->lm_root)) { + if (WARN_ON_ONCE(!mmu->pml4_root)) { r = -EIO; goto out_unlock; } - mmu->lm_root[0] = __pa(mmu->pae_root) | pm_mask; + mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask; } for (i = 0; i < 4; ++i) { @@ -3335,7 +3335,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) } if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) - mmu->root_hpa = __pa(mmu->lm_root); + mmu->root_hpa = __pa(mmu->pml4_root); else mmu->root_hpa = __pa(mmu->pae_root); @@ -3350,7 +3350,7 @@ out_unlock: static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; - u64 *lm_root, *pae_root; + u64 *pml4_root, *pae_root; /* * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP @@ -3369,14 +3369,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL)) return -EIO; - if (mmu->pae_root && mmu->lm_root) + if (mmu->pae_root && mmu->pml4_root) return 0; /* * The special roots should always be allocated in concert. Yell and * bail if KVM ends up in a state where only one of the roots is valid. */ - if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->lm_root)) + if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root)) return -EIO; /* @@ -3387,14 +3387,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) if (!pae_root) return -ENOMEM; - lm_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); - if (!lm_root) { + pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); + if (!pml4_root) { free_page((unsigned long)pae_root); return -ENOMEM; } mmu->pae_root = pae_root; - mmu->lm_root = lm_root; + mmu->pml4_root = pml4_root; return 0; } @@ -4739,9 +4739,33 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu) context->inject_page_fault = kvm_inject_page_fault; } +static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu) +{ + union kvm_mmu_role role = kvm_calc_shadow_root_page_role_common(vcpu, false); + + /* + * Nested MMUs are used only for walking L2's gva->gpa, they never have + * shadow pages of their own and so "direct" has no meaning. Set it + * to "true" to try to detect bogus usage of the nested MMU. + */ + role.base.direct = true; + + if (!is_paging(vcpu)) + role.base.level = 0; + else if (is_long_mode(vcpu)) + role.base.level = is_la57_mode(vcpu) ? PT64_ROOT_5LEVEL : + PT64_ROOT_4LEVEL; + else if (is_pae(vcpu)) + role.base.level = PT32E_ROOT_LEVEL; + else + role.base.level = PT32_ROOT_LEVEL; + + return role; +} + static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) { - union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false); + union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu); struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; if (new_role.as_u64 == g_context->mmu_role.as_u64) @@ -5261,7 +5285,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu) if (!tdp_enabled && mmu->pae_root) set_memory_encrypted((unsigned long)mmu->pae_root, 1); free_page((unsigned long)mmu->pae_root); - free_page((unsigned long)mmu->lm_root); + free_page((unsigned long)mmu->pml4_root); } static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 70b7e44e3035..823a5919f9fa 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -90,8 +90,8 @@ struct guest_walker { gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; bool pte_writable[PT_MAX_FULL_LEVELS]; - unsigned pt_access; - unsigned pte_access; + unsigned int pt_access[PT_MAX_FULL_LEVELS]; + unsigned int pte_access; gfn_t gfn; struct x86_exception fault; }; @@ -418,13 +418,15 @@ retry_walk: } walker->ptes[walker->level - 1] = pte; + + /* Convert to ACC_*_MASK flags for struct guest_walker. */ + walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask); } while (!is_last_gpte(mmu, walker->level, pte)); pte_pkey = FNAME(gpte_pkeys)(vcpu, pte); accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0; /* Convert to ACC_*_MASK flags for struct guest_walker. */ - walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask); walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask); errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access); if (unlikely(errcode)) @@ -463,7 +465,8 @@ retry_walk: } pgprintk("%s: pte %llx pte_access %x pt_access %x\n", - __func__, (u64)pte, walker->pte_access, walker->pt_access); + __func__, (u64)pte, walker->pte_access, + walker->pt_access[walker->level - 1]); return 1; error: @@ -643,7 +646,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr, bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled; struct kvm_mmu_page *sp = NULL; struct kvm_shadow_walk_iterator it; - unsigned direct_access, access = gw->pt_access; + unsigned int direct_access, access; int top_level, level, req_level, ret; gfn_t base_gfn = gw->gfn; @@ -675,6 +678,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr, sp = NULL; if (!is_shadow_present_pte(*it.sptep)) { table_gfn = gw->table_gfn[it.level - 2]; + access = gw->pt_access[it.level - 2]; sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, false, access); } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 88f69a6cc492..237317b1eddd 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -388,7 +388,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt, } /** - * handle_changed_spte - handle bookkeeping associated with an SPTE change + * __handle_changed_spte - handle bookkeeping associated with an SPTE change * @kvm: kvm instance * @as_id: the address space of the paging structure the SPTE was a part of * @gfn: the base GFN that was mapped by the SPTE @@ -444,6 +444,13 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte); + if (is_large_pte(old_spte) != is_large_pte(new_spte)) { + if (is_large_pte(old_spte)) + atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages); + else + atomic64_add(1, (atomic64_t*)&kvm->stat.lpages); + } + /* * The only times a SPTE should be changed from a non-present to * non-present state is when an MMIO entry is installed/modified/ @@ -1009,6 +1016,14 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, } if (!is_shadow_present_pte(iter.old_spte)) { + /* + * If SPTE has been forzen by another thread, just + * give up and retry, avoiding unnecessary page table + * allocation and free. + */ + if (is_removed_spte(iter.old_spte)) + break; + sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level); child_pt = sp->spt; @@ -1177,9 +1192,9 @@ bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) } /* - * Remove write access from all the SPTEs mapping GFNs [start, end). If - * skip_4k is set, SPTEs that map 4k pages, will not be write-protected. - * Returns true if an SPTE has been changed and the TLBs need to be flushed. + * Remove write access from all SPTEs at or above min_level that map GFNs + * [start, end). Returns true if an SPTE has been changed and the TLBs need to + * be flushed. */ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end, int min_level) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 712b4e0de481..5e7e920113f3 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -28,10 +28,8 @@ #include "svm.h" /* enable / disable AVIC */ -int avic; -#ifdef CONFIG_X86_LOCAL_APIC -module_param(avic, int, S_IRUGO); -#endif +bool avic; +module_param(avic, bool, S_IRUGO); #define SVM_AVIC_DOORBELL 0xc001011b @@ -223,7 +221,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, return &avic_physical_id_table[index]; } -/** +/* * Note: * AVIC hardware walks the nested page table to check permissions, * but does not use the SPA address specified in the leaf page @@ -766,7 +764,7 @@ out: return ret; } -/** +/* * Note: * The HW cannot support posting multicast/broadcast * interrupts to a vCPU. So, we still use legacy interrupt @@ -1007,7 +1005,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } -/** +/* * This function is called during VCPU halt/unhalt. */ static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 540d43ba2cf4..5e8d8443154e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -764,7 +764,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr); svm_switch_vmcb(svm, &svm->vmcb01); - WARN_ON_ONCE(svm->vmcb->control.exit_code != SVM_EXIT_VMRUN); /* * On vmexit the GIF is set to false and @@ -872,6 +871,15 @@ void svm_free_nested(struct vcpu_svm *svm) __free_page(virt_to_page(svm->nested.vmcb02.ptr)); svm->nested.vmcb02.ptr = NULL; + /* + * When last_vmcb12_gpa matches the current vmcb12 gpa, + * some vmcb12 fields are not loaded if they are marked clean + * in the vmcb12, since in this case they are up to date already. + * + * When the vmcb02 is freed, this optimization becomes invalid. + */ + svm->nested.last_vmcb12_gpa = INVALID_GPA; + svm->nested.initialized = false; } @@ -884,9 +892,11 @@ void svm_leave_nested(struct vcpu_svm *svm) if (is_guest_mode(vcpu)) { svm->nested.nested_run_pending = 0; + svm->nested.vmcb12_gpa = INVALID_GPA; + leave_guest_mode(vcpu); - svm_switch_vmcb(svm, &svm->nested.vmcb02); + svm_switch_vmcb(svm, &svm->vmcb01); nested_svm_uninit_mmu_context(vcpu); vmcb_mark_all_dirty(svm->vmcb); @@ -1298,12 +1308,17 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, * L2 registers if needed are moved from the current VMCB to VMCB02. */ + if (is_guest_mode(vcpu)) + svm_leave_nested(svm); + else + svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; + + svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); + svm->nested.nested_run_pending = !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; - if (svm->current_vmcb == &svm->vmcb01) - svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; svm->vmcb01.ptr->save.es = save->es; svm->vmcb01.ptr->save.cs = save->cs; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 1356ee095cd5..8d36f0c73071 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -199,9 +199,19 @@ static void sev_asid_free(struct kvm_sev_info *sev) sev->misc_cg = NULL; } -static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) +static void sev_decommission(unsigned int handle) { struct sev_data_decommission decommission; + + if (!handle) + return; + + decommission.handle = handle; + sev_guest_decommission(&decommission, NULL); +} + +static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) +{ struct sev_data_deactivate deactivate; if (!handle) @@ -214,9 +224,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) sev_guest_deactivate(&deactivate, NULL); up_read(&sev_deactivate_lock); - /* decommission handle */ - decommission.handle = handle; - sev_guest_decommission(&decommission, NULL); + sev_decommission(handle); } static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) @@ -341,8 +349,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) /* Bind ASID to this guest */ ret = sev_bind_asid(kvm, start.handle, error); - if (ret) + if (ret) { + sev_decommission(start.handle); goto e_free_session; + } /* return handle to userspace */ params.handle = start.handle; @@ -763,7 +773,7 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, } static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, - unsigned long __user dst_uaddr, + void __user *dst_uaddr, unsigned long dst_paddr, int size, int *err) { @@ -787,8 +797,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, if (tpage) { offset = paddr & 15; - if (copy_to_user((void __user *)(uintptr_t)dst_uaddr, - page_address(tpage) + offset, size)) + if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size)) ret = -EFAULT; } @@ -800,9 +809,9 @@ e_free: } static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, - unsigned long __user vaddr, + void __user *vaddr, unsigned long dst_paddr, - unsigned long __user dst_vaddr, + void __user *dst_vaddr, int size, int *error) { struct page *src_tpage = NULL; @@ -810,13 +819,12 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, int ret, len = size; /* If source buffer is not aligned then use an intermediate buffer */ - if (!IS_ALIGNED(vaddr, 16)) { + if (!IS_ALIGNED((unsigned long)vaddr, 16)) { src_tpage = alloc_page(GFP_KERNEL); if (!src_tpage) return -ENOMEM; - if (copy_from_user(page_address(src_tpage), - (void __user *)(uintptr_t)vaddr, size)) { + if (copy_from_user(page_address(src_tpage), vaddr, size)) { __free_page(src_tpage); return -EFAULT; } @@ -830,7 +838,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, * - copy the source buffer in an intermediate buffer * - use the intermediate buffer as source buffer */ - if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { + if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { int dst_offset; dst_tpage = alloc_page(GFP_KERNEL); @@ -855,7 +863,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, page_address(src_tpage), size); else { if (copy_from_user(page_address(dst_tpage) + dst_offset, - (void __user *)(uintptr_t)vaddr, size)) { + vaddr, size)) { ret = -EFAULT; goto e_free; } @@ -935,15 +943,15 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) if (dec) ret = __sev_dbg_decrypt_user(kvm, __sme_page_pa(src_p[0]) + s_off, - dst_vaddr, + (void __user *)dst_vaddr, __sme_page_pa(dst_p[0]) + d_off, len, &argp->error); else ret = __sev_dbg_encrypt_user(kvm, __sme_page_pa(src_p[0]) + s_off, - vaddr, + (void __user *)vaddr, __sme_page_pa(dst_p[0]) + d_off, - dst_vaddr, + (void __user *)dst_vaddr, len, &argp->error); sev_unpin_memory(kvm, src_p, n); @@ -1105,10 +1113,9 @@ __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp, struct sev_data_send_start data; int ret; + memset(&data, 0, sizeof(data)); data.handle = sev->handle; ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error); - if (ret < 0) - return ret; params->session_len = data.session_len; if (copy_to_user((void __user *)(uintptr_t)argp->data, params, @@ -1217,10 +1224,9 @@ __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp, struct sev_data_send_update_data data; int ret; + memset(&data, 0, sizeof(data)); data.handle = sev->handle; ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error); - if (ret < 0) - return ret; params->hdr_len = data.hdr_len; params->trans_len = data.trans_len; @@ -1764,7 +1770,8 @@ e_mirror_unlock: e_source_unlock: mutex_unlock(&source_kvm->lock); e_source_put: - fput(source_kvm_file); + if (source_kvm_file) + fput(source_kvm_file); return ret; } @@ -2198,7 +2205,7 @@ vmgexit_err: return -EINVAL; } -static void pre_sev_es_run(struct vcpu_svm *svm) +void sev_es_unmap_ghcb(struct vcpu_svm *svm) { if (!svm->ghcb) return; @@ -2234,9 +2241,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int asid = sev_get_asid(svm->vcpu.kvm); - /* Perform any SEV-ES pre-run actions */ - pre_sev_es_run(svm); - /* Assign the asid allocated with this SEV guest */ svm->asid = asid; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index b649f92287a2..e088086f3de6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -212,7 +212,7 @@ DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); * RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to * defer the restoration of TSC_AUX until the CPU returns to userspace. */ -#define TSC_AUX_URET_SLOT 0 +static int tsc_aux_uret_slot __read_mostly = -1; static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; @@ -447,6 +447,11 @@ static int has_svm(void) return 0; } + if (pgtable_l5_enabled()) { + pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n"); + return 0; + } + return 1; } @@ -858,8 +863,8 @@ static __init void svm_adjust_mmio_mask(void) return; /* If memory encryption is not enabled, use existing mask */ - rdmsrl(MSR_K8_SYSCFG, msr); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + rdmsrl(MSR_AMD64_SYSCFG, msr); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) return; enc_bit = cpuid_ebx(0x8000001f) & 0x3f; @@ -959,8 +964,7 @@ static __init int svm_hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 32; } - if (boot_cpu_has(X86_FEATURE_RDTSCP)) - kvm_define_user_return_msr(TSC_AUX_URET_SLOT, MSR_TSC_AUX); + tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX); /* Check for pause filtering support */ if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { @@ -1006,9 +1010,7 @@ static __init int svm_hardware_setup(void) } if (avic) { - if (!npt_enabled || - !boot_cpu_has(X86_FEATURE_AVIC) || - !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) { + if (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)) { avic = false; } else { pr_info("AVIC enabled\n"); @@ -1100,7 +1102,9 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) return svm->vmcb->control.tsc_offset; } -static void svm_check_invpcid(struct vcpu_svm *svm) +/* Evaluate instruction intercepts that depend on guest CPUID features. */ +static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu, + struct vcpu_svm *svm) { /* * Intercept INVPCID if shadow paging is enabled to sync/free shadow @@ -1113,6 +1117,13 @@ static void svm_check_invpcid(struct vcpu_svm *svm) else svm_clr_intercept(svm, INTERCEPT_INVPCID); } + + if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) { + if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + svm_clr_intercept(svm, INTERCEPT_RDTSCP); + else + svm_set_intercept(svm, INTERCEPT_RDTSCP); + } } static void init_vmcb(struct kvm_vcpu *vcpu) @@ -1235,8 +1246,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm->current_vmcb->asid_generation = 0; svm->asid = 0; - svm->nested.vmcb12_gpa = 0; - svm->nested.last_vmcb12_gpa = 0; + svm->nested.vmcb12_gpa = INVALID_GPA; + svm->nested.last_vmcb12_gpa = INVALID_GPA; vcpu->arch.hflags = 0; if (!kvm_pause_in_guest(vcpu->kvm)) { @@ -1248,7 +1259,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_clr_intercept(svm, INTERCEPT_PAUSE); } - svm_check_invpcid(svm); + svm_recalc_instruction_intercepts(vcpu, svm); /* * If the host supports V_SPEC_CTRL then disable the interception @@ -1424,6 +1435,9 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); + if (sev_es_guest(vcpu->kvm)) + sev_es_unmap_ghcb(svm); + if (svm->guest_state_loaded) return; @@ -1445,8 +1459,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) } } - if (static_cpu_has(X86_FEATURE_RDTSCP)) - kvm_set_user_return_msr(TSC_AUX_URET_SLOT, svm->tsc_aux, -1ull); + if (likely(tsc_aux_uret_slot >= 0)) + kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); svm->guest_state_loaded = true; } @@ -2655,11 +2669,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data |= (u64)svm->sysenter_esp_hi << 32; break; case MSR_TSC_AUX: - if (!boot_cpu_has(X86_FEATURE_RDTSCP)) - return 1; - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) - return 1; msr_info->data = svm->tsc_aux; break; /* @@ -2876,30 +2885,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0; break; case MSR_TSC_AUX: - if (!boot_cpu_has(X86_FEATURE_RDTSCP)) - return 1; - - if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) - return 1; - - /* - * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has - * incomplete and conflicting architectural behavior. Current - * AMD CPUs completely ignore bits 63:32, i.e. they aren't - * reserved and always read as zeros. Emulate AMD CPU behavior - * to avoid explosions if the vCPU is migrated from an AMD host - * to an Intel host. - */ - data = (u32)data; - /* * TSC_AUX is usually changed only during boot and never read * directly. Intercept TSC_AUX instead of exposing it to the * guest via direct_access_msrs, and switch it via user return. */ preempt_disable(); - r = kvm_set_user_return_msr(TSC_AUX_URET_SLOT, data, -1ull); + r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull); preempt_enable(); if (r) return 1; @@ -3084,6 +3076,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { [SVM_EXIT_STGI] = stgi_interception, [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = skinit_interception, + [SVM_EXIT_RDTSCP] = kvm_handle_invalid_op, [SVM_EXIT_WBINVD] = kvm_emulate_wbinvd, [SVM_EXIT_MONITOR] = kvm_emulate_monitor, [SVM_EXIT_MWAIT] = kvm_emulate_mwait, @@ -3972,8 +3965,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) && guest_cpuid_has(vcpu, X86_FEATURE_NRIPS); - /* Check again if INVPCID interception if required */ - svm_check_invpcid(svm); + svm_recalc_instruction_intercepts(vcpu, svm); /* For sev guests, the memory encryption bit is not reserved in CR3. */ if (sev_guest(vcpu->kvm)) { diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 84b3133c2251..2908c6ab5bb4 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -20,6 +20,7 @@ #include <linux/bits.h> #include <asm/svm.h> +#include <asm/sev-common.h> #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) @@ -479,7 +480,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops; #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL -extern int avic; +extern bool avic; static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data) { @@ -525,40 +526,9 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); /* sev.c */ -#define GHCB_VERSION_MAX 1ULL -#define GHCB_VERSION_MIN 1ULL - -#define GHCB_MSR_INFO_POS 0 -#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) - -#define GHCB_MSR_SEV_INFO_RESP 0x001 -#define GHCB_MSR_SEV_INFO_REQ 0x002 -#define GHCB_MSR_VER_MAX_POS 48 -#define GHCB_MSR_VER_MAX_MASK 0xffff -#define GHCB_MSR_VER_MIN_POS 32 -#define GHCB_MSR_VER_MIN_MASK 0xffff -#define GHCB_MSR_CBIT_POS 24 -#define GHCB_MSR_CBIT_MASK 0xff -#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ - ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ - (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ - (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ - GHCB_MSR_SEV_INFO_RESP) - -#define GHCB_MSR_CPUID_REQ 0x004 -#define GHCB_MSR_CPUID_RESP 0x005 -#define GHCB_MSR_CPUID_FUNC_POS 32 -#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff -#define GHCB_MSR_CPUID_VALUE_POS 32 -#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff -#define GHCB_MSR_CPUID_REG_POS 30 -#define GHCB_MSR_CPUID_REG_MASK 0x3 - -#define GHCB_MSR_TERM_REQ 0x100 -#define GHCB_MSR_TERM_REASON_SET_POS 12 -#define GHCB_MSR_TERM_REASON_SET_MASK 0xf -#define GHCB_MSR_TERM_REASON_POS 16 -#define GHCB_MSR_TERM_REASON_MASK 0xff +#define GHCB_VERSION_MAX 1ULL +#define GHCB_VERSION_MIN 1ULL + extern unsigned int max_sev_asid; @@ -581,6 +551,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_create_vcpu(struct vcpu_svm *svm); void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu); +void sev_es_unmap_ghcb(struct vcpu_svm *svm); /* vmenter.S */ diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a61c015870e3..4f839148948b 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1550,16 +1550,16 @@ TRACE_EVENT(kvm_nested_vmenter_failed, TP_ARGS(msg, err), TP_STRUCT__entry( - __field(const char *, msg) + __string(msg, msg) __field(u32, err) ), TP_fast_assign( - __entry->msg = msg; + __assign_str(msg, msg); __entry->err = err; ), - TP_printk("%s%s", __entry->msg, !__entry->err ? "" : + TP_printk("%s%s", __get_str(msg), !__entry->err ? "" : __print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS)) ); diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index d1d77985e889..aa0e7872fcc9 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -90,8 +90,7 @@ static inline bool cpu_has_vmx_preemption_timer(void) static inline bool cpu_has_vmx_posted_intr(void) { - return IS_ENABLED(CONFIG_X86_LOCAL_APIC) && - vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; } static inline bool cpu_has_load_ia32_efer(void) @@ -398,6 +397,9 @@ static inline u64 vmx_supported_debugctl(void) { u64 debugctl = 0; + if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; + if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) debugctl |= DEBUGCTLMSR_LBR_MASK; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index bced76637823..6058a65a6ede 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3098,15 +3098,8 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu) nested_vmx_handle_enlightened_vmptrld(vcpu, false); if (evmptrld_status == EVMPTRLD_VMFAIL || - evmptrld_status == EVMPTRLD_ERROR) { - pr_debug_ratelimited("%s: enlightened vmptrld failed\n", - __func__); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; + evmptrld_status == EVMPTRLD_ERROR) return false; - } } return true; @@ -3194,8 +3187,16 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu) { - if (!nested_get_evmcs_page(vcpu)) + if (!nested_get_evmcs_page(vcpu)) { + pr_debug_ratelimited("%s: enlightened vmptrld failed\n", + __func__); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return false; + } if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu)) return false; @@ -4435,7 +4436,15 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, /* Similarly, triple faults in L2 should never escape. */ WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)); - kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); + if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { + /* + * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map + * Enlightened VMCS after migration and we still need to + * do that when something is forcing L2->L1 exit prior to + * the first L2 run. + */ + (void)nested_get_evmcs_page(vcpu); + } /* Service the TLB flush request for L2 before switching to L1. */ if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 459748680daf..5f81ef092bd4 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -238,6 +238,20 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu) /* + * Bail out of the block loop if the VM has an assigned + * device, but the blocking vCPU didn't reconfigure the + * PI.NV to the wakeup vector, i.e. the assigned device + * came along after the initial check in pi_pre_block(). + */ +void vmx_pi_start_assignment(struct kvm *kvm) +{ + if (!irq_remapping_cap(IRQ_POSTING_CAP)) + return; + + kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK); +} + +/* * pi_update_irte - set IRTE for Posted-Interrupts * * @kvm: kvm diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h index 0bdc41391c5b..7f7b2326caf5 100644 --- a/arch/x86/kvm/vmx/posted_intr.h +++ b/arch/x86/kvm/vmx/posted_intr.h @@ -95,5 +95,6 @@ void __init pi_init_cpu(int cpu); bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu); int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); +void vmx_pi_start_assignment(struct kvm *kvm); #endif /* __KVM_X86_VMX_POSTED_INTR_H */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d000cddbd734..c2a779b688e6 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -455,21 +455,6 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) static unsigned long host_idt_base; -/* - * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm - * will emulate SYSCALL in legacy mode if the vendor string in guest - * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To - * support this emulation, IA32_STAR must always be included in - * vmx_uret_msrs_list[], even in i386 builds. - */ -static const u32 vmx_uret_msrs_list[] = { -#ifdef CONFIG_X86_64 - MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, -#endif - MSR_EFER, MSR_TSC_AUX, MSR_STAR, - MSR_IA32_TSX_CTRL, -}; - #if IS_ENABLED(CONFIG_HYPERV) static bool __read_mostly enlightened_vmcs = true; module_param(enlightened_vmcs, bool, 0444); @@ -697,21 +682,11 @@ static bool is_valid_passthrough_msr(u32 msr) return r; } -static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) -{ - int i; - - for (i = 0; i < vmx->nr_uret_msrs; ++i) - if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr) - return i; - return -1; -} - struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) { int i; - i = __vmx_find_uret_msr(vmx, msr); + i = kvm_find_user_return_msr(msr); if (i >= 0) return &vmx->guest_uret_msrs[i]; return NULL; @@ -720,13 +695,14 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx, struct vmx_uret_msr *msr, u64 data) { + unsigned int slot = msr - vmx->guest_uret_msrs; int ret = 0; u64 old_msr_data = msr->data; msr->data = data; - if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) { + if (msr->load_into_hardware) { preempt_disable(); - ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask); + ret = kvm_set_user_return_msr(slot, msr->data, msr->mask); preempt_enable(); if (ret) msr->data = old_msr_data; @@ -1078,7 +1054,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx) return false; } - i = __vmx_find_uret_msr(vmx, MSR_EFER); + i = kvm_find_user_return_msr(MSR_EFER); if (i < 0) return false; @@ -1240,11 +1216,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) */ if (!vmx->guest_uret_msrs_loaded) { vmx->guest_uret_msrs_loaded = true; - for (i = 0; i < vmx->nr_active_uret_msrs; ++i) - kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot, + for (i = 0; i < kvm_nr_uret_msrs; ++i) { + if (!vmx->guest_uret_msrs[i].load_into_hardware) + continue; + + kvm_set_user_return_msr(i, vmx->guest_uret_msrs[i].data, vmx->guest_uret_msrs[i].mask); - + } } if (vmx->nested.need_vmcs12_to_shadow_sync) @@ -1751,19 +1730,16 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) vmx_clear_hlt(vcpu); } -static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr) +static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr, + bool load_into_hardware) { - struct vmx_uret_msr tmp; - int from, to; + struct vmx_uret_msr *uret_msr; - from = __vmx_find_uret_msr(vmx, msr); - if (from < 0) + uret_msr = vmx_find_uret_msr(vmx, msr); + if (!uret_msr) return; - to = vmx->nr_active_uret_msrs++; - tmp = vmx->guest_uret_msrs[to]; - vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from]; - vmx->guest_uret_msrs[from] = tmp; + uret_msr->load_into_hardware = load_into_hardware; } /* @@ -1773,29 +1749,42 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr) */ static void setup_msrs(struct vcpu_vmx *vmx) { - vmx->guest_uret_msrs_loaded = false; - vmx->nr_active_uret_msrs = 0; #ifdef CONFIG_X86_64 + bool load_syscall_msrs; + /* * The SYSCALL MSRs are only needed on long mode guests, and only * when EFER.SCE is set. */ - if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) { - vmx_setup_uret_msr(vmx, MSR_STAR); - vmx_setup_uret_msr(vmx, MSR_LSTAR); - vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK); - } + load_syscall_msrs = is_long_mode(&vmx->vcpu) && + (vmx->vcpu.arch.efer & EFER_SCE); + + vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs); + vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs); + vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs); #endif - if (update_transition_efer(vmx)) - vmx_setup_uret_msr(vmx, MSR_EFER); + vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx)); - if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) - vmx_setup_uret_msr(vmx, MSR_TSC_AUX); + vmx_setup_uret_msr(vmx, MSR_TSC_AUX, + guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) || + guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID)); - vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL); + /* + * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new + * kernel and old userspace. If those guests run on a tsx=off host, do + * allow guests to use TSX_CTRL, but don't change the value in hardware + * so that TSX remains always disabled. + */ + vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM)); if (cpu_has_vmx_msr_bitmap()) vmx_update_msr_bitmap(&vmx->vcpu); + + /* + * The set of MSRs to load may have changed, reload MSRs before the + * next VM-Enter. + */ + vmx->guest_uret_msrs_loaded = false; } static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) @@ -1993,11 +1982,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) else msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; break; - case MSR_TSC_AUX: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) - return 1; - goto find_uret_msr; case MSR_IA32_DEBUGCTLMSR: msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL); break; @@ -2031,6 +2015,9 @@ static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu) if (!intel_pmu_lbr_is_enabled(vcpu)) debugctl &= ~DEBUGCTLMSR_LBR_MASK; + if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) + debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; + return debugctl; } @@ -2313,14 +2300,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) else vmx->pt_desc.guest.addr_a[index / 2] = data; break; - case MSR_TSC_AUX: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) - return 1; - /* Check reserved bit, higher 32 bits should be zero */ - if ((data >> 32) != 0) - return 1; - goto find_uret_msr; case MSR_IA32_PERF_CAPABILITIES: if (data && !vcpu_to_pmu(vcpu)->version) return 1; @@ -4369,7 +4348,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) xsaves_enabled, false); } - vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP); + /* + * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either + * feature is exposed to the guest. This creates a virtualization hole + * if both are supported in hardware but only one is exposed to the + * guest, but letting the guest execute RDTSCP or RDPID when either one + * is advertised is preferable to emulating the advertised instruction + * in KVM on #UD, and obviously better than incorrectly injecting #UD. + */ + if (cpu_has_vmx_rdtscp()) { + bool rdpid_or_rdtscp_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) || + guest_cpuid_has(vcpu, X86_FEATURE_RDPID); + + vmx_adjust_secondary_exec_control(vmx, &exec_control, + SECONDARY_EXEC_ENABLE_RDTSCP, + rdpid_or_rdtscp_enabled, false); + } vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID); vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND); @@ -4848,7 +4843,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_run *kvm_run = vcpu->run; u32 intr_info, ex_no, error_code; - unsigned long cr2, rip, dr6; + unsigned long cr2, dr6; u32 vect_info; vect_info = vmx->idt_vectoring_info; @@ -4938,8 +4933,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) vmx->vcpu.arch.event_exit_inst_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); kvm_run->exit_reason = KVM_EXIT_DEBUG; - rip = kvm_rip_read(vcpu); - kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; + kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu); kvm_run->debug.arch.exception = ex_no; break; case AC_VECTOR: @@ -6253,6 +6247,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) switch (kvm_get_apic_mode(vcpu)) { case LAPIC_MODE_INVALID: WARN_ONCE(true, "Invalid local APIC state"); + break; case LAPIC_MODE_DISABLED: break; case LAPIC_MODE_XAPIC: @@ -6855,6 +6850,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) static int vmx_create_vcpu(struct kvm_vcpu *vcpu) { + struct vmx_uret_msr *tsx_ctrl; struct vcpu_vmx *vmx; int i, cpu, err; @@ -6877,43 +6873,19 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) goto free_vpid; } - BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS); - - for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) { - u32 index = vmx_uret_msrs_list[i]; - u32 data_low, data_high; - int j = vmx->nr_uret_msrs; - - if (rdmsr_safe(index, &data_low, &data_high) < 0) - continue; - if (wrmsr_safe(index, data_low, data_high) < 0) - continue; - - vmx->guest_uret_msrs[j].slot = i; - vmx->guest_uret_msrs[j].data = 0; - switch (index) { - case MSR_IA32_TSX_CTRL: - /* - * TSX_CTRL_CPUID_CLEAR is handled in the CPUID - * interception. Keep the host value unchanged to avoid - * changing CPUID bits under the host kernel's feet. - * - * hle=0, rtm=0, tsx_ctrl=1 can be found with some - * combinations of new kernel and old userspace. If - * those guests run on a tsx=off host, do allow guests - * to use TSX_CTRL, but do not change the value on the - * host so that TSX remains always disabled. - */ - if (boot_cpu_has(X86_FEATURE_RTM)) - vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; - else - vmx->guest_uret_msrs[j].mask = 0; - break; - default: - vmx->guest_uret_msrs[j].mask = -1ull; - break; - } - ++vmx->nr_uret_msrs; + for (i = 0; i < kvm_nr_uret_msrs; ++i) { + vmx->guest_uret_msrs[i].data = 0; + vmx->guest_uret_msrs[i].mask = -1ull; + } + if (boot_cpu_has(X86_FEATURE_RTM)) { + /* + * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception. + * Keep the host value unchanged to avoid changing CPUID bits + * under the host kernel's feet. + */ + tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL); + if (tsx_ctrl) + vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; } err = alloc_loaded_vmcs(&vmx->vmcs01); @@ -7344,9 +7316,11 @@ static __init void vmx_set_cpu_caps(void) if (!cpu_has_vmx_xsaves()) kvm_cpu_cap_clear(X86_FEATURE_XSAVES); - /* CPUID 0x80000001 */ - if (!cpu_has_vmx_rdtscp()) + /* CPUID 0x80000001 and 0x7 (RDPID) */ + if (!cpu_has_vmx_rdtscp()) { kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); + kvm_cpu_cap_clear(X86_FEATURE_RDPID); + } if (cpu_has_vmx_waitpkg()) kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); @@ -7402,8 +7376,9 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, /* * RDPID causes #UD if disabled through secondary execution controls. * Because it is marked as EmulateOnUD, we need to intercept it here. + * Note, RDPID is hidden behind ENABLE_RDTSCP. */ - case x86_intercept_rdtscp: + case x86_intercept_rdpid: if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) { exception->vector = UD_VECTOR; exception->error_code_valid = false; @@ -7746,6 +7721,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .nested_ops = &vmx_nested_ops, .update_pi_irte = pi_update_irte, + .start_assignment = vmx_pi_start_assignment, #ifdef CONFIG_X86_64 .set_hv_timer = vmx_set_hv_timer, @@ -7769,17 +7745,42 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, }; +static __init void vmx_setup_user_return_msrs(void) +{ + + /* + * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm + * will emulate SYSCALL in legacy mode if the vendor string in guest + * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To + * support this emulation, MSR_STAR is included in the list for i386, + * but is never loaded into hardware. MSR_CSTAR is also never loaded + * into hardware and is here purely for emulation purposes. + */ + const u32 vmx_uret_msrs_list[] = { + #ifdef CONFIG_X86_64 + MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, + #endif + MSR_EFER, MSR_TSC_AUX, MSR_STAR, + MSR_IA32_TSX_CTRL, + }; + int i; + + BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS); + + for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) + kvm_add_user_return_msr(vmx_uret_msrs_list[i]); +} + static __init int hardware_setup(void) { unsigned long host_bndcfgs; struct desc_ptr dt; - int r, i, ept_lpage_level; + int r, ept_lpage_level; store_idt(&dt); host_idt_base = dt.address; - for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) - kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]); + vmx_setup_user_return_msrs(); if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) return -EIO; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 008cb87ff088..16e4e457ba23 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -36,7 +36,7 @@ struct vmx_msrs { }; struct vmx_uret_msr { - unsigned int slot; /* The MSR's slot in kvm_user_return_msrs. */ + bool load_into_hardware; u64 data; u64 mask; }; @@ -245,8 +245,16 @@ struct vcpu_vmx { u32 idt_vectoring_info; ulong rflags; + /* + * User return MSRs are always emulated when enabled in the guest, but + * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside + * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to + * be loaded into hardware if those conditions aren't met. + * nr_active_uret_msrs tracks the number of MSRs that need to be loaded + * into hardware when running the guest. guest_uret_msrs[] is resorted + * whenever the number of "active" uret MSRs is modified. + */ struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS]; - int nr_uret_msrs; int nr_active_uret_msrs; bool guest_uret_msrs_loaded; #ifdef CONFIG_X86_64 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6eda2834fc05..e0f4a46649d7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -184,11 +184,6 @@ module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR); */ #define KVM_MAX_NR_USER_RETURN_MSRS 16 -struct kvm_user_return_msrs_global { - int nr; - u32 msrs[KVM_MAX_NR_USER_RETURN_MSRS]; -}; - struct kvm_user_return_msrs { struct user_return_notifier urn; bool registered; @@ -198,7 +193,9 @@ struct kvm_user_return_msrs { } values[KVM_MAX_NR_USER_RETURN_MSRS]; }; -static struct kvm_user_return_msrs_global __read_mostly user_return_msrs_global; +u32 __read_mostly kvm_nr_uret_msrs; +EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs); +static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS]; static struct kvm_user_return_msrs __percpu *user_return_msrs; #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ @@ -330,23 +327,53 @@ static void kvm_on_user_return(struct user_return_notifier *urn) user_return_notifier_unregister(urn); } local_irq_restore(flags); - for (slot = 0; slot < user_return_msrs_global.nr; ++slot) { + for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) { values = &msrs->values[slot]; if (values->host != values->curr) { - wrmsrl(user_return_msrs_global.msrs[slot], values->host); + wrmsrl(kvm_uret_msrs_list[slot], values->host); values->curr = values->host; } } } -void kvm_define_user_return_msr(unsigned slot, u32 msr) +static int kvm_probe_user_return_msr(u32 msr) +{ + u64 val; + int ret; + + preempt_disable(); + ret = rdmsrl_safe(msr, &val); + if (ret) + goto out; + ret = wrmsrl_safe(msr, val); +out: + preempt_enable(); + return ret; +} + +int kvm_add_user_return_msr(u32 msr) { - BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS); - user_return_msrs_global.msrs[slot] = msr; - if (slot >= user_return_msrs_global.nr) - user_return_msrs_global.nr = slot + 1; + BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS); + + if (kvm_probe_user_return_msr(msr)) + return -1; + + kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr; + return kvm_nr_uret_msrs++; +} +EXPORT_SYMBOL_GPL(kvm_add_user_return_msr); + +int kvm_find_user_return_msr(u32 msr) +{ + int i; + + for (i = 0; i < kvm_nr_uret_msrs; ++i) { + if (kvm_uret_msrs_list[i] == msr) + return i; + } + return -1; } -EXPORT_SYMBOL_GPL(kvm_define_user_return_msr); +EXPORT_SYMBOL_GPL(kvm_find_user_return_msr); static void kvm_user_return_msr_cpu_online(void) { @@ -355,8 +382,8 @@ static void kvm_user_return_msr_cpu_online(void) u64 value; int i; - for (i = 0; i < user_return_msrs_global.nr; ++i) { - rdmsrl_safe(user_return_msrs_global.msrs[i], &value); + for (i = 0; i < kvm_nr_uret_msrs; ++i) { + rdmsrl_safe(kvm_uret_msrs_list[i], &value); msrs->values[i].host = value; msrs->values[i].curr = value; } @@ -371,7 +398,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) value = (value & mask) | (msrs->values[slot].host & ~mask); if (value == msrs->values[slot].curr) return 0; - err = wrmsrl_safe(user_return_msrs_global.msrs[slot], value); + err = wrmsrl_safe(kvm_uret_msrs_list[slot], value); if (err) return 1; @@ -1149,6 +1176,9 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM)) fixed |= DR6_RTM; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) + fixed |= DR6_BUS_LOCK; return fixed; } @@ -1615,6 +1645,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, * invokes 64-bit SYSENTER. */ data = get_canonical(data, vcpu_virt_addr_bits(vcpu)); + break; + case MSR_TSC_AUX: + if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX)) + return 1; + + if (!host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) + return 1; + + /* + * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has + * incomplete and conflicting architectural behavior. Current + * AMD CPUs completely ignore bits 63:32, i.e. they aren't + * reserved and always read as zeros. Enforce Intel's reserved + * bits check if and only if the guest CPU is Intel, and clear + * the bits in all other cases. This ensures cross-vendor + * migration will provide consistent behavior for the guest. + */ + if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0) + return 1; + + data = (u32)data; + break; } msr.data = data; @@ -1651,6 +1705,18 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) return KVM_MSR_RET_FILTERED; + switch (index) { + case MSR_TSC_AUX: + if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX)) + return 1; + + if (!host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) + return 1; + break; + } + msr.index = index; msr.host_initiated = host_initiated; @@ -3006,6 +3072,19 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu) static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) { ++vcpu->stat.tlb_flush; + + if (!tdp_enabled) { + /* + * A TLB flush on behalf of the guest is equivalent to + * INVPCID(all), toggling CR4.PGE, etc., which requires + * a forced sync of the shadow page tables. Unload the + * entire MMU here and the subsequent load will sync the + * shadow page tables, and also flush the TLB. + */ + kvm_mmu_unload(vcpu); + return; + } + static_call(kvm_x86_tlb_flush_guest)(vcpu); } @@ -3035,10 +3114,14 @@ static void record_steal_time(struct kvm_vcpu *vcpu) * expensive IPIs. */ if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) { + u8 st_preempted = xchg(&st->preempted, 0); + trace_kvm_pv_tlb_flush(vcpu->vcpu_id, - st->preempted & KVM_VCPU_FLUSH_TLB); - if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) + st_preempted & KVM_VCPU_FLUSH_TLB); + if (st_preempted & KVM_VCPU_FLUSH_TLB) kvm_vcpu_flush_tlb_guest(vcpu); + } else { + st->preempted = 0; } vcpu->arch.st.preempted = 0; @@ -3402,7 +3485,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_LASTBRANCHTOIP: case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: - case MSR_K8_SYSCFG: + case MSR_AMD64_SYSCFG: case MSR_K8_TSEG_ADDR: case MSR_K8_TSEG_MASK: case MSR_VM_HSAVE_PA: @@ -5468,14 +5551,18 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter) static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, struct kvm_msr_filter_range *user_range) { - struct msr_bitmap_range range; unsigned long *bitmap = NULL; size_t bitmap_size; - int r; if (!user_range->nmsrs) return 0; + if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) + return -EINVAL; + + if (!user_range->flags) + return -EINVAL; + bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long); if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE) return -EINVAL; @@ -5484,31 +5571,15 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, if (IS_ERR(bitmap)) return PTR_ERR(bitmap); - range = (struct msr_bitmap_range) { + msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) { .flags = user_range->flags, .base = user_range->base, .nmsrs = user_range->nmsrs, .bitmap = bitmap, }; - if (range.flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) { - r = -EINVAL; - goto err; - } - - if (!range.flags) { - r = -EINVAL; - goto err; - } - - /* Everything ok, add this range identifier. */ - msr_filter->ranges[msr_filter->count] = range; msr_filter->count++; - return 0; -err: - kfree(bitmap); - return r; } static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) @@ -5937,7 +6008,8 @@ static void kvm_init_msr_list(void) continue; break; case MSR_TSC_AUX: - if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) + if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) && + !kvm_cpu_cap_has(X86_FEATURE_RDPID)) continue; break; case MSR_IA32_UMWAIT_CONTROL: @@ -7034,7 +7106,10 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags) { - emul_to_vcpu(ctxt)->arch.hflags = emul_flags; + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + + vcpu->arch.hflags = emul_flags; + kvm_mmu_reset_context(vcpu); } static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, @@ -7171,6 +7246,11 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK); BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK); + ctxt->interruptibility = 0; + ctxt->have_exception = false; + ctxt->exception.vector = -1; + ctxt->perm_ok = false; + init_decode_cache(ctxt); vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } @@ -7506,14 +7586,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, kvm_vcpu_check_breakpoint(vcpu, &r)) return r; - ctxt->interruptibility = 0; - ctxt->have_exception = false; - ctxt->exception.vector = -1; - ctxt->perm_ok = false; - - ctxt->ud = emulation_type & EMULTYPE_TRAP_UD; - - r = x86_decode_insn(ctxt, insn, insn_len); + r = x86_decode_insn(ctxt, insn, insn_len, emulation_type); trace_kvm_emulate_insn_start(vcpu); ++vcpu->stat.insn_emulation; @@ -8040,6 +8113,18 @@ static void pvclock_gtod_update_fn(struct work_struct *work) static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); /* + * Indirection to move queue_work() out of the tk_core.seq write held + * region to prevent possible deadlocks against time accessors which + * are invoked with work related locks held. + */ +static void pvclock_irq_work_fn(struct irq_work *w) +{ + queue_work(system_long_wq, &pvclock_gtod_work); +} + +static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn); + +/* * Notification about pvclock gtod data update. */ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused, @@ -8050,13 +8135,14 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused, update_pvclock_gtod(tk); - /* disable master clock if host does not trust, or does not - * use, TSC based clocksource. + /* + * Disable master clock if host does not trust, or does not use, + * TSC based clocksource. Delegate queue_work() to irq_work as + * this is invoked with tk_core.seq write held. */ if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) && atomic_read(&kvm_guest_has_master_clock) != 0) - queue_work(system_long_wq, &pvclock_gtod_work); - + irq_work_queue(&pvclock_irq_work); return 0; } @@ -8118,6 +8204,7 @@ int kvm_arch_init(void *opaque) printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n"); goto out_free_x86_emulator_cache; } + kvm_nr_uret_msrs = 0; r = kvm_mmu_module_init(); if (r) @@ -8168,10 +8255,13 @@ void kvm_arch_exit(void) cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE); #ifdef CONFIG_X86_64 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); + irq_work_sync(&pvclock_irq_work); + cancel_work_sync(&pvclock_gtod_work); #endif kvm_x86_ops.hardware_enable = NULL; kvm_mmu_module_exit(); free_percpu(user_return_msrs); + kmem_cache_destroy(x86_emulator_cache); kmem_cache_destroy(x86_fpu_cache); #ifdef CONFIG_KVM_XEN static_key_deferred_flush(&kvm_xen_enabled); @@ -8289,6 +8379,9 @@ static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) vcpu->stat.directed_yield_attempted++; + if (single_task_running()) + goto no_yield; + rcu_read_lock(); map = rcu_dereference(vcpu->kvm->arch.apic_map); @@ -9425,7 +9518,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (r <= 0) break; - kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_clear_request(KVM_REQ_UNBLOCK, vcpu); if (kvm_cpu_has_pending_timer(vcpu)) kvm_inject_pending_timer_irqs(vcpu); @@ -10044,8 +10137,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, kvm_update_dr7(vcpu); if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + - get_segment_base(vcpu, VCPU_SREG_CS); + vcpu->arch.singlestep_rip = kvm_get_linear_rip(vcpu); /* * Trigger an rflags update that will inject or remove the trace @@ -11428,7 +11520,8 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) void kvm_arch_start_assignment(struct kvm *kvm) { - atomic_inc(&kvm->arch.assigned_device_count); + if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1) + static_call_cond(kvm_x86_start_assignment)(kvm); } EXPORT_SYMBOL_GPL(kvm_arch_start_assignment); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index b93d6cd08a7f..121921b2927c 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -5,7 +5,7 @@ #include <xen/xen.h> #include <asm/fpu/internal.h> -#include <asm/sev-es.h> +#include <asm/sev.h> #include <asm/traps.h> #include <asm/kdebug.h> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 1c548ad00752..6bda7f67d737 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -836,8 +836,8 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, if (si_code == SEGV_PKUERR) force_sig_pkuerr((void __user *)address, pkey); - - force_sig_fault(SIGSEGV, si_code, (void __user *)address); + else + force_sig_fault(SIGSEGV, si_code, (void __user *)address); local_irq_disable(); } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 12c686c65ea9..60ade7dd71bd 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -118,7 +118,9 @@ static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *des if (!IS_ENABLED(CONFIG_EFI)) return; - if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA) + if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA || + (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA && + efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME)) desc->flags |= IORES_MAP_ENCRYPTED; } diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 04aba7e80a36..470b20208430 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -504,10 +504,6 @@ void __init sme_enable(struct boot_params *bp) #define AMD_SME_BIT BIT(0) #define AMD_SEV_BIT BIT(1) - /* Check the SEV MSR whether SEV or SME is enabled */ - sev_status = __rdmsr(MSR_AMD64_SEV); - feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; - /* * Check for the SME/SEV feature: * CPUID Fn8000_001F[EAX] @@ -519,17 +515,22 @@ void __init sme_enable(struct boot_params *bp) eax = 0x8000001f; ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); - if (!(eax & feature_mask)) + /* Check whether SEV or SME is supported */ + if (!(eax & (AMD_SEV_BIT | AMD_SME_BIT))) return; me_mask = 1UL << (ebx & 0x3f); + /* Check the SEV MSR whether SEV or SME is enabled */ + sev_status = __rdmsr(MSR_AMD64_SEV); + feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; + /* Check if memory encryption is enabled */ if (feature_mask == AMD_SME_BIT) { /* * No SME if Hypervisor bit is set. This check is here to * prevent a guest from trying to enable SME. For running as a - * KVM guest the MSR_K8_SYSCFG will be sufficient, but there + * KVM guest the MSR_AMD64_SYSCFG will be sufficient, but there * might be other hypervisors which emulate that MSR as non-zero * or even pass it through to the guest. * A malicious hypervisor can still trick a guest into this @@ -542,8 +543,8 @@ void __init sme_enable(struct boot_params *bp) return; /* For SME, check the SYSCFG MSR */ - msr = __rdmsr(MSR_K8_SYSCFG); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + msr = __rdmsr(MSR_AMD64_SYSCFG); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) return; } else { /* SEV state cannot be controlled by a command line option */ diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 5eb4dc2b97da..e94da744386f 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -254,7 +254,13 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) /* make sure all non-reserved blocks are inside the limits */ bi->start = max(bi->start, low); - bi->end = min(bi->end, high); + + /* preserve info for non-RAM areas above 'max_pfn': */ + if (bi->end > high) { + numa_add_memblk_to(bi->nid, high, bi->end, + &numa_reserved_meminfo); + bi->end = high; + } /* and there's no empty block */ if (bi->start >= bi->end) diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index ae744b6a0785..dd40d3fea74e 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -284,7 +284,7 @@ static int __init early_root_info_init(void) /* need to take out [4G, TOM2) for RAM*/ /* SYS_CFG */ - address = MSR_K8_SYSCFG; + address = MSR_AMD64_SYSCFG; rdmsrl(address, val); /* TOP_MEM2 is enabled? */ if (val & (1<<21)) { diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 02dc64625e64..2edd86649468 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); +#define RS690_LOWER_TOP_OF_DRAM2 0x30 +#define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1 +#define RS690_UPPER_TOP_OF_DRAM2 0x31 +#define RS690_HTIU_NB_INDEX 0xA8 +#define RS690_HTIU_NB_INDEX_WR_ENABLE 0x100 +#define RS690_HTIU_NB_DATA 0xAC + +/* + * Some BIOS implementations support RAM above 4GB, but do not configure the + * PCI host to respond to bus master accesses for these addresses. These + * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA + * works as expected for addresses below 4GB. + * + * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57) + * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf + */ +static void rs690_fix_64bit_dma(struct pci_dev *pdev) +{ + u32 val = 0; + phys_addr_t top_of_dram = __pa(high_memory - 1) + 1; + + if (top_of_dram <= (1ULL << 32)) + return; + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_LOWER_TOP_OF_DRAM2); + pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val); + + if (val) + return; + + pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram); + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); + pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32); + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); + pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, + top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma); + #endif diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index df7b5477fc4f..7515e78ef898 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -47,7 +47,7 @@ #include <asm/realmode.h> #include <asm/time.h> #include <asm/pgalloc.h> -#include <asm/sev-es.h> +#include <asm/sev.h> /* * We allocate runtime services regions top-down, starting from -4G, i.e. diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 7850111008a8..b15ebfe40a73 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -450,6 +450,18 @@ void __init efi_free_boot_services(void) size -= rm_size; } + /* + * Don't free memory under 1M for two reasons: + * - BIOS might clobber it + * - Crash kernel needs it to be reserved + */ + if (start + size < SZ_1M) + continue; + if (start < SZ_1M) { + size -= (SZ_1M - start); + start = SZ_1M; + } + memblock_free_late(start, size); } diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 1be71ef5e4c4..6534c92d0f83 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -9,7 +9,7 @@ #include <asm/realmode.h> #include <asm/tlbflush.h> #include <asm/crash.h> -#include <asm/sev-es.h> +#include <asm/sev.h> struct real_mode_header *real_mode_header; u32 *trampoline_cr4_features; @@ -29,14 +29,16 @@ void __init reserve_real_mode(void) /* Has to be under 1M so we can execute real-mode AP code. */ mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); - if (!mem) { + if (!mem) pr_info("No sub-1M memory is available for the trampoline\n"); - return; - } + else + set_real_mode_mem(mem); - memblock_reserve(mem, size); - set_real_mode_mem(mem); - crash_reserve_low_1M(); + /* + * Unconditionally reserve the entire fisrt 1M, see comment in + * setup_arch(). + */ + memblock_reserve(0, SZ_1M); } static void sme_sev_setup_real_mode(struct trampoline_header *th) diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index 84c5d1b33d10..cc8391f86cdb 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -123,9 +123,9 @@ SYM_CODE_START(startup_32) */ btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags jnc .Ldone - movl $MSR_K8_SYSCFG, %ecx + movl $MSR_AMD64_SYSCFG, %ecx rdmsr - bts $MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax + bts $MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT, %eax jc .Ldone /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 17503fed2017..e87699aa2dc8 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1273,16 +1273,16 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Get mfn list */ xen_build_dynamic_phys_to_machine(); + /* Work out if we support NX */ + get_cpu_cap(&boot_cpu_data); + x86_configure_nx(); + /* * Set up kernel GDT and segment registers, mainly so that * -fstack-protector code can be executed. */ xen_setup_gdt(0); - /* Work out if we support NX */ - get_cpu_cap(&boot_cpu_data); - x86_configure_nx(); - /* Determine virtual and physical address sizes */ get_cpu_address_sizes(&boot_cpu_data); diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 9d76d433d3d6..fd2f30227d96 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -413,7 +413,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self |