diff options
Diffstat (limited to 'arch')
152 files changed, 2716 insertions, 804 deletions
diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi index 6dde51c2aed3..e4775bbceecc 100644 --- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi +++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi @@ -118,7 +118,7 @@ }; pinctrl_fwqspid_default: fwqspid_default { - function = "FWQSPID"; + function = "FWSPID"; groups = "FWQSPID"; }; diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi index dff18fc9a906..21294f775a20 100644 --- a/arch/arm/boot/dts/bcm2711.dtsi +++ b/arch/arm/boot/dts/bcm2711.dtsi @@ -290,6 +290,7 @@ hvs: hvs@7e400000 { compatible = "brcm,bcm2711-hvs"; + reg = <0x7e400000 0x8000>; interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>; }; diff --git a/arch/arm/boot/dts/omap3-devkit8000-common.dtsi b/arch/arm/boot/dts/omap3-devkit8000-common.dtsi index 5e55198e4576..54cd37336be7 100644 --- a/arch/arm/boot/dts/omap3-devkit8000-common.dtsi +++ b/arch/arm/boot/dts/omap3-devkit8000-common.dtsi @@ -158,6 +158,24 @@ status = "disabled"; }; +/* Unusable as clockevent because if unreliable oscillator, allow to idle */ +&timer1_target { + /delete-property/ti,no-reset-on-init; + /delete-property/ti,no-idle; + timer@0 { + /delete-property/ti,timer-alwon; + }; +}; + +/* Preferred timer for clockevent */ +&timer12_target { + ti,no-reset-on-init; + ti,no-idle; + timer@0 { + /* Always clocked by secure_32k_fck */ + }; +}; + &twl_gpio { ti,use-leds; /* diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts index c2995a280729..162d0726b008 100644 --- a/arch/arm/boot/dts/omap3-devkit8000.dts +++ b/arch/arm/boot/dts/omap3-devkit8000.dts @@ -14,36 +14,3 @@ display2 = &tv0; }; }; - -/* Unusable as clocksource because of unreliable oscillator */ -&counter32k { - status = "disabled"; -}; - -/* Unusable as clockevent because if unreliable oscillator, allow to idle */ -&timer1_target { - /delete-property/ti,no-reset-on-init; - /delete-property/ti,no-idle; - timer@0 { - /delete-property/ti,timer-alwon; - }; -}; - -/* Preferred always-on timer for clocksource */ -&timer12_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - /* Always clocked by secure_32k_fck */ - }; -}; - -/* Preferred timer for clockevent */ -&timer2_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - assigned-clocks = <&gpt2_fck>; - assigned-clock-parents = <&sys_ck>; - }; -}; diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi index 8eed9e3a92e9..5868eb512f69 100644 --- a/arch/arm/boot/dts/rk322x.dtsi +++ b/arch/arm/boot/dts/rk322x.dtsi @@ -718,8 +718,8 @@ interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>; assigned-clocks = <&cru SCLK_HDMI_PHY>; assigned-clock-parents = <&hdmi_phy>; - clocks = <&cru SCLK_HDMI_HDCP>, <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_CEC>; - clock-names = "isfr", "iahb", "cec"; + clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>, <&cru SCLK_HDMI_CEC>; + clock-names = "iahb", "isfr", "cec"; pinctrl-names = "default"; pinctrl-0 = <&hdmii2c_xfer &hdmi_hpd &hdmi_cec>; resets = <&cru SRST_HDMI_P>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index aaaa61875701..45a9d9b908d2 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -971,7 +971,7 @@ status = "disabled"; }; - crypto: cypto-controller@ff8a0000 { + crypto: crypto@ff8a0000 { compatible = "rockchip,rk3288-crypto"; reg = <0x0 0xff8a0000 0x0 0x4000>; interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts b/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts index d35fb79d2f51..4db43324dafa 100644 --- a/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts +++ b/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts @@ -5,7 +5,13 @@ / { /* Version of Nyan Big with 1080p panel */ - panel { - compatible = "auo,b133htn01"; + host1x@50000000 { + dpaux@545c0000 { + aux-bus { + panel: panel { + compatible = "auo,b133htn01"; + }; + }; + }; }; }; diff --git a/arch/arm/boot/dts/tegra124-nyan-big.dts b/arch/arm/boot/dts/tegra124-nyan-big.dts index 1d2aac2cb6d0..fdc1d64dfff9 100644 --- a/arch/arm/boot/dts/tegra124-nyan-big.dts +++ b/arch/arm/boot/dts/tegra124-nyan-big.dts @@ -13,12 +13,15 @@ "google,nyan-big-rev1", "google,nyan-big-rev0", "google,nyan-big", "google,nyan", "nvidia,tegra124"; - panel: panel { - compatible = "auo,b133xtn01"; - - power-supply = <&vdd_3v3_panel>; - backlight = <&backlight>; - ddc-i2c-bus = <&dpaux>; + host1x@50000000 { + dpaux@545c0000 { + aux-bus { + panel: panel { + compatible = "auo,b133xtn01"; + backlight = <&backlight>; + }; + }; + }; }; mmc@700b0400 { /* SD Card on this bus */ diff --git a/arch/arm/boot/dts/tegra124-nyan-blaze.dts b/arch/arm/boot/dts/tegra124-nyan-blaze.dts index 677babde6460..abdf4456826f 100644 --- a/arch/arm/boot/dts/tegra124-nyan-blaze.dts +++ b/arch/arm/boot/dts/tegra124-nyan-blaze.dts @@ -15,12 +15,15 @@ "google,nyan-blaze-rev0", "google,nyan-blaze", "google,nyan", "nvidia,tegra124"; - panel: panel { - compatible = "samsung,ltn140at29-301"; - - power-supply = <&vdd_3v3_panel>; - backlight = <&backlight>; - ddc-i2c-bus = <&dpaux>; + host1x@50000000 { + dpaux@545c0000 { + aux-bus { + panel: panel { + compatible = "samsung,ltn140at29-301"; + backlight = <&backlight>; + }; + }; + }; }; sound { diff --git a/arch/arm/boot/dts/tegra124-venice2.dts b/arch/arm/boot/dts/tegra124-venice2.dts index 232c90604df9..6a9592ceb5f2 100644 --- a/arch/arm/boot/dts/tegra124-venice2.dts +++ b/arch/arm/boot/dts/tegra124-venice2.dts @@ -48,6 +48,13 @@ dpaux@545c0000 { vdd-supply = <&vdd_3v3_panel>; status = "okay"; + + aux-bus { + panel: panel { + compatible = "lg,lp129qe"; + backlight = <&backlight>; + }; + }; }; }; @@ -1080,13 +1087,6 @@ }; }; - panel: panel { - compatible = "lg,lp129qe"; - power-supply = <&vdd_3v3_panel>; - backlight = <&backlight>; - ddc-i2c-bus = <&dpaux>; - }; - vdd_mux: regulator-mux { compatible = "regulator-fixed"; regulator-name = "+VDD_MUX"; diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 6fe67963ba5a..aee73ef5b3dc 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -107,6 +107,16 @@ .endm #endif +#if __LINUX_ARM_ARCH__ < 7 + .macro dsb, args + mcr p15, 0, r0, c7, c10, 4 + .endm + + .macro isb, args + mcr p15, 0, r0, c7, c5, 4 + .endm +#endif + .macro asm_trace_hardirqs_off, save=1 #if defined(CONFIG_TRACE_IRQFLAGS) .if \save diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h new file mode 100644 index 000000000000..85f9e538fb32 --- /dev/null +++ b/arch/arm/include/asm/spectre.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_SPECTRE_H +#define __ASM_SPECTRE_H + +enum { + SPECTRE_UNAFFECTED, + SPECTRE_MITIGATED, + SPECTRE_VULNERABLE, +}; + +enum { + __SPECTRE_V2_METHOD_BPIALL, + __SPECTRE_V2_METHOD_ICIALLU, + __SPECTRE_V2_METHOD_SMC, + __SPECTRE_V2_METHOD_HVC, + __SPECTRE_V2_METHOD_LOOP8, +}; + +enum { + SPECTRE_V2_METHOD_BPIALL = BIT(__SPECTRE_V2_METHOD_BPIALL), + SPECTRE_V2_METHOD_ICIALLU = BIT(__SPECTRE_V2_METHOD_ICIALLU), + SPECTRE_V2_METHOD_SMC = BIT(__SPECTRE_V2_METHOD_SMC), + SPECTRE_V2_METHOD_HVC = BIT(__SPECTRE_V2_METHOD_HVC), + SPECTRE_V2_METHOD_LOOP8 = BIT(__SPECTRE_V2_METHOD_LOOP8), +}; + +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES +void spectre_v2_update_state(unsigned int state, unsigned int methods); +#else +static inline void spectre_v2_update_state(unsigned int state, + unsigned int methods) +{} +#endif + +int spectre_bhb_update_vectors(unsigned int method); + +#endif diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h index 4a91428c324d..fad45c884e98 100644 --- a/arch/arm/include/asm/vmlinux.lds.h +++ b/arch/arm/include/asm/vmlinux.lds.h @@ -26,6 +26,19 @@ #define ARM_MMU_DISCARD(x) x #endif +/* + * ld.lld does not support NOCROSSREFS: + * https://github.com/ClangBuiltLinux/linux/issues/1609 + */ +#ifdef CONFIG_LD_IS_LLD +#define NOCROSSREFS +#endif + +/* Set start/end symbol names to the LMA for the section */ +#define ARM_LMA(sym, section) \ + sym##_start = LOADADDR(section); \ + sym##_end = LOADADDR(section) + SIZEOF(section) + #define PROC_INFO \ . = ALIGN(4); \ __proc_info_begin = .; \ @@ -110,19 +123,31 @@ * only thing that matters is their relative offsets */ #define ARM_VECTORS \ - __vectors_start = .; \ - .vectors 0xffff0000 : AT(__vectors_start) { \ - *(.vectors) \ + __vectors_lma = .; \ + OVERLAY 0xffff0000 : NOCROSSREFS AT(__vectors_lma) { \ + .vectors { \ + *(.vectors) \ + } \ + .vectors.bhb.loop8 { \ + *(.vectors.bhb.loop8) \ + } \ + .vectors.bhb.bpiall { \ + *(.vectors.bhb.bpiall) \ + } \ } \ - . = __vectors_start + SIZEOF(.vectors); \ - __vectors_end = .; \ + ARM_LMA(__vectors, .vectors); \ + ARM_LMA(__vectors_bhb_loop8, .vectors.bhb.loop8); \ + ARM_LMA(__vectors_bhb_bpiall, .vectors.bhb.bpiall); \ + . = __vectors_lma + SIZEOF(.vectors) + \ + SIZEOF(.vectors.bhb.loop8) + \ + SIZEOF(.vectors.bhb.bpiall); \ \ - __stubs_start = .; \ - .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) { \ + __stubs_lma = .; \ + .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_lma) { \ *(.stubs) \ } \ - . = __stubs_start + SIZEOF(.stubs); \ - __stubs_end = .; \ + ARM_LMA(__stubs, .stubs); \ + . = __stubs_lma + SIZEOF(.stubs); \ \ PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors)); diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index ae295a3bcfef..6ef3b535b7bf 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -106,4 +106,6 @@ endif obj-$(CONFIG_HAVE_ARM_SMCCC) += smccc-call.o +obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += spectre.o + extra-y := $(head-y) vmlinux.lds diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 5cd057859fe9..ee3f7a599181 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1002,12 +1002,11 @@ vector_\name: sub lr, lr, #\correction .endif - @ - @ Save r0, lr_<exception> (parent PC) and spsr_<exception> - @ (parent CPSR) - @ + @ Save r0, lr_<exception> (parent PC) stmia sp, {r0, lr} @ save r0, lr - mrs lr, spsr + + @ Save spsr_<exception> (parent CPSR) +2: mrs lr, spsr str lr, [sp, #8] @ save spsr @ @@ -1028,6 +1027,44 @@ vector_\name: movs pc, lr @ branch to handler in SVC mode ENDPROC(vector_\name) +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .subsection 1 + .align 5 +vector_bhb_loop8_\name: + .if \correction + sub lr, lr, #\correction + .endif + + @ Save r0, lr_<exception> (parent PC) + stmia sp, {r0, lr} + + @ bhb workaround + mov r0, #8 +3: b . + 4 + subs r0, r0, #1 + bne 3b + dsb + isb + b 2b +ENDPROC(vector_bhb_loop8_\name) + +vector_bhb_bpiall_\name: + .if \correction + sub lr, lr, #\correction + .endif + + @ Save r0, lr_<exception> (parent PC) + stmia sp, {r0, lr} + + @ bhb workaround + mcr p15, 0, r0, c7, c5, 6 @ BPIALL + @ isb not needed due to "movs pc, lr" in the vector stub + @ which gives a "context synchronisation". + b 2b +ENDPROC(vector_bhb_bpiall_\name) + .previous +#endif + .align 2 @ handler addresses follow this label 1: @@ -1036,6 +1073,10 @@ ENDPROC(vector_\name) .section .stubs, "ax", %progbits @ This must be the first word .word vector_swi +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .word vector_bhb_loop8_swi + .word vector_bhb_bpiall_swi +#endif vector_rst: ARM( swi SYS_ERROR0 ) @@ -1150,8 +1191,10 @@ vector_addrexcptn: * FIQ "NMI" handler *----------------------------------------------------------------------------- * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86 - * systems. + * systems. This must be the last vector stub, so lets place it in its own + * subsection. */ + .subsection 2 vector_stub fiq, FIQ_MODE, 4 .long __fiq_usr @ 0 (USR_26 / USR_32) @@ -1184,6 +1227,30 @@ vector_addrexcptn: W(b) vector_irq W(b) vector_fiq +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .section .vectors.bhb.loop8, "ax", %progbits +.L__vectors_bhb_loop8_start: + W(b) vector_rst + W(b) vector_bhb_loop8_und + W(ldr) pc, .L__vectors_bhb_loop8_start + 0x1004 + W(b) vector_bhb_loop8_pabt + W(b) vector_bhb_loop8_dabt + W(b) vector_addrexcptn + W(b) vector_bhb_loop8_irq + W(b) vector_bhb_loop8_fiq + + .section .vectors.bhb.bpiall, "ax", %progbits +.L__vectors_bhb_bpiall_start: + W(b) vector_rst + W(b) vector_bhb_bpiall_und + W(ldr) pc, .L__vectors_bhb_bpiall_start + 0x1008 + W(b) vector_bhb_bpiall_pabt + W(b) vector_bhb_bpiall_dabt + W(b) vector_addrexcptn + W(b) vector_bhb_bpiall_irq + W(b) vector_bhb_bpiall_fiq +#endif + .data .align 2 diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index ac86c34682bb..dbc1913ee30b 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -154,12 +154,36 @@ ENDPROC(ret_from_fork) */ .align 5 +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +ENTRY(vector_bhb_loop8_swi) + sub sp, sp, #PT_REGS_SIZE + stmia sp, {r0 - r12} + mov r8, #8 +1: b 2f +2: subs r8, r8, #1 + bne 1b + dsb + isb + b 3f +ENDPROC(vector_bhb_loop8_swi) + + .align 5 +ENTRY(vector_bhb_bpiall_swi) + sub sp, sp, #PT_REGS_SIZE + stmia sp, {r0 - r12} + mcr p15, 0, r8, c7, c5, 6 @ BPIALL + isb + b 3f +ENDPROC(vector_bhb_bpiall_swi) +#endif + .align 5 ENTRY(vector_swi) #ifdef CONFIG_CPU_V7M v7m_exception_entry #else sub sp, sp, #PT_REGS_SIZE stmia sp, {r0 - r12} @ Calling r0 - r12 +3: ARM( add r8, sp, #S_PC ) ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr THUMB( mov r8, sp ) diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 7bd30c0a4280..22f937e6f3ff 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -154,22 +154,38 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) return 0; } -static struct undef_hook kgdb_brkpt_hook = { +static struct undef_hook kgdb_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_BREAKINST, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_brk_fn }; -static struct undef_hook kgdb_compiled_brkpt_hook = { +static struct undef_hook kgdb_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_BREAKINST & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_brk_fn +}; + +static struct undef_hook kgdb_compiled_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_COMPILED_BREAK, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_compiled_brk_fn }; +static struct undef_hook kgdb_compiled_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_COMPILED_BREAK & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_compiled_brk_fn +}; + static int __kgdb_notify(struct die_args *args, unsigned long cmd) { struct pt_regs *regs = args->regs; @@ -210,8 +226,10 @@ int kgdb_arch_init(void) if (ret != 0) return ret; - register_undef_hook(&kgdb_brkpt_hook); - register_undef_hook(&kgdb_compiled_brkpt_hook); + register_undef_hook(&kgdb_brkpt_arm_hook); + register_undef_hook(&kgdb_brkpt_thumb_hook); + register_undef_hook(&kgdb_compiled_brkpt_arm_hook); + register_undef_hook(&kgdb_compiled_brkpt_thumb_hook); return 0; } @@ -224,8 +242,10 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { - unregister_undef_hook(&kgdb_brkpt_hook); - unregister_undef_hook(&kgdb_compiled_brkpt_hook); + unregister_undef_hook(&kgdb_brkpt_arm_hook); + unregister_undef_hook(&kgdb_brkpt_thumb_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_arm_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_thumb_hook); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/arm/kernel/spectre.c b/arch/arm/kernel/spectre.c new file mode 100644 index 000000000000..0dcefc36fb7a --- /dev/null +++ b/arch/arm/kernel/spectre.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/bpf.h> +#include <linux/cpu.h> +#include <linux/device.h> + +#include <asm/spectre.h> + +static bool _unprivileged_ebpf_enabled(void) +{ +#ifdef CONFIG_BPF_SYSCALL + return !sysctl_unprivileged_bpf_disabled; +#else + return false; +#endif +} + +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +static unsigned int spectre_v2_state; +static unsigned int spectre_v2_methods; + +void spectre_v2_update_state(unsigned int state, unsigned int method) +{ + if (state > spectre_v2_state) + spectre_v2_state = state; + spectre_v2_methods |= method; +} + +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, + char *buf) +{ + const char *method; + + if (spectre_v2_state == SPECTRE_UNAFFECTED) + return sprintf(buf, "%s\n", "Not affected"); + + if (spectre_v2_state != SPECTRE_MITIGATED) + return sprintf(buf, "%s\n", "Vulnerable"); + + if (_unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n"); + + switch (spectre_v2_methods) { + case SPECTRE_V2_METHOD_BPIALL: + method = "Branch predictor hardening"; + break; + + case SPECTRE_V2_METHOD_ICIALLU: + method = "I-cache invalidation"; + break; + + case SPECTRE_V2_METHOD_SMC: + case SPECTRE_V2_METHOD_HVC: + method = "Firmware call"; + break; + + case SPECTRE_V2_METHOD_LOOP8: + method = "History overwrite"; + break; + + default: + method = "Multiple mitigations"; + break; + } + + return sprintf(buf, "Mitigation: %s\n", method); +} diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index da04ed85855a..cae4a748811f 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -30,6 +30,7 @@ #include <linux/atomic.h> #include <asm/cacheflush.h> #include <asm/exception.h> +#include <asm/spectre.h> #include <asm/unistd.h> #include <asm/traps.h> #include <asm/ptrace.h> @@ -789,10 +790,59 @@ static inline void __init kuser_init(void *vectors) } #endif +#ifndef CONFIG_CPU_V7M +static void copy_from_lma(void *vma, void *lma_start, void *lma_end) +{ + memcpy(vma, lma_start, lma_end - lma_start); +} + +static void flush_vectors(void *vma, size_t offset, size_t size) +{ + unsigned long start = (unsigned long)vma + offset; + unsigned long end = start + size; + + flush_icache_range(start, end); +} + +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +int spectre_bhb_update_vectors(unsigned int method) +{ + extern char __vectors_bhb_bpiall_start[], __vectors_bhb_bpiall_end[]; + extern char __vectors_bhb_loop8_start[], __vectors_bhb_loop8_end[]; + void *vec_start, *vec_end; + + if (system_state >= SYSTEM_FREEING_INITMEM) { + pr_err("CPU%u: Spectre BHB workaround too late - system vulnerable\n", + smp_processor_id()); + return SPECTRE_VULNERABLE; + } + + switch (method) { + case SPECTRE_V2_METHOD_LOOP8: + vec_start = __vectors_bhb_loop8_start; + vec_end = __vectors_bhb_loop8_end; + break; + + case SPECTRE_V2_METHOD_BPIALL: + vec_start = __vectors_bhb_bpiall_start; + vec_end = __vectors_bhb_bpiall_end; + break; + + default: + pr_err("CPU%u: unknown Spectre BHB state %d\n", + smp_processor_id(), method); + return SPECTRE_VULNERABLE; + } + + copy_from_lma(vectors_page, vec_start, vec_end); + flush_vectors(vectors_page, 0, vec_end - vec_start); + + return SPECTRE_MITIGATED; +} +#endif + void __init early_trap_init(void *vectors_base) { -#ifndef CONFIG_CPU_V7M - unsigned long vectors = (unsigned long)vectors_base; extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; unsigned i; @@ -813,17 +863,20 @@ void __init early_trap_init(void *vectors_base) * into the vector page, mapped at 0xffff0000, and ensure these * are visible to the instruction stream. */ - memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start); - memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start); + copy_from_lma(vectors_base, __vectors_start, __vectors_end); + copy_from_lma(vectors_base + 0x1000, __stubs_start, __stubs_end); kuser_init(vectors_base); - flush_icache_range(vectors, vectors + PAGE_SIZE * 2); + flush_vectors(vectors_base, 0, PAGE_SIZE * 2); +} #else /* ifndef CONFIG_CPU_V7M */ +void __init early_trap_init(void *vectors_base) +{ /* * on V7-M there is no need to copy the vector table to a dedicated * memory area. The address is configurable and so a table in the kernel * image can be used. */ -#endif } +#endif diff --git a/arch/arm/mach-mstar/Kconfig b/arch/arm/mach-mstar/Kconfig index cd300eeedc20..0bf4d312bcfd 100644 --- a/arch/arm/mach-mstar/Kconfig +++ b/arch/arm/mach-mstar/Kconfig @@ -3,6 +3,7 @@ menuconfig ARCH_MSTARV7 depends on ARCH_MULTI_V7 select ARM_GIC select ARM_HEAVY_MB + select HAVE_ARM_ARCH_TIMER select MST_IRQ select MSTAR_MSC313_MPLL help diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 58afba346729..9724c16e9076 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -830,6 +830,7 @@ config CPU_BPREDICT_DISABLE config CPU_SPECTRE bool + select GENERIC_CPU_VULNERABILITIES config HARDEN_BRANCH_PREDICTOR bool "Harden the branch predictor against aliasing attacks" if EXPERT @@ -850,6 +851,16 @@ config HARDEN_BRANCH_PREDICTOR If unsure, say Y. +config HARDEN_BRANCH_HISTORY + bool "Harden Spectre style attacks against branch history" if EXPERT + depends on CPU_SPECTRE + default y + help + Speculation attacks against some high-performance processors can + make use of branch history to influence future speculation. When + taking an exception, a sequence of branches overwrites the branch + history, or branch history is invalidated. + config TLS_REG_EMUL bool select NEED_KUSER_HELPERS diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 274e4f73fd33..5e2be37a198e 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -212,12 +212,14 @@ early_param("ecc", early_ecc); static int __init early_cachepolicy(char *p) { pr_warn("cachepolicy kernel parameter not supported without cp15\n"); + return 0; } early_param("cachepolicy", early_cachepolicy); static int __init noalign_setup(char *__unused) { pr_warn("noalign kernel parameter not supported without cp15\n"); + return 1; } __setup("noalign", noalign_setup); diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 114c05ab4dd9..06dbfb968182 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -6,8 +6,35 @@ #include <asm/cp15.h> #include <asm/cputype.h> #include <asm/proc-fns.h> +#include <asm/spectre.h> #include <asm/system_misc.h> +#ifdef CONFIG_ARM_PSCI +static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + + switch ((int)res.a0) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + return SPECTRE_UNAFFECTED; + + default: + return SPECTRE_VULNERABLE; + } +} +#else +static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void) +{ + return SPECTRE_VULNERABLE; +} +#endif + #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn); @@ -36,13 +63,61 @@ static void __maybe_unused call_hvc_arch_workaround_1(void) arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void cpu_v7_spectre_init(void) +static unsigned int spectre_v2_install_workaround(unsigned int method) { const char *spectre_v2_method = NULL; int cpu = smp_processor_id(); if (per_cpu(harden_branch_predictor_fn, cpu)) - return; + return SPECTRE_MITIGATED; + + switch (method) { + case SPECTRE_V2_METHOD_BPIALL: + per_cpu(harden_branch_predictor_fn, cpu) = + harden_branch_predictor_bpiall; + spectre_v2_method = "BPIALL"; + break; + + case SPECTRE_V2_METHOD_ICIALLU: + per_cpu(harden_branch_predictor_fn, cpu) = + harden_branch_predictor_iciallu; + spectre_v2_method = "ICIALLU"; + break; + + case SPECTRE_V2_METHOD_HVC: + per_cpu(harden_branch_predictor_fn, cpu) = + call_hvc_arch_workaround_1; + cpu_do_switch_mm = cpu_v7_hvc_switch_mm; + spectre_v2_method = "hypervisor"; + break; + + case SPECTRE_V2_METHOD_SMC: + per_cpu(harden_branch_predictor_fn, cpu) = + call_smc_arch_workaround_1; + cpu_do_switch_mm = cpu_v7_smc_switch_mm; + spectre_v2_method = "firmware"; + break; + } + + if (spectre_v2_method) + pr_info("CPU%u: Spectre v2: using %s workaround\n", + smp_processor_id(), spectre_v2_method); + + return SPECTRE_MITIGATED; +} +#else +static unsigned int spectre_v2_install_workaround(unsigned int method) +{ + pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n", + smp_processor_id()); + + return SPECTRE_VULNERABLE; +} +#endif + +static void cpu_v7_spectre_v2_init(void) +{ + unsigned int state, method = 0; switch (read_cpuid_part()) { case ARM_CPU_PART_CORTEX_A8: @@ -51,69 +126,133 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A17: case ARM_CPU_PART_CORTEX_A73: case ARM_CPU_PART_CORTEX_A75: - per_cpu(harden_branch_predictor_fn, cpu) = - harden_branch_predictor_bpiall; - spectre_v2_method = "BPIALL"; + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_BPIALL; break; case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_BRAHMA_B15: - per_cpu(harden_branch_predictor_fn, cpu) = - harden_branch_predictor_iciallu; - spectre_v2_method = "ICIALLU"; + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_ICIALLU; break; -#ifdef CONFIG_ARM_PSCI case ARM_CPU_PART_BRAHMA_B53: /* Requires no workaround */ + state = SPECTRE_UNAFFECTED; break; + default: /* Other ARM CPUs require no workaround */ - if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) + if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) { + state = SPECTRE_UNAFFECTED; break; + } + fallthrough; - /* Cortex A57/A72 require firmware workaround */ - case ARM_CPU_PART_CORTEX_A57: - case ARM_CPU_PART_CORTEX_A72: { - struct arm_smccc_res res; - arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - return; + /* Cortex A57/A72 require firmware workaround */ + case ARM_CPU_PART_CORTEX_A57: + case ARM_CPU_PART_CORTEX_A72: + state = spectre_v2_get_cpu_fw_mitigation_state(); + if (state != SPECTRE_MITIGATED) + break; switch (arm_smccc_1_1_get_conduit()) { case SMCCC_CONDUIT_HVC: - per_cpu(harden_branch_predictor_fn, cpu) = - call_hvc_arch_workaround_1; - cpu_do_switch_mm = cpu_v7_hvc_switch_mm; - spectre_v2_method = "hypervisor"; + method = SPECTRE_V2_METHOD_HVC; break; case SMCCC_CONDUIT_SMC: - per_cpu(harden_branch_predictor_fn, cpu) = - call_smc_arch_workaround_1; - cpu_do_switch_mm = cpu_v7_smc_switch_mm; - spectre_v2_method = "firmware"; + method = SPECTRE_V2_METHOD_SMC; break; default: + state = SPECTRE_VULNERABLE; break; } } -#endif + + if (state == SPECTRE_MITIGATED) + state = spectre_v2_install_workaround(method); + + spectre_v2_update_state(state, method); +} + +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +static int spectre_bhb_method; + +static const char *spectre_bhb_method_name(int method) +{ + switch (method) { + case SPECTRE_V2_METHOD_LOOP8: + return "loop"; + + case SPECTRE_V2_METHOD_BPIALL: + return "BPIALL"; + + default: + return "unknown"; } +} - if (spectre_v2_method) - pr_info("CPU%u: Spectre v2: using %s workaround\n", - smp_processor_id(), spectre_v2_method); +static int spectre_bhb_install_workaround(int method) +{ + if (spectre_bhb_method != method) { + if (spectre_bhb_method) { + pr_err("CPU%u: Spectre BHB: method disagreement, system vulnerable\n", + smp_processor_id()); + + return SPECTRE_VULNERABLE; + } + + if (spectre_bhb_update_vectors(method) == SPECTRE_VULNERABLE) + return SPECTRE_VULNERABLE; + + spectre_bhb_method = method; + } + + pr_info("CPU%u: Spectre BHB: using %s workaround\n", + smp_processor_id(), spectre_bhb_method_name(method)); + + return SPECTRE_MITIGATED; } #else -static void cpu_v7_spectre_init(void) +static int spectre_bhb_install_workaround(int method) { + return SPECTRE_VULNERABLE; } #endif +static void cpu_v7_spectre_bhb_init(void) +{ + unsigned int state, method = 0; + + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A15: + case ARM_CPU_PART_BRAHMA_B15: + case ARM_CPU_PART_CORTEX_A57: + case ARM_CPU_PART_CORTEX_A72: + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_LOOP8; + break; + + case ARM_CPU_PART_CORTEX_A73: + case ARM_CPU_PART_CORTEX_A75: + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_BPIALL; + break; + + default: + state = SPECTRE_UNAFFECTED; + break; + } + + if (state == SPECTRE_MITIGATED) + state = spectre_bhb_install_workaround(method); + + spectre_v2_update_state(state, method); +} + static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned, u32 mask, const char *msg) { @@ -142,16 +281,17 @@ static bool check_spectre_auxcr(bool *warned, u32 bit) void cpu_v7_ca8_ibe(void) { if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6))) - cpu_v7_spectre_init(); + cpu_v7_spectre_v2_init(); } void cpu_v7_ca15_ibe(void) { if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0))) - cpu_v7_spectre_init(); + cpu_v7_spectre_v2_init(); } void cpu_v7_bugs_init(void) { - cpu_v7_spectre_init(); + cpu_v7_spectre_v2_init(); + cpu_v7_spectre_bhb_init(); } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 09b885cc4db5..c842878f8133 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1252,9 +1252,6 @@ config HW_PERF_EVENTS def_bool y depends on ARM_PMU -config ARCH_HAS_FILTER_PGPROT - def_bool y - # Supported by clang >= 7.0 config CC_HAVE_SHADOW_CALL_STACK def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18) @@ -1383,6 +1380,15 @@ config UNMAP_KERNEL_AT_EL0 If unsure, say Y. +config MITIGATE_SPECTRE_BRANCH_HISTORY + bool "Mitigate Spectre style attacks against branch history" if EXPERT + default y + help + Speculation attacks against some high-performance processors can + make use of branch history to influence future speculation. + When taking an exception from user-space, a sequence of branches + or a firmware call overwrites the branch history. + config RODATA_FULL_DEFAULT_ENABLED bool "Apply r/o permissions of VM areas also to their linear aliases" default y diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi index 6288e104a089..a2635b14da30 100644 --- a/arch/arm64/boot/dts/arm/juno-base.dtsi +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -543,8 +543,7 @@ <0x02000000 0x00 0x50000000 0x00 0x50000000 0x0 0x08000000>, <0x42000000 0x40 0x00000000 0x40 0x00000000 0x1 0x00000000>; /* Standard AXI Translation entries as programmed by EDK2 */ - dma-ranges = <0x02000000 0x0 0x2c1c0000 0x0 0x2c1c0000 0x0 0x00040000>, - <0x02000000 0x0 0x80000000 0x0 0x80000000 0x0 0x80000000>, + dma-ranges = <0x02000000 0x0 0x80000000 0x0 0x80000000 0x0 0x80000000>, <0x43000000 0x8 0x00000000 0x8 0x00000000 0x2 0x00000000>; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index f77f90ed416f..0c7a72c51a31 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -707,7 +707,6 @@ clocks = <&clk IMX8MM_CLK_VPU_DEC_ROOT>; assigned-clocks = <&clk IMX8MM_CLK_VPU_BUS>; assigned-clock-parents = <&clk IMX8MM_SYS_PLL1_800M>; - resets = <&src IMX8MQ_RESET_VPU_RESET>; }; pgc_vpu_g1: power-domain@7 { diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi index a987ff7156bd..09f7364dd1d0 100644 --- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi @@ -132,7 +132,7 @@ scmi_sensor: protocol@15 { reg = <0x15>; - #thermal-sensor-cells = <0>; + #thermal-sensor-cells = <1>; }; }; }; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index 0dd2d2ee765a..f4270cf18996 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -502,7 +502,7 @@ }; usb0: usb@ffb00000 { - compatible = "snps,dwc2"; + compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2"; reg = <0xffb00000 0x40000>; interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy0>; @@ -515,7 +515,7 @@ }; usb1: usb@ffb40000 { - compatible = "snps,dwc2"; + compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2"; reg = <0xffb40000 0x40000>; interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy0>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index 04da07ae4420..1cee26479bfe 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -18,6 +18,7 @@ aliases { spi0 = &spi0; + ethernet0 = ð0; ethernet1 = ð1; mmc0 = &sdhci0; mmc1 = &sdhci1; @@ -138,7 +139,9 @@ /* * U-Boot port for Turris Mox has a bug which always expects that "ranges" DT property * contains exactly 2 ranges with 3 (child) address cells, 2 (parent) address cells and - * 2 size cells and also expects that the second range starts at 16 MB offset. If these + * 2 size cells and also expects that the second range starts at 16 MB offset. Also it + * expects that first range uses same address for PCI (child) and CPU (parent) cells (so + * no remapping) and that this address is the lowest from all specified ranges. If these * conditions are not met then U-Boot crashes during loading kernel DTB file. PCIe address * space is 128 MB long, so the best split between MEM and IO is to use fixed 16 MB window * for IO and the rest 112 MB (64+32+16) for MEM, despite that maximal IO size is just 64 kB. @@ -147,6 +150,9 @@ * https://source.denx.de/u-boot/u-boot/-/commit/cb2ddb291ee6fcbddd6d8f4ff49089dfe580f5d7 * https://source.denx.de/u-boot/u-boot/-/commit/c64ac3b3185aeb3846297ad7391fc6df8ecd73bf * https://source.denx.de/u-boot/u-boot/-/commit/4a82fca8e330157081fc132a591ebd99ba02ee33 + * Bug related to requirement of same child and parent addresses for first range is fixed + * in U-Boot version 2022.04 by following commit: + * https://source.denx.de/u-boot/u-boot/-/commit/1fd54253bca7d43d046bba4853fe5fafd034bc17 */ #address-cells = <3>; #size-cells = <2>; diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index 673f4906eef9..fb78ef613b29 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -499,7 +499,7 @@ * (totaling 127 MiB) for MEM. */ ranges = <0x82000000 0 0xe8000000 0 0xe8000000 0 0x07f00000 /* Port 0 MEM */ - 0x81000000 0 0xefff0000 0 0xefff0000 0 0x00010000>; /* Port 0 IO */ + 0x81000000 0 0x00000000 0 0xefff0000 0 0x00010000>; /* Port 0 IO */ interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &pcie_intc 0>, <0 0 0 2 &pcie_intc 1>, diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 2d48c3715fc6..aaa00da5351d 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -1584,7 +1584,7 @@ #iommu-cells = <1>; nvidia,memory-controller = <&mc>; - status = "okay"; + status = "disabled"; }; smmu: iommu@12000000 { diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts index 58845a14805f..e2b9ec134cb1 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts @@ -807,3 +807,8 @@ qcom,snoc-host-cap-8bit-quirk; }; + +&crypto { + /* FIXME: qce_start triggers an SError */ + status= "disable"; +}; diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index 53b39e718fb6..4b19744bcfb3 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -35,6 +35,24 @@ clock-frequency = <32000>; #clock-cells = <0>; }; + + ufs_phy_rx_symbol_0_clk: ufs-phy-rx-symbol-0 { + compatible = "fixed-clock"; + clock-frequency = <1000>; + #clock-cells = <0>; + }; + + ufs_phy_rx_symbol_1_clk: ufs-phy-rx-symbol-1 { + compatible = "fixed-clock"; + clock-frequency = <1000>; + #clock-cells = <0>; + }; + + ufs_phy_tx_symbol_0_clk: ufs-phy-tx-symbol-0 { + compatible = "fixed-clock"; + clock-frequency = <1000>; + #clock-cells = <0>; + }; }; cpus { @@ -603,9 +621,9 @@ <0>, <0>, <0>, - <0>, - <0>, - <0>, + <&ufs_phy_rx_symbol_0_clk>, + <&ufs_phy_rx_symbol_1_clk>, + <&ufs_phy_tx_symbol_0_clk>, <0>, <0>; }; @@ -1923,8 +1941,8 @@ <75000000 300000000>, <0 0>, <0 0>, - <75000000 300000000>, - <75000000 300000000>; + <0 0>, + <0 0>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 10c25ad2d0c7..02b97e838c47 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -726,7 +726,7 @@ compatible = "qcom,sm8450-smmu-500", "arm,mmu-500"; reg = <0 0x15000000 0 0x100000>; #iommu-cells = <2>; - #global-interrupts = <2>; + #global-interrupts = <1>; interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>, @@ -813,6 +813,7 @@ <GIC_SPI 412 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 421 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 707 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 423 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 424 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 425 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 690 IRQ_TYPE_LEVEL_HIGH>, @@ -1072,9 +1073,10 @@ <&gcc GCC_USB30_PRIM_MASTER_CLK>, <&gcc GCC_AGGRE_USB3_PRIM_AXI_CLK>, <&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>, - <&gcc GCC_USB30_PRIM_SLEEP_CLK>; + <&gcc GCC_USB30_PRIM_SLEEP_CLK>, + <&gcc GCC_USB3_0_CLKREF_EN>; clock-names = "cfg_noc", "core", "iface", "mock_utmi", - "sleep"; + "sleep", "xo"; assigned-clocks = <&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>, <&gcc GCC_USB30_PRIM_MASTER_CLK>; diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index f972704dfe7a..56dfbb2e2fa6 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -711,7 +711,7 @@ clock-names = "pclk", "timer"; }; - dmac: dmac@ff240000 { + dmac: dma-controller@ff240000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x0 0xff240000 0x0 0x4000>; interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 39db0b85b4da..b822533dc7f1 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -489,7 +489,7 @@ status = "disabled"; }; - dmac: dmac@ff1f0000 { + dmac: dma-controller@ff1f0000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x0 0xff1f0000 0x0 0x4000>; interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index 45a5ae5d2027..162f08bca0d4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -286,7 +286,7 @@ sound: sound { compatible = "rockchip,rk3399-gru-sound"; - rockchip,cpu = <&i2s0 &i2s2>; + rockchip,cpu = <&i2s0 &spdif>; }; }; @@ -437,10 +437,6 @@ ap_i2c_audio: &i2c8 { status = "okay"; }; -&i2s2 { - status = "okay"; -}; - &io_domains { status = "okay"; @@ -537,6 +533,17 @@ ap_i2c_audio: &i2c8 { vqmmc-supply = <&ppvar_sd_card_io>; }; +&spdif { + status = "okay"; + + /* + * SPDIF is routed internally to DP; we either don't use these pins, or + * mux them to something else. + */ + /delete-property/ pinctrl-0; + /delete-property/ pinctrl-names; +}; + &spi1 { status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 292bb7e80cf3..3ae5d727e367 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -232,6 +232,7 @@ &usbdrd_dwc3_0 { dr_mode = "otg"; + extcon = <&extcon_usb3>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index fb67db4619ea..08fa00364b42 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -25,6 +25,13 @@ }; }; + extcon_usb3: extcon-usb3 { + compatible = "linux,extcon-usb-gpio"; + id-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&usb3_id>; + }; + clkin_gmac: external-gmac-clock { compatible = "fixed-clock"; clock-frequency = <125000000>; @@ -422,9 +429,22 @@ <4 RK_PA3 RK_FUNC_GPIO &pcfg_pull_none>; }; }; + + usb3 { + usb3_id: usb3-id { + rockchip,pins = + <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>; + }; + }; }; &sdhci { + /* + * Signal integrity isn't great at 200MHz but 100MHz has proven stable + * enough. + */ + max-frequency = <100000000>; + bus-width = <8>; mmc-hs400-1_8v; mmc-hs400-enhanced-strobe; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index d3cdf6f42a30..080457a68e3c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1881,10 +1881,10 @@ interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH 0>; clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_SFR>, - <&cru PLL_VPLL>, + <&cru SCLK_HDMI_CEC>, <&cru PCLK_VIO_GRF>, - <&cru SCLK_HDMI_CEC>; - clock-names = "iahb", "isfr", "vpll", "grf", "cec"; + <&cru PLL_VPLL>; + clock-names = "iahb", "isfr", "cec", "grf", "vpll"; power-domains = <&power RK3399_PD_HDCP>; reg-io-width = <4>; rockchip,grf = <&grf>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index 166399b7f13f..d9eb92d59099 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -285,8 +285,6 @@ vcc_ddr: DCDC_REG3 { regulator-always-on; regulator-boot-on; - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1100000>; regulator-initial-mode = <0x2>; regulator-name = "vcc_ddr"; regulator-state-mem { diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi index 2fd313a295f8..d91df1cde736 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi @@ -32,13 +32,11 @@ clocks = <&cru SCLK_GMAC0>, <&cru SCLK_GMAC0_RX_TX>, <&cru SCLK_GMAC0_RX_TX>, <&cru CLK_MAC0_REFOUT>, <&cru ACLK_GMAC0>, <&cru PCLK_GMAC0>, - <&cru SCLK_GMAC0_RX_TX>, <&cru CLK_GMAC0_PTP_REF>, - <&cru PCLK_XPCS>; + <&cru SCLK_GMAC0_RX_TX>, <&cru CLK_GMAC0_PTP_REF>; clock-names = "stmmaceth", "mac_clk_rx", "mac_clk_tx", "clk_mac_refout", "aclk_mac", "pclk_mac", - "clk_mac_speed", "ptp_ref", - "pclk_xpcs"; + "clk_mac_speed", "ptp_ref"; resets = <&cru SRST_A_GMAC0>; reset-names = "stmmaceth"; rockchip,grf = <&grf>; diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index a68033a23975..8ccce54ee8e7 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -651,7 +651,7 @@ status = "disabled"; }; - dmac0: dmac@fe530000 { + dmac0: dma-controller@fe530000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x0 0xfe530000 0x0 0x4000>; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>, @@ -662,7 +662,7 @@ #dma-cells = <1>; }; - dmac1: dmac@fe550000 { + dmac1: dma-controller@fe550000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x0 0xfe550000 0x0 0x4000>; interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index e8bd0af0141c..6ebdc0f834a7 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -109,6 +109,13 @@ .endm /* + * Clear Branch History instruction + */ + .macro clearbhb + hint #22 + .endm + +/* * Speculation barrier */ .macro sb @@ -850,4 +857,50 @@ alternative_endif #endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */ + .macro __mitigate_spectre_bhb_loop tmp +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +alternative_cb spectre_bhb_patch_loop_iter + mov \tmp, #32 // Patched to correct the immediate +alternative_cb_end +.Lspectre_bhb_loop\@: + b . + 4 + subs \tmp, \tmp, #1 + b.ne .Lspectre_bhb_loop\@ + sb +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm + + .macro mitigate_spectre_bhb_loop tmp +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +alternative_cb spectre_bhb_patch_loop_mitigation_enable + b .L_spectre_bhb_loop_done\@ // Patched to NOP +alternative_cb_end + __mitigate_spectre_bhb_loop \tmp +.L_spectre_bhb_loop_done\@: +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm + + /* Save/restores x0-x3 to the stack */ + .macro __mitigate_spectre_bhb_fw +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + stp x0, x1, [sp, #-16]! + stp x2, x3, [sp, #-16]! + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3 +alternative_cb smccc_patch_fw_mitigation_conduit + nop // Patched to SMC/HVC #0 +alternative_cb_end + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm + + .macro mitigate_spectre_bhb_clear_insn +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +alternative_cb spectre_bhb_patch_clearbhb + /* Patched to NOP when not supported */ + clearbhb + isb +alternative_cb_end +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index ef6be92b1921..a77b5f49b3a6 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -637,6 +637,35 @@ static inline bool cpu_supports_mixed_endian_el0(void) return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); } + +static inline bool supports_csv2p3(int scope) +{ + u64 pfr0; + u8 csv2_val; + + if (scope == SCOPE_LOCAL_CPU) + pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); + else + pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + csv2_val = cpuid_feature_extract_unsigned_field(pfr0, + ID_AA64PFR0_CSV2_SHIFT); + return csv2_val == 3; +} + +static inline bool supports_clearbhb(int scope) +{ + u64 isar2; + + if (scope == SCOPE_LOCAL_CPU) + isar2 = read_sysreg_s(SYS_ID_AA64ISAR2_EL1); + else + isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); + + return cpuid_feature_extract_unsigned_field(isar2, + ID_AA64ISAR2_CLEARBHB_SHIFT); +} + const struct cpumask *system_32bit_el0_cpumask(void); DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 999b9149f856..bfbf0c4c7c5e 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -73,10 +73,14 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_NEOVERSE_V1 0xD40 +#define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 +#define ARM_CPU_PART_CORTEX_A78C 0xD4B #define APM_CPU_PART_POTENZA 0x000 @@ -117,10 +121,14 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) +#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) +#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 3198acb2aad8..7f3c87f7a0ce 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -106,7 +106,7 @@ msr_s SYS_ICC_SRE_EL2, x0 isb // Make sure SRE is now set mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, - tbz x0, #0, 1f // and check that it sticks + tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults .Lskip_gicv3_\@: .endm diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 4335800201c9..daff882883f9 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -62,9 +62,11 @@ enum fixed_addresses { #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + FIX_ENTRY_TRAMP_TEXT3, + FIX_ENTRY_TRAMP_TEXT2, + FIX_ENTRY_TRAMP_TEXT1, FIX_ENTRY_TRAMP_DATA, - FIX_ENTRY_TRAMP_TEXT, -#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT)) +#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ __end_of_permanent_fixed_addresses, diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 6b776c8667b2..b02f0c328c8e 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -65,6 +65,7 @@ enum aarch64_insn_hint_cr_op { AARCH64_INSN_HINT_PSB = 0x11 << 5, AARCH64_INSN_HINT_TSB = 0x12 << 5, AARCH64_INSN_HINT_CSDB = 0x14 << 5, + AARCH64_INSN_HINT_CLEARBHB = 0x16 << 5, AARCH64_INSN_HINT_BTI = 0x20 << 5, AARCH64_INSN_HINT_BTIC = 0x22 << 5, diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5bc01e62c08a..031e3a2537fc 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -714,6 +714,11 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr(); } +static inline bool kvm_system_needs_idmapped_vectors(void) +{ + return cpus_have_const_cap(ARM64_SPECTRE_V3A); +} + void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu); static inline void kvm_arch_hardware_unsetup(void) {} diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index e4704a403237..a857bcacf0fe 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -5,6 +5,7 @@ #ifndef __ASM_MTE_KASAN_H #define __ASM_MTE_KASAN_H +#include <asm/compiler.h> #include <asm/mte-def.h> #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 7032f04c8ac6..b1e1b74d993c 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -92,7 +92,7 @@ extern bool arm64_use_ng_mappings; #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_EXECONLY +#define __P100 PAGE_READONLY_EXEC /* PAGE_EXECONLY if Enhanced PAN */ #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -101,7 +101,7 @@ extern bool arm64_use_ng_mappings; #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_EXECONLY +#define __S100 PAGE_READONLY_EXEC /* PAGE_EXECONLY if Enhanced PAN */ #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c4ba047a82d2..94e147e5456c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1017,17 +1017,6 @@ static inline bool arch_wants_old_prefaulted_pte(void) } #define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte -static inline pgprot_t arch_filter_pgprot(pgprot_t prot) -{ - if (cpus_have_const_cap(ARM64_HAS_EPAN)) - return prot; - - if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY)) - return prot; - - return PAGE_READONLY_EXEC; -} - static inline bool pud_sect_supported(void) { return PAGE_SIZE == SZ_4K; diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h index 1bce62fa908a..56f7b1d4d54b 100644 --- a/arch/arm64/include/asm/rwonce.h +++ b/arch/arm64/include/asm/rwonce.h @@ -5,7 +5,7 @@ #ifndef __ASM_RWONCE_H #define __ASM_RWONCE_H -#ifdef CONFIG_LTO +#if defined(CONFIG_LTO) && !defined(__ASSEMBLY__) #include <linux/compiler_types.h> #include <asm/alternative-macros.h> @@ -66,7 +66,7 @@ }) #endif /* !BUILD_VDSO */ -#endif /* CONFIG_LTO */ +#endif /* CONFIG_LTO && !__ASSEMBLY__ */ #include <asm-generic/rwonce.h> diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 152cb35bf9df..40971ac1303f 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -23,4 +23,9 @@ extern char __mmuoff_data_start[], __mmuoff_data_end[]; extern char __entry_tramp_text_start[], __entry_tramp_text_end[]; extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[]; +static inline size_t entry_tramp_text_size(void) +{ + return __entry_tramp_text_end - __entry_tramp_text_start; +} + #endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index f62ca39da6c5..86e0cc9b9c68 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -93,5 +93,9 @@ void spectre_v4_enable_task_mitigation(struct task_struct *tsk); enum mitigation_state arm64_get_meltdown_state(void); +enum mitigation_state arm64_get_spectre_bhb_state(void); +bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); +u8 spectre_bhb_loop_affected(int scope); +void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); #endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 898bee0004ae..932d45b17877 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -773,6 +773,7 @@ #define ID_AA64ISAR1_GPI_IMP_DEF 0x1 /* id_aa64isar2 */ +#define ID_AA64ISAR2_CLEARBHB_SHIFT 28 #define ID_AA64ISAR2_RPRES_SHIFT 4 #define ID_AA64ISAR2_WFXT_SHIFT 0 @@ -904,6 +905,7 @@ #endif /* id_aa64mmfr1 */ +#define ID_AA64MMFR1_ECBHB_SHIFT 60 #define ID_AA64MMFR1_AFP_SHIFT 44 #define ID_AA64MMFR1_ETS_SHIFT 36 #define ID_AA64MMFR1_TWED_SHIFT 32 diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h new file mode 100644 index 000000000000..f64613a96d53 --- /dev/null +++ b/arch/arm64/include/asm/vectors.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 ARM Ltd. + */ +#ifndef __ASM_VECTORS_H +#define __ASM_VECTORS_H + +#include <linux/bug.h> +#include <linux/percpu.h> + +#include <asm/fixmap.h> + +extern char vectors[]; +extern char tramp_vectors[]; +extern char __bp_harden_el1_vectors[]; + +/* + * Note: the order of this enum corresponds to two arrays in entry.S: + * tramp_vecs and __bp_harden_el1_vectors. By default the canonical + * 'full fat' vectors are used directly. + */ +enum arm64_bp_harden_el1_vectors { +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + /* + * Perform the BHB loop mitigation, before branching to the canonical + * vectors. + */ + EL1_VECTOR_BHB_LOOP, + + /* + * Make the SMC call for firmware mitigation, before branching to the + * canonical vectors. + */ + EL1_VECTOR_BHB_FW, + + /* + * Use the ClearBHB instruction, before branching to the canonical + * vectors. + */ + EL1_VECTOR_BHB_CLEAR_INSN, +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + + /* + * Remap the kernel before branching to the canonical vectors. + */ + EL1_VECTOR_KPTI, +}; + +#ifndef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +#define EL1_VECTOR_BHB_LOOP -1 +#define EL1_VECTOR_BHB_FW -1 +#define EL1_VECTOR_BHB_CLEAR_INSN -1 +#endif /* !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + +/* The vectors to use on return from EL0. e.g. to remap the kernel */ +DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector); + +#ifndef CONFIG_UNMAP_KERNEL_AT_EL0 +#define TRAMP_VALIAS 0 +#endif + +static inline const char * +arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot) +{ + if (arm64_kernel_unmapped_at_el0()) + return (char *)TRAMP_VALIAS + SZ_2K * slot; + + WARN_ON_ONCE(slot == EL1_VECTOR_KPTI); + + return __bp_harden_el1_vectors + SZ_2K * slot; +} + +#endif /* __ASM_VECTORS_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index b3edde68bc3e..323e251ed37b 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -281,6 +281,11 @@ struct kvm_arm_copy_mte_tags { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 KVM_REG_ARM_FW_REG(3) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED 2 + /* SVE registers */ #define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b217941713a8..a401180e8d66 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -502,6 +502,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = has_spectre_v4, .cpu_enable = spectre_v4_enable_mitigation, }, + { + .desc = "Spectre-BHB", + .capability = ARM64_SPECTRE_BHB, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = is_spectre_bhb_affected, + .cpu_enable = spectre_bhb_enable_mitigation, + }, #ifdef CONFIG_ARM64_ERRATUM_1418040 { .desc = "ARM erratum 1418040", diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e5f23dab1c8d..d33687673f6b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -73,6 +73,8 @@ #include <linux/mm.h> #include <linux/cpu.h> #include <linux/kasan.h> +#include <linux/percpu.h> + #include <asm/cpu.h> #include <asm/cpufeature.h> #include <asm/cpu_ops.h> @@ -85,6 +87,7 @@ #include <asm/smp.h> #include <asm/sysreg.h> #include <asm/traps.h> +#include <asm/vectors.h> #include <asm/virt.h> /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ @@ -110,6 +113,8 @@ DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); bool arm64_use_ng_mappings = false; EXPORT_SYMBOL(arm64_use_ng_mappings); +DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors; + /* * Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs * support it? @@ -226,6 +231,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -1590,6 +1596,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) int cpu = smp_processor_id(); + if (__this_cpu_read(this_cpu_vector) == vectors) { + const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI); + + __this_cpu_write(this_cpu_vector, v); + } + /* * We don't need to rewrite the page-tables if either we've done * it already or we have KASLR enabled and therefore have not diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 772ec2ecf488..4a3a653df07e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -37,18 +37,21 @@ .macro kernel_ventry, el:req, ht:req, regsize:req, label:req .align 7 -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +.Lventry_start\@: .if \el == 0 -alternative_if ARM64_UNMAP_KERNEL_AT_EL0 + /* + * This must be the first instruction of the EL0 vector entries. It is + * skipped by the trampoline vectors, to trigger the cleanup. + */ + b .Lskip_tramp_vectors_cleanup\@ .if \regsize == 64 mrs x30, tpidrro_el0 msr tpidrro_el0, xzr .else mov x30, xzr .endif -alternative_else_nop_endif +.Lskip_tramp_vectors_cleanup\@: .endif -#endif sub sp, sp, #PT_REGS_SIZE #ifdef CONFIG_VMAP_STACK @@ -95,11 +98,15 @@ alternative_else_nop_endif mrs x0, tpidrro_el0 #endif b el\el\ht\()_\regsize\()_\label +.org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm - .macro tramp_alias, dst, sym + .macro tramp_alias, dst, sym, tmp mov_q \dst, TRAMP_VALIAS - add \dst, \dst, #(\sym - .entry.tramp.text) + adr_l \tmp, \sym + add \dst, \dst, \tmp + adr_l \tmp, .entry.tramp.text + sub \dst, \dst, \tmp .endm /* @@ -116,7 +123,7 @@ alternative_cb_end tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 mov w1, #\state -alternative_cb spectre_v4_patch_fw_mitigation_conduit +alternative_cb smccc_patch_fw_mitigation_conduit nop // Patched to SMC/HVC #0 alternative_cb_end .L__asm_ssbd_skip\@: @@ -413,21 +420,26 @@ alternative_else_nop_endif ldp x24, x25, [sp, #16 * 12] ldp x26, x27, [sp, #16 * 13] ldp x28, x29, [sp, #16 * 14] - ldr lr, [sp, #S_LR] - add sp, sp, #PT_REGS_SIZE // restore sp .if \el == 0 -alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 +alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 + ldr lr, [sp, #S_LR] + add sp, sp, #PT_REGS_SIZE // restore sp + eret +alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 bne 4f - msr far_el1, x30 - tramp_alias x30, tramp_exit_native + msr far_el1, x29 + tramp_alias x30, tramp_exit_native, x29 br x30 4: - tramp_alias x30, tramp_exit_compat + tramp_alias x30, tramp_exit_compat, x29 br x30 #endif .else + ldr lr, [sp, #S_LR] + add sp, sp, #PT_REGS_SIZE // restore sp + /* Ensure any device/NC reads complete */ alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412 @@ -594,12 +606,6 @@ SYM_CODE_END(ret_to_user) .popsection // .entry.text -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -/* - * Exception vectors trampoline. - */ - .pushsection ".entry.tramp.text", "ax" - // Move from tramp_pg_dir to swapper_pg_dir .macro tramp_map_kernel, tmp mrs \tmp, ttbr1_el1 @@ -633,12 +639,47 @@ alternative_else_nop_endif */ .endm - .macro tramp_ventry, regsize = 64 + .macro tramp_data_page dst + adr_l \dst, .entry.tramp.text + sub \dst, \dst, PAGE_SIZE + .endm + + .macro tramp_data_read_var dst, var +#ifdef CONFIG_RANDOMIZE_BASE + tramp_data_page \dst + add \dst, \dst, #:lo12:__entry_tramp_data_\var + ldr \dst, [\dst] +#else + ldr \dst, =\var +#endif + .endm + +#define BHB_MITIGATION_NONE 0 +#define BHB_MITIGATION_LOOP 1 +#define BHB_MITIGATION_FW 2 +#define BHB_MITIGATION_INSN 3 + + .macro tramp_ventry, vector_start, regsize, kpti, bhb .align 7 1: .if \regsize == 64 msr tpidrro_el0, x30 // Restored in kernel_ventry .endif + + .if \bhb == BHB_MITIGATION_LOOP + /* + * This sequence must appear before the first indirect branch. i.e. the + * ret out of tramp_ventry. It appears here because x30 is free. + */ + __mitigate_spectre_bhb_loop x30 + .endif // \bhb == BHB_MITIGATION_LOOP + + .if \bhb == BHB_MITIGATION_INSN + clearbhb + isb + .endif // \bhb == BHB_MITIGATION_INSN + + .if \kpti == 1 /* * Defend against branch aliasing attacks by pushing a dummy * entry onto the return stack and using a RET instruction to @@ -648,46 +689,75 @@ alternative_else_nop_endif b . 2: tramp_map_kernel x30 -#ifdef CONFIG_RANDOMIZE_BASE - adr x30, tramp_vectors + PAGE_SIZE alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 - ldr x30, [x30] -#else - ldr x30, =vectors -#endif + tramp_data_read_var x30, vectors alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM - prfm plil1strm, [x30, #(1b - tramp_vectors)] + prfm plil1strm, [x30, #(1b - \vector_start)] alternative_else_nop_endif + msr vbar_el1, x30 - add x30, x30, #(1b - tramp_vectors) isb + .else + ldr x30, =vectors + .endif // \kpti == 1 + + .if \bhb == BHB_MITIGATION_FW + /* + * The firmware sequence must appear before the first indirect branch. + * i.e. the ret out of tramp_ventry. But it also needs the stack to be + * mapped to save/restore the registers the SMC clobbers. + */ + __mitigate_spectre_bhb_fw + .endif // \bhb == BHB_MITIGATION_FW + + add x30, x30, #(1b - \vector_start + 4) ret +.org 1b + 128 // Did we overflow the ventry slot? .endm .macro tramp_exit, regsize = 64 - adr x30, tramp_vectors + tramp_data_read_var x30, this_cpu_vector + get_this_cpu_offset x29 + ldr x30, [x30, x29] + msr vbar_el1, x30 - tramp_unmap_kernel x30 + ldr lr, [sp, #S_LR] + tramp_unmap_kernel x29 .if \regsize == 64 - mrs x30, far_el1 + mrs x29, far_el1 .endif + add sp, sp, #PT_REGS_SIZE // restore sp eret sb .endm - .align 11 -SYM_CODE_START_NOALIGN(tramp_vectors) + .macro generate_tramp_vector, kpti, bhb +.Lvector_start\@: .space 0x400 - tramp_ventry - tramp_ventry - tramp_ventry - tramp_ventry + .rept 4 + tramp_ventry .Lvector_start\@, 64, \kpti, \bhb + .endr + .rept 4 + tramp_ventry .Lvector_start\@, 32, \kpti, \bhb + .endr + .endm - tramp_ventry 32 - tramp_ventry 32 - tramp_ventry 32 - tramp_ventry 32 +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +/* + * Exception vectors trampoline. + * The order must match __bp_harden_el1_vectors and the + * arm64_bp_harden_el1_vectors enum. + */ + .pushsection ".entry.tramp.text", "ax" + .align 11 +SYM_CODE_START_NOALIGN(tramp_vectors) +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_INSN +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE SYM_CODE_END(tramp_vectors) SYM_CODE_START(tramp_exit_native) @@ -704,13 +774,57 @@ SYM_CODE_END(tramp_exit_compat) .pushsection ".rodata", "a" .align PAGE_SHIFT SYM_DATA_START(__entry_tramp_data_start) +__entry_tramp_data_vectors: .quad vectors +#ifdef CONFIG_ARM_SDE_INTERFACE +__entry_tramp_data___sdei_asm_handler: + .quad __sdei_asm_handler +#endif /* CONFIG_ARM_SDE_INTERFACE */ +__entry_tramp_data_this_cpu_vector: + .quad this_cpu_vector SYM_DATA_END(__entry_tramp_data_start) .popsection // .rodata #endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* + * Exception vectors for spectre mitigations on entry from EL1 when + * kpti is not in use. + */ + .macro generate_el1_vector, bhb +.Lvector_start\@: + kernel_ventry 1, t, 64, sync // Synchronous EL1t + kernel_ventry 1, t, 64, irq // IRQ EL1t + kernel_ventry 1, t, 64, fiq // FIQ EL1h + kernel_ventry 1, t, 64, error // Error EL1t + + kernel_ventry 1, h, 64, sync // Synchronous EL1h + kernel_ventry 1, h, 64, irq // IRQ EL1h + kernel_ventry 1, h, 64, fiq // FIQ EL1h + kernel_ventry 1, h, 64, error // Error EL1h + + .rept 4 + tramp_ventry .Lvector_start\@, 64, 0, \bhb + .endr + .rept 4 + tramp_ventry .Lvector_start\@, 32, 0, \bhb + .endr + .endm + +/* The order must match tramp_vecs and the arm64_bp_harden_el1_vectors enum. */ + .pushsection ".entry.text", "ax" + .align 11 +SYM_CODE_START(__bp_harden_el1_vectors) +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + generate_el1_vector bhb=BHB_MITIGATION_LOOP + generate_el1_vector bhb=BHB_MITIGATION_FW + generate_el1_vector bhb=BHB_MITIGATION_INSN +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ +SYM_CODE_END(__bp_harden_el1_vectors) + .popsection + + +/* * Register switch for AArch64. The callee-saved registers need to be saved * and restored. On entry: * x0 = previous task_struct (must be preserved across the switch) @@ -835,14 +949,7 @@ SYM_CODE_START(__sdei_asm_entry_trampoline) * Remember whether to unmap the kernel on exit. */ 1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)] - -#ifdef CONFIG_RANDOMIZE_BASE - adr x4, tramp_vectors + PAGE_SIZE - add x4, x4, #:lo12:__sdei_asm_trampoline_next_handler - ldr x4, [x4] -#else - ldr x4, =__sdei_asm_handler -#endif + tramp_data_read_var x4, __sdei_asm_handler br x4 SYM_CODE_END(__sdei_asm_entry_trampoline) NOKPROBE(__sdei_asm_entry_trampoline) @@ -865,13 +972,6 @@ SYM_CODE_END(__sdei_asm_exit_trampoline) NOKPROBE(__sdei_asm_exit_trampoline) .ltorg .popsection // .entry.tramp.text -#ifdef CONFIG_RANDOMIZE_BASE -.pushsection ".rodata", "a" -SYM_DATA_START(__sdei_asm_trampoline_next_handler) - .quad __sdei_asm_handler -SYM_DATA_END(__sdei_asm_trampoline_next_handler) -.popsection // .rodata -#endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* @@ -981,7 +1081,7 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline + tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3 br x5 #endif SYM_CODE_END(__sdei_asm_handler) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 7eaf1f7c4168..55a1ced8eb77 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -66,6 +66,10 @@ KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); KVM_NVHE_ALIAS(kvm_get_kimage_voffset); KVM_NVHE_ALIAS(kvm_compute_final_ctr_el0); +KVM_NVHE_ALIAS(spectre_bhb_patch_loop_iter); +KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable); +KVM_NVHE_ALIAS(spectre_bhb_patch_wa3); +KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb); /* Global kernel state accessed by nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_vgic_global_state); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 902e4084c477..6d45c63c6454 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -18,15 +18,18 @@ */ #include <linux/arm-smccc.h> +#include <linux/bpf.h> #include <linux/cpu.h> #include <linux/device.h> #include <linux/nospec.h> #include <linux/prctl.h> #include <linux/sched/task_stack.h> +#include <asm/debug-monitors.h> #include <asm/insn.h> #include <asm/spectre.h> #include <asm/traps.h> +#include <asm/vectors.h> #include <asm/virt.h> /* @@ -96,14 +99,51 @@ static bool spectre_v2_mitigations_off(void) return ret; } +static const char *get_bhb_affected_string(enum mitigation_state bhb_state) +{ + switch (bhb_state) { + case SPECTRE_UNAFFECTED: + return ""; + default: + case SPECTRE_VULNERABLE: + return ", but not BHB"; + case SPECTRE_MITIGATED: + return ", BHB"; + } +} + +static bool _unprivileged_ebpf_enabled(void) +{ +#ifdef CONFIG_BPF_SYSCALL + return !sysctl_unprivileged_bpf_disabled; +#else + return false; +#endif +} + ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { + enum mitigation_state bhb_state = arm64_get_spectre_bhb_state(); + const char *bhb_str = get_bhb_affected_string(bhb_state); + const char *v2_str = "Branch predictor hardening"; + switch (spectre_v2_state) { case SPECTRE_UNAFFECTED: - return sprintf(buf, "Not affected\n"); + if (bhb_state == SPECTRE_UNAFFECTED) + return sprintf(buf, "Not affected\n"); + + /* + * Platforms affected by Spectre-BHB can't report + * "Not affected" for Spectre-v2. + */ + v2_str = "CSV2"; + fallthrough; case SPECTRE_MITIGATED: - return sprintf(buf, "Mitigation: Branch predictor hardening\n"); + if (bhb_state == SPECTRE_MITIGATED && _unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n"); + + return sprintf(buf, "Mitigation: %s%s\n", v2_str, bhb_str); case SPECTRE_VULNERABLE: fallthrough; default: @@ -554,9 +594,9 @@ void __init spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, * Patch a NOP in the Spectre-v4 mitigation code with an SMC/HVC instruction * to call into firmware to adjust the mitigation state. */ -void __init spectre_v4_patch_fw_mitigation_conduit(struct alt_instr *alt, - __le32 *origptr, - __le32 *updptr, int nr_inst) +void __init smccc_patch_fw_mitigation_conduit(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) { u32 insn; @@ -770,3 +810,344 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) return -ENODEV; } } + +/* + * Spectre BHB. + * + * A CPU is either: + * - Mitigated by a branchy loop a CPU specific number of times, and listed + * in our "loop mitigated list". + * - Mitigated in software by the firmware Spectre v2 call. + * - Has the ClearBHB instruction to perform the mitigation. + * - Has the 'Exception Clears Branch History Buffer' (ECBHB) feature, so no + * software mitigation in the vectors is needed. + * - Has CSV2.3, so is unaffected. + */ +static enum mitigation_state spectre_bhb_state; + +enum mitigation_state arm64_get_spectre_bhb_state(void) +{ + return spectre_bhb_state; +} + +enum bhb_mitigation_bits { + BHB_LOOP, + BHB_FW, + BHB_HW, + BHB_INSN, +}; +static unsigned long system_bhb_mitigations; + +/* + * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any + * SCOPE_SYSTEM call will give the right answer. + */ +u8 spectre_bhb_loop_affected(int scope) +{ + u8 k = 0; + static u8 max_bhb_k; + + if (scope == SCOPE_LOCAL_CPU) { + static const struct midr_range spectre_bhb_k32_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + {}, + }; + static const struct midr_range spectre_bhb_k24_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + {}, + }; + static const struct midr_range spectre_bhb_k8_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + {}, + }; + + if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list)) + k = 32; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) + k = 24; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) + k = 8; + + max_bhb_k = max(max_bhb_k, k); + } else { + k = max_bhb_k; + } + + return k; +} + +static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void) +{ + int ret; + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_3, &res); + + ret = res.a0; + switch (ret) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + return SPECTRE_UNAFFECTED; + default: + fallthrough; + case SMCCC_RET_NOT_SUPPORTED: + return SPECTRE_VULNERABLE; + } +} + +static bool is_spectre_bhb_fw_affected(int scope) +{ + static bool system_affected; + enum mitigation_state fw_state; + bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE; + static const struct midr_range spectre_bhb_firmware_mitigated_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + {}, + }; + bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(), + spectre_bhb_firmware_mitigated_list); + + if (scope != SCOPE_LOCAL_CPU) + return system_affected; + + fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); + if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) { + system_affected = true; + return true; + } + + return false; +} + +static bool supports_ecbhb(int scope) +{ + u64 mmfr1; + + if (scope == SCOPE_LOCAL_CPU) + mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1); + else + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_ECBHB_SHIFT); +} + +bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, + int scope) +{ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + if (supports_csv2p3(scope)) + return false; + + if (supports_clearbhb(scope)) + return true; + + if (spectre_bhb_loop_affected(scope)) + return true; + + if (is_spectre_bhb_fw_affected(scope)) + return true; + + return false; +} + +static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) +{ + const char *v = arm64_get_bp_hardening_vector(slot); + + if (slot < 0) + return; + + __this_cpu_write(this_cpu_vector, v); + + /* + * When KPTI is in use, the vectors are switched when exiting to + * user-space. + */ + if (arm64_kernel_unmapped_at_el0()) + return; + + write_sysreg(v, vbar_el1); + isb(); +} + +void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) +{ + bp_hardening_cb_t cpu_cb; + enum mitigation_state fw_state, state = SPECTRE_VULNERABLE; + struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); + + if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU)) + return; + + if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) { + /* No point mitigating Spectre-BHB alone. */ + } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { + pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); + } else if (cpu_mitigations_off()) { + pr_info_once("spectre-bhb mitigation disabled by command line option\n"); + } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { + state = SPECTRE_MITIGATED; + set_bit(BHB_HW, &system_bhb_mitigations); + } else if (supports_clearbhb(SCOPE_LOCAL_CPU)) { + /* + * Ensure KVM uses the indirect vector which will have ClearBHB + * added. + */ + if (!data->slot) + data->slot = HYP_VECTOR_INDIRECT; + + this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN); + state = SPECTRE_MITIGATED; + set_bit(BHB_INSN, &system_bhb_mitigations); + } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) { + /* + * Ensure KVM uses the indirect vector which will have the + * branchy-loop added. A57/A72-r0 will already have selected + * the spectre-indirect vector, which is sufficient for BHB + * too. + */ + if (!data->slot) + data->slot = HYP_VECTOR_INDIRECT; + + this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP); + state = SPECTRE_MITIGATED; + set_bit(BHB_LOOP, &system_bhb_mitigations); + } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) { + fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); + if (fw_state == SPECTRE_MITIGATED) { + /* + * Ensure KVM uses one of the spectre bp_hardening + * vectors. The indirect vector doesn't include the EL3 + * call, so needs upgrading to + * HYP_VECTOR_SPECTRE_INDIRECT. + */ + if (!data->slot || data->slot == HYP_VECTOR_INDIRECT) + data->slot += 1; + + this_cpu_set_vectors(EL1_VECTOR_BHB_FW); + + /* + * The WA3 call in the vectors supersedes the WA1 call + * made during context-switch. Uninstall any firmware + * bp_hardening callback. + */ + cpu_cb = spectre_v2_get_sw_mitigation_cb(); + if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb) + __this_cpu_write(bp_hardening_data.fn, NULL); + + state = SPECTRE_MITIGATED; + set_bit(BHB_FW, &system_bhb_mitigations); + } + } + + update_mitigation_state(&spectre_bhb_state, state); +} + +/* Patched to NOP when enabled */ +void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); + + if (test_bit(BHB_LOOP, &system_bhb_mitigations)) + *updptr++ = cpu_to_le32(aarch64_insn_gen_nop()); +} + +/* Patched to NOP when enabled */ +void noinstr spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); + + if (test_bit(BHB_FW, &system_bhb_mitigations)) + *updptr++ = cpu_to_le32(aarch64_insn_gen_nop()); +} + +/* Patched to correct the immediate */ +void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + u8 rd; + u32 insn; + u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM); + + BUG_ON(nr_inst != 1); /* MOV -> MOV */ + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) + return; + + insn = le32_to_cpu(*origptr); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn); + insn = aarch64_insn_gen_movewide(rd, loop_count, 0, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_ZERO); + *updptr++ = cpu_to_le32(insn); +} + +/* Patched to mov WA3 when supported */ +void noinstr spectre_bhb_patch_wa3(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + u8 rd; + u32 insn; + + BUG_ON(nr_inst != 1); /* MOV -> MOV */ + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) || + !test_bit(BHB_FW, &system_bhb_mitigations)) + return; + + insn = le32_to_cpu(*origptr); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn); + + insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_ORR, + AARCH64_INSN_VARIANT_32BIT, + AARCH64_INSN_REG_ZR, rd, + ARM_SMCCC_ARCH_WORKAROUND_3); + if (WARN_ON_ONCE(insn == AARCH64_BREAK_FAULT)) + return; + + *updptr++ = cpu_to_le32(insn); +} + +/* Patched to NOP when not supported */ +void __init spectre_bhb_patch_clearbhb(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 2); + + if (test_bit(BHB_INSN, &system_bhb_mitigations)) + return; + + *updptr++ = cpu_to_le32(aarch64_insn_gen_nop()); + *updptr++ = cpu_to_le32(aarch64_insn_gen_nop()); +} + +#ifdef CONFIG_BPF_SYSCALL +#define EBPF_WARN "Unprivileged eBPF is enabled, data leaks possible via Spectre v2 BHB attacks!\n" +void unpriv_ebpf_notify(int new_state) +{ + if (spectre_v2_state == SPECTRE_VULNERABLE || + spectre_bhb_state != SPECTRE_MITIGATED) + return; + + if (!new_state) + pr_err("WARNING: %s", EBPF_WARN); +} +#endif diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 50bab186c49b..edaf0faf766f 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -341,7 +341,7 @@ ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) <= SZ_4K, "Hibernate exit text too big or misaligned") #endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, +ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE, "Entry trampoline text too big") #endif #ifdef CONFIG_KVM diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ecc5958e27fe..4dca6ffd03d4 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1491,10 +1491,7 @@ static int kvm_init_vector_slots(void) base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); - if (!cpus_have_const_cap(ARM64_SPECTRE_V3A)) - return 0; - - if (!has_vhe()) { + if (kvm_system_needs_idmapped_vectors() && !has_vhe()) { err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), __BP_HARDEN_HYP_VECS_SZ, &base); if (err) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index b6b6801d96d5..7839d075729b 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -62,6 +62,10 @@ el1_sync: // Guest trapped into EL2 /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \ ARM_SMCCC_ARCH_WORKAROUND_2) + cbz w1, wa_epilogue + + eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_2 ^ \ + ARM_SMCCC_ARCH_WORKAROUND_3) cbnz w1, el1_trap wa_epilogue: @@ -192,7 +196,10 @@ SYM_CODE_END(__kvm_hyp_vector) sub sp, sp, #(8 * 4) stp x2, x3, [sp, #(8 * 0)] stp x0, x1, [sp, #(8 * 2)] + alternative_cb spectre_bhb_patch_wa3 + /* Patched to mov WA3 when supported */ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + alternative_cb_end smc #0 ldp x2, x3, [sp, #(8 * 0)] add sp, sp, #(8 * 2) @@ -205,6 +212,8 @@ SYM_CODE_END(__kvm_hyp_vector) spectrev2_smccc_wa1_smc .else stp x0, x1, [sp, #-16]! + mitigate_spectre_bhb_loop x0 + mitigate_spectre_bhb_clear_insn .endif .if \indirect != 0 alternative_cb kvm_patch_vector_branch diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 526a7d6fa86f..cdbe8e246418 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -148,8 +148,10 @@ int hyp_map_vectors(void) phys_addr_t phys; void *bp_base; - if (!cpus_have_const_cap(ARM64_SPECTRE_V3A)) + if (!kvm_system_needs_idmapped_vectors()) { + __hyp_bp_vect_base = __bp_harden_hyp_vecs; return 0; + } phys = __hyp_pa(__bp_harden_hyp_vecs); bp_base = (void *)__pkvm_create_private_mapping(phys, diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 11d053fdd604..54af47005e45 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -10,6 +10,7 @@ #include <linux/kvm_host.h> #include <linux/types.h> #include <linux/jump_label.h> +#include <linux/percpu.h> #include <uapi/linux/psci.h> #include <kvm/arm_psci.h> @@ -24,6 +25,8 @@ #include <asm/fpsimd.h> #include <asm/debug-monitors.h> #include <asm/processor.h> +#include <asm/thread_info.h> +#include <asm/vectors.h> /* VHE specific context */ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); @@ -67,7 +70,7 @@ NOKPROBE_SYMBOL(__activate_traps); static void __deactivate_traps(struct kvm_vcpu *vcpu) { - extern char vectors[]; /* kernel exception vectors */ + const char *host_vectors = vectors; ___deactivate_traps(vcpu); @@ -81,7 +84,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); - write_sysreg(vectors, vbar_el1); + + if (!arm64_kernel_unmapped_at_el0()) + host_vectors = __this_cpu_read(this_cpu_vector); + write_sysreg(host_vectors, vbar_el1); } NOKPROBE_SYMBOL(__deactivate_traps); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 30da78f72b3b..202b8c455724 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -107,6 +107,18 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) break; } break; + case ARM_SMCCC_ARCH_WORKAROUND_3: + switch (arm64_get_spectre_bhb_state()) { + case SPECTRE_VULNERABLE: + break; + case SPECTRE_MITIGATED: + val[0] = SMCCC_RET_SUCCESS; + break; + case SPECTRE_UNAFFECTED: + val[0] = SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED; + break; + } + break; case ARM_SMCCC_HV_PV_TIME_FEATURES: val[0] = SMCCC_RET_SUCCESS; break; diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 3eae32876897..5918095c90a5 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -46,8 +46,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) * specification (ARM DEN 0022A). This means all suspend states * for KVM will preserve the register state. */ - kvm_vcpu_halt(vcpu); - kvm_clear_request(KVM_REQ_UNHALT, vcpu); + kvm_vcpu_wfi(vcpu); return PSCI_RET_SUCCESS; } @@ -406,7 +405,7 @@ int kvm_psci_call(struct kvm_vcpu *vcpu) int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) { - return 3; /* PSCI version and two workaround registers */ + return 4; /* PSCI version and three workaround registers */ } int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) @@ -420,6 +419,9 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++)) return -EFAULT; + if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, uindices++)) + return -EFAULT; + return 0; } @@ -459,6 +461,17 @@ static int get_kernel_wa_level(u64 regid) case SPECTRE_VULNERABLE: return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; } + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: + switch (arm64_get_spectre_bhb_state()) { + case SPECTRE_VULNERABLE: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; + case SPECTRE_MITIGATED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL; + case SPECTRE_UNAFFECTED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED; + } + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; } return -EINVAL; @@ -475,6 +488,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) break; case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; break; default: @@ -520,6 +534,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: if (val & ~KVM_REG_FEATURE_LEVEL_MASK) return -EINVAL; diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index 7068da080799..49837d3a3ef5 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -248,6 +248,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, IRQCHIP_STATE_PENDING, &val); WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + } else if (vgic_irq_is_mapped_level(irq)) { + val = vgic_get_phys_line_level(irq); } else { val = irq_is_pending(irq); } diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index a38f54cd638c..77ada00280d9 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -7,8 +7,10 @@ #include <linux/io.h> #include <linux/memblock.h> +#include <linux/mm.h> #include <linux/types.h> +#include <asm/cpufeature.h> #include <asm/page.h> /* @@ -38,3 +40,18 @@ int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) { return !(((pfn << PAGE_SHIFT) + size) & ~PHYS_MASK); } + +static int __init adjust_protection_map(void) +{ + /* + * With Enhanced PAN we can honour the execute-only permissions as + * there is no PAN override with such mappings. + */ + if (cpus_have_const_cap(ARM64_HAS_EPAN)) { + protection_map[VM_EXEC] = PAGE_EXECONLY; + protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY; + } + + return 0; +} +arch_initcall(adjust_protection_map); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index acfae9b41cc8..49abbf43bf35 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -617,6 +617,8 @@ early_param("rodata", parse_rodata); #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __init map_entry_trampoline(void) { + int i; + pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); @@ -625,11 +627,15 @@ static int __init map_entry_trampoline(void) /* Map only the text into the trampoline page table */ memset(tramp_pg_dir, 0, PGD_SIZE); - __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, - prot, __pgd_pgtable_alloc, 0); + __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, + entry_tramp_text_size(), prot, + __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); /* Map both the text and data into the kernel page table */ - __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); + for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, + pa_start + i * PAGE_SIZE, prot); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { extern char __entry_tramp_data_start[]; diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 9c65b1e25a96..cea7533cb304 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -44,6 +44,7 @@ MTE_ASYMM SPECTRE_V2 SPECTRE_V3A SPECTRE_V4 +SPECTRE_BHB SSBS SVE UNMAP_KERNEL_AT_EL0 diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index f979adfd4fc2..ef73ba1e0ec1 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -803,7 +803,7 @@ early_param("coherentio", setcoherentio); static int __init setnocoherentio(char *str) { - dma_default_coherent = true; + dma_default_coherent = false; pr_info("Software DMA cache coherency (command line)\n"); return 0; } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index d542fb7af3ba..1986d1309410 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -351,6 +351,9 @@ asmlinkage void start_secondary(void) cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; + set_cpu_sibling_map(cpu); + set_cpu_core_map(cpu); + cpumask_set_cpu(cpu, &cpu_coherent_mask); notify_cpu_starting(cpu); @@ -362,9 +365,6 @@ asmlinkage void start_secondary(void) /* The CPU is running and counters synchronised, now mark it online */ set_cpu_online(cpu, true); - set_cpu_sibling_map(cpu); - set_cpu_core_map(cpu); - calculate_cpu_foreign_map(); /* diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c index d6efffd4dd20..fb0565bc34fd 100644 --- a/arch/mips/ralink/mt7621.c +++ b/arch/mips/ralink/mt7621.c @@ -22,7 +22,9 @@ #include "common.h" -static void *detect_magic __initdata = detect_memory_region; +#define MT7621_MEM_TEST_PATTERN 0xaa5555aa + +static u32 detect_magic __initdata; int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { @@ -58,24 +60,32 @@ phys_addr_t mips_cpc_default_phys_base(void) panic("Cannot detect cpc address"); } +static bool __init mt7621_addr_wraparound_test(phys_addr_t size) +{ + void *dm = (void *)KSEG1ADDR(&detect_magic); + + if (CPHYSADDR(dm + size) >= MT7621_LOWMEM_MAX_SIZE) + return true; + __raw_writel(MT7621_MEM_TEST_PATTERN, dm); + if (__raw_readl(dm) != __raw_readl(dm + size)) + return false; + __raw_writel(~MT7621_MEM_TEST_PATTERN, dm); + return __raw_readl(dm) == __raw_readl(dm + size); +} + static void __init mt7621_memory_detect(void) { - void *dm = &detect_magic; phys_addr_t size; - for (size = 32 * SZ_1M; size < 256 * SZ_1M; size <<= 1) { - if (!__builtin_memcmp(dm, dm + size, sizeof(detect_magic))) - break; + for (size = 32 * SZ_1M; size <= 256 * SZ_1M; size <<= 1) { + if (mt7621_addr_wraparound_test(size)) { + memblock_add(MT7621_LOWMEM_BASE, size); + return; + } } - if ((size == 256 * SZ_1M) && - (CPHYSADDR(dm + size) < MT7621_LOWMEM_MAX_SIZE) && - __builtin_memcmp(dm, dm + size, sizeof(detect_magic))) { - memblock_add(MT7621_LOWMEM_BASE, MT7621_LOWMEM_MAX_SIZE); - memblock_add(MT7621_HIGHMEM_BASE, MT7621_HIGHMEM_SIZE); - } else { - memblock_add(MT7621_LOWMEM_BASE, size); - } + memblock_add(MT7621_LOWMEM_BASE, MT7621_LOWMEM_MAX_SIZE); + memblock_add(MT7621_HIGHMEM_BASE, MT7621_HIGHMEM_SIZE); } void __init ralink_of_remap(void) diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h index 0ec9cfc5131f..56ffd260c669 100644 --- a/arch/parisc/include/asm/bitops.h +++ b/arch/parisc/include/asm/bitops.h @@ -12,6 +12,14 @@ #include <asm/barrier.h> #include <linux/atomic.h> +/* compiler build environment sanity checks: */ +#if !defined(CONFIG_64BIT) && defined(__LP64__) +#error "Please use 'ARCH=parisc' to build the 32-bit kernel." +#endif +#if defined(CONFIG_64BIT) && !defined(__LP64__) +#error "Please use 'ARCH=parisc64' to build the 64-bit kernel." +#endif + /* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion * on use of volatile and __*_bit() (set/clear/change): * *_bit() want use of volatile. diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index ebf8a845b017..123d5f16cd9d 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -89,8 +89,8 @@ struct exception_table_entry { __asm__("1: " ldx " 0(" sr "%2),%0\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - : "=r"(__gu_val), "=r"(__gu_err) \ - : "r"(ptr), "1"(__gu_err)); \ + : "=r"(__gu_val), "+r"(__gu_err) \ + : "r"(ptr)); \ \ (val) = (__force __typeof__(*(ptr))) __gu_val; \ } @@ -123,8 +123,8 @@ struct exception_table_entry { "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ - : "=&r"(__gu_tmp.l), "=r"(__gu_err) \ - : "r"(ptr), "1"(__gu_err)); \ + : "=&r"(__gu_tmp.l), "+r"(__gu_err) \ + : "r"(ptr)); \ \ (val) = __gu_tmp.t; \ } @@ -135,13 +135,12 @@ struct exception_table_entry { #define __put_user_internal(sr, x, ptr) \ ({ \ ASM_EXCEPTIONTABLE_VAR(__pu_err); \ - __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x); \ \ switch (sizeof(*(ptr))) { \ - case 1: __put_user_asm(sr, "stb", __x, ptr); break; \ - case 2: __put_user_asm(sr, "sth", __x, ptr); break; \ - case 4: __put_user_asm(sr, "stw", __x, ptr); break; \ - case 8: STD_USER(sr, __x, ptr); break; \ + case 1: __put_user_asm(sr, "stb", x, ptr); break; \ + case 2: __put_user_asm(sr, "sth", x, ptr); break; \ + case 4: __put_user_asm(sr, "stw", x, ptr); break; \ + case 8: STD_USER(sr, x, ptr); break; \ default: BUILD_BUG(); \ } \ \ @@ -150,7 +149,9 @@ struct exception_table_entry { #define __put_user(x, ptr) \ ({ \ - __put_user_internal("%%sr3,", x, ptr); \ + __typeof__(&*(ptr)) __ptr = ptr; \ + __typeof__(*(__ptr)) __x = (__typeof__(*(__ptr)))(x); \ + __put_user_internal("%%sr3,", __x, __ptr); \ }) #define __put_kernel_nofault(dst, src, type, err_label) \ @@ -180,8 +181,8 @@ struct exception_table_entry { "1: " stx " %2,0(" sr "%1)\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - : "=r"(__pu_err) \ - : "r"(ptr), "r"(x), "0"(__pu_err)) + : "+r"(__pu_err) \ + : "r"(ptr), "r"(x)) #if !defined(CONFIG_64BIT) @@ -193,8 +194,8 @@ struct exception_table_entry { "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ - : "=r"(__pu_err) \ - : "r"(ptr), "r"(__val), "0"(__pu_err)); \ + : "+r"(__pu_err) \ + : "r"(ptr), "r"(__val)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 237d20dd5622..286cec4d86d7 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -340,7 +340,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER ); - return 0; + return ret; } static int emulate_std(struct pt_regs *regs, int frreg, int flop) { @@ -397,7 +397,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) __asm__ __volatile__ ( " mtsp %4, %%sr1\n" " zdep %2, 29, 2, %%r19\n" -" dep %%r0, 31, 2, %2\n" +" dep %%r0, 31, 2, %3\n" " mtsar %%r19\n" " zvdepi -2, 32, %%r19\n" "1: ldw 0(%%sr1,%3),%%r20\n" @@ -409,7 +409,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) " andcm %%r21, %%r19, %%r21\n" " or %1, %%r20, %1\n" " or %2, %%r21, %2\n" -"3: stw %1,0(%%sr1,%1)\n" +"3: stw %1,0(%%sr1,%3)\n" "4: stw %%r1,4(%%sr1,%3)\n" "5: stw %2,8(%%sr1,%3)\n" " copy %%r0, %0\n" @@ -596,7 +596,6 @@ void handle_unaligned(struct pt_regs *regs) ret = ERR_NOTHANDLED; /* "undefined", but lets kill them. */ break; } -#ifdef CONFIG_PA20 switch (regs->iir & OPCODE2_MASK) { case OPCODE_FLDD_L: @@ -607,22 +606,23 @@ void handle_unaligned(struct pt_regs *regs) flop=1; ret = emulate_std(regs, R2(regs->iir),1); break; +#ifdef CONFIG_PA20 case OPCODE_LDD_L: ret = emulate_ldd(regs, R2(regs->iir),0); break; case OPCODE_STD_L: ret = emulate_std(regs, R2(regs->iir),0); break; - } #endif + } switch (regs->iir & OPCODE3_MASK) { case OPCODE_FLDW_L: flop=1; - ret = emulate_ldw(regs, R2(regs->iir),0); + ret = emulate_ldw(regs, R2(regs->iir), 1); break; case OPCODE_LDW_M: - ret = emulate_ldw(regs, R2(regs->iir),1); + ret = emulate_ldw(regs, R2(regs->iir), 0); break; case OPCODE_FSTW_L: diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c index 367f6397bda7..860385058085 100644 --- a/arch/parisc/lib/iomap.c +++ b/arch/parisc/lib/iomap.c @@ -346,6 +346,16 @@ u64 ioread64be(const void __iomem *addr) return *((u64 *)addr); } +u64 ioread64_lo_hi(const void __iomem *addr) +{ + u32 low, high; + + low = ioread32(addr); + high = ioread32(addr + sizeof(u32)); + + return low + ((u64)high << 32); +} + u64 ioread64_hi_lo(const void __iomem *addr) { u32 low, high; @@ -419,6 +429,12 @@ void iowrite64be(u64 datum, void __iomem *addr) } } +void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + iowrite32(val, addr); + iowrite32(val >> 32, addr + sizeof(u32)); +} + void iowrite64_hi_lo(u64 val, void __iomem *addr) { iowrite32(val >> 32, addr + sizeof(u32)); @@ -530,6 +546,7 @@ EXPORT_SYMBOL(ioread32); EXPORT_SYMBOL(ioread32be); EXPORT_SYMBOL(ioread64); EXPORT_SYMBOL(ioread64be); +EXPORT_SYMBOL(ioread64_lo_hi); EXPORT_SYMBOL(ioread64_hi_lo); EXPORT_SYMBOL(iowrite8); EXPORT_SYMBOL(iowrite16); @@ -538,6 +555,7 @@ EXPORT_SYMBOL(iowrite32); EXPORT_SYMBOL(iowrite32be); EXPORT_SYMBOL(iowrite64); EXPORT_SYMBOL(iowrite64be); +EXPORT_SYMBOL(iowrite64_lo_hi); EXPORT_SYMBOL(iowrite64_hi_lo); EXPORT_SYMBOL(ioread8_rep); EXPORT_SYMBOL(ioread16_rep); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 1ae31db9988f..1dc2e88e7b04 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -337,9 +337,9 @@ static void __init setup_bootmem(void) static bool kernel_set_to_readonly; -static void __init map_pages(unsigned long start_vaddr, - unsigned long start_paddr, unsigned long size, - pgprot_t pgprot, int force) +static void __ref map_pages(unsigned long start_vaddr, + unsigned long start_paddr, unsigned long size, + pgprot_t pgprot, int force) { pmd_t *pmd; pte_t *pg_table; @@ -449,7 +449,7 @@ void __init set_kernel_text_rw(int enable_read_write) flush_tlb_all(); } -void __ref free_initmem(void) +void free_initmem(void) { unsigned long init_begin = (unsigned long)__init_begin; unsigned long init_end = (unsigned long)__init_end; @@ -463,7 +463,6 @@ void __ref free_initmem(void) /* The init text pages are marked R-X. We have to * flush the icache and mark them RW- * - * This is tricky, because map_pages is in the init section. * Do a dummy remap of the data section first (the data * section is already PAGE_KERNEL) to pull in the TLB entries * for map_kernel */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index ba5b1becf518..006cbec70ffe 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -202,7 +202,6 @@ static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx) /* * The current system page and segment sizes */ -extern int mmu_linear_psize; extern int mmu_virtual_psize; extern int mmu_vmalloc_psize; extern int mmu_io_psize; @@ -213,6 +212,7 @@ extern int mmu_io_psize; #define mmu_virtual_psize MMU_PAGE_4K #endif #endif +extern int mmu_linear_psize; extern int mmu_vmemmap_psize; /* MMU initialization */ diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h index 7a90000f8d15..f83866a19e87 100644 --- a/arch/powerpc/include/asm/kexec_ranges.h +++ b/arch/powerpc/include/asm/kexec_ranges.h @@ -9,7 +9,7 @@ struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); int add_tce_mem_ranges(struct crash_mem **mem_ranges); int add_initrd_mem_range(struct crash_mem **mem_ranges); -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU int add_htab_mem_range(struct crash_mem **mem_ranges); #else static inline int add_htab_mem_range(struct crash_mem **mem_ranges) diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 160abcb8e9fa..ea0e487f87b1 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -9,7 +9,7 @@ long soft_nmi_interrupt(struct pt_regs *regs); static inline void arch_touch_nmi_watchdog(void) {} #endif -#if defined(CONFIG_NMI_IPI) && defined(CONFIG_STACKTRACE) +#ifdef CONFIG_NMI_IPI extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index fa84744d6b24..b876ef8c70a7 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -421,14 +421,14 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a94b0cd0bdc5..bd3734d5be89 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -3264,12 +3264,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) case BARRIER_EIEIO: eieio(); break; +#ifdef CONFIG_PPC64 case BARRIER_LWSYNC: asm volatile("lwsync" : : : "memory"); break; case BARRIER_PTESYNC: asm volatile("ptesync" : : : "memory"); break; +#endif } break; diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index b44d6ecdb46e..0aacd7052585 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -2,6 +2,7 @@ menu "CPU errata selection" config RISCV_ERRATA_ALTERNATIVE bool "RISC-V alternative scheme" + depends on !XIP_KERNEL default y help This Kconfig allows the kernel to automatically patch the diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 6ec44a22278a..c112ab2a9052 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -14,8 +14,8 @@ config SOC_SIFIVE select CLK_SIFIVE select CLK_SIFIVE_PRCI select SIFIVE_PLIC - select RISCV_ERRATA_ALTERNATIVE - select ERRATA_SIFIVE + select RISCV_ERRATA_ALTERNATIVE if !XIP_KERNEL + select ERRATA_SIFIVE if !XIP_KERNEL help This enables support for SiFive SoC platform hardware. diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi index 56f57118c633..44d338514761 100644 --- a/arch/riscv/boot/dts/canaan/k210.dtsi +++ b/arch/riscv/boot/dts/canaan/k210.dtsi @@ -113,7 +113,8 @@ compatible = "canaan,k210-plic", "sifive,plic-1.0.0"; reg = <0xC000000 0x4000000>; interrupt-controller; - interrupts-extended = <&cpu0_intc 11>, <&cpu1_intc 11>; + interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 9>, + <&cpu1_intc 11>, <&cpu1_intc 9>; riscv,ndev = <65>; }; diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 2a82a3b2992b..af64b95e88cc 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -23,7 +23,7 @@ CONFIG_SLOB=y CONFIG_SOC_CANAAN=y CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro" +CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro" CONFIG_CMDLINE_FORCE=y # CONFIG_SECCOMP is not set # CONFIG_STACKPROTECTOR is not set diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 160e3a1e8f8b..004372f8da54 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -119,7 +119,7 @@ extern phys_addr_t phys_ram_base; ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size)) #define is_linear_mapping(x) \ - ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < kernel_map.virt_addr)) + ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE)) #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)) #define kernel_mapping_pa_to_va(y) ({ \ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7e949f25c933..e3549e50de95 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -13,6 +13,7 @@ #ifndef CONFIG_MMU #define KERNEL_LINK_ADDR PAGE_OFFSET +#define KERN_VIRT_SIZE (UL(-1)) #else #define ADDRESS_SPACE_END (UL(-1)) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 612556faa527..ffc87e76b1dd 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -51,6 +51,8 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o +obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o + obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index ed29e9c8f660..d6a46ed0bf05 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -108,7 +108,7 @@ _save_context: .option pop #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_off + call __trace_hardirqs_off #endif #ifdef CONFIG_CONTEXT_TRACKING @@ -143,7 +143,7 @@ skip_context_tracking: li t0, EXC_BREAKPOINT beq s4, t0, 1f #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_on + call __trace_hardirqs_on #endif csrs CSR_STATUS, SR_IE @@ -234,7 +234,7 @@ ret_from_exception: REG_L s0, PT_STATUS(sp) csrc CSR_STATUS, SR_IE #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_off + call __trace_hardirqs_off #endif #ifdef CONFIG_RISCV_M_MODE /* the MPP value is too large to be used as an immediate arg for addi */ @@ -270,10 +270,10 @@ restore_all: REG_L s1, PT_STATUS(sp) andi t0, s1, SR_PIE beqz t0, 1f - call trace_hardirqs_on + call __trace_hardirqs_on j 2f 1: - call trace_hardirqs_off + call __trace_hardirqs_off 2: #endif REG_L a0, PT_STATUS(sp) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 68a9e3d1fe16..4a48287513c3 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -13,6 +13,19 @@ #include <linux/pgtable.h> #include <asm/sections.h> +/* + * The auipc+jalr instruction pair can reach any PC-relative offset + * in the range [-2^31 - 2^11, 2^31 - 2^11) + */ +static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) +{ +#ifdef CONFIG_32BIT + return true; +#else + return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11)); +#endif +} + static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { if (v != (u32)v) { @@ -95,7 +108,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, ptrdiff_t offset = (void *)v - (void *)location; s32 hi20; - if (offset != (s32)offset) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); @@ -197,10 +210,9 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { /* Only emit the plt entry if offset over 32-bit range */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { offset = module_emit_plt_entry(me, v); @@ -224,10 +236,9 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index f72527fcb347..775d3322b422 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -5,6 +5,7 @@ * Copyright (c) 2020 Western Digital Corporation or its affiliates. */ +#include <linux/bits.h> #include <linux/init.h> #include <linux/pm.h> #include <linux/reboot.h> @@ -85,7 +86,7 @@ static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mas pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n"); break; } - hmask |= 1 << hartid; + hmask |= BIT(hartid); } return hmask; @@ -160,7 +161,7 @@ static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) { unsigned long hart_mask; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); @@ -176,7 +177,7 @@ static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, int result = 0; unsigned long hart_mask; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); @@ -249,26 +250,37 @@ static void __sbi_set_timer_v02(uint64_t stime_value) static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask) { - unsigned long hartid, cpuid, hmask = 0, hbase = 0; + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; struct sbiret ret = {0}; int result; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; for_each_cpu(cpuid, cpu_mask) { hartid = cpuid_to_hartid_map(cpuid); - if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { - ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, - hmask, hbase, 0, 0, 0, 0); - if (ret.error) - goto ecall_failed; - hmask = 0; - hbase = 0; + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + ret = sbi_ecall(SBI_EXT_IPI, + SBI_EXT_IPI_SEND_IPI, hmask, + hbase, 0, 0, 0, 0); + if (ret.error) + goto ecall_failed; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } } - if (!hmask) + if (!hmask) { hbase = hartid; - hmask |= 1UL << (hartid - hbase); + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } + hmask |= BIT(hartid - hbase); } if (hmask) { @@ -344,25 +356,35 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) { - unsigned long hartid, cpuid, hmask = 0, hbase = 0; + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; int result; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; for_each_cpu(cpuid, cpu_mask) { hartid = cpuid_to_hartid_map(cpuid); - if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { - result = __sbi_rfence_v02_call(fid, hmask, hbase, - start, size, arg4, arg5); - if (result) - return result; - hmask = 0; - hbase = 0; + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + result = __sbi_rfence_v02_call(fid, hmask, + hbase, start, size, arg4, arg5); + if (result) + return result; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } } - if (!hmask) + if (!hmask) { hbase = hartid; - hmask |= 1UL << (hartid - hbase); + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } + hmask |= BIT(hartid - hbase); } if (hmask) { diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c new file mode 100644 index 000000000000..095ac976d7da --- /dev/null +++ b/arch/riscv/kernel/trace_irq.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> + */ + +#include <linux/irqflags.h> +#include <linux/kprobes.h> +#include "trace_irq.h" + +/* + * trace_hardirqs_on/off require the caller to setup frame pointer properly. + * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel. + * Here we add one extra level so they can be safely called by low + * level entry code which $fp is used for other purpose. + */ + +void __trace_hardirqs_on(void) +{ + trace_hardirqs_on(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_on); + +void __trace_hardirqs_off(void) +{ + trace_hardirqs_off(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_off); diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h new file mode 100644 index 000000000000..99fe67377e5e --- /dev/null +++ b/arch/riscv/kernel/trace_irq.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> + */ +#ifndef __TRACE_IRQ_H +#define __TRACE_IRQ_H + +void __trace_hardirqs_on(void); +void __trace_hardirqs_off(void); + +#endif /* __TRACE_IRQ_H */ diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 7ebaef10ea1b..ac7a25298a04 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -24,6 +24,9 @@ obj-$(CONFIG_KASAN) += kasan_init.o ifdef CONFIG_KASAN KASAN_SANITIZE_kasan_init.o := n KASAN_SANITIZE_init.o := n +ifdef CONFIG_DEBUG_VIRTUAL +KASAN_SANITIZE_physaddr.o := n +endif endif obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index c27294128e18..0d588032d6e6 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -125,7 +125,6 @@ void __init mem_init(void) else swiotlb_force = SWIOTLB_NO_FORCE; #endif - high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); memblock_free_all(); print_vm_layout(); @@ -195,6 +194,7 @@ static void __init setup_bootmem(void) min_low_pfn = PFN_UP(phys_ram_base); max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); + high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index f61f7ca6fe0f..cd1a145257b7 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -113,8 +113,11 @@ static void __init kasan_populate_pud(pgd_t *pgd, base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd))); } else { base_pud = (pud_t *)pgd_page_vaddr(*pgd); - if (base_pud == lm_alias(kasan_early_shadow_pud)) + if (base_pud == lm_alias(kasan_early_shadow_pud)) { base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); + memcpy(base_pud, (void *)kasan_early_shadow_pud, + sizeof(pud_t) * PTRS_PER_PUD); + } } pudp = base_pud + pud_index(vaddr); @@ -202,8 +205,7 @@ asmlinkage void __init kasan_early_init(void) for (i = 0; i < PTRS_PER_PTE; ++i) set_pte(kasan_early_shadow_pte + i, - mk_pte(virt_to_page(kasan_early_shadow_page), - PAGE_KERNEL)); + pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL)); for (i = 0; i < PTRS_PER_PMD; ++i) set_pmd(kasan_early_shadow_pmd + i, diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c index e7fd0c253c7b..19cf25a74ee2 100644 --- a/arch/riscv/mm/physaddr.c +++ b/arch/riscv/mm/physaddr.c @@ -8,12 +8,10 @@ phys_addr_t __virt_to_phys(unsigned long x) { - phys_addr_t y = x - PAGE_OFFSET; - /* * Boundary checking aginst the kernel linear mapping space. */ - WARN(y >= KERN_VIRT_SIZE, + WARN(!is_linear_mapping(x) && !is_kernel_mapping(x), "virt_to_phys used for non-linear address: %pK (%pS)\n", (void *)x, (void *)x); diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h index 16dc57dd90b3..8511f0e59290 100644 --- a/arch/s390/include/asm/extable.h +++ b/arch/s390/include/asm/extable.h @@ -69,8 +69,13 @@ static inline void swap_ex_entry_fixup(struct exception_table_entry *a, { a->fixup = b->fixup + delta; b->fixup = tmp.fixup - delta; - a->handler = b->handler + delta; - b->handler = tmp.handler - delta; + a->handler = b->handler; + if (a->handler) + a->handler += delta; + b->handler = tmp.handler; + if (b->handler) + b->handler -= delta; } +#define swap_ex_entry_fixup swap_ex_entry_fixup #endif diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 267f70f4393f..6f80ec9c04be 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -47,15 +47,17 @@ struct ftrace_regs { static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { - return &fregs->regs; + struct pt_regs *regs = &fregs->regs; + + if (test_pt_regs_flag(regs, PIF_FTRACE_FULL_REGS)) + return regs; + return NULL; } static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *fregs, unsigned long ip) { - struct pt_regs *regs = arch_ftrace_get_regs(fregs); - - regs->psw.addr = ip; + fregs->regs.psw.addr = ip; } /* diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 4ffa8e7f0ed3..ddb70fb13fbc 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -15,11 +15,13 @@ #define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */ #define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ #define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ +#define PIF_FTRACE_FULL_REGS 4 /* all register contents valid (ftrace) */ #define _PIF_SYSCALL BIT(PIF_SYSCALL) #define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART) #define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) #define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) +#define _PIF_FTRACE_FULL_REGS BIT(PIF_FTRACE_FULL_REGS) #ifndef __ASSEMBLY__ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 21d62d8b6b9a..89c0870d5679 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -159,9 +159,38 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) return 0; } +static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrace *rec) +{ + struct ftrace_hotpatch_trampoline *trampoline; + struct ftrace_insn insn; + s64 disp; + u16 opc; + + if (copy_from_kernel_nofault(&insn, (void *)rec->ip, sizeof(insn))) + return ERR_PTR(-EFAULT); + disp = (s64)insn.disp * 2; + trampoline = (void *)(rec->ip + disp); + if (get_kernel_nofault(opc, &trampoline->brasl_opc)) + return ERR_PTR(-EFAULT); + if (opc != 0xc015) + return ERR_PTR(-EINVAL); + return trampoline; +} + int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { + struct ftrace_hotpatch_trampoline *trampoline; + u64 old; + + trampoline = ftrace_get_trampoline(rec); + if (IS_ERR(trampoline)) + return PTR_ERR(trampoline); + if (get_kernel_nofault(old, &trampoline->interceptor)) + return -EFAULT; + if (old != old_addr) + return -EINVAL; + s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr)); return 0; } @@ -188,6 +217,12 @@ static void brcl_enable(void *brcl) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { + struct ftrace_hotpatch_trampoline *trampoline; + + trampoline = ftrace_get_trampoline(rec); + if (IS_ERR(trampoline)) + return PTR_ERR(trampoline); + s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr)); brcl_enable((void *)rec->ip); return 0; } @@ -291,7 +326,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, regs = ftrace_get_regs(fregs); p = get_kprobe((kprobe_opcode_t *)ip); - if (unlikely(!p) || kprobe_disabled(p)) + if (!regs || unlikely(!p) || kprobe_disabled(p)) goto out; if (kprobe_running()) { diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 39bcc0e39a10..a24177dcd12a 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -27,6 +27,7 @@ ENDPROC(ftrace_stub) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) #define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2) +#define STACK_PTREGS_FLAGS (STACK_PTREGS + __PT_FLAGS) #ifdef __PACK_STACK /* allocate just enough for r14, r15 and backchain */ #define TRACED_FUNC_FRAME_SIZE 24 @@ -57,6 +58,14 @@ ENDPROC(ftrace_stub) .if \allregs == 1 stg %r14,(STACK_PTREGS_PSW)(%r15) stosm (STACK_PTREGS_PSW)(%r15),0 +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + mvghi STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS +#else + lghi %r14,_PIF_FTRACE_FULL_REGS + stg %r14,STACK_PTREGS_FLAGS(%r15) +#endif + .else + xc STACK_PTREGS_FLAGS(8,%r15),STACK_PTREGS_FLAGS(%r15) .endif lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f2c25d113e7b..05327be3a982 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -800,6 +800,8 @@ static void __init check_initrd(void) static void __init reserve_kernel(void) { memblock_reserve(0, STARTUP_NORMAL_OFFSET); + memblock_reserve(OLDMEM_BASE, sizeof(unsigned long)); + memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long)); memblock_reserve(__amode31_base, __eamode31 - __samode31); memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP); memblock_reserve(__pa(_stext), _end - _stext); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6db4e2932b3d..65d147974f8d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -204,7 +204,7 @@ /* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6dcccb304775..ec9830d2aabf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -703,7 +703,6 @@ struct kvm_vcpu_arch { struct fpu_guest guest_fpu; u64 xcr0; - u64 guest_supported_xcr0; struct kvm_pio_request pio; void *pio_data; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3faf0f97edb1..a4a39c3e0f19 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -476,6 +476,7 @@ #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b #define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index cc74dc584836..acbaeaf83b61 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -84,7 +84,7 @@ #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE #else jmp *%\reg #endif @@ -94,7 +94,7 @@ #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \ __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE #else call *%\reg #endif @@ -146,7 +146,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[]; "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_AMD) + X86_FEATURE_RETPOLINE_LFENCE) # define THUNK_TARGET(addr) [thunk_target] "r" (addr) @@ -176,7 +176,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[]; "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_AMD) + X86_FEATURE_RETPOLINE_LFENCE) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif @@ -188,9 +188,11 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[]; /* The Spectre V2 mitigation variants */ enum spectre_v2_mitigation { SPECTRE_V2_NONE, - SPECTRE_V2_RETPOLINE_GENERIC, - SPECTRE_V2_RETPOLINE_AMD, - SPECTRE_V2_IBRS_ENHANCED, + SPECTRE_V2_RETPOLINE, + SPECTRE_V2_LFENCE, + SPECTRE_V2_EIBRS, + SPECTRE_V2_EIBRS_RETPOLINE, + SPECTRE_V2_EIBRS_LFENCE, }; /* The indirect branch speculation control variants */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index b00dbc5fac2b..bb2fb78523ce 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -220,6 +220,42 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define SVM_NESTED_CTL_SEV_ENABLE BIT(1) #define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2) + +/* AVIC */ +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) + +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) +#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) +#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) +#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) +#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFF) + +#define AVIC_DOORBELL_PHYSICAL_ID_MASK (0xFF) + +#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1 +#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 +#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF + +enum avic_ipi_failure_cause { + AVIC_IPI_FAILURE_INVALID_INT_TYPE, + AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, + AVIC_IPI_FAILURE_INVALID_TARGET, + AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, +}; + + +/* + * 0xff is broadcast, so the max index allowed for physical APIC ID + * table is 0xfe. APIC IDs above 0xff are reserved. + */ +#define AVIC_MAX_PHYSICAL_ID_COUNT 0xff + +#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF) +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL + + struct vmcb_seg { u16 selector; u16 attrib; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 2f0b6be8eaab..9c73d62f7276 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -215,15 +215,26 @@ extern void arch_scale_freq_tick(void); #define arch_scale_freq_tick arch_scale_freq_tick extern void arch_set_max_freq_ratio(bool turbo_disabled); +void init_freq_invariance(bool secondary, bool cppc_ready); #else static inline void arch_set_max_freq_ratio(bool turbo_disabled) { } +static inline void init_freq_invariance(bool secondary, bool cppc_ready) +{ +} #endif -#if defined(CONFIG_ACPI_CPPC_LIB) && defined(CONFIG_SMP) +#ifdef CONFIG_ACPI_CPPC_LIB void init_freq_invariance_cppc(void); #define init_freq_invariance_cppc init_freq_invariance_cppc + +bool amd_set_max_freq_ratio(u64 *ratio); +#else +static inline bool amd_set_max_freq_ratio(u64 *ratio) +{ + return false; +} #endif #endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index cf340d85946a..fc17b3f136fe 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o obj-$(CONFIG_ACPI_APEI) += apei.o -obj-$(CONFIG_ACPI_CPPC_LIB) += cppc_msr.o +obj-$(CONFIG_ACPI_CPPC_LIB) += cppc.o ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += cstate.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 5b6d1a95776f..0d01e7f5078c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1328,6 +1328,17 @@ static int __init disable_acpi_pci(const struct dmi_system_id *d) return 0; } +static int __init disable_acpi_xsdt(const struct dmi_system_id *d) +{ + if (!acpi_force) { + pr_notice("%s detected: force use of acpi=rsdt\n", d->ident); + acpi_gbl_do_not_use_xsdt = TRUE; + } else { + pr_notice("Warning: DMI blacklist says broken, but acpi XSDT forced\n"); + } + return 0; +} + static int __init dmi_disable_acpi(const struct dmi_system_id *d) { if (!acpi_force) { @@ -1451,6 +1462,19 @@ static const struct dmi_system_id acpi_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), }, }, + /* + * Boxes that need ACPI XSDT use disabled due to corrupted tables + */ + { + .callback = disable_acpi_xsdt, + .ident = "Advantech DAC-BJ01", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Bearlake CRB Board"), + DMI_MATCH(DMI_BIOS_VERSION, "V1.12"), + DMI_MATCH(DMI_BIOS_DATE, "02/01/2011"), + }, + }, {} }; diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c new file mode 100644 index 000000000000..df1644d9b3b6 --- /dev/null +++ b/arch/x86/kernel/acpi/cppc.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * cppc.c: CPPC Interface for x86 + * Copyright (c) 2016, Intel Corporation. + */ + +#include <acpi/cppc_acpi.h> +#include <asm/msr.h> +#include <asm/processor.h> +#include <asm/topology.h> + +/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */ + +bool cpc_ffh_supported(void) +{ + return true; +} + +int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val) +{ + int err; + + err = rdmsrl_safe_on_cpu(cpunum, reg->address, val); + if (!err) { + u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1, + reg->bit_offset); + + *val &= mask; + *val >>= reg->bit_offset; + } + return err; +} + +int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) +{ + u64 rd_val; + int err; + + err = rdmsrl_safe_on_cpu(cpunum, reg->address, &rd_val); + if (!err) { + u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1, + reg->bit_offset); + + val <<= reg->bit_offset; + val &= mask; + rd_val &= ~mask; + rd_val |= val; + err = wrmsrl_safe_on_cpu(cpunum, reg->address, rd_val); + } + return err; +} + +bool amd_set_max_freq_ratio(u64 *ratio) +{ + struct cppc_perf_caps perf_caps; + u64 highest_perf, nominal_perf; + u64 perf_ratio; + int rc; + + if (!ratio) + return false; + + rc = cppc_get_perf_caps(0, &perf_caps); + if (rc) { + pr_debug("Could not retrieve perf counters (%d)\n", rc); + return false; + } + + highest_perf = amd_get_highest_perf(); + nominal_perf = perf_caps.nominal_perf; + + if (!highest_perf || !nominal_perf) { + pr_debug("Could not retrieve highest or nominal performance\n"); + return false; + } + + perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf); + /* midpoint between max_boost and max_P */ + perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1; + if (!perf_ratio) { + pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n"); + return false; + } + + *ratio = perf_ratio; + arch_set_max_freq_ratio(false); + + return true; +} + +static DEFINE_MUTEX(freq_invariance_lock); + +void init_freq_invariance_cppc(void) +{ + static bool secondary; + + mutex_lock(&freq_invariance_lock); + + init_freq_invariance(secondary, true); + secondary = true; + + mutex_unlock(&freq_invariance_lock); +} diff --git a/arch/x86/kernel/acpi/cppc_msr.c b/arch/x86/kernel/acpi/cppc_msr.c deleted file mode 100644 index b961de569e7e..000000000000 --- a/arch/x86/kernel/acpi/cppc_msr.c +++ /dev/null @@ -1,49 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * cppc_msr.c: MSR Interface for CPPC - * Copyright (c) 2016, Intel Corporation. - */ - -#include <acpi/cppc_acpi.h> -#include <asm/msr.h> - -/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */ - -bool cpc_ffh_supported(void) -{ - return true; -} - -int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val) -{ - int err; - - err = rdmsrl_safe_on_cpu(cpunum, reg->address, val); - if (!err) { - u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1, - reg->bit_offset); - - *val &= mask; - *val >>= reg->bit_offset; - } - return err; -} - -int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) -{ - u64 rd_val; - int err; - - err = rdmsrl_safe_on_cpu(cpunum, reg->address, &rd_val); - if (!err) { - u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1, - reg->bit_offset); - - val <<= reg->bit_offset; - val &= mask; - rd_val &= ~mask; - rd_val |= val; - err = wrmsrl_safe_on_cpu(cpunum, reg->address, rd_val); - } - return err; -} diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5007c3ffe96f..b4470eabf151 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -389,7 +389,7 @@ static int emit_indirect(int op, int reg, u8 *bytes) * * CALL *%\reg * - * It also tries to inline spectre_v2=retpoline,amd when size permits. + * It also tries to inline spectre_v2=retpoline,lfence when size permits. */ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) { @@ -407,7 +407,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) BUG_ON(reg == 4); if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && - !cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) + !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) return -1; op = insn->opcode.bytes[0]; @@ -438,9 +438,9 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) } /* - * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE. + * For RETPOLINE_LFENCE: prepend the indirect CALL/JMP with an LFENCE. */ - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) { + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { bytes[i++] = 0x0f; bytes[i++] = 0xae; bytes[i++] = 0xe8; /* LFENCE */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1c1f218a701d..6296e1ebed1d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -16,6 +16,7 @@ #include <linux/prctl.h> #include <linux/sched/smt.h> #include <linux/pgtable.h> +#include <linux/bpf.h> #include <asm/spec-ctrl.h> #include <asm/cmdline.h> @@ -650,6 +651,32 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif +#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n" +#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n" +#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n" + +#ifdef CONFIG_BPF_SYSCALL +void unpriv_ebpf_notify(int new_state) +{ + if (new_state) + return; + + /* Unprivileged eBPF is enabled */ + + switch (spectre_v2_enabled) { + case SPECTRE_V2_EIBRS: + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + break; + case SPECTRE_V2_EIBRS_LFENCE: + if (sched_smt_active()) + pr_err(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + break; + default: + break; + } +} +#endif + static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); @@ -664,7 +691,10 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_FORCE, SPECTRE_V2_CMD_RETPOLINE, SPECTRE_V2_CMD_RETPOLINE_GENERIC, - SPECTRE_V2_CMD_RETPOLINE_AMD, + SPECTRE_V2_CMD_RETPOLINE_LFENCE, + SPECTRE_V2_CMD_EIBRS, + SPECTRE_V2_CMD_EIBRS_RETPOLINE, + SPECTRE_V2_CMD_EIBRS_LFENCE, }; enum spectre_v2_user_cmd { @@ -737,6 +767,13 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) return SPECTRE_V2_USER_CMD_AUTO; } +static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) +{ + return (mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE); +} + static void __init spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) { @@ -804,7 +841,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + spectre_v2_in_eibrs_mode(spectre_v2_enabled)) return; /* @@ -824,9 +861,11 @@ set_mode: static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", - [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", - [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines", + [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE", + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", }; static const struct { @@ -837,8 +876,12 @@ static const struct { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, - { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false }, + { "retpoline,lfence", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false }, { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "eibrs", SPECTRE_V2_CMD_EIBRS, false }, + { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, + { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, { "auto", SPECTRE_V2_CMD_AUTO, false }, }; @@ -875,10 +918,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) } if ((cmd == SPECTRE_V2_CMD_RETPOLINE || - cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || - cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && + cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE || + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && !IS_ENABLED(CONFIG_RETPOLINE)) { - pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if ((cmd == SPECTRE_V2_CMD_EIBRS || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { + pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE) && + !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { + pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -887,6 +950,16 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } +static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) +{ + if (!IS_ENABLED(CONFIG_RETPOLINE)) { + pr_err("Kernel not compiled with retpoline; no mitigation available!"); + return SPECTRE_V2_NONE; + } + + return SPECTRE_V2_RETPOLINE; +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -907,49 +980,64 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_CMD_FORCE: case SPECTRE_V2_CMD_AUTO: if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { - mode = SPECTRE_V2_IBRS_ENHANCED; - /* Force it so VMEXIT will restore correctly */ - x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - goto specv2_set_mode; + mode = SPECTRE_V2_EIBRS; + break; } - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_auto; + + mode = spectre_v2_select_retpoline(); break; - case SPECTRE_V2_CMD_RETPOLINE_AMD: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_amd; + + case SPECTRE_V2_CMD_RETPOLINE_LFENCE: + pr_err(SPECTRE_V2_LFENCE_MSG); + mode = SPECTRE_V2_LFENCE; break; + case SPECTRE_V2_CMD_RETPOLINE_GENERIC: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_generic; + mode = SPECTRE_V2_RETPOLINE; break; + case SPECTRE_V2_CMD_RETPOLINE: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_auto; + mode = spectre_v2_select_retpoline(); + break; + + case SPECTRE_V2_CMD_EIBRS: + mode = SPECTRE_V2_EIBRS; + break; + + case SPECTRE_V2_CMD_EIBRS_LFENCE: + mode = SPECTRE_V2_EIBRS_LFENCE; + break; + + case SPECTRE_V2_CMD_EIBRS_RETPOLINE: + mode = SPECTRE_V2_EIBRS_RETPOLINE; break; } - pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!"); - return; -retpoline_auto: - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - retpoline_amd: - if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { - pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); - goto retpoline_generic; - } - mode = SPECTRE_V2_RETPOLINE_AMD; - setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); - setup_force_cpu_cap(X86_FEATURE_RETPOLINE); - } else { - retpoline_generic: - mode = SPECTRE_V2_RETPOLINE_GENERIC; + if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + + if (spectre_v2_in_eibrs_mode(mode)) { + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + } + + switch (mode) { + case SPECTRE_V2_NONE: + case SPECTRE_V2_EIBRS: + break; + + case SPECTRE_V2_LFENCE: + case SPECTRE_V2_EIBRS_LFENCE: + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); + fallthrough; + + case SPECTRE_V2_RETPOLINE: + case SPECTRE_V2_EIBRS_RETPOLINE: setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + break; } -specv2_set_mode: spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -975,7 +1063,7 @@ specv2_set_mode: * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) { + if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } @@ -1045,6 +1133,10 @@ void cpu_bugs_smt_update(void) { mutex_lock(&spec_ctrl_mutex); + if (sched_smt_active() && unprivileged_ebpf_enabled() && + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: break; @@ -1684,7 +1776,7 @@ static ssize_t tsx_async_abort_show_state(char *buf) static char *stibp_state(void) { - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) return ""; switch (spectre_v2_user_stibp) { @@ -1714,6 +1806,27 @@ static char *ibpb_state(void) return ""; } +static ssize_t spectre_v2_show_state(char *buf) +{ + if (spectre_v2_enabled == SPECTRE_V2_LFENCE) + return sprintf(buf, "Vulnerable: LFENCE\n"); + + if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n"); + + if (sched_smt_active() && unprivileged_ebpf_enabled() && + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); + + return sprintf(buf, "%s%s%s%s%s%s\n", + spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + stibp_state(), + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + spectre_v2_module_string()); +} + static ssize_t srbds_show_state(char *buf) { return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); @@ -1739,12 +1852,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); case X86_BUG_SPECTRE_V2: - return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", - spectre_v2_module_string()); + return spectre_v2_show_state(buf); case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 48afe96ae0f0..7c63a1911fae 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -13,6 +13,30 @@ #include "sgx.h" /* + * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's + * follow right after the EPC data in the backing storage. In addition to the + * visible enclave pages, there's one extra page slot for SECS, before PCMD + * structs. + */ +static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl, + unsigned long page_index) +{ + pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs); + + return epc_end_off + page_index * sizeof(struct sgx_pcmd); +} + +/* + * Free a page from the backing storage in the given page index. + */ +static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index) +{ + struct inode *inode = file_inode(encl->backing); + + shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1); +} + +/* * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC * Pages" in the SDM. */ @@ -22,9 +46,11 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, { unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK; struct sgx_encl *encl = encl_page->encl; + pgoff_t page_index, page_pcmd_off; struct sgx_pageinfo pginfo; struct sgx_backing b; - pgoff_t page_index; + bool pcmd_page_empty; + u8 *pcmd_page; int ret; if (secs_page) @@ -32,14 +58,16 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, else page_index = PFN_DOWN(encl->size); + page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); + ret = sgx_encl_get_backing(encl, page_index, &b); if (ret) return ret; pginfo.addr = encl_page->desc & PAGE_MASK; pginfo.contents = (unsigned long)kmap_atomic(b.contents); - pginfo.metadata = (unsigned long)kmap_atomic(b.pcmd) + - b.pcmd_offset; + pcmd_page = kmap_atomic(b.pcmd); + pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset; if (secs_page) pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page); @@ -55,11 +83,24 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, ret = -EFAULT; } - kunmap_atomic((void *)(unsigned long)(pginfo.metadata - b.pcmd_offset)); + memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd)); + + /* + * The area for the PCMD in the page was zeroed above. Check if the + * whole page is now empty meaning that all PCMD's have been zeroed: + */ + pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE); + + kunmap_atomic(pcmd_page); kunmap_atomic((void *)(unsigned long)pginfo.contents); sgx_encl_put_backing(&b, false); + sgx_encl_truncate_backing_page(encl, page_index); + + if (pcmd_page_empty) + sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off)); + return ret; } @@ -579,7 +620,7 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl, int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, struct sgx_backing *backing) { - pgoff_t pcmd_index = PFN_DOWN(encl->size) + 1 + (page_index >> 5); + pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); struct page *contents; struct page *pcmd; @@ -587,7 +628,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, if (IS_ERR(contents)) return PTR_ERR(contents); - pcmd = sgx_encl_get_backing_page(encl, pcmd_index); + pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off)); if (IS_ERR(pcmd)) { put_page(contents); return PTR_ERR(pcmd); @@ -596,9 +637,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, backing->page_index = page_index; backing->contents = contents; backing->pcmd = pcmd; - backing->pcmd_offset = - (page_index & (PAGE_SIZE / sizeof(struct sgx_pcmd) - 1)) * - sizeof(struct sgx_pcmd); + backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1); return 0; } diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 4b41efc9e367..8e4bc6453d26 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -344,10 +344,8 @@ static void sgx_reclaim_pages(void) { struct sgx_epc_page *chunk[SGX_NR_TO_SCAN]; struct sgx_backing backing[SGX_NR_TO_SCAN]; - struct sgx_epc_section *section; struct sgx_encl_page *encl_page; struct sgx_epc_page *epc_page; - struct sgx_numa_node *node; pgoff_t page_index; int cnt = 0; int ret; @@ -418,13 +416,7 @@ skip: kref_put(&encl_page->encl->refcount, sgx_encl_release); epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; - section = &sgx_epc_sections[epc_page->section]; - node = section->node; - - spin_lock(&node->lock); - list_add_tail(&epc_page->list, &node->free_page_list); - spin_unlock(&node->lock); - atomic_long_inc(&sgx_nr_free_pages); + sgx_free_epc_page(epc_page); } } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index bc0657f0deed..f267205f2d5a 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -995,8 +995,10 @@ early_param("memmap", parse_memmap_opt); */ void __init e820__reserve_setup_data(void) { + struct setup_indirect *indirect; struct setup_data *data; - u64 pa_data; + u64 pa_data, pa_next; + u32 len; pa_data = boot_params.hdr.setup_data; if (!pa_data) @@ -1004,6 +1006,14 @@ void __init e820__reserve_setup_data(void) while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); + if (!data) { + pr_warn("e820: failed to memremap setup_data entry\n"); + return; + } + + len = sizeof(*data); + pa_next = data->next; + e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); /* @@ -1015,18 +1025,27 @@ void __init e820__reserve_setup_data(void) sizeof(*data) + data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - e820__range_update(((struct setup_indirect *)data->data)->addr, - ((struct setup_indirect *)data->data)->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - e820__range_update_kexec(((struct setup_indirect *)data->data)->addr, - ((struct setup_indirect *)data->data)->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + if (data->type == SETUP_INDIRECT) { + len += data->len; + early_memunmap(data, sizeof(*data)); + data = early_memremap(pa_data, len); + if (!data) { + pr_warn("e820: failed to memremap indirect setup_data\n"); + return; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + e820__range_update(indirect->addr, indirect->len, + E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + e820__range_update_kexec(indirect->addr, indirect->len, + E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + } } - pa_data = data->next; - early_memunmap(data, sizeof(*data)); + pa_data = pa_next; + early_memunmap(data, len); } e820__update_table(e820_table); diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 437d7c930c0b..75ffaef8c299 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -91,11 +91,9 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; - struct user32_fxsr_struct newstate; + struct fxregs_state newstate; int ret; - BUILD_BUG_ON(sizeof(newstate) != sizeof(struct fxregs_state)); - if (!cpu_feature_enabled(X86_FEATURE_FXSR)) return -ENODEV; @@ -116,9 +114,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, /* Copy the state */ memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate)); - /* Clear xmm8..15 */ + /* Clear xmm8..15 for 32-bit callers */ BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16); - memset(&fpu->fpstate->regs.fxsave.xmm_space[8], 0, 8 * 16); + if (in_ia32_syscall()) + memset(&fpu->fpstate->regs.fxsave.xmm_space[8*4], 0, 8 * 16); /* Mark FP and SSE as in use when XSAVE is enabled */ if (use_xsave()) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 02b3ddaf4f75..7c7824ae7862 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1558,7 +1558,10 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, fpregs_restore_userregs(); newfps->xfeatures = curfps->xfeatures | xfeatures; - newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; + + if (!guest_fpu) + newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; + newfps->xfd = curfps->xfd & ~xfeatures; /* Do the final updates within the locked region */ diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 64b6da95af98..e2e89bebcbc3 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -88,11 +88,13 @@ create_setup_data_node(struct dentry *parent, int no, static int __init create_setup_data_nodes(struct dentry *parent) { + struct setup_indirect *indirect; struct setup_data_node *node; struct setup_data *data; - int error; + u64 pa_data, pa_next; struct dentry *d; - u64 pa_data; + int error; + u32 len; int no = 0; d = debugfs_create_dir("setup_data", parent); @@ -112,12 +114,29 @@ static int __init create_setup_data_nodes(struct dentry *parent) error = -ENOMEM; goto err_dir; } - - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - node->paddr = ((struct setup_indirect *)data->data)->addr; - node->type = ((struct setup_indirect *)data->data)->type; - node->len = ((struct setup_indirect *)data->data)->len; + pa_next = data->next; + + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(pa_data, len, MEMREMAP_WB); + if (!data) { + kfree(node); + error = -ENOMEM; + goto err_dir; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + node->paddr = indirect->addr; + node->type = indirect->type; + node->len = indirect->len; + } else { + node->paddr = pa_data; + node->type = data->type; + node->len = data->len; + } } else { node->paddr = pa_data; node->type = data->type; @@ -125,7 +144,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) } create_setup_data_node(d, no, node); - pa_data = data->next; + pa_data = pa_next; memunmap(data); no++; diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index d0a19121c6a4..257892fcefa7 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -91,26 +91,41 @@ static int get_setup_data_paddr(int nr, u64 *paddr) static int __init get_setup_data_size(int nr, size_t *size) { - int i = 0; + u64 pa_data = boot_params.hdr.setup_data, pa_next; + struct setup_indirect *indirect; struct setup_data *data; - u64 pa_data = boot_params.hdr.setup_data; + int i = 0; + u32 len; while (pa_data) { data = memremap(pa_data, sizeof(*data), MEMREMAP_WB); if (!data) return -ENOMEM; + pa_next = data->next; + if (nr == i) { - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) - *size = ((struct setup_indirect *)data->data)->len; - else + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(pa_data, len, MEMREMAP_WB); + if (!data) + return -ENOMEM; + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) + *size = indirect->len; + else + *size = data->len; + } else { *size = data->len; + } memunmap(data); return 0; } - pa_data = data->next; + pa_data = pa_next; memunmap(data); i++; } @@ -120,9 +135,11 @@ static int __init get_setup_data_size(int nr, size_t *size) static ssize_t type_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { + struct setup_indirect *indirect; + struct setup_data *data; int nr, ret; u64 paddr; - struct setup_data *data; + u32 len; ret = kobj_to_setup_data_nr(kobj, &nr); if (ret) @@ -135,10 +152,20 @@ static ssize_t type_show(struct kobject *kobj, if (!data) return -ENOMEM; - if (data->type == SETUP_INDIRECT) - ret = sprintf(buf, "0x%x\n", ((struct setup_indirect *)data->data)->type); - else + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(paddr, len, MEMREMAP_WB); + if (!data) + return -ENOMEM; + + indirect = (struct setup_indirect *)data->data; + + ret = sprintf(buf, "0x%x\n", indirect->type); + } else { ret = sprintf(buf, "0x%x\n", data->type); + } + memunmap(data); return ret; } @@ -149,9 +176,10 @@ static ssize_t setup_data_data_read(struct file *fp, char *buf, loff_t off, size_t count) { + struct setup_indirect *indirect; + struct setup_data *data; int nr, ret = 0; u64 paddr, len; - struct setup_data *data; void *p; ret = kobj_to_setup_data_nr(kobj, &nr); @@ -165,10 +193,27 @@ static ssize_t setup_data_data_read(struct file *fp, if (!data) return -ENOMEM; - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - paddr = ((struct setup_indirect *)data->data)->addr; - len = ((struct setup_indirect *)data->data)->len; + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(paddr, len, MEMREMAP_WB); + if (!data) + return -ENOMEM; + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } else { + /* + * Even though this is technically undefined, return + * the data as though it is a normal setup_data struct. + * This will at least allow it to be inspected. + */ + paddr += sizeof(*data); + len = data->len; + } } else { paddr += sizeof(*data); len = data->len; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a438217cbfac..d77481ecb0d5 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -462,19 +462,24 @@ static bool pv_tlb_flush_supported(void) { return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) && + !boot_cpu_has(X86_FEATURE_MWAIT) && + (num_possible_cpus() != 1)); } static bool pv_ipi_supported(void) { - return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI); + return (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) && + (num_possible_cpus() != 1)); } static bool pv_sched_yield_supported(void) { return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) && + !boot_cpu_has(X86_FEATURE_MWAIT) && + (num_possible_cpus() != 1)); } #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) @@ -619,7 +624,7 @@ static void kvm_smp_send_call_func_ipi(const struct cpumask *mask) /* Make sure other vCPUs get a chance to run if they need to. */ for_each_cpu(cpu, mask) { - if (vcpu_is_preempted(cpu)) { + if (!idle_cpu(cpu) && vcpu_is_preempted(cpu)) { kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu)); break; } diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index a35cbf9107af..c5caa7311bd8 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -239,6 +239,9 @@ static void __init kvmclock_init_mem(void) static int __init kvm_setup_vsyscall_timeinfo(void) { + if (!kvm_para_available() || !kvmclock) + return 0; + kvmclock_init_mem(); #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 95fa745e310a..96d7c27b7093 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -273,6 +273,14 @@ int module_finalize(const Elf_Ehdr *hdr, retpolines = s; } + /* + * See alternative_instructions() for the ordering rules between the + * various patching types. + */ + if (para) { + void *pseg = (void *)para->sh_addr; + apply_paravirt(pseg, pseg + para->sh_size); + } if (retpolines) { void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); @@ -290,11 +298,6 @@ int module_finalize(const Elf_Ehdr *hdr, tseg, tseg + text->sh_size); } - if (para) { - void *pseg = (void *)para->sh_addr; - apply_paravirt(pseg, pseg + para->sh_size); - } - /* make jump label nops */ jump_label_apply_nops(me); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 6d2244c94799..8d2f2f995539 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1224,7 +1224,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { }, [REGSET_FP] = { .core_note_type = NT_PRFPREG, - .n = sizeof(struct user_i387_struct) / sizeof(long), + .n = sizeof(struct fxregs_state) / sizeof(long), .size = sizeof(long), .align = sizeof(long), .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, @@ -1271,7 +1271,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { }, [REGSET_XFP] = { .core_note_type = NT_PRXFPREG, - .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), + .n = sizeof(struct fxregs_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f7a132eb794d..90d7e1788c91 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -369,21 +369,41 @@ static void __init parse_setup_data(void) static void __init memblock_x86_reserve_range_setup_data(void) { + struct setup_indirect *indirect; struct setup_data *data; - u64 pa_data; + u64 pa_data, pa_next; + u32 len; pa_data = boot_params.hdr.setup_data; while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); + if (!data) { + pr_warn("setup: failed to memremap setup_data entry\n"); + return; + } + + len = sizeof(*data); + pa_next = data->next; + memblock_reserve(pa_data, sizeof(*data) + data->len); - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) - memblock_reserve(((struct setup_indirect *)data->data)->addr, - ((struct setup_indirect *)data->data)->len); + if (data->type == SETUP_INDIRECT) { + len += data->len; + early_memunmap(data, sizeof(*data)); + data = early_memremap(pa_data, len); + if (!data) { + pr_warn("setup: failed to memremap indirect setup_data\n"); + return; + } - pa_data = data->next; - early_memunmap(data, sizeof(*data)); + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) + memblock_reserve(indirect->addr, indirect->len); + } + + pa_data = pa_next; + early_memunmap(data, len); } } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 617012f4619f..2ef14772dc04 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -83,10 +83,6 @@ #include <asm/hw_irq.h> #include <asm/stackprotector.h> -#ifdef CONFIG_ACPI_CPPC_LIB -#include <acpi/cppc_acpi.h> -#endif - /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); @@ -155,8 +151,6 @@ static inline void smpboot_restore_warm_reset_vector(void) *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; } -static void init_freq_invariance(bool secondary, bool cppc_ready); - /* * Report back to the Boot Processor during boot time or to the caller processor * during CPU online. @@ -2097,48 +2091,6 @@ out: return true; } -#ifdef CONFIG_ACPI_CPPC_LIB -static bool amd_set_max_freq_ratio(void) -{ - struct cppc_perf_caps perf_caps; - u64 highest_perf, nominal_perf; - u64 perf_ratio; - int rc; - - rc = cppc_get_perf_caps(0, &perf_caps); - if (rc) { - pr_debug("Could not retrieve perf counters (%d)\n", rc); - return false; - } - - highest_perf = amd_get_highest_perf(); - nominal_perf = perf_caps.nominal_perf; - - if (!highest_perf || !nominal_perf) { - pr_debug("Could not retrieve highest or nominal performance\n"); - return false; - } - - perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf); - /* midpoint between max_boost and max_P */ - perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1; - if (!perf_ratio) { - pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n"); - return false; - } - - arch_turbo_freq_ratio = perf_ratio; - arch_set_max_freq_ratio(false); - - return true; -} -#else -static bool amd_set_max_freq_ratio(void) -{ - return false; -} -#endif - static void init_counter_refs(void) { u64 aperf, mperf; @@ -2167,7 +2119,7 @@ static void register_freq_invariance_syscore_ops(void) static inline void register_freq_invariance_syscore_ops(void) {} #endif -static void init_freq_invariance(bool secondary, bool cppc_ready) +void init_freq_invariance(bool secondary, bool cppc_ready) { bool ret = false; @@ -2187,7 +2139,7 @@ static void init_freq_invariance(bool secondary, bool cppc_ready) if (!cppc_ready) { return; } - ret = amd_set_max_freq_ratio(); + ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio); } if (ret) { @@ -2200,22 +2152,6 @@ static void init_freq_invariance(bool secondary, bool cppc_ready) } } -#ifdef CONFIG_ACPI_CPPC_LIB -static DEFINE_MUTEX(freq_invariance_lock); - -void init_freq_invariance_cppc(void) -{ - static bool secondary; - - mutex_lock(&freq_invariance_lock); - - init_freq_invariance(secondary, true); - secondary = true; - - mutex_unlock(&freq_invariance_lock); -} -#endif - static void disable_freq_invariance_workfn(struct work_struct *work) { static_branch_disable(&arch_scale_freq_key); @@ -2264,8 +2200,4 @@ error: pr_warn("Scheduler frequency invariance went wobbly, disabling!\n"); schedule_work(&disable_freq_invariance_work); } -#else -static inline void init_freq_invariance(bool secondary, bool cppc_ready) -{ -} #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c9d566dcf89a..8143693a7ea6 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -659,6 +659,7 @@ static bool do_int3(struct pt_regs *regs) return res == NOTIFY_STOP; } +NOKPROBE_SYMBOL(do_int3); static void do_int3_user(struct pt_regs *regs) { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 494d4d351859..b8f8d268d058 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -282,6 +282,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_cpuid_entry2 *best; + u64 guest_supported_xcr0; best = kvm_find_cpuid_entry(vcpu, 1, 0); if (best && apic) { @@ -293,9 +294,11 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_apic_set_version(vcpu); } - vcpu->arch.guest_supported_xcr0 = + guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); + vcpu->arch.guest_fpu.fpstate->user_xfeatures = guest_supported_xcr0; + kvm_update_pv_runtime(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d7e6fde82d25..9322e6340a74 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2306,7 +2306,12 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) apic->irr_pending = true; apic->isr_count = 1; } else { - apic->irr_pending = (apic_search_irr(apic) != -1); + /* + * Don't clear irr_pending, searching the IRR can race with + * updates from the CPU as APICv is still active from hardware's + * perspective. The flag will be cleared as appropriate when + * KVM injects the interrupt. + */ apic->isr_count = count_vectors(apic->regs + APIC_ISR); } } diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 593093b52395..5628d0ba637e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3565,7 +3565,7 @@ set_root_pgd: out_unlock: write_unlock(&vcpu->kvm->mmu_lock); - return 0; + return r; } static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) @@ -3889,12 +3889,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) walk_shadow_page_lockless_end(vcpu); } +static u32 alloc_apf_token(struct kvm_vcpu *vcpu) +{ + /* make sure the token value is not 0 */ + u32 id = vcpu->arch.apf.id; + + if (id << 12 == 0) + vcpu->arch.apf.id = 1; + + return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; +} + static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, gfn_t gfn) { struct kvm_arch_async_pf arch; - arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; + arch.token = alloc_apf_token(vcpu); arch.gfn = gfn; arch.direct_map = vcpu->arch.mmu->direct_map; arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu); diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index f614f95acc6b..b1a02993782b 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -95,7 +95,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event, } static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, - unsigned config, bool exclude_user, + u64 config, bool exclude_user, bool exclude_kernel, bool intr, bool in_tx, bool in_tx_cp) { @@ -181,7 +181,8 @@ static int cmp_u64(const void *a, const void *b) void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) { - unsigned config, type = PERF_TYPE_RAW; + u64 config; + u32 type = PERF_TYPE_RAW; struct kvm *kvm = pmc->vcpu->kvm; struct kvm_pmu_event_filter *filter; bool allow_event = true; @@ -220,7 +221,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) } if (type == PERF_TYPE_RAW) - config = eventsel & X86_RAW_EVENT_MASK; + config = eventsel & AMD64_RAW_EVENT_MASK; if (pmc->current_config == eventsel && pmc_resume_counter(pmc)) return; diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 90364d02f22a..fb3e20791338 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -27,20 +27,6 @@ #include "irq.h" #include "svm.h" -#define SVM_AVIC_DOORBELL 0xc001011b - -#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF) - -/* - * 0xff is broadcast, so the max index allowed for physical APIC ID - * table is 0xfe. APIC IDs above 0xff are reserved. - */ -#define AVIC_MAX_PHYSICAL_ID_COUNT 255 - -#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1 -#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 -#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF - /* AVIC GATAG is encoded using VM and VCPU IDs */ #define AVIC_VCPU_ID_BITS 8 #define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1) @@ -73,12 +59,6 @@ struct amd_svm_iommu_ir { void *data; /* Storing pointer to struct amd_ir_data */ }; -enum avic_ipi_failure_cause { - AVIC_IPI_FAILURE_INVALID_INT_TYPE, - AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, - AVIC_IPI_FAILURE_INVALID_TARGET, - AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, -}; /* Note: * This function is called from IOMMU driver to notify @@ -289,6 +269,22 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) return 0; } +void avic_ring_doorbell(struct kvm_vcpu *vcpu) +{ + /* + * Note, the vCPU could get migrated to a different pCPU at any point, + * which could result in signalling the wrong/previous pCPU. But if + * that happens the vCPU is guaranteed to do a VMRUN (after being + * migrated) and thus will process pending interrupts, i.e. a doorbell + * is not needed (and the spurious one is harmless). + */ + int cpu = READ_ONCE(vcpu->cpu); + + if (cpu != get_cpu()) + wrmsrl(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu)); + put_cpu(); +} + static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source, u32 icrl, u32 icrh) { @@ -304,8 +300,13 @@ static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source, kvm_for_each_vcpu(i, vcpu, kvm) { if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK, GET_APIC_DEST_FIELD(icrh), - icrl & APIC_DEST_MASK)) - kvm_vcpu_wake_up(vcpu); + icrl & APIC_DEST_MASK)) { + vcpu->arch.apic->irr_pending = true; + svm_complete_interrupt_delivery(vcpu, + icrl & APIC_MODE_MASK, + icrl & APIC_INT_LEVELTRIG, + icrl & APIC_VECTOR_MASK); + } } } @@ -345,8 +346,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh); break; case AVIC_IPI_FAILURE_INVALID_TARGET: - WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n", - index, vcpu->vcpu_id, icrh, icrl); break; case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: WARN_ONCE(1, "Invalid backing page\n"); @@ -669,52 +668,6 @@ void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) -{ - if (!vcpu->arch.apicv_active) - return -1; - - kvm_lapic_set_irr(vec, vcpu->arch.apic); - - /* - * Pairs with the smp_mb_*() after setting vcpu->guest_mode in - * vcpu_enter_guest() to ensure the write to the vIRR is ordered before - * the read of guest_mode, which guarantees that either VMRUN will see - * and process the new vIRR entry, or that the below code will signal - * the doorbell if the vCPU is already running in the guest. - */ - smp_mb__after_atomic(); - - /* - * Signal the doorbell to tell hardware to inject the IRQ if the vCPU - * is in the guest. If the vCPU is not in the guest, hardware will - * automatically process AVIC interrupts at VMRUN. - */ - if (vcpu->mode == IN_GUEST_MODE) { - int cpu = READ_ONCE(vcpu->cpu); - - /* - * Note, the vCPU could get migrated to a different pCPU at any - * point, which could result in signalling the wrong/previous - * pCPU. But if that happens the vCPU is guaranteed to do a - * VMRUN (after being migrated) and thus will process pending - * interrupts, i.e. a doorbell is not needed (and the spurious - * one is harmless). - */ - if (cpu != get_cpu()) - wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu)); - put_cpu(); - } else { - /* - * Wake the vCPU if it was blocking. KVM will then detect the - * pending IRQ when checking if the vCPU has a wake event. - */ - kvm_vcpu_wake_up(vcpu); - } - - return 0; -} - bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) { return false; diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 1218b5a342fc..39d280e7e80e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1457,18 +1457,6 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, !__nested_vmcb_check_save(vcpu, &save_cached)) goto out_free; - /* - * While the nested guest CR3 is already checked and set by - * KVM_SET_SREGS, it was set when nested state was yet loaded, - * thus MMU might not be initialized correctly. - * Set it again to fix this. - */ - - ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3, - nested_npt_enabled(svm), false); - if (WARN_ON_ONCE(ret)) - goto out_free; - /* * All checks done, we can enter guest mode. Userspace provides @@ -1494,6 +1482,20 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm_switch_vmcb(svm, &svm->nested.vmcb02); nested_vmcb02_prepare_control(svm); + + /* + * While the nested guest CR3 is already checked and set by + * KVM_SET_SREGS, it was set when nested state was yet loaded, + * thus MMU might not be initialized correctly. + * Set it again to fix this. + */ + + ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3, + nested_npt_enabled(svm), false); + if (WARN_ON_ONCE(ret)) + goto out_free; + + kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); ret = 0; out_free: diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a290efb272ad..fd3a00c892c7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1585,6 +1585,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); u64 hcr0 = cr0; + bool old_paging = is_paging(vcpu); #ifdef CONFIG_X86_64 if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) { @@ -1601,8 +1602,11 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) #endif vcpu->arch.cr0 = cr0; - if (!npt_enabled) + if (!npt_enabled) { hcr0 |= X86_CR0_PG | X86_CR0_WP; + if (old_paging != is_paging(vcpu)) + svm_set_cr4(vcpu, kvm_read_cr4(vcpu)); + } /* * re-enable caching here because the QEMU bios @@ -1646,8 +1650,12 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) svm_flush_tlb(vcpu); vcpu->arch.cr4 = cr4; - if (!npt_enabled) + if (!npt_enabled) { cr4 |= X86_CR4_PAE; + + if (!is_paging(vcpu)) + cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); + } cr4 |= host_cr4_mce; to_svm(vcpu)->vmcb->save.cr4 = cr4; vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); @@ -2685,8 +2693,23 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) u64 data = msr->data; switch (ecx) { case MSR_AMD64_TSC_RATIO: - if (!msr->host_initiated && !svm->tsc_scaling_enabled) - return 1; + + if (!svm->tsc_scaling_enabled) { + + if (!msr->host_initiated) + return 1; + /* + * In case TSC scaling is not enabled, always + * leave this MSR at the default value. + * + * Due to bug in qemu 6.2.0, it would try to set + * this msr to 0 if tsc scaling is not enabled. + * Ignore this value as well. + */ + if (data != 0 && data != svm->tsc_ratio_msr) + return 1; + break; + } if (data & TSC_RATIO_RSVD) return 1; @@ -3291,21 +3314,55 @@ static void svm_set_irq(struct kvm_vcpu *vcpu) SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; } -static void svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, - int trig_mode, int vector) +void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, + int trig_mode, int vector) { - struct kvm_vcpu *vcpu = apic->vcpu; + /* + * vcpu->arch.apicv_active must be read after vcpu->mode. + * Pairs with smp_store_release in vcpu_enter_guest. + */ + bool in_guest_mode = (smp_load_acquire(&vcpu->mode) == IN_GUEST_MODE); - if (svm_deliver_avic_intr(vcpu, vector)) { - kvm_lapic_set_irr(vector, apic); + if (!READ_ONCE(vcpu->arch.apicv_active)) { + /* Process the interrupt via inject_pending_event */ kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); + return; + } + + trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, trig_mode, vector); + if (in_guest_mode) { + /* + * Signal the doorbell to tell hardware to inject the IRQ. If + * the vCPU exits the guest before the doorbell chimes, hardware + * will automatically process AVIC interrupts at the next VMRUN. + */ + avic_ring_doorbell(vcpu); } else { - trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, - trig_mode, vector); + /* + * Wake the vCPU if it was blocking. KVM will then detect the + * pending IRQ when checking if the vCPU has a wake event. + */ + kvm_vcpu_wake_up(vcpu); } } +static void svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector) +{ + kvm_lapic_set_irr(vector, apic); + + /* + * Pairs with the smp_mb_*() after setting vcpu->guest_mode in + * vcpu_enter_guest() to ensure the write to the vIRR is ordered before + * the read of guest_mode. This guarantees that either VMRUN will see + * and process the new vIRR entry, or that svm_complete_interrupt_delivery + * will signal the doorbell if the CPU has already entered the guest. + */ + smp_mb__after_atomic(); + svm_complete_interrupt_delivery(apic->vcpu, delivery_mode, trig_mode, vector); +} + static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3353,11 +3410,13 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) if (svm->nested.nested_run_pending) return -EBUSY; + if (svm_nmi_blocked(vcpu)) + return 0; + /* An NMI must not be injected into L2 if it's supposed to VM-Exit. */ if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm)) return -EBUSY; - - return !svm_nmi_blocked(vcpu); + return 1; } static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) @@ -3409,9 +3468,13 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) { struct vcpu_svm *svm = to_svm(vcpu); + if (svm->nested.nested_run_pending) return -EBUSY; + if (svm_interrupt_blocked(vcpu)) + return 0; + /* * An IRQ must not be injected into L2 if it's supposed to VM-Exit, * e.g. if the IRQ arrived asynchronously after checking nested events. @@ -3419,7 +3482,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm)) return -EBUSY; - return !svm_interrupt_blocked(vcpu); + return 1; } static void svm_enable_irq_window(struct kvm_vcpu *vcpu) @@ -4150,11 +4213,14 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) if (svm->nested.nested_run_pending) return -EBUSY; + if (svm_smi_blocked(vcpu)) + return 0; + /* An SMI must not be injected into L2 if it's supposed to VM-Exit. */ if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm)) return -EBUSY; - return !svm_smi_blocked(vcpu); + return 1; } static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) @@ -4248,11 +4314,18 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) * Enter the nested guest now */ + vmcb_mark_all_dirty(svm->vmcb01.ptr); + vmcb12 = map.hva; nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false); + if (ret) + goto unmap_save; + + svm->nested.nested_run_pending = 1; + unmap_save: kvm_vcpu_unmap(vcpu, &map_save, true); unmap_map: @@ -4637,6 +4710,7 @@ static __init void svm_set_cpu_caps(void) /* CPUID 0x80000001 and 0x8000000A (SVM features) */ if (nested) { kvm_cpu_cap_set(X86_FEATURE_SVM); + kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN); if (nrips) kvm_cpu_cap_set(X86_FEATURE_NRIPS); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 73525353e424..fa98d6844728 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -489,6 +489,8 @@ void svm_set_gif(struct vcpu_svm *svm, bool value); int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code); void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, int read, int write); +void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, + int trig_mode, int vec); /* nested.c */ @@ -556,17 +558,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ -#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) -#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 -#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) - -#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) -#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) -#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) -#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) - -#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL - int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm); @@ -583,12 +574,12 @@ bool svm_check_apicv_inhibit_reasons(ulong bit); void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr); void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr); -int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec); bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu); int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); void avic_vcpu_blocking(struct kvm_vcpu *vcpu); void avic_vcpu_unblocking(struct kvm_vcpu *vcpu); +void avic_ring_doorbell(struct kvm_vcpu *vcpu); /* sev.c */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ba34e94049c7..dc822a1d403d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -246,8 +246,7 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx, src = &prev->host_state; dest = &vmx->loaded_vmcs->host_state; - vmx_set_vmcs_host_state(dest, src->cr3, src->fs_sel, src->gs_sel, - src->fs_base, src->gs_base); + vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base); dest->ldt_sel = src->ldt_sel; #ifdef CONFIG_X86_64 dest->ds_sel = src->ds_sel; @@ -3056,7 +3055,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr4; + unsigned long cr3, cr4; bool vm_fail; if (!nested_early_check) @@ -3079,6 +3078,12 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) */ vmcs_writel(GUEST_RFLAGS, 0); + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } + cr4 = cr4_read_shadow(); if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { vmcs_writel(HOST_CR4, cr4); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6c27bd0c89e1..b730d799c26e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1080,14 +1080,9 @@ static void pt_guest_exit(struct vcpu_vmx *vmx) wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); } -void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3, - u16 fs_sel, u16 gs_sel, - unsigned long fs_base, unsigned long gs_base) +void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, + unsigned long fs_base, unsigned long gs_base) { - if (unlikely(cr3 != host->cr3)) { - vmcs_writel(HOST_CR3, cr3); - host->cr3 = cr3; - } if (unlikely(fs_sel != host->fs_sel)) { if (!(fs_sel & 7)) vmcs_write16(HOST_FS_SELECTOR, fs_sel); @@ -1182,9 +1177,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) gs_base = segment_base(gs_sel); #endif - vmx_set_vmcs_host_state(host_state, __get_current_cr3_fast(), - fs_sel, gs_sel, fs_base, gs_base); - + vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); vmx->guest_state_loaded = true; } @@ -6791,7 +6784,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr4; + unsigned long cr3, cr4; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!enable_vnmi && @@ -6834,6 +6827,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); vcpu->arch.regs_dirty = 0; + /* + * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately + * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time + * it switches back to the current->mm, which can occur in KVM context + * when switching to a temporary mm to patch kernel code, e.g. if KVM + * toggles a static key while handling a VM-Exit. + */ + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } + cr4 = cr4_read_shadow(); if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { vmcs_writel(HOST_CR4, cr4); @@ -7659,6 +7665,7 @@ static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) if (ret) return ret; + vmx->nested.nested_run_pending = 1; vmx->nested.smm.guest_mode = false; } return 0; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 7f2c82e7f38f..9c6bfcd84008 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -374,9 +374,8 @@ int allocate_vpid(void); void free_vpid(int vpid); void vmx_set_constant_host_state(struct vcpu_vmx *vmx); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); -void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3, - u16 fs_sel, u16 gs_sel, - unsigned long fs_base, unsigned long gs_base); +void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, + unsigned long fs_base, unsigned long gs_base); int vmx_get_cpl(struct kvm_vcpu *vcpu); bool vmx_emulation_required(struct kvm_vcpu *vcpu); unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7131d735b1ef..eb4029660bd9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -984,6 +984,18 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state); +static inline u64 kvm_guest_supported_xcr0(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.guest_fpu.fpstate->user_xfeatures; +} + +#ifdef CONFIG_X86_64 +static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu) +{ + return kvm_guest_supported_xcr0(vcpu) & XFEATURE_MASK_USER_DYNAMIC; +} +#endif + static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { u64 xcr0 = xcr; @@ -1003,7 +1015,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) * saving. However, xcr0 bit 0 is always set, even if the * emulated CPU does not support XSAVE (see kvm_vcpu_reset()). */ - valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP; + valid_bits = kvm_guest_supported_xcr0(vcpu) | XFEATURE_MASK_FP; if (xcr0 & ~valid_bits) return 1; @@ -2351,10 +2363,12 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) return tsc; } +#ifdef CONFIG_X86_64 static inline int gtod_is_based_on_tsc(int mode) { return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK; } +#endif static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) { @@ -3706,8 +3720,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) return 1; - if (data & ~(XFEATURE_MASK_USER_DYNAMIC & - vcpu->arch.guest_supported_xcr0)) + if (data & ~kvm_guest_supported_xfd(vcpu)) return 1; fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data); @@ -3717,8 +3730,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) return 1; - if (data & ~(XFEATURE_MASK_USER_DYNAMIC & - vcpu->arch.guest_supported_xcr0)) + if (data & ~kvm_guest_supported_xfd(vcpu)) return 1; vcpu->arch.guest_fpu.xfd_err = data; @@ -4233,6 +4245,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_EXIT_ON_EMULATION_FAILURE: case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_SYS_ATTRIBUTES: + case KVM_CAP_ENABLE_CAP: r = 1; break; case KVM_CAP_EXIT_HYPERCALL: @@ -8942,6 +8955,13 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK) return -KVM_EOPNOTSUPP; + /* + * When tsc is in permanent catchup mode guests won't be able to use + * pvclock_read_retry loop to get consistent view of pvclock + */ + if (vcpu->arch.tsc_always_catchup) + return -KVM_EOPNOTSUPP; + if (!kvm_get_walltime_and_clockread(&ts, &cycle)) return -KVM_EOPNOTSUPP; @@ -9160,6 +9180,7 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) likely(!pic_in_kernel(vcpu->kvm)); } +/* Called within kvm->srcu read side. */ static void post_kvm_run_save(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; @@ -9168,16 +9189,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); - /* - * The call to kvm_ready_for_interrupt_injection() may end up in - * kvm_xen_has_interrupt() which may require the srcu lock to be - * held, to protect against changes in the vcpu_info address. - */ - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); kvm_run->ready_for_interrupt_injection = pic_in_kernel(vcpu->kvm) || kvm_vcpu_ready_for_interrupt_injection(vcpu); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (is_smm(vcpu)) kvm_run->flags |= KVM_RUN_X86_SMM; @@ -9795,6 +9809,7 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit); /* + * Called within kvm->srcu read side. * Returns 1 to let vcpu_run() continue the guest execution loop without * exiting to the userspace. Otherwise, the value will be returned to the * userspace. @@ -9983,7 +9998,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * result in virtual interrupt delivery. */ local_irq_disable(); - vcpu->mode = IN_GUEST_MODE; + + /* Store vcpu->apicv_active before vcpu->mode. */ + smp_store_release(&vcpu->mode, IN_GUEST_MODE); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); @@ -10171,6 +10188,7 @@ out: return r; } +/* Called within kvm->srcu read side. */ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) { bool hv_timer; @@ -10230,12 +10248,12 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) !vcpu->arch.apf.halted); } +/* Called within kvm->srcu read side. */ static int vcpu_run(struct kvm_vcpu *vcpu) { int r; struct kvm *kvm = vcpu->kvm; - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); vcpu->arch.l1tf_flush_l1d = true; for (;;) { @@ -10263,14 +10281,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (__xfer_to_guest_mode_work_pending()) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); r = xfer_to_guest_mode_handle_work(vcpu); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); if (r) return r; - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); } } - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - return r; } @@ -10376,6 +10392,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; + struct kvm *kvm = vcpu->kvm; int r; vcpu_load(vcpu); @@ -10383,6 +10400,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_run->flags = 0; kvm_load_guest_fpu(vcpu); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { r = -EINTR; @@ -10393,7 +10411,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * use before KVM has ever run the vCPU. */ WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu)); + + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + if (kvm_apic_accept_events(vcpu) < 0) { r = 0; goto out; @@ -10453,8 +10475,9 @@ out: if (kvm_run->kvm_valid_regs) store_regs(vcpu); post_kvm_run_save(vcpu); - kvm_sigset_deactivate(vcpu); + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + kvm_sigset_deactivate(vcpu); vcpu_put(vcpu); return r; } diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index bad57535fad0..74be1fda58e3 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -133,32 +133,57 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) { struct kvm_vcpu_xen *vx = &v->arch.xen; + struct gfn_to_hva_cache *ghc = &vx->runstate_cache; + struct kvm_memslots *slots = kvm_memslots(v->kvm); + bool atomic = (state == RUNSTATE_runnable); uint64_t state_entry_time; - unsigned int offset; + int __user *user_state; + uint64_t __user *user_times; kvm_xen_update_runstate(v, state); if (!vx->runstate_set) return; - BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); + if (unlikely(slots->generation != ghc->generation || kvm_is_error_hva(ghc->hva)) && + kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len)) + return; + + /* We made sure it fits in a single page */ + BUG_ON(!ghc->memslot); + + if (atomic) + pagefault_disable(); - offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time); -#ifdef CONFIG_X86_64 /* - * The only difference is alignment of uint64_t in 32-bit. - * So the first field 'state' is accessed directly using - * offsetof() (where its offset happens to be zero), while the - * remaining fields which are all uint64_t, start at 'offset' - * which we tweak here by adding 4. + * The only difference between 32-bit and 64-bit versions of the + * runstate struct us the alignment of uint64_t in 32-bit, which + * means that the 64-bit version has an additional 4 bytes of + * padding after the first field 'state'. + * + * So we use 'int __user *user_state' to point to the state field, + * and 'uint64_t __user *user_times' for runstate_entry_time. So + * the actual array of time[] in each state starts at user_times[1]. */ + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0); + BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0); + user_state = (int __user *)ghc->hva; + + BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); + + user_times = (uint64_t __user *)(ghc->hva + + offsetof(struct compat_vcpu_runstate_info, + state_entry_time)); +#ifdef CONFIG_X86_64 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != offsetof(struct compat_vcpu_runstate_info, time) + 4); if (v->kvm->arch.xen.long_mode) - offset = offsetof(struct vcpu_runstate_info, state_entry_time); + user_times = (uint64_t __user *)(ghc->hva + + offsetof(struct vcpu_runstate_info, + state_entry_time)); #endif /* * First write the updated state_entry_time at the appropriate @@ -172,10 +197,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != sizeof(state_entry_time)); - if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, - &state_entry_time, offset, - sizeof(state_entry_time))) - return; + if (__put_user(state_entry_time, user_times)) + goto out; smp_wmb(); /* @@ -189,11 +212,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != sizeof(vx->current_runstate)); - if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, - &vx->current_runstate, - offsetof(struct vcpu_runstate_info, state), - sizeof(vx->current_runstate))) - return; + if (__put_user(vx->current_runstate, user_state)) + goto out; /* * Write the actual runstate times immediately after the @@ -208,24 +228,23 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != sizeof(vx->runstate_times)); - if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, - &vx->runstate_times[0], - offset + sizeof(u64), - sizeof(vx->runstate_times))) - return; - + if (__copy_to_user(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times))) + goto out; smp_wmb(); /* * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's * runstate_entry_time field. */ - state_entry_time &= ~XEN_RUNSTATE_UPDATE; - if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, - &state_entry_time, offset, - sizeof(state_entry_time))) - return; + __put_user(state_entry_time, user_times); + smp_wmb(); + + out: + mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT); + + if (atomic) + pagefault_enable(); } int __kvm_xen_has_interrupt(struct kvm_vcpu *v) @@ -443,6 +462,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; } + /* It must fit within a single page */ + if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_info) > PAGE_SIZE) { + r = -EINVAL; + break; + } + r = kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache, data->u.gpa, @@ -460,6 +485,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; } + /* It must fit within a single page */ + if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct pvclock_vcpu_time_info) > PAGE_SIZE) { + r = -EINVAL; + break; + } + r = kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache, data->u.gpa, @@ -481,6 +512,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; } + /* It must fit within a single page */ + if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_runstate_info) > PAGE_SIZE) { + r = -EINVAL; + break; + } + r = kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.xen.runstate_cache, data->u.gpa, diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 89b3fb244e15..afbdda539b80 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -34,7 +34,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE .endm diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 026031b3b782..17a492c27306 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -615,6 +615,7 @@ static bool memremap_is_efi_data(resource_size_t phys_addr, static bool memremap_is_setup_data(resource_size_t phys_addr, unsigned long size) { + struct setup_indirect *indirect; struct setup_data *data; u64 paddr, paddr_next; @@ -627,6 +628,10 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, data = memremap(paddr, sizeof(*data), MEMREMAP_WB | MEMREMAP_DEC); + if (!data) { + pr_warn("failed to memremap setup_data entry\n"); + return false; + } paddr_next = data->next; len = data->len; @@ -636,10 +641,21 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, return true; } - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - paddr = ((struct setup_indirect *)data->data)->addr; - len = ((struct setup_indirect *)data->data)->len; + if (data->type == SETUP_INDIRECT) { + memunmap(data); + data = memremap(paddr, sizeof(*data) + len, + MEMREMAP_WB | MEMREMAP_DEC); + if (!data) { + pr_warn("failed to memremap indirect setup_data\n"); + return false; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } } memunmap(data); @@ -660,22 +676,51 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, static bool __init early_memremap_is_setup_data(resource_size_t phys_addr, unsigned long size) { + struct setup_indirect *indirect; struct setup_data *data; u64 paddr, paddr_next; paddr = boot_params.hdr.setup_data; while (paddr) { - unsigned int len; + unsigned int len, size; if (phys_addr == paddr) return true; data = early_memremap_decrypted(paddr, sizeof(*data)); + if (!data) { + pr_warn("failed to early memremap setup_data entry\n"); + return false; + } + + size = sizeof(*data); paddr_next = data->next; len = data->len; - early_memunmap(data, sizeof(*data)); + if ((phys_addr > paddr) && (phys_addr < (paddr + len))) { + early_memunmap(data, sizeof(*data)); + return true; + } + + if (data->type == SETUP_INDIRECT) { + size += len; + early_memunmap(data, sizeof(*data)); + data = early_memremap_decrypted(paddr, size); + if (!data) { + pr_warn("failed to early memremap indirect setup_data\n"); + return false; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } + } + + early_memunmap(data, size); if ((phys_addr > paddr) && (phys_addr < (paddr + len))) return true; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 2b1e266ff95c..0ecb140864b2 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -394,7 +394,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) u8 *prog = *pprog; #ifdef CONFIG_RETPOLINE - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) { + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { |