diff options
Diffstat (limited to 'arch')
586 files changed, 5639 insertions, 5810 deletions
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index 1989b946a28d..d1ed5a8133c5 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -283,14 +283,8 @@ static inline void __iomem *ioremap(unsigned long port, unsigned long size) return IO_CONCAT(__IO_PREFIX,ioremap) (port, size); } -static inline void __iomem * ioremap_nocache(unsigned long offset, - unsigned long size) -{ - return ioremap(offset, size); -} - -#define ioremap_wc ioremap_nocache -#define ioremap_uc ioremap_nocache +#define ioremap_wc ioremap +#define ioremap_uc ioremap static inline void iounmap(volatile void __iomem *addr) { diff --git a/arch/alpha/include/asm/vmalloc.h b/arch/alpha/include/asm/vmalloc.h new file mode 100644 index 000000000000..0a9a366a4d34 --- /dev/null +++ b/arch/alpha/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ALPHA_VMALLOC_H +#define _ASM_ALPHA_VMALLOC_H + +#endif /* _ASM_ALPHA_VMALLOC_H */ diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 26108ea785c2..5f448201955b 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -13,7 +13,7 @@ config ARC select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC select ARCH_32BIT_OFF_T - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select COMMON_CLK select DMA_DIRECT_REMAP diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index 41b16f21beec..0b8b63d0bec1 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -162,7 +162,7 @@ #endif #ifdef CONFIG_ARC_HAS_ACCL_REGS - ST2 r58, r59, PT_sp + 12 + ST2 r58, r59, PT_r58 #endif .endm @@ -172,8 +172,8 @@ LD2 gp, fp, PT_r26 ; gp (r26), fp (r27) - ld r12, [sp, PT_sp + 4] - ld r30, [sp, PT_sp + 8] + ld r12, [sp, PT_r12] + ld r30, [sp, PT_r30] ; Restore SP (into AUX_USER_SP) only if returning to U mode ; - for K mode, it will be implicitly restored as stack is unwound @@ -190,7 +190,7 @@ #endif #ifdef CONFIG_ARC_HAS_ACCL_REGS - LD2 r58, r59, PT_sp + 12 + LD2 r58, r59, PT_r58 #endif .endm diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index 9a74ce71a767..30ac40fed2c5 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -8,7 +8,6 @@ #define _ASM_ARC_HUGEPAGE_H #include <linux/types.h> -#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> static inline pte_t pmd_pte(pmd_t pmd) diff --git a/arch/arc/include/asm/vmalloc.h b/arch/arc/include/asm/vmalloc.h new file mode 100644 index 000000000000..973095aad665 --- /dev/null +++ b/arch/arc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ARC_VMALLOC_H +#define _ASM_ARC_VMALLOC_H + +#endif /* _ASM_ARC_VMALLOC_H */ diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c index 1f621e416521..c783bcd35eb8 100644 --- a/arch/arc/kernel/asm-offsets.c +++ b/arch/arc/kernel/asm-offsets.c @@ -66,7 +66,15 @@ int main(void) DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs)); DEFINE(SZ_PT_REGS, sizeof(struct pt_regs)); - DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25)); + +#ifdef CONFIG_ISA_ARCV2 + OFFSET(PT_r12, pt_regs, r12); + OFFSET(PT_r30, pt_regs, r30); +#endif +#ifdef CONFIG_ARC_HAS_ACCL_REGS + OFFSET(PT_r58, pt_regs, r58); + OFFSET(PT_r59, pt_regs, r59); +#endif return 0; } diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 72be01270e24..1f6bb184a44d 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -337,11 +337,11 @@ resume_user_mode_begin: resume_kernel_mode: ; Disable Interrupts from this point on - ; CONFIG_PREEMPT: This is a must for preempt_schedule_irq() - ; !CONFIG_PREEMPT: To ensure restore_regs is intr safe + ; CONFIG_PREEMPTION: This is a must for preempt_schedule_irq() + ; !CONFIG_PREEMPTION: To ensure restore_regs is intr safe IRQ_DISABLE r9 -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION ; Can't preempt if preemption disabled GET_CURR_THR_INFO_FROM_SP r10 diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index dc05a63516f5..27ea64b1fa33 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -42,10 +42,10 @@ do { \ #define EXTRA_INFO(f) { \ BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \ - % FIELD_SIZEOF(struct unwind_frame_info, f)) \ + % sizeof_field(struct unwind_frame_info, f)) \ + offsetof(struct unwind_frame_info, f) \ - / FIELD_SIZEOF(struct unwind_frame_info, f), \ - FIELD_SIZEOF(struct unwind_frame_info, f) \ + / sizeof_field(struct unwind_frame_info, f), \ + sizeof_field(struct unwind_frame_info, f) \ } #define PTREGS_INFO(f) EXTRA_INFO(regs.f) diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig index a376a50d3fea..a931d0a256d0 100644 --- a/arch/arc/plat-eznps/Kconfig +++ b/arch/arc/plat-eznps/Kconfig @@ -7,7 +7,7 @@ menuconfig ARC_PLAT_EZNPS bool "\"EZchip\" ARC dev platform" select CPU_BIG_ENDIAN - select CLKSRC_NPS + select CLKSRC_NPS if !PHYS_ADDR_T_64BIT select EZNPS_GIC select EZCHIP_NPS_MANAGEMENT_ENET if ETHERNET help diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ba75e3661a41..0b1b1c66bce9 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -36,7 +36,7 @@ config ARM select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_IPC_PARSE_VERSION select BINFMT_FLAT_ARGVP_ENVP_ON_STACK - select BUILDTIME_EXTABLE_SORT if MMU + select BUILDTIME_TABLE_SORT if MMU select CLONE_BACKWARDS select CPU_PM if SUSPEND || CPU_IDLE select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS @@ -72,6 +72,7 @@ config ARM select HAVE_ARM_SMCCC if CPU_V7 select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 select HAVE_CONTEXT_TRACKING + select HAVE_COPY_THREAD_TLS select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS if MMU diff --git a/arch/arm/boot/dts/am335x-boneblack-common.dtsi b/arch/arm/boot/dts/am335x-boneblack-common.dtsi index 7ad079861efd..91f93bc89716 100644 --- a/arch/arm/boot/dts/am335x-boneblack-common.dtsi +++ b/arch/arm/boot/dts/am335x-boneblack-common.dtsi @@ -131,6 +131,11 @@ }; / { + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x20000000>; /* 512 MB */ + }; + clk_mcasp0_fixed: clk_mcasp0_fixed { #clock-cells = <0>; compatible = "fixed-clock"; diff --git a/arch/arm/boot/dts/am335x-sancloud-bbe.dts b/arch/arm/boot/dts/am335x-sancloud-bbe.dts index 8678e6e35493..e5fdb7abb0d5 100644 --- a/arch/arm/boot/dts/am335x-sancloud-bbe.dts +++ b/arch/arm/boot/dts/am335x-sancloud-bbe.dts @@ -108,7 +108,7 @@ &cpsw_emac0 { phy-handle = <ðphy0>; - phy-mode = "rgmii-txid"; + phy-mode = "rgmii-id"; }; &i2c0 { diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts index cae4500194fe..811c8cae315b 100644 --- a/arch/arm/boot/dts/am437x-gp-evm.dts +++ b/arch/arm/boot/dts/am437x-gp-evm.dts @@ -86,7 +86,7 @@ }; lcd0: display { - compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + compatible = "osddisplays,osd070t1718-19ts", "panel-dpi"; label = "lcd"; backlight = <&lcd_bl>; diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 95314121d111..a6fbc088daa8 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -42,7 +42,7 @@ }; lcd0: display { - compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + compatible = "osddisplays,osd070t1718-19ts", "panel-dpi"; label = "lcd"; backlight = <&lcd_bl>; @@ -848,6 +848,7 @@ pinctrl-names = "default", "sleep"; pinctrl-0 = <&spi0_pins_default>; pinctrl-1 = <&spi0_pins_sleep>; + ti,pindir-d0-out-d1-in = <1>; }; &spi1 { @@ -855,6 +856,7 @@ pinctrl-names = "default", "sleep"; pinctrl-0 = <&spi1_pins_default>; pinctrl-1 = <&spi1_pins_sleep>; + ti,pindir-d0-out-d1-in = <1>; }; &usb2_phy1 { diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts index 820ce3b60bb6..669559c9c95b 100644 --- a/arch/arm/boot/dts/am571x-idk.dts +++ b/arch/arm/boot/dts/am571x-idk.dts @@ -167,11 +167,7 @@ &pcie1_rc { status = "okay"; - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; -}; - -&pcie1_ep { - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; + gpios = <&gpio5 18 GPIO_ACTIVE_HIGH>; }; &mmc1 { diff --git a/arch/arm/boot/dts/am572x-idk-common.dtsi b/arch/arm/boot/dts/am572x-idk-common.dtsi index a064f13b3880..ddf123620e96 100644 --- a/arch/arm/boot/dts/am572x-idk-common.dtsi +++ b/arch/arm/boot/dts/am572x-idk-common.dtsi @@ -147,10 +147,6 @@ gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; }; -&pcie1_ep { - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; -}; - &mailbox5 { status = "okay"; mbox_ipu1_ipc3x: mbox_ipu1_ipc3x { diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi index bc76f1705c0f..a813a0cf3ff3 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi +++ b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi @@ -29,6 +29,27 @@ reg = <0x0 0x80000000 0x0 0x80000000>; }; + main_12v0: fixedregulator-main_12v0 { + /* main supply */ + compatible = "regulator-fixed"; + regulator-name = "main_12v0"; + regulator-min-microvolt = <12000000>; + regulator-max-microvolt = <12000000>; + regulator-always-on; + regulator-boot-on; + }; + + evm_5v0: fixedregulator-evm_5v0 { + /* Output of TPS54531D */ + compatible = "regulator-fixed"; + regulator-name = "evm_5v0"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + vin-supply = <&main_12v0>; + regulator-always-on; + regulator-boot-on; + }; + vdd_3v3: fixedregulator-vdd_3v3 { compatible = "regulator-fixed"; regulator-name = "vdd_3v3"; @@ -547,10 +568,6 @@ gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; }; -&pcie1_ep { - gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; -}; - &mcasp3 { #sound-dai-cells = <0>; assigned-clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 24>; diff --git a/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts index c1c9cd30f980..13f7aefe045e 100644 --- a/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts +++ b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts @@ -258,9 +258,9 @@ }; }; - pca0: pca9552@60 { + pca0: pca9552@61 { compatible = "nxp,pca9552"; - reg = <0x60>; + reg = <0x61>; #address-cells = <1>; #size-cells = <0>; @@ -521,371 +521,6 @@ &i2c13 { status = "okay"; -}; - -&i2c14 { - status = "okay"; -}; - -&i2c15 { - status = "okay"; -}; - -&i2c0 { - status = "okay"; -}; - -&i2c1 { - status = "okay"; -}; - -&i2c2 { - status = "okay"; -}; - -&i2c3 { - status = "okay"; - - power-supply@68 { - compatible = "ibm,cffps2"; - reg = <0x68>; - }; - - power-supply@69 { - compatible = "ibm,cffps2"; - reg = <0x69>; - }; - - power-supply@6a { - compatible = "ibm,cffps2"; - reg = <0x6a>; - }; - - power-supply@6b { - compatible = "ibm,cffps2"; - reg = <0x6b>; - }; -}; - -&i2c4 { - status = "okay"; - - tmp275@48 { - compatible = "ti,tmp275"; - reg = <0x48>; - }; - - tmp275@49 { - compatible = "ti,tmp275"; - reg = <0x49>; - }; - - tmp275@4a { - compatible = "ti,tmp275"; - reg = <0x4a>; - }; -}; - -&i2c5 { - status = "okay"; - - tmp275@48 { - compatible = "ti,tmp275"; - reg = <0x48>; - }; - - tmp275@49 { - compatible = "ti,tmp275"; - reg = <0x49>; - }; -}; - -&i2c6 { - status = "okay"; - - tmp275@48 { - compatible = "ti,tmp275"; - reg = <0x48>; - }; - - tmp275@4a { - compatible = "ti,tmp275"; - reg = <0x4a>; - }; - - tmp275@4b { - compatible = "ti,tmp275"; - reg = <0x4b>; - }; -}; - -&i2c7 { - status = "okay"; - - si7021-a20@20 { - compatible = "silabs,si7020"; - reg = <0x20>; - }; - - tmp275@48 { - compatible = "ti,tmp275"; - reg = <0x48>; - }; - - max31785@52 { - compatible = "maxim,max31785a"; - reg = <0x52>; - #address-cells = <1>; - #size-cells = <0>; - - fan@0 { - compatible = "pmbus-fan"; - reg = <0>; - tach-pulses = <2>; - }; - - fan@1 { - compatible = "pmbus-fan"; - reg = <1>; - tach-pulses = <2>; - }; - - fan@2 { - compatible = "pmbus-fan"; - reg = <2>; - tach-pulses = <2>; - }; - - fan@3 { - compatible = "pmbus-fan"; - reg = <3>; - tach-pulses = <2>; - }; - }; - - pca0: pca9552@60 { - compatible = "nxp,pca9552"; - reg = <0x60>; - #address-cells = <1>; - #size-cells = <0>; - - gpio-controller; - #gpio-cells = <2>; - - gpio@0 { - reg = <0>; - }; - - gpio@1 { - reg = <1>; - }; - - gpio@2 { - reg = <2>; - }; - - gpio@3 { - reg = <3>; - }; - - gpio@4 { - reg = <4>; - }; - - gpio@5 { - reg = <5>; - }; - - gpio@6 { - reg = <6>; - }; - - gpio@7 { - reg = <7>; - }; - - gpio@8 { - reg = <8>; - }; - - gpio@9 { - reg = <9>; - }; - - gpio@10 { - reg = <10>; - }; - - gpio@11 { - reg = <11>; - }; - - gpio@12 { - reg = <12>; - }; - - gpio@13 { - reg = <13>; - }; - - gpio@14 { - reg = <14>; - }; - - gpio@15 { - reg = <15>; - }; - }; - - dps: dps310@76 { - compatible = "infineon,dps310"; - reg = <0x76>; - #io-channel-cells = <0>; - }; -}; - -&i2c8 { - status = "okay"; - - ucd90320@b { - compatible = "ti,ucd90160"; - reg = <0x0b>; - }; - - ucd90320@c { - compatible = "ti,ucd90160"; - reg = <0x0c>; - }; - - ucd90320@11 { - compatible = "ti,ucd90160"; - reg = <0x11>; - }; - - rtc@32 { - compatible = "epson,rx8900"; - reg = <0x32>; - }; - - tmp275@48 { - compatible = "ti,tmp275"; - reg = <0x48>; - }; - - tmp275@4a { - compatible = "ti,tmp275"; - reg = <0x4a>; - }; -}; - -&i2c9 { - status = "okay"; - - ir35221@42 { - compatible = "infineon,ir35221"; - reg = <0x42>; - }; - - ir35221@43 { - compatible = "infineon,ir35221"; - reg = <0x43>; - }; - - ir35221@44 { - compatible = "infineon,ir35221"; - reg = <0x44>; - }; - - tmp423a@4c { - compatible = "ti,tmp423"; - reg = <0x4c>; - }; - - tmp423b@4d { - compatible = "ti,tmp423"; - reg = <0x4d>; - }; - - ir35221@72 { - compatible = "infineon,ir35221"; - reg = <0x72>; - }; - - ir35221@73 { - compatible = "infineon,ir35221"; - reg = <0x73>; - }; - - ir35221@74 { - compatible = "infineon,ir35221"; - reg = <0x74>; - }; -}; - -&i2c10 { - status = "okay"; - - ir35221@42 { - compatible = "infineon,ir35221"; - reg = <0x42>; - }; - - ir35221@43 { - compatible = "infineon,ir35221"; - reg = <0x43>; - }; - - ir35221@44 { - compatible = "infineon,ir35221"; - reg = <0x44>; - }; - - tmp423a@4c { - compatible = "ti,tmp423"; - reg = <0x4c>; - }; - - tmp423b@4d { - compatible = "ti,tmp423"; - reg = <0x4d>; - }; - - ir35221@72 { - compatible = "infineon,ir35221"; - reg = <0x72>; - }; - - ir35221@73 { - compatible = "infineon,ir35221"; - reg = <0x73>; - }; - - ir35221@74 { - compatible = "infineon,ir35221"; - reg = <0x74>; - }; -}; - -&i2c11 { - status = "okay"; - - tmp275@48 { - compatible = "ti,tmp275"; - reg = <0x48>; - }; - - tmp275@49 { - compatible = "ti,tmp275"; - reg = <0x49>; - }; -}; - -&i2c12 { - status = "okay"; -}; - -&i2c13 { - status = "okay"; eeprom@50 { compatible = "atmel,24c64"; diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts b/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts index f02de4ab058c..ff49ec76fa7c 100644 --- a/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts +++ b/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts @@ -122,37 +122,6 @@ }; }; -&fmc { - status = "okay"; - flash@0 { - status = "okay"; - m25p,fast-read; - label = "bmc"; - spi-max-frequency = <50000000>; -#include "openbmc-flash-layout-128.dtsi" - }; - - flash@1 { - status = "okay"; - m25p,fast-read; - label = "alt-bmc"; - spi-max-frequency = <50000000>; - }; -}; - -&spi1 { - status = "okay"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_spi1_default>; - - flash@0 { - status = "okay"; - m25p,fast-read; - label = "pnor"; - spi-max-frequency = <100000000>; - }; -}; - &mac2 { status = "okay"; pinctrl-names = "default"; @@ -165,6 +134,11 @@ &emmc { status = "okay"; +}; + +&fsim0 { + status = "okay"; + #address-cells = <2>; #size-cells = <0>; @@ -820,373 +794,6 @@ status = "okay"; }; -&i2c0 { - status = "okay"; -}; - -&i2c1 { - status = "okay"; -}; - -&i2c2 { - status = "okay"; -}; - -&i2c3 { - status = "okay"; - - bmp: bmp280@77 { - compatible = "bosch,bmp280"; - reg = <0x77>; - #io-channel-cells = <1>; - }; - - max31785@52 { - compatible = "maxim,max31785a"; - reg = <0x52>; - #address-cells = <1>; - #size-cells = <0>; - - fan@0 { - compatible = "pmbus-fan"; - reg = <0>; - tach-pulses = <2>; - maxim,fan-rotor-input = "tach"; - maxim,fan-pwm-freq = <25000>; - maxim,fan-dual-tach; - maxim,fan-no-watchdog; - maxim,fan-no-fault-ramp; - maxim,fan-ramp = <2>; - maxim,fan-fault-pin-mon; - }; - - fan@1 { - compatible = "pmbus-fan"; - reg = <1>; - tach-pulses = <2>; - maxim,fan-rotor-input = "tach"; - maxim,fan-pwm-freq = <25000>; - maxim,fan-dual-tach; - maxim,fan-no-watchdog; - maxim,fan-no-fault-ramp; - maxim,fan-ramp = <2>; - maxim,fan-fault-pin-mon; - }; - - fan@2 { - compatible = "pmbus-fan"; - reg = <2>; - tach-pulses = <2>; - maxim,fan-rotor-input = "tach"; - maxim,fan-pwm-freq = <25000>; - maxim,fan-dual-tach; - maxim,fan-no-watchdog; - maxim,fan-no-fault-ramp; - maxim,fan-ramp = <2>; - maxim,fan-fault-pin-mon; - }; - - fan@3 { - compatible = "pmbus-fan"; - reg = <3>; - tach-pulses = <2>; - maxim,fan-rotor-input = "tach"; - maxim,fan-pwm-freq = <25000>; - maxim,fan-dual-tach; - maxim,fan-no-watchdog; - maxim,fan-no-fault-ramp; - maxim,fan-ramp = <2>; - maxim,fan-fault-pin-mon; - }; - }; - - dps: dps310@76 { - compatible = "infineon,dps310"; - reg = <0x76>; - #io-channel-cells = <0>; - }; - - pca0: pca9552@60 { - compatible = "nxp,pca9552"; - reg = <0x60>; - #address-cells = <1>; - #size-cells = <0>; - - gpio-controller; - #gpio-cells = <2>; - - gpio@0 { - reg = <0>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@1 { - reg = <1>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@2 { - reg = <2>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@3 { - reg = <3>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@4 { - reg = <4>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@5 { - reg = <5>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@6 { - reg = <6>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@7 { - reg = <7>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@8 { - reg = <8>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@9 { - reg = <9>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@10 { - reg = <10>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@11 { - reg = <11>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@12 { - reg = <12>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@13 { - reg = <13>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@14 { - reg = <14>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@15 { - reg = <15>; - type = <PCA955X_TYPE_GPIO>; - }; - }; - - power-supply@68 { - compatible = "ibm,cffps1"; - reg = <0x68>; - }; - - power-supply@69 { - compatible = "ibm,cffps1"; - reg = <0x69>; - }; -}; - -&i2c4 { - status = "okay"; - - tmp423a@4c { - compatible = "ti,tmp423"; - reg = <0x4c>; - }; - - ir35221@70 { - compatible = "infineon,ir35221"; - reg = <0x70>; - }; - - ir35221@71 { - compatible = "infineon,ir35221"; - reg = <0x71>; - }; -}; - -&i2c5 { - status = "okay"; - - tmp423a@4c { - compatible = "ti,tmp423"; - reg = <0x4c>; - }; - - ir35221@70 { - compatible = "infineon,ir35221"; - reg = <0x70>; - }; - - ir35221@71 { - compatible = "infineon,ir35221"; - reg = <0x71>; - }; -}; - -&i2c7 { - status = "okay"; -}; - -&i2c9 { - status = "okay"; - - tmp275@4a { - compatible = "ti,tmp275"; - reg = <0x4a>; - }; -}; - -&i2c10 { - status = "okay"; -}; - -&i2c11 { - status = "okay"; - - pca9552: pca9552@60 { - compatible = "nxp,pca9552"; - reg = <0x60>; - #address-cells = <1>; - #size-cells = <0>; - gpio-controller; - #gpio-cells = <2>; - - gpio-line-names = "PS_SMBUS_RESET_N", "APSS_RESET_N", - "GPU0_TH_OVERT_N_BUFF", "GPU1_TH_OVERT_N_BUFF", - "GPU2_TH_OVERT_N_BUFF", "GPU3_TH_OVERT_N_BUFF", - "GPU4_TH_OVERT_N_BUFF", "GPU5_TH_OVERT_N_BUFF", - "GPU0_PWR_GOOD_BUFF", "GPU1_PWR_GOOD_BUFF", - "GPU2_PWR_GOOD_BUFF", "GPU3_PWR_GOOD_BUFF", - "GPU4_PWR_GOOD_BUFF", "GPU5_PWR_GOOD_BUFF", - "12V_BREAKER_FLT_N", "THROTTLE_UNLATCHED_N"; - - gpio@0 { - reg = <0>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@1 { - reg = <1>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@2 { - reg = <2>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@3 { - reg = <3>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@4 { - reg = <4>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@5 { - reg = <5>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@6 { - reg = <6>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@7 { - reg = <7>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@8 { - reg = <8>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@9 { - reg = <9>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@10 { - reg = <10>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@11 { - reg = <11>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@12 { - reg = <12>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@13 { - reg = <13>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@14 { - reg = <14>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@15 { - reg = <15>; - type = <PCA955X_TYPE_GPIO>; - }; - }; - - rtc@32 { - compatible = "epson,rx8900"; - reg = <0x32>; - }; - - eeprom@51 { - compatible = "atmel,24c64"; - reg = <0x51>; - }; - - ucd90160@64 { - compatible = "ti,ucd90160"; - reg = <0x64>; - }; -}; - -&i2c12 { - status = "okay"; -}; - -&i2c13 { - status = "okay"; -}; - &pinctrl { /* Hog these as no driver is probed for the entire LPC block */ pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi index 5f6142d99eeb..b72afbaadaf8 100644 --- a/arch/arm/boot/dts/aspeed-g6.dtsi +++ b/arch/arm/boot/dts/aspeed-g6.dtsi @@ -163,26 +163,6 @@ spi-max-frequency = <50000000>; status = "disabled"; }; - - fsim0: fsi@1e79b000 { - compatible = "aspeed,ast2600-fsi-master", "fsi-master"; - reg = <0x1e79b000 0x94>; - interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_fsi1_default>; - clocks = <&syscon ASPEED_CLK_GATE_FSICLK>; - status = "disabled"; - }; - - fsim1: fsi@1e79b100 { - compatible = "aspeed,ast2600-fsi-master", "fsi-master"; - reg = <0x1e79b100 0x94>; - interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_fsi2_default>; - clocks = <&syscon ASPEED_CLK_GATE_FSICLK>; - status = "disabled"; - }; }; mdio0: mdio@1e650000 { @@ -595,6 +575,25 @@ ranges = <0 0x1e78a000 0x1000>; }; + fsim0: fsi@1e79b000 { + compatible = "aspeed,ast2600-fsi-master", "fsi-master"; + reg = <0x1e79b000 0x94>; + interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_fsi1_default>; + clocks = <&syscon ASPEED_CLK_GATE_FSICLK>; + status = "disabled"; + }; + + fsim1: fsi@1e79b100 { + compatible = "aspeed,ast2600-fsi-master", "fsi-master"; + reg = <0x1e79b100 0x94>; + interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_fsi2_default>; + clocks = <&syscon ASPEED_CLK_GATE_FSICLK>; + status = "disabled"; + }; }; }; }; diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi index 2dac3efc7640..1bc45cfd5453 100644 --- a/arch/arm/boot/dts/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi @@ -174,8 +174,8 @@ mdio: mdio@18002000 { compatible = "brcm,iproc-mdio"; reg = <0x18002000 0x8>; - #size-cells = <1>; - #address-cells = <0>; + #size-cells = <0>; + #address-cells = <1>; status = "disabled"; gphy0: ethernet-phy@0 { diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi index 961bed832755..e2f6ffb00aa9 100644 --- a/arch/arm/boot/dts/bcm2711.dtsi +++ b/arch/arm/boot/dts/bcm2711.dtsi @@ -43,7 +43,7 @@ <0x7c000000 0x0 0xfc000000 0x02000000>, <0x40000000 0x0 0xff800000 0x00800000>; /* Emulate a contiguous 30-bit address range for DMA */ - dma-ranges = <0xc0000000 0x0 0x00000000 0x3c000000>; + dma-ranges = <0xc0000000 0x0 0x00000000 0x40000000>; /* * This node is the provider for the enable-method for diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 3caaa57eb6c8..839491628e87 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -37,7 +37,7 @@ trips { cpu-crit { - temperature = <80000>; + temperature = <90000>; hysteresis = <0>; type = "critical"; }; diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index 372dc1eb88a0..2d9b4dd05830 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -353,8 +353,8 @@ mdio: mdio@18003000 { compatible = "brcm,iproc-mdio"; reg = <0x18003000 0x8>; - #size-cells = <1>; - #address-cells = <0>; + #size-cells = <0>; + #address-cells = <1>; }; mdio-bus-mux@18003000 { diff --git a/arch/arm/boot/dts/e60k02.dtsi b/arch/arm/boot/dts/e60k02.dtsi index 6472b056a001..5a2c5320437d 100644 --- a/arch/arm/boot/dts/e60k02.dtsi +++ b/arch/arm/boot/dts/e60k02.dtsi @@ -265,11 +265,6 @@ regulator-name = "LDORTC1"; regulator-boot-on; }; - - ldortc2_reg: LDORTC2 { - regulator-name = "LDORTC2"; - regulator-boot-on; - }; }; }; }; diff --git a/arch/arm/boot/dts/imx6dl-icore-mipi.dts b/arch/arm/boot/dts/imx6dl-icore-mipi.dts index e43bccb78ab2..d8f3821a0ffd 100644 --- a/arch/arm/boot/dts/imx6dl-icore-mipi.dts +++ b/arch/arm/boot/dts/imx6dl-icore-mipi.dts @@ -8,7 +8,7 @@ /dts-v1/; #include "imx6dl.dtsi" -#include "imx6qdl-icore.dtsi" +#include "imx6qdl-icore-1.5.dtsi" / { model = "Engicam i.CoreM6 DualLite/Solo MIPI Starter Kit"; diff --git a/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts b/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts index 5219553df1e7..bb74fc62d913 100644 --- a/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts +++ b/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts @@ -63,7 +63,7 @@ #sound-dai-cells = <0>; clocks = <&clk_ext_audio_codec>; VDDA-supply = <®_3p3v>; - VDDIO-supply = <®_3p3v>; + VDDIO-supply = <&sw2_reg>; }; }; diff --git a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi index 845cfad99bf9..87f0aa897086 100644 --- a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi +++ b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi @@ -204,7 +204,7 @@ }; rtc@56 { - compatible = "rv3029c2"; + compatible = "microcrystal,rv3029"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_rtc_hw300>; reg = <0x56>; diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi index 71ca76a5e4a5..fe59dde41b64 100644 --- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi @@ -749,10 +749,6 @@ vin-supply = <&vgen5_reg>; }; -®_vdd3p0 { - vin-supply = <&sw2_reg>; -}; - ®_vdd2p5 { vin-supply = <&vgen5_reg>; }; diff --git a/arch/arm/boot/dts/imx6sl-evk.dts b/arch/arm/boot/dts/imx6sl-evk.dts index 4829aa682aeb..bc86cfaaa9c2 100644 --- a/arch/arm/boot/dts/imx6sl-evk.dts +++ b/arch/arm/boot/dts/imx6sl-evk.dts @@ -584,10 +584,6 @@ vin-supply = <&sw2_reg>; }; -®_vdd3p0 { - vin-supply = <&sw2_reg>; -}; - ®_vdd2p5 { vin-supply = <&sw2_reg>; }; diff --git a/arch/arm/boot/dts/imx6sll-evk.dts b/arch/arm/boot/dts/imx6sll-evk.dts index 3e1d32fdf4b8..5ace9e6acf85 100644 --- a/arch/arm/boot/dts/imx6sll-evk.dts +++ b/arch/arm/boot/dts/imx6sll-evk.dts @@ -265,10 +265,6 @@ status = "okay"; }; -®_3p0 { - vin-supply = <&sw2_reg>; -}; - &snvs_poweroff { status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6sx-sdb-reva.dts b/arch/arm/boot/dts/imx6sx-sdb-reva.dts index f1830ed387a5..91a7548fdb8d 100644 --- a/arch/arm/boot/dts/imx6sx-sdb-reva.dts +++ b/arch/arm/boot/dts/imx6sx-sdb-reva.dts @@ -159,10 +159,6 @@ vin-supply = <&vgen6_reg>; }; -®_vdd3p0 { - vin-supply = <&sw2_reg>; -}; - ®_vdd2p5 { vin-supply = <&vgen6_reg>; }; diff --git a/arch/arm/boot/dts/imx6sx-sdb.dts b/arch/arm/boot/dts/imx6sx-sdb.dts index a8ee7087af5a..5a63ca615722 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dts +++ b/arch/arm/boot/dts/imx6sx-sdb.dts @@ -141,10 +141,6 @@ vin-supply = <&vgen6_reg>; }; -®_vdd3p0 { - vin-supply = <&sw2_reg>; -}; - ®_vdd2p5 { vin-supply = <&vgen6_reg>; }; diff --git a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi index 1506eb12b21e..212144511b66 100644 --- a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi +++ b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi @@ -30,14 +30,26 @@ enable-active-high; }; - reg_sensors: regulator-sensors { + reg_peri_3v3: regulator-peri-3v3 { compatible = "regulator-fixed"; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_sensors_reg>; - regulator-name = "sensors-supply"; + pinctrl-0 = <&pinctrl_peri_3v3>; + regulator-name = "VPERI_3V3"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; gpio = <&gpio5 2 GPIO_ACTIVE_LOW>; + /* + * If you want to want to make this dynamic please + * check schematics and test all affected peripherals: + * + * - sensors + * - ethernet phy + * - can + * - bluetooth + * - wm8960 audio codec + * - ov5640 camera + */ + regulator-always-on; }; reg_can_3v3: regulator-can-3v3 { @@ -140,6 +152,7 @@ pinctrl-0 = <&pinctrl_enet1>; phy-mode = "rmii"; phy-handle = <ðphy0>; + phy-supply = <®_peri_3v3>; status = "okay"; }; @@ -148,6 +161,7 @@ pinctrl-0 = <&pinctrl_enet2>; phy-mode = "rmii"; phy-handle = <ðphy1>; + phy-supply = <®_peri_3v3>; status = "okay"; mdio { @@ -193,8 +207,8 @@ magnetometer@e { compatible = "fsl,mag3110"; reg = <0x0e>; - vdd-supply = <®_sensors>; - vddio-supply = <®_sensors>; + vdd-supply = <®_peri_3v3>; + vddio-supply = <®_peri_3v3>; }; }; @@ -227,7 +241,7 @@ flash0: n25q256a@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "micron,n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; spi-max-frequency = <29000000>; spi-rx-bus-width = <4>; spi-tx-bus-width = <4>; @@ -462,7 +476,7 @@ >; }; - pinctrl_sensors_reg: sensorsreggrp { + pinctrl_peri_3v3: peri3v3grp { fsl,pins = < MX6UL_PAD_SNVS_TAMPER2__GPIO5_IO02 0x1b0b0 >; diff --git a/arch/arm/boot/dts/imx7s-colibri.dtsi b/arch/arm/boot/dts/imx7s-colibri.dtsi index 1fb1ec5d3d70..6d16e32aed89 100644 --- a/arch/arm/boot/dts/imx7s-colibri.dtsi +++ b/arch/arm/boot/dts/imx7s-colibri.dtsi @@ -49,3 +49,7 @@ reg = <0x80000000 0x10000000>; }; }; + +&gpmi { + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi index d37a1927c88e..ab91c98f2124 100644 --- a/arch/arm/boot/dts/imx7ulp.dtsi +++ b/arch/arm/boot/dts/imx7ulp.dtsi @@ -37,10 +37,10 @@ #address-cells = <1>; #size-cells = <0>; - cpu0: cpu@0 { + cpu0: cpu@f00 { compatible = "arm,cortex-a7"; device_type = "cpu"; - reg = <0>; + reg = <0xf00>; }; }; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index 5a7e3e5caebe..3c534cd50ee3 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -253,7 +253,7 @@ &aobus { pmu: pmu@e0 { compatible = "amlogic,meson8-pmu", "syscon"; - reg = <0xe0 0x8>; + reg = <0xe0 0x18>; }; pinctrl_aobus: pinctrl@84 { diff --git a/arch/arm/boot/dts/mmp3.dtsi b/arch/arm/boot/dts/mmp3.dtsi index d9762de0ed34..6f480827b94d 100644 --- a/arch/arm/boot/dts/mmp3.dtsi +++ b/arch/arm/boot/dts/mmp3.dtsi @@ -356,7 +356,7 @@ twsi1: i2c@d4011000 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4011000 0x1000>; + reg = <0xd4011000 0x70>; interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; clocks = <&soc_clocks MMP2_CLK_TWSI0>; resets = <&soc_clocks MMP2_CLK_TWSI0>; @@ -368,7 +368,7 @@ twsi2: i2c@d4031000 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4031000 0x1000>; + reg = <0xd4031000 0x70>; interrupt-parent = <&twsi_mux>; interrupts = <0>; clocks = <&soc_clocks MMP2_CLK_TWSI1>; @@ -380,7 +380,7 @@ twsi3: i2c@d4032000 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4032000 0x1000>; + reg = <0xd4032000 0x70>; interrupt-parent = <&twsi_mux>; interrupts = <1>; clocks = <&soc_clocks MMP2_CLK_TWSI2>; @@ -392,7 +392,7 @@ twsi4: i2c@d4033000 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4033000 0x1000>; + reg = <0xd4033000 0x70>; interrupt-parent = <&twsi_mux>; interrupts = <2>; clocks = <&soc_clocks MMP2_CLK_TWSI3>; @@ -405,7 +405,7 @@ twsi5: i2c@d4033800 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4033800 0x1000>; + reg = <0xd4033800 0x70>; interrupt-parent = <&twsi_mux>; interrupts = <3>; clocks = <&soc_clocks MMP2_CLK_TWSI4>; @@ -417,7 +417,7 @@ twsi6: i2c@d4034000 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4034000 0x1000>; + reg = <0xd4034000 0x70>; interrupt-parent = <&twsi_mux>; interrupts = <4>; clocks = <&soc_clocks MMP2_CLK_TWSI5>; diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts index fb928503ad45..d9be511f054f 100644 --- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts +++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts @@ -101,7 +101,7 @@ initial-mode = <1>; /* initialize in HUB mode */ disabled-ports = <1>; intn-gpios = <&pio 7 5 GPIO_ACTIVE_HIGH>; /* PH5 */ - reset-gpios = <&pio 4 16 GPIO_ACTIVE_HIGH>; /* PE16 */ + reset-gpios = <&pio 4 16 GPIO_ACTIVE_LOW>; /* PE16 */ connect-gpios = <&pio 4 17 GPIO_ACTIVE_HIGH>; /* PE17 */ refclk-frequency = <19200000>; }; diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index e7e4bb5ad8d5..fde84f123fbb 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -350,6 +350,7 @@ CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_DETECT_HUNG_TASK is not set diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 26d6dee67aa6..3608e55eaecd 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -462,6 +462,7 @@ CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set CONFIG_PROVE_LOCKING=y # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 8c37cc8ab6f2..c32c338f7704 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -92,6 +92,7 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NETFILTER=y CONFIG_PHONET=m +CONFIG_NET_SWITCHDEV=y CONFIG_CAN=m CONFIG_CAN_C_CAN=m CONFIG_CAN_C_CAN_PLATFORM=m @@ -181,6 +182,7 @@ CONFIG_SMSC911X=y # CONFIG_NET_VENDOR_STMICRO is not set CONFIG_TI_DAVINCI_EMAC=y CONFIG_TI_CPSW=y +CONFIG_TI_CPSW_SWITCHDEV=y CONFIG_TI_CPTS=y # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set @@ -554,6 +556,6 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_SPLIT=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y CONFIG_SCHEDSTATS=y # CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_TI_CPSW_SWITCHDEV=y diff --git a/arch/arm/configs/shmobile_defconfig b/arch/arm/configs/shmobile_defconfig index bda57cafa2bc..de3830443613 100644 --- a/arch/arm/configs/shmobile_defconfig +++ b/arch/arm/configs/shmobile_defconfig @@ -212,4 +212,5 @@ CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=64 CONFIG_PRINTK_TIME=y # CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index fa50bb04f580..b5752f0e8936 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -10,6 +10,7 @@ #ifndef __ASSEMBLY__ #include <linux/io.h> +#include <linux/io-64-nonatomic-lo-hi.h> #include <asm/barrier.h> #include <asm/cacheflush.h> #include <asm/cp15.h> @@ -327,6 +328,7 @@ static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr) /* * GITS_VPROPBASER - hi and lo bits may be accessed independently. */ +#define gits_read_vpropbaser(c) __gic_readq_nonatomic(c) #define gits_write_vpropbaser(v, c) __gic_writeq_nonatomic(v, c) /* diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 7667826b93f1..5ac46e2860bc 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -50,19 +50,16 @@ void efi_virtmap_unload(void); /* arch specific definitions used by the stub code */ -#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) -#define __efi_call_early(f, ...) f(__VA_ARGS__) -#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) -#define efi_is_64bit() (false) +#define efi_bs_call(func, ...) efi_system_table()->boottime->func(__VA_ARGS__) +#define efi_rt_call(func, ...) efi_system_table()->runtime->func(__VA_ARGS__) +#define efi_is_native() (true) -#define efi_table_attr(table, attr, instance) \ - ((table##_t *)instance)->attr +#define efi_table_attr(inst, attr) (inst->attr) -#define efi_call_proto(protocol, f, instance, ...) \ - ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__) +#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__) -struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg); -void free_screen_info(efi_system_table_t *sys_table, struct screen_info *si); +struct screen_info *alloc_screen_info(void); +void free_screen_info(struct screen_info *si); static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index aefdabdbeb84..ab2b654084fa 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -356,7 +356,6 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, * * Function Memory type Cacheability Cache hint * ioremap() Device n/a n/a - * ioremap_nocache() Device n/a n/a * ioremap_cache() Normal Writeback Read allocate * ioremap_wc() Normal Non-cacheable n/a * ioremap_wt() Normal Non-cacheable n/a @@ -368,13 +367,6 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, * - unaligned accesses are "unpredictable" * - writes may be delayed before they hit the endpoint device * - * ioremap_nocache() is the same as ioremap() as there are too many device - * drivers using this for device registers, and documentation which tells - * people to use it for such for this to be any different. This is not a - * safe fallback for memory-like mappings, or memory regions where the - * compiler may generate unaligned accesses - eg, via inlining its own - * memcpy. - * * All normal memory mappings have the following properties: * - reads can be repeated with no side effects * - repeated reads return the last value written diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index d3e937dcee4d..007d8fea7157 100644 --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h @@ -10,7 +10,7 @@ * to ensure that the maintenance completes in case we migrate to another * CPU. */ -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7) +#if defined(CONFIG_PREEMPTION) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7) #define __complete_pending_tlbi() dsb(ish) #else #define __complete_pending_tlbi() diff --git a/arch/arm/include/asm/vdso/gettimeofday.h b/arch/arm/include/asm/vdso/gettimeofday.h index 0ad2429c324f..fe6e1f65932d 100644 --- a/arch/arm/include/asm/vdso/gettimeofday.h +++ b/arch/arm/include/asm/vdso/gettimeofday.h @@ -52,6 +52,24 @@ static __always_inline long clock_gettime_fallback( return ret; } +static __always_inline long clock_gettime32_fallback( + clockid_t _clkid, + struct old_timespec32 *_ts) +{ + register struct old_timespec32 *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_clock_gettime; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + static __always_inline int clock_getres_fallback( clockid_t _clkid, struct __kernel_timespec *_ts) @@ -70,6 +88,24 @@ static __always_inline int clock_getres_fallback( return ret; } +static __always_inline int clock_getres32_fallback( + clockid_t _clkid, + struct old_timespec32 *_ts) +{ + register struct old_timespec32 *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_clock_getres; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + static __always_inline u64 __arch_get_hw_counter(int clock_mode) { #ifdef CONFIG_ARM_ARCH_TIMER diff --git a/arch/arm/include/asm/vdso/vsyscall.h b/arch/arm/include/asm/vdso/vsyscall.h index c4166f317071..cff87d8d30da 100644 --- a/arch/arm/include/asm/vdso/vsyscall.h +++ b/arch/arm/include/asm/vdso/vsyscall.h @@ -34,9 +34,9 @@ struct vdso_data *__arm_get_k_vdso_data(void) #define __arch_get_k_vdso_data __arm_get_k_vdso_data static __always_inline -int __arm_update_vdso_data(void) +bool __arm_update_vdso_data(void) { - return !cntvct_ok; + return cntvct_ok; } #define __arch_update_vdso_data __arm_update_vdso_data diff --git a/arch/arm/include/asm/vmalloc.h b/arch/arm/include/asm/vmalloc.h new file mode 100644 index 000000000000..a9b3718b8600 --- /dev/null +++ b/arch/arm/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ARM_VMALLOC_H +#define _ASM_ARM_VMALLOC_H + +#endif /* _ASM_ARM_VMALLOC_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 8b679e2ca3c3..89e5d864e923 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -53,8 +53,8 @@ obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o obj-$(CONFIG_FUNCTION_TRACER) += entry-ftrace.o -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o patch.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o patch.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o # Main staffs in KPROBES are in arch/arm/probes/ . diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 858d4e541532..77f54830554c 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -211,7 +211,7 @@ __irq_svc: svc_entry irq_handler -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION ldr r8, [tsk, #TI_PREEMPT] @ get preempt count ldr r0, [tsk, #TI_FLAGS] @ get flags teq r8, #0 @ if preempt count != 0 @@ -226,7 +226,7 @@ ENDPROC(__irq_svc) .ltorg -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION svc_preempt: mov r8, lr 1: bl preempt_schedule_irq @ irq en/disable is done inside diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index bda949fd84e8..2a5ff69c28e6 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -22,6 +22,7 @@ #include <asm/ftrace.h> #include <asm/insn.h> #include <asm/set_memory.h> +#include <asm/patch.h> #ifdef CONFIG_THUMB2_KERNEL #define NOP 0xf85deb04 /* pop.w {lr} */ @@ -35,9 +36,7 @@ static int __ftrace_modify_code(void *data) { int *command = data; - set_kernel_text_rw(); ftrace_modify_all_code(*command); - set_kernel_text_ro(); return 0; } @@ -59,13 +58,11 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) int ftrace_arch_code_modify_prepare(void) { - set_all_modules_text_rw(); return 0; } int ftrace_arch_code_modify_post_process(void) { - set_all_modules_text_ro(); /* Make sure any TLB misses during machine stop are cleared. */ flush_tlb_all(); return 0; @@ -97,10 +94,7 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old, return -EINVAL; } - if (probe_kernel_write((void *)pc, &new, MCOUNT_INSN_SIZE)) - return -EPERM; - - flush_icache_range(pc, pc + MCOUNT_INSN_SIZE); + __patch_text((void *)pc, new); return 0; } diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index ae5020302de4..6607fa817bba 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -146,10 +146,9 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE #if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER) @ make CNTP_* and CNTPCT accessible from PL1 mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 - lsr r7, #16 - and r7, #0xf - cmp r7, #1 - bne 1f + ubfx r7, r7, #16, #4 + teq r7, #0 + beq 1f mrc p15, 4, r7, c14, c1, 0 @ CNTHCTL orr r7, r7, #3 @ PL1PCEN | PL1PCTEN mcr p15, 4, r7, c14, c1, 0 @ CNTHCTL diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index cea1c27c29cb..46e478fb5ea2 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -226,8 +226,8 @@ void release_thread(struct task_struct *dead_task) asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); int -copy_thread(unsigned long clone_flags, unsigned long stack_start, - unsigned long stk_sz, struct task_struct *p) +copy_thread_tls(unsigned long clone_flags, unsigned long stack_start, + unsigned long stk_sz, struct task_struct *p, unsigned long tls) { struct thread_info *thread = task_thread_info(p); struct pt_regs *childregs = task_pt_regs(p); @@ -261,7 +261,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, clear_ptrace_hw_breakpoint(p); if (clone_flags & CLONE_SETTLS) - thread->tp_value[0] = childregs->ARM_r3; + thread->tp_value[0] = tls; thread->tp_value[1] = get_tpuser(); thread_notify(THREAD_NOTIFY_COPY, thread); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index c053abd1fb53..abb7dd7e656f 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -248,6 +248,8 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) #ifdef CONFIG_PREEMPT #define S_PREEMPT " PREEMPT" +#elif defined(CONFIG_PREEMPT_RT) +#define S_PREEMPT " PREEMPT_RT" #else #define S_PREEMPT "" #endif diff --git a/arch/arm/mach-bcm/bcm2711.c b/arch/arm/mach-bcm/bcm2711.c index dbe296798647..fa0300d8c79d 100644 --- a/arch/arm/mach-bcm/bcm2711.c +++ b/arch/arm/mach-bcm/bcm2711.c @@ -13,6 +13,7 @@ static const char * const bcm2711_compat[] = { #ifdef CONFIG_ARCH_MULTI_V7 "brcm,bcm2711", #endif + NULL }; DT_MACHINE_START(BCM2711, "BCM2711") diff --git a/arch/arm/mach-bcm/platsmp.c b/arch/arm/mach-bcm/platsmp.c index 21400b3fa5fe..c9db2a9006d9 100644 --- a/arch/arm/mach-bcm/platsmp.c +++ b/arch/arm/mach-bcm/platsmp.c @@ -105,7 +105,7 @@ static int nsp_write_lut(unsigned int cpu) if (!secondary_boot_addr) return -EINVAL; - sku_rom_lut = ioremap_nocache((phys_addr_t)secondary_boot_addr, + sku_rom_lut = ioremap((phys_addr_t)secondary_boot_addr, sizeof(phys_addr_t)); if (!sku_rom_lut) { pr_warn("unable to ioremap SKU-ROM LUT register for cpu %u\n", cpu); @@ -174,7 +174,7 @@ static int kona_boot_secondary(unsigned int cpu, struct task_struct *idle) if (!secondary_boot_addr) return -EINVAL; - boot_reg = ioremap_nocache((phys_addr_t)secondary_boot_addr, + boot_reg = ioremap((phys_addr_t)secondary_boot_addr, sizeof(phys_addr_t)); if (!boot_reg) { pr_err("unable to map boot register for cpu %u\n", cpu_id); diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig index dd427bd2768c..02b180ad7245 100644 --- a/arch/arm/mach-davinci/Kconfig +++ b/arch/arm/mach-davinci/Kconfig @@ -9,6 +9,7 @@ menuconfig ARCH_DAVINCI select PM_GENERIC_DOMAINS if PM select PM_GENERIC_DOMAINS_OF if PM && OF select REGMAP_MMIO + select RESET_CONTROLLER select HAVE_IDE select PINCTRL_SINGLE diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c index 3e447d468845..e650131ee88f 100644 --- a/arch/arm/mach-davinci/devices.c +++ b/arch/arm/mach-davinci/devices.c @@ -34,7 +34,7 @@ void __iomem *davinci_sysmod_base; void davinci_map_sysmod(void) { - davinci_sysmod_base = ioremap_nocache(DAVINCI_SYSTEM_MODULE_BASE, + davinci_sysmod_base = ioremap(DAVINCI_SYSTEM_MODULE_BASE, 0x800); /* * Throw a bug since a lot of board initialization code depends diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 4ef56571145b..6e7f10c8098a 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -12,6 +12,7 @@ menuconfig ARCH_EXYNOS select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_GIC + select EXYNOS_IRQ_COMBINER select COMMON_CLK_SAMSUNG select EXYNOS_ASV select EXYNOS_CHIPID diff --git a/arch/arm/mach-imx/cpu.c b/arch/arm/mach-imx/cpu.c index d8118031c51f..871f98342d50 100644 --- a/arch/arm/mach-imx/cpu.c +++ b/arch/arm/mach-imx/cpu.c @@ -84,7 +84,7 @@ struct device * __init imx_soc_device_init(void) const char *ocotp_compat = NULL; struct soc_device *soc_dev; struct device_node *root; - struct regmap *ocotp; + struct regmap *ocotp = NULL; const char *soc_id; u64 soc_uid = 0; u32 val; @@ -148,11 +148,11 @@ struct device * __init imx_soc_device_init(void) soc_id = "i.MX6UL"; break; case MXC_CPU_IMX6ULL: - ocotp_compat = "fsl,imx6ul-ocotp"; + ocotp_compat = "fsl,imx6ull-ocotp"; soc_id = "i.MX6ULL"; break; case MXC_CPU_IMX6ULZ: - ocotp_compat = "fsl,imx6ul-ocotp"; + ocotp_compat = "fsl,imx6ull-ocotp"; soc_id = "i.MX6ULZ"; break; case MXC_CPU_IMX6SLL: @@ -175,7 +175,9 @@ struct device * __init imx_soc_device_init(void) ocotp = syscon_regmap_lookup_by_compatible(ocotp_compat); if (IS_ERR(ocotp)) pr_err("%s: failed to find %s regmap!\n", __func__, ocotp_compat); + } + if (!IS_ERR_OR_NULL(ocotp)) { regmap_read(ocotp, OCOTP_UID_H, &val); soc_uid = val; regmap_read(ocotp, OCOTP_UID_L, &val); diff --git a/arch/arm/mach-mmp/pxa168.h b/arch/arm/mach-mmp/pxa168.h index 0331c58b07a2..dff651b9f252 100644 --- a/arch/arm/mach-mmp/pxa168.h +++ b/arch/arm/mach-mmp/pxa168.h @@ -17,9 +17,9 @@ extern void pxa168_clear_keypad_wakeup(void); #include <linux/platform_data/keypad-pxa27x.h> #include <linux/pxa168_eth.h> #include <linux/platform_data/mv_usb.h> +#include <linux/soc/mmp/cputype.h> #include "devices.h" -#include "cputype.h" extern struct pxa_device_desc pxa168_device_uart1; extern struct pxa_device_desc pxa168_device_uart2; diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c index 110dcb3314d1..c65cfc1ad99b 100644 --- a/arch/arm/mach-mmp/time.c +++ b/arch/arm/mach-mmp/time.c @@ -207,7 +207,7 @@ static int __init mmp_dt_init_timer(struct device_node *np) ret = clk_prepare_enable(clk); if (ret) return ret; - rate = clk_get_rate(clk) / 2; + rate = clk_get_rate(clk); } else if (cpu_is_pj4()) { rate = 6500000; } else { diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index ad08d470a2ca..dca7d06c0b93 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -95,6 +95,7 @@ config ARCH_OMAP2PLUS bool select ARCH_HAS_BANDGAP select ARCH_HAS_HOLES_MEMORYMODEL + select ARCH_HAS_RESET_CONTROLLER select ARCH_OMAP select CLKSRC_MMIO select GENERIC_IRQ_CHIP @@ -105,11 +106,11 @@ config ARCH_OMAP2PLUS select OMAP_DM_TIMER select OMAP_GPMC select PINCTRL + select RESET_CONTROLLER select SOC_BUS select TI_SYSC select OMAP_IRQCHIP select CLKSRC_TI_32K - select ARCH_HAS_RESET_CONTROLLER help Systems based on OMAP2, OMAP3, OMAP4 or OMAP5 diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index ca52271de5a8..e95c224ffc4d 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -306,10 +306,14 @@ static void __init dra7x_evm_mmc_quirk(void) static struct clockdomain *ti_sysc_find_one_clockdomain(struct clk *clk) { + struct clk_hw *hw = __clk_get_hw(clk); struct clockdomain *clkdm = NULL; struct clk_hw_omap *hwclk; - hwclk = to_clk_hw_omap(__clk_get_hw(clk)); + hwclk = to_clk_hw_omap(hw); + if (!omap2_clk_is_hw_omap(hw)) + return NULL; + if (hwclk && hwclk->clkdm_name) clkdm = clkdm_lookup(hwclk->clkdm_name); diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index e1a394ac3eea..868dc0cf4859 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -1008,7 +1008,7 @@ static void __init magician_init(void) pxa_set_udc_info(&magician_udc_info); /* Check LCD type we have */ - cpld = ioremap_nocache(PXA_CS3_PHYS, 0x1000); + cpld = ioremap(PXA_CS3_PHYS, 0x1000); if (cpld) { u8 board_id = __raw_readb(cpld + 0x14); diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c index 96330ef25641..e771ce70e132 100644 --- a/arch/arm/mach-shmobile/platsmp-apmu.c +++ b/arch/arm/mach-shmobile/platsmp-apmu.c @@ -189,7 +189,7 @@ static void apmu_init_cpu(struct resource *res, int cpu, int bit) if ((cpu >= ARRAY_SIZE(apmu_cpus)) || apmu_cpus[cpu].iomem) return; - apmu_cpus[cpu].iomem = ioremap_nocache(res->start, resource_size(res)); + apmu_cpus[cpu].iomem = ioremap(res->start, resource_size(res)); apmu_cpus[cpu].bit = bit; pr_debug("apmu ioremap %d %d %pr\n", cpu, bit, res); diff --git a/arch/arm/mach-shmobile/pm-rcar-gen2.c b/arch/arm/mach-shmobile/pm-rcar-gen2.c index e84599dd96f1..672081405a7e 100644 --- a/arch/arm/mach-shmobile/pm-rcar-gen2.c +++ b/arch/arm/mach-shmobile/pm-rcar-gen2.c @@ -103,7 +103,7 @@ map: iounmap(p); /* setup reset vectors */ - p = ioremap_nocache(RST, 0x63); + p = ioremap(RST, 0x63); bar = phys_to_sbar(res.start); if (has_a15) { writel_relaxed(bar, p + CA15BAR); diff --git a/arch/arm/mach-shmobile/setup-r8a7740.c b/arch/arm/mach-shmobile/setup-r8a7740.c index 787d039b5a07..f760c27c9907 100644 --- a/arch/arm/mach-shmobile/setup-r8a7740.c +++ b/arch/arm/mach-shmobile/setup-r8a7740.c @@ -28,7 +28,7 @@ static void __init r8a7740_meram_workaround(void) { void __iomem *reg; - reg = ioremap_nocache(MEBUFCNTR, 4); + reg = ioremap(MEBUFCNTR, 4); if (reg) { iowrite32(0x01600164, reg); iounmap(reg); @@ -37,9 +37,9 @@ static void __init r8a7740_meram_workaround(void) static void __init r8a7740_init_irq_of(void) { - void __iomem *intc_prio_base = ioremap_nocache(0xe6900010, 0x10); - void __iomem *intc_msk_base = ioremap_nocache(0xe6900040, 0x10); - void __iomem *pfc_inta_ctrl = ioremap_nocache(0xe605807c, 0x4); + void __iomem *intc_prio_base = ioremap(0xe6900010, 0x10); + void __iomem *intc_msk_base = ioremap(0xe6900040, 0x10); + void __iomem *pfc_inta_ctrl = ioremap(0xe605807c, 0x4); irqchip_init(); diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c index ce51794f64c7..2bc93f391bcf 100644 --- a/arch/arm/mach-shmobile/setup-r8a7778.c +++ b/arch/arm/mach-shmobile/setup-r8a7778.c @@ -22,7 +22,7 @@ static void __init r8a7778_init_irq_dt(void) { - void __iomem *base = ioremap_nocache(0xfe700000, 0x00100000); + void __iomem *base = ioremap(0xfe700000, 0x00100000); BUG_ON(!base); diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index 354e0e7025ae..1da11bdb1dfb 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -551,8 +551,9 @@ static struct clk *ve_spc_clk_register(struct device *cpu_dev) static int __init ve_spc_clk_init(void) { - int cpu; + int cpu, cluster; struct clk *clk; + bool init_opp_table[MAX_CLUSTERS] = { false }; if (!info) return 0; /* Continue only if SPC is initialised */ @@ -578,8 +579,17 @@ static int __init ve_spc_clk_init(void) continue; } + cluster = topology_physical_package_id(cpu_dev->id); + if (init_opp_table[cluster]) + continue; + if (ve_init_opp_table(cpu_dev)) pr_warn("failed to initialise cpu%d opp table\n", cpu); + else if (dev_pm_opp_set_sharing_cpus(cpu_dev, + topology_core_cpumask(cpu_dev->id))) + pr_warn("failed to mark OPPs shared for cpu%d\n", cpu); + else + init_opp_table[cluster] = true; } platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0); diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 0ee8fc4b4672..dc8f152f3556 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -135,13 +135,13 @@ flush_levels: and r1, r1, #7 @ mask of the bits for current cache only cmp r1, #2 @ see what cache we have at this level blt skip @ skip if no cache, or just i-cache -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic #endif mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr isb @ isb to sych the new cssr&csidr mrc p15, 1, r1, c0, c0, 0 @ read the new csidr -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION restore_irqs_notrace r9 #endif and r2, r1, #7 @ extract the length of the cache lines diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S index a0035c426ce6..1bc3a0a50753 100644 --- a/arch/arm/mm/cache-v7m.S +++ b/arch/arm/mm/cache-v7m.S @@ -183,13 +183,13 @@ flush_levels: and r1, r1, #7 @ mask of the bits for current cache only cmp r1, #2 @ see what cache we have at this level blt skip @ skip if no cache, or just i-cache -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic #endif write_csselr r10, r1 @ set current cache level isb @ isb to sych the new cssr&csidr read_ccsidr r1 @ read the new csidr -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION restore_irqs_notrace r9 #endif and r2, r1, #7 @ extract the length of the cache lines diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 0fda344beb0b..1babb392e70a 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -14,7 +14,7 @@ targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) ccflags-y := -fPIC -fno-common -fno-builtin -fno-stack-protector -ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO32 ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1b4476ddb83..d2cebf613ec3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -34,32 +34,32 @@ config ARM64 select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_NMI_SAFE_CMPXCHG - select ARCH_INLINE_READ_LOCK if !PREEMPT - select ARCH_INLINE_READ_LOCK_BH if !PREEMPT - select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPT - select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPT - select ARCH_INLINE_READ_UNLOCK if !PREEMPT - select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPT - select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPT - select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPT - select ARCH_INLINE_WRITE_LOCK if !PREEMPT - select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPT - select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPT - select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPT - select ARCH_INLINE_WRITE_UNLOCK if !PREEMPT - select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPT - select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPT - select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPT - select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPT - select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPT - select ARCH_INLINE_SPIN_LOCK if !PREEMPT - select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPT - select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPT - select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPT - select ARCH_INLINE_SPIN_UNLOCK if !PREEMPT - select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPT - select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPT - select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPT + select ARCH_INLINE_READ_LOCK if !PREEMPTION + select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION + select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION + select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPTION + select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION select ARCH_KEEP_MEMBLOCK select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_QUEUED_RWLOCKS @@ -81,7 +81,7 @@ config ARM64 select ARM_GIC_V3 select ARM_GIC_V3_ITS if PCI select ARM_PSCI_FW - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select COMMON_CLK select CPU_PM if (SUSPEND || CPU_IDLE) @@ -138,6 +138,7 @@ config ARM64 select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_CONTEXT_TRACKING + select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS @@ -161,6 +162,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_FUNCTION_ARG_ACCESS_API + select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_RCU_TABLE_FREE select HAVE_RSEQ select HAVE_STACKPROTECTOR @@ -301,6 +303,9 @@ config ARCH_SUPPORTS_UPROBES config ARCH_PROC_KCORE_TEXT def_bool y +config BROKEN_GAS_INST + def_bool !$(as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n) + config KASAN_SHADOW_OFFSET hex depends on KASAN @@ -514,9 +519,13 @@ config ARM64_ERRATUM_1418040 If unsure, say Y. +config ARM64_WORKAROUND_SPECULATIVE_AT_VHE + bool + config ARM64_ERRATUM_1165522 bool "Cortex-A76: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" default y + select ARM64_WORKAROUND_SPECULATIVE_AT_VHE help This option adds a workaround for ARM Cortex-A76 erratum 1165522. @@ -526,6 +535,19 @@ config ARM64_ERRATUM_1165522 If unsure, say Y. +config ARM64_ERRATUM_1530923 + bool "Cortex-A55: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" + default y + select ARM64_WORKAROUND_SPECULATIVE_AT_VHE + help + This option adds a workaround for ARM Cortex-A55 erratum 1530923. + + Affected Cortex-A55 cores (r0p0, r0p1, r1p0, r2p0) could end-up with + corrupted TLBs by speculating an AT instruction during a guest + context switch. + + If unsure, say Y. + config ARM64_ERRATUM_1286807 bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation" default y @@ -542,9 +564,13 @@ config ARM64_ERRATUM_1286807 invalidated has been observed by other observers. The workaround repeats the TLBI+DSB operation. +config ARM64_WORKAROUND_SPECULATIVE_AT_NVHE + bool + config ARM64_ERRATUM_1319367 bool "Cortex-A57/A72: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" default y + select ARM64_WORKAROUND_SPECULATIVE_AT_NVHE help This option adds work arounds for ARM Cortex-A57 erratum 1319537 and A72 erratum 1319367 @@ -1363,6 +1389,11 @@ config ARM64_PAN instruction if the cpu does not implement the feature. config ARM64_LSE_ATOMICS + bool + default ARM64_USE_LSE_ATOMICS + depends on $(as-instr,.arch_extension lse) + +config ARM64_USE_LSE_ATOMICS bool "Atomic instructions" depends on JUMP_LABEL default y @@ -1484,6 +1515,30 @@ config ARM64_PTR_AUTH endmenu +menu "ARMv8.5 architectural features" + +config ARM64_E0PD + bool "Enable support for E0PD" + default y + help + E0PD (part of the ARMv8.5 extensions) allows us to ensure + that EL0 accesses made via TTBR1 always fault in constant time, + providing similar benefits to KASLR as those provided by KPTI, but + with lower overhead and without disrupting legitimate access to + kernel memory such as SPE. + + This option enables E0PD for TTBR1 where available. + +config ARCH_RANDOM + bool "Enable support for random number generation" + default y + help + Random number generation (part of the ARMv8.5 Extensions) + provides a high bandwidth, cryptographically secure + hardware random number generator. + +endmenu + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y @@ -1544,7 +1599,7 @@ config ARM64_MODULE_PLTS config ARM64_PSEUDO_NMI bool "Support for NMI-like interrupts" - select CONFIG_ARM_GIC_V3 + select ARM_GIC_V3 help Adds support for mimicking Non-Maskable Interrupts through the use of GIC interrupt priority. This support requires version 3 or later of diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 1fbe24d4fdb6..dca1a97751ab 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -30,11 +30,8 @@ LDFLAGS_vmlinux += --fix-cortex-a53-843419 endif endif -# Check for binutils support for specific extensions -lseinstr := $(call as-instr,.arch_extension lse,-DCONFIG_AS_LSE=1) - -ifeq ($(CONFIG_ARM64_LSE_ATOMICS), y) - ifeq ($(lseinstr),) +ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y) + ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y) $(warning LSE atomics not supported by binutils) endif endif @@ -45,19 +42,15 @@ cc_has_k_constraint := $(call try-run,echo \ return 0; \ }' | $(CC) -S -x c -o "$$TMP" -,,-DCONFIG_CC_HAS_K_CONSTRAINT=1) -ifeq ($(CONFIG_ARM64), y) -brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1) - - ifneq ($(brokengasinst),) +ifeq ($(CONFIG_BROKEN_GAS_INST),y) $(warning Detected assembler with broken .inst; disassembly will be unreliable) - endif endif -KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) \ +KBUILD_CFLAGS += -mgeneral-regs-only \ $(compat_vdso) $(cc_has_k_constraint) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(call cc-disable-warning, psabi) -KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) $(compat_vdso) +KBUILD_AFLAGS += $(compat_vdso) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 1f012c506434..cd3414898d10 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -16,7 +16,7 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.gz +targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts index 96ab0227e82d..121e6cc4849b 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts @@ -15,7 +15,7 @@ pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; vmmc-supply = <®_dcdc1>; - vqmmc-supply = <®_dcdc1>; + vqmmc-supply = <®_eldo1>; bus-width = <8>; non-removable; cap-mmc-hw-reset; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts index 01a9a52edae4..393c1948a495 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts @@ -140,7 +140,7 @@ &mmc1 { pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; - vmmc-supply = <®_aldo2>; + vmmc-supply = <®_dcdc1>; vqmmc-supply = <®_dldo4>; mmc-pwrseq = <&wifi_pwrseq>; bus-width = <4>; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 144a2c19ac02..d1fc9c2055f4 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -61,10 +61,10 @@ pmu { compatible = "arm,armv8-pmuv3"; - interrupts = <0 120 8>, - <0 121 8>, - <0 122 8>, - <0 123 8>; + interrupts = <0 170 4>, + <0 171 4>, + <0 172 4>, + <0 173 4>; interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts index 5bd07469766b..a8bb3fa9fec9 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts @@ -46,25 +46,47 @@ }; gpio-keys { - compatible = "gpio-keys-polled"; - poll-interval = <100>; + compatible = "gpio-keys"; key1 { label = "A"; linux,code = <BTN_0>; gpios = <&gpio GPIOH_6 GPIO_ACTIVE_LOW>; + interrupt-parent = <&gpio_intc>; + interrupts = <34 IRQ_TYPE_EDGE_BOTH>; }; key2 { label = "B"; linux,code = <BTN_1>; gpios = <&gpio GPIOH_7 GPIO_ACTIVE_LOW>; + interrupt-parent = <&gpio_intc>; + interrupts = <35 IRQ_TYPE_EDGE_BOTH>; }; key3 { label = "C"; linux,code = <BTN_2>; gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_LOW>; + interrupt-parent = <&gpio_intc>; + interrupts = <2 IRQ_TYPE_EDGE_BOTH>; + }; + + mic_mute { + label = "MicMute"; + linux,code = <SW_MUTE_DEVICE>; + linux,input-type = <EV_SW>; + gpios = <&gpio_ao GPIOE_2 GPIO_ACTIVE_LOW>; + interrupt-parent = <&gpio_intc>; + interrupts = <99 IRQ_TYPE_EDGE_BOTH>; + }; + + power_key { + label = "PowerKey"; + linux,code = <KEY_POWER>; + gpios = <&gpio_ao GPIOAO_3 GPIO_ACTIVE_LOW>; + interrupt-parent = <&gpio_intc>; + interrupts = <3 IRQ_TYPE_EDGE_BOTH>; }; }; @@ -569,6 +591,8 @@ bluetooth { compatible = "brcm,bcm43438-bt"; + interrupt-parent = <&gpio_intc>; + interrupts = <95 IRQ_TYPE_LEVEL_HIGH>; shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>; max-speed = <2000000>; clocks = <&wifi32k>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 8e8a77eb596a..a6f9b7784e8f 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -88,7 +88,7 @@ reboot { compatible ="syscon-reboot"; - regmap = <&dcfg>; + regmap = <&rst>; offset = <0xb0>; mask = <0x02>; }; @@ -175,7 +175,13 @@ dcfg: syscon@1e00000 { compatible = "fsl,ls1028a-dcfg", "syscon"; reg = <0x0 0x1e00000 0x0 0x10000>; - big-endian; + little-endian; + }; + + rst: syscon@1e60000 { + compatible = "syscon"; + reg = <0x0 0x1e60000 0x0 0x10000>; + little-endian; }; scfg: syscon@1fc0000 { @@ -584,7 +590,7 @@ 0x00010004 0x0000003d 0x00010005 0x00000045 0x00010006 0x0000004d - 0x00010007 0x00000045 + 0x00010007 0x00000055 0x00010008 0x0000005e 0x00010009 0x00000066 0x0001000a 0x0000006e diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index 6edbdfe2d0d7..3d95b66a2d71 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -740,7 +740,7 @@ reg = <0x30bd0000 0x10000>; interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MM_CLK_SDMA1_ROOT>, - <&clk IMX8MM_CLK_SDMA1_ROOT>; + <&clk IMX8MM_CLK_AHB>; clock-names = "ipg", "ahb"; #dma-cells = <3>; fsl,sdma-ram-script-name = "imx/sdma/sdma-imx7d.bin"; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts b/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts index 2a759dff9f87..596bc65f475c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts @@ -421,7 +421,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_imu>; interrupt-parent = <&gpio3>; - interrupts = <19 IRQ_TYPE_LEVEL_LOW>; + interrupts = <19 IRQ_TYPE_LEVEL_HIGH>; vdd-supply = <®_3v3_p>; vddio-supply = <®_3v3_p>; }; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index 94090c6fb946..d43e1299c8ef 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -60,10 +60,10 @@ pmu { compatible = "arm,armv8-pmuv3"; - interrupts = <0 120 8>, - <0 121 8>, - <0 122 8>, - <0 123 8>; + interrupts = <0 170 4>, + <0 171 4>, + <0 172 4>, + <0 173 4>; interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, diff --git a/arch/arm64/boot/dts/rockchip/rk3328-a1.dts b/arch/arm64/boot/dts/rockchip/rk3328-a1.dts index 76b49f573101..16f1656d5203 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-a1.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-a1.dts @@ -49,7 +49,8 @@ ir-receiver { compatible = "gpio-ir-receiver"; - gpios = <&gpio2 RK_PA2 GPIO_ACTIVE_HIGH>; + gpios = <&gpio2 RK_PA2 GPIO_ACTIVE_LOW>; + linux,rc-map-name = "rc-beelink-gs1"; }; }; diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index 01e0ab36d135..ddf4a0d85c1c 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -108,7 +108,7 @@ static int sha256_update_neon(struct shash_desc *desc, const u8 *data, * input when running on a preemptible kernel, but process the * data block by block instead. */ - if (IS_ENABLED(CONFIG_PREEMPT) && + if (IS_ENABLED(CONFIG_PREEMPTION) && chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE) chunk = SHA256_BLOCK_SIZE - sctx->count % SHA256_BLOCK_SIZE; diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index b9f8d787eea9..324e7d5ab37e 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -35,13 +35,16 @@ void apply_alternatives_module(void *start, size_t length); static inline void apply_alternatives_module(void *start, size_t length) { } #endif -#define ALTINSTR_ENTRY(feature,cb) \ +#define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ - " .if " __stringify(cb) " == 0\n" \ " .word 663f - .\n" /* new instruction */ \ - " .else\n" \ + " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* source len */ \ + " .byte 664f-663f\n" /* replacement len */ + +#define ALTINSTR_ENTRY_CB(feature, cb) \ + " .word 661b - .\n" /* label */ \ " .word " __stringify(cb) "- .\n" /* callback */ \ - " .endif\n" \ " .hword " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -62,15 +65,14 @@ static inline void apply_alternatives_module(void *start, size_t length) { } * * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature,cb) \ + ALTINSTR_ENTRY(feature) \ ".popsection\n" \ - " .if " __stringify(cb) " == 0\n" \ ".pushsection .altinstr_replacement, \"a\"\n" \ "663:\n\t" \ newinstr "\n" \ @@ -78,17 +80,25 @@ static inline void apply_alternatives_module(void *start, size_t length) { } ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ ".org . - (662b-661b) + (664b-663b)\n" \ - ".else\n\t" \ + ".endif\n" + +#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ + ".if "__stringify(cfg_enabled)" == 1\n" \ + "661:\n\t" \ + oldinstr "\n" \ + "662:\n" \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY_CB(feature, cb) \ + ".popsection\n" \ "663:\n\t" \ "664:\n\t" \ - ".endif\n" \ ".endif\n" #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0) + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) #define ALTERNATIVE_CB(oldinstr, cb) \ - __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb) + __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb) #else #include <asm/assembler.h> diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 89e4c8b79349..4750fc8030c3 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -141,6 +141,7 @@ static inline u32 gic_read_rpr(void) #define gicr_read_pendbaser(c) readq_relaxed(c) #define gits_write_vpropbaser(v, c) writeq_relaxed(v, c) +#define gits_read_vpropbaser(c) readq_relaxed(c) #define gits_write_vpendbaser(v, c) writeq_relaxed(v, c) #define gits_read_vpendbaser(c) readq_relaxed(c) diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h new file mode 100644 index 000000000000..3fe02da70004 --- /dev/null +++ b/arch/arm64/include/asm/archrandom.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARCHRANDOM_H +#define _ASM_ARCHRANDOM_H + +#ifdef CONFIG_ARCH_RANDOM + +#include <linux/random.h> +#include <asm/cpufeature.h> + +static inline bool __arm64_rndr(unsigned long *v) +{ + bool ok; + + /* + * Reads of RNDR set PSTATE.NZCV to 0b0000 on success, + * and set PSTATE.NZCV to 0b0100 otherwise. + */ + asm volatile( + __mrs_s("%0", SYS_RNDR_EL0) "\n" + " cset %w1, ne\n" + : "=r" (*v), "=r" (ok) + : + : "cc"); + + return ok; +} + +static inline bool __must_check arch_get_random_long(unsigned long *v) +{ + return false; +} + +static inline bool __must_check arch_get_random_int(unsigned int *v) +{ + return false; +} + +static inline bool __must_check arch_get_random_seed_long(unsigned long *v) +{ + /* + * Only support the generic interface after we have detected + * the system wide capability, avoiding complexity with the + * cpufeature code and with potential scheduling between CPUs + * with and without the feature. + */ + if (!cpus_have_const_cap(ARM64_HAS_RNG)) + return false; + + return __arm64_rndr(v); +} + + +static inline bool __must_check arch_get_random_seed_int(unsigned int *v) +{ + unsigned long val; + bool ok = arch_get_random_seed_long(&val); + + *v = val; + return ok; +} + +static inline bool __init __early_cpu_has_rndr(void) +{ + /* Open code as we run prior to the first call to cpufeature. */ + unsigned long ftr = read_sysreg_s(SYS_ID_AA64ISAR0_EL1); + return (ftr >> ID_AA64ISAR0_RNDR_SHIFT) & 0xf; +} + +#else + +static inline bool __arm64_rndr(unsigned long *v) { return false; } +static inline bool __init __early_cpu_has_rndr(void) { return false; } + +#endif /* CONFIG_ARCH_RANDOM */ +#endif /* _ASM_ARCHRANDOM_H */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index b8cf7c85ffa2..aca337d79d12 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -40,12 +40,6 @@ msr daif, \flags .endm - /* Only on aarch64 pstate, PSR_D_BIT is different for aarch32 */ - .macro inherit_daif, pstate:req, tmp:req - and \tmp, \pstate, #(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) - msr daif, \tmp - .endm - /* IRQ is the lowest priority flag, unconditionally unmask the rest. */ .macro enable_da_f msr daifclr, #(8 | 4 | 1) @@ -86,13 +80,6 @@ .endm /* - * SMP data memory barrier - */ - .macro smp_dmb, opt - dmb \opt - .endm - -/* * RAS Error Synchronization barrier */ .macro esb @@ -462,17 +449,6 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .endm /* - * Annotate a function as position independent, i.e., safe to be called before - * the kernel virtual mapping is activated. - */ -#define ENDPIPROC(x) \ - .globl __pi_##x; \ - .type __pi_##x, %function; \ - .set __pi_##x, x; \ - .size __pi_##x, . - x; \ - ENDPROC(x) - -/* * Annotate a function as being unsuitable for kprobes. */ #ifdef CONFIG_KPROBES @@ -699,8 +675,8 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU * where <label> is optional, and marks the point where execution will resume * after a yield has been performed. If omitted, execution resumes right after * the endif_yield_neon invocation. Note that the entire sequence, including - * the provided patchup code, will be omitted from the image if CONFIG_PREEMPT - * is not defined. + * the provided patchup code, will be omitted from the image if + * CONFIG_PREEMPTION is not defined. * * As a convenience, in the case where no patchup code is required, the above * sequence may be abbreviated to @@ -728,7 +704,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .endm .macro if_will_cond_yield_neon -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION get_current_task x0 ldr x0, [x0, #TSK_TI_PREEMPT] sub x0, x0, #PREEMPT_DISABLE_OFFSET diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 7b012148bfd6..13869b76b58c 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -12,7 +12,7 @@ #include <linux/stringify.h> -#if IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) && IS_ENABLED(CONFIG_AS_LSE) +#ifdef CONFIG_ARM64_LSE_ATOMICS #define __LL_SC_FALLBACK(asm_ops) \ " b 3f\n" \ " .subsection 1\n" \ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 574808b9df4c..da3280f639cd 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -14,6 +14,7 @@ static inline void __lse_atomic_##op(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op " %w[i], %[v]\n" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v)); \ @@ -30,6 +31,7 @@ ATOMIC_OP(add, stadd) static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op #mb " %w[i], %w[i], %[v]" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v) \ @@ -58,6 +60,7 @@ static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \ u32 tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \ " add %w[i], %w[i], %w[tmp]" \ : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \ @@ -77,6 +80,7 @@ ATOMIC_OP_ADD_RETURN( , al, "memory") static inline void __lse_atomic_and(int i, atomic_t *v) { asm volatile( + __LSE_PREAMBLE " mvn %w[i], %w[i]\n" " stclr %w[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -87,6 +91,7 @@ static inline void __lse_atomic_and(int i, atomic_t *v) static inline int __lse_atomic_fetch_and##name(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " mvn %w[i], %w[i]\n" \ " ldclr" #mb " %w[i], %w[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -106,6 +111,7 @@ ATOMIC_FETCH_OP_AND( , al, "memory") static inline void __lse_atomic_sub(int i, atomic_t *v) { asm volatile( + __LSE_PREAMBLE " neg %w[i], %w[i]\n" " stadd %w[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -118,6 +124,7 @@ static inline int __lse_atomic_sub_return##name(int i, atomic_t *v) \ u32 tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \ " add %w[i], %w[i], %w[tmp]" \ @@ -139,6 +146,7 @@ ATOMIC_OP_SUB_RETURN( , al, "memory") static inline int __lse_atomic_fetch_sub##name(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -159,6 +167,7 @@ ATOMIC_FETCH_OP_SUB( , al, "memory") static inline void __lse_atomic64_##op(s64 i, atomic64_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op " %[i], %[v]\n" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v)); \ @@ -175,6 +184,7 @@ ATOMIC64_OP(add, stadd) static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op #mb " %[i], %[i], %[v]" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v) \ @@ -203,6 +213,7 @@ static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\ unsigned long tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " ldadd" #mb " %[i], %x[tmp], %[v]\n" \ " add %[i], %[i], %x[tmp]" \ : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \ @@ -222,6 +233,7 @@ ATOMIC64_OP_ADD_RETURN( , al, "memory") static inline void __lse_atomic64_and(s64 i, atomic64_t *v) { asm volatile( + __LSE_PREAMBLE " mvn %[i], %[i]\n" " stclr %[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -232,6 +244,7 @@ static inline void __lse_atomic64_and(s64 i, atomic64_t *v) static inline long __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " mvn %[i], %[i]\n" \ " ldclr" #mb " %[i], %[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -251,6 +264,7 @@ ATOMIC64_FETCH_OP_AND( , al, "memory") static inline void __lse_atomic64_sub(s64 i, atomic64_t *v) { asm volatile( + __LSE_PREAMBLE " neg %[i], %[i]\n" " stadd %[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -263,6 +277,7 @@ static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v) \ unsigned long tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %x[tmp], %[v]\n" \ " add %[i], %[i], %x[tmp]" \ @@ -284,6 +299,7 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory") static inline long __lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -305,6 +321,7 @@ static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v) unsigned long tmp; asm volatile( + __LSE_PREAMBLE "1: ldr %x[tmp], %[v]\n" " subs %[ret], %x[tmp], #1\n" " b.lt 2f\n" @@ -332,6 +349,7 @@ __lse__cmpxchg_case_##name##sz(volatile void *ptr, \ unsigned long tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " mov %" #w "[tmp], %" #w "[old]\n" \ " cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n" \ " mov %" #w "[ret], %" #w "[tmp]" \ @@ -379,6 +397,7 @@ __lse__cmpxchg_double##name(unsigned long old1, \ register unsigned long x4 asm ("x4") = (unsigned long)ptr; \ \ asm volatile( \ + __LSE_PREAMBLE \ " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\ " eor %[old1], %[old1], %[oldval1]\n" \ " eor %[old2], %[old2], %[oldval2]\n" \ diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h index d064a50deb5f..8d2a7de39744 100644 --- a/arch/arm64/include/asm/checksum.h +++ b/arch/arm64/include/asm/checksum.h @@ -35,6 +35,9 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) } #define ip_fast_csum ip_fast_csum +extern unsigned int do_csum(const unsigned char *buff, int len); +#define do_csum do_csum + #include <asm-generic/checksum.h> #endif /* __ASM_CHECKSUM_H */ diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index d72d995b7e25..b4a40535a3d8 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -39,6 +39,7 @@ struct cpuinfo_arm64 { u32 reg_id_isar3; u32 reg_id_isar4; u32 reg_id_isar5; + u32 reg_id_isar6; u32 reg_id_mmfr0; u32 reg_id_mmfr1; u32 reg_id_mmfr2; diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b92683871119..865e0253fc1e 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -44,7 +44,7 @@ #define ARM64_SSBS 34 #define ARM64_WORKAROUND_1418040 35 #define ARM64_HAS_SB 36 -#define ARM64_WORKAROUND_1165522 37 +#define ARM64_WORKAROUND_SPECULATIVE_AT_VHE 37 #define ARM64_HAS_ADDRESS_AUTH_ARCH 38 #define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39 #define ARM64_HAS_GENERIC_AUTH_ARCH 40 @@ -55,8 +55,10 @@ #define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 #define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 #define ARM64_WORKAROUND_1542419 47 -#define ARM64_WORKAROUND_1319367 48 +#define ARM64_WORKAROUND_SPECULATIVE_AT_NVHE 48 +#define ARM64_HAS_E0PD 49 +#define ARM64_HAS_RNG 50 -#define ARM64_NCAPS 49 +#define ARM64_NCAPS 51 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 4261d55e8506..92ef9539874a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -613,6 +613,11 @@ static inline bool system_has_prio_mask_debugging(void) system_uses_irq_prio_masking(); } +static inline bool system_capabilities_finalized(void) +{ + return static_branch_likely(&arm64_const_caps_ready); +} + #define ARM64_BP_HARDEN_UNKNOWN -1 #define ARM64_BP_HARDEN_WA_NEEDED 0 #define ARM64_BP_HARDEN_NOT_REQUIRED 1 diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index aca07c2f6e6e..a87a93f67671 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -85,6 +85,8 @@ #define QCOM_CPU_PART_FALKOR_V1 0x800 #define QCOM_CPU_PART_FALKOR 0xC00 #define QCOM_CPU_PART_KRYO 0x200 +#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 +#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 #define NVIDIA_CPU_PART_DENVER 0x003 #define NVIDIA_CPU_PART_CARMEL 0x004 @@ -111,6 +113,8 @@ #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) +#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) +#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 72acd2db167f..ec213b4a1650 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -38,7 +38,7 @@ static inline void local_daif_mask(void) trace_hardirqs_off(); } -static inline unsigned long local_daif_save(void) +static inline unsigned long local_daif_save_flags(void) { unsigned long flags; @@ -50,6 +50,15 @@ static inline unsigned long local_daif_save(void) flags |= PSR_I_BIT; } + return flags; +} + +static inline unsigned long local_daif_save(void) +{ + unsigned long flags; + + flags = local_daif_save_flags(); + local_daif_mask(); return flags; diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index b54d3a86c444..44531a69d32b 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -93,21 +93,17 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1)); } -#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) -#define __efi_call_early(f, ...) f(__VA_ARGS__) -#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) -#define efi_is_64bit() (true) +#define efi_bs_call(func, ...) efi_system_table()->boottime->func(__VA_ARGS__) +#define efi_rt_call(func, ...) efi_system_table()->runtime->func(__VA_ARGS__) +#define efi_is_native() (true) -#define efi_table_attr(table, attr, instance) \ - ((table##_t *)instance)->attr +#define efi_table_attr(inst, attr) (inst->attr) -#define efi_call_proto(protocol, f, instance, ...) \ - ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__) +#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__) #define alloc_screen_info(x...) &screen_info -static inline void free_screen_info(efi_system_table_t *sys_table_arg, - struct screen_info *si) +static inline void free_screen_info(struct screen_info *si) { } diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 4d5f3b5f50cd..b87c6e276ab1 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -45,8 +45,8 @@ void do_sysinstr(unsigned int esr, struct pt_regs *regs); void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); void do_cp15instr(unsigned int esr, struct pt_regs *regs); -void el0_svc_handler(struct pt_regs *regs); -void el0_svc_compat_handler(struct pt_regs *regs); +void do_el0_svc(struct pt_regs *regs); +void do_el0_svc_compat(struct pt_regs *regs); void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, struct pt_regs *regs); diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 3d2f2472a36c..0f00265248b5 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -86,6 +86,14 @@ #define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4) #define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2) #define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT) +#define KERNEL_HWCAP_SVEI8MM __khwcap2_feature(SVEI8MM) +#define KERNEL_HWCAP_SVEF32MM __khwcap2_feature(SVEF32MM) +#define KERNEL_HWCAP_SVEF64MM __khwcap2_feature(SVEF64MM) +#define KERNEL_HWCAP_SVEBF16 __khwcap2_feature(SVEBF16) +#define KERNEL_HWCAP_I8MM __khwcap2_feature(I8MM) +#define KERNEL_HWCAP_BF16 __khwcap2_feature(BF16) +#define KERNEL_HWCAP_DGH __khwcap2_feature(DGH) +#define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 12a561a54128..d24b527e8c00 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -96,6 +96,10 @@ static inline void crash_post_resume(void) {} struct kimage_arch { void *dtb; unsigned long dtb_mem; + /* Core ELF header buffer */ + void *elf_headers; + unsigned long elf_headers_mem; + unsigned long elf_headers_sz; }; extern const struct kexec_file_ops kexec_image_ops; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c61260cf63c5..f5acdde17f3b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -547,7 +547,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, * wrong, and hyp will crash and burn when it uses any * cpus_have_const_cap() wrapper. */ - BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); + BUG_ON(!system_capabilities_finalized()); __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); /* @@ -571,7 +571,7 @@ static inline bool kvm_arch_requires_vhe(void) return true; /* Some implementations have defects that confine them to VHE */ - if (cpus_have_cap(ARM64_WORKAROUND_1165522)) + if (cpus_have_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) return true; return false; diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 97f21cc66657..a3a6a2ba9a63 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -91,11 +91,11 @@ static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); /* - * ARM erratum 1165522 requires the actual execution of the above - * before we can switch to the EL1/EL0 translation regime used by + * ARM errata 1165522 and 1530923 require the actual execution of the + * above before we can switch to the EL1/EL0 translation regime used by * the guest. */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522)); + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE)); } #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index 1b266292f0be..ebee3113a62f 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -4,4 +4,20 @@ #define __ALIGN .align 2 #define __ALIGN_STR ".align 2" +/* + * Annotate a function as position independent, i.e., safe to be called before + * the kernel virtual mapping is activated. + */ +#define SYM_FUNC_START_PI(x) \ + SYM_FUNC_START_ALIAS(__pi_##x); \ + SYM_FUNC_START(x) + +#define SYM_FUNC_START_WEAK_PI(x) \ + SYM_FUNC_START_ALIAS(__pi_##x); \ + SYM_FUNC_START_WEAK(x) + +#define SYM_FUNC_END_PI(x) \ + SYM_FUNC_END(x); \ + SYM_FUNC_END_ALIAS(__pi_##x) + #endif diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 80b388278149..d429f7701c36 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -4,7 +4,9 @@ #include <asm/atomic_ll_sc.h> -#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) +#ifdef CONFIG_ARM64_LSE_ATOMICS + +#define __LSE_PREAMBLE ".arch armv8-a+lse\n" #include <linux/compiler_types.h> #include <linux/export.h> @@ -14,8 +16,6 @@ #include <asm/atomic_lse.h> #include <asm/cpucaps.h> -__asm__(".arch_extension lse"); - extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; @@ -34,9 +34,9 @@ static inline bool system_uses_lse_atomics(void) /* In-line patching at runtime */ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ - ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS) + ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS) -#else /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ +#else /* CONFIG_ARM64_LSE_ATOMICS */ static inline bool system_uses_lse_atomics(void) { return false; } @@ -44,5 +44,5 @@ static inline bool system_uses_lse_atomics(void) { return false; } #define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc -#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ +#endif /* CONFIG_ARM64_LSE_ATOMICS */ #endif /* __ASM_LSE_H */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index f217e3292919..e4d862420bb4 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -29,52 +29,11 @@ typedef struct { */ #define ASID(mm) ((mm)->context.id.counter & 0xffff) -static inline bool arm64_kernel_unmapped_at_el0(void) -{ - return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && - cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); -} +extern bool arm64_use_ng_mappings; -static inline bool arm64_kernel_use_ng_mappings(void) +static inline bool arm64_kernel_unmapped_at_el0(void) { - bool tx1_bug; - - /* What's a kpti? Use global mappings if we don't know. */ - if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) - return false; - - /* - * Note: this function is called before the CPU capabilities have - * been configured, so our early mappings will be global. If we - * later determine that kpti is required, then - * kpti_install_ng_mappings() will make them non-global. - */ - if (arm64_kernel_unmapped_at_el0()) - return true; - - if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) - return false; - - /* - * KASLR is enabled so we're going to be enabling kpti on non-broken - * CPUs regardless of their susceptibility to Meltdown. Rather - * than force everybody to go through the G -> nG dance later on, - * just put down non-global mappings from the beginning. - */ - if (!IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { - tx1_bug = false; -#ifndef MODULE - } else if (!static_branch_likely(&arm64_const_caps_ready)) { - extern const struct midr_range cavium_erratum_27456_cpus[]; - - tx1_bug = is_midr_in_range_list(read_cpuid_id(), - cavium_erratum_27456_cpus); -#endif - } else { - tx1_bug = __cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456); - } - - return !tx1_bug && kaslr_offset() > 0; + return arm64_use_ng_mappings; } typedef void (*bp_hardening_cb_t)(void); @@ -128,6 +87,7 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, pgprot_t prot, bool page_mappings_only); extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot); extern void mark_linear_text_alias_ro(void); +extern bool kaslr_requires_kpti(void); #define INIT_MM_CONTEXT(name) \ .pgd = init_pg_dir, diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d9fbd433cc17..6bf5e650da78 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -110,6 +110,7 @@ #define PUD_TABLE_BIT (_AT(pudval_t, 1) << 1) #define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0) +#define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */ /* * Level 2 descriptor (PMD). @@ -292,6 +293,8 @@ #define TCR_HD (UL(1) << 40) #define TCR_NFD0 (UL(1) << 53) #define TCR_NFD1 (UL(1) << 54) +#define TCR_E0PD0 (UL(1) << 55) +#define TCR_E0PD1 (UL(1) << 56) /* * TTBR. diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 8dc6c5cdabe6..6f87839f0249 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -26,8 +26,8 @@ #define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#define PTE_MAYBE_NG (arm64_kernel_use_ng_mappings() ? PTE_NG : 0) -#define PMD_MAYBE_NG (arm64_kernel_use_ng_mappings() ? PMD_SECT_NG : 0) +#define PTE_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PTE_NG : 0) +#define PMD_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PMD_SECT_NG : 0) #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) @@ -85,13 +85,12 @@ #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_EXECONLY +#define __P100 PAGE_READONLY_EXEC #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -100,7 +99,7 @@ #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_EXECONLY +#define __S100 PAGE_READONLY_EXEC #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5d15b4735a0e..cd5de0e40bfa 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -96,12 +96,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -/* - * Execute-only user mappings do not have the PTE_USER bit set. All valid - * kernel mappings have the PTE_UXN bit set. - */ #define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) #define pte_valid_young(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) #define pte_valid_user(pte) \ @@ -117,8 +113,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; /* * p??_access_permitted() is true for valid user mappings (subject to the - * write permission check) other than user execute-only which do not have the - * PTE_USER bit set. PROT_NONE mappings do not have the PTE_VALID bit set. + * write permission check). PROT_NONE mappings do not have the PTE_VALID bit + * set. */ #define pte_access_permitted(pte, write) \ (pte_valid_user(pte) && (!(write) || pte_write(pte))) diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h index d49951647014..80e946b2abee 100644 --- a/arch/arm64/include/asm/preempt.h +++ b/arch/arm64/include/asm/preempt.h @@ -79,11 +79,11 @@ static inline bool should_resched(int preempt_offset) return pc == preempt_offset; } -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION void preempt_schedule(void); #define __preempt_schedule() preempt_schedule() void preempt_schedule_notrace(void); #define __preempt_schedule_notrace() preempt_schedule_notrace() -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ #endif /* __ASM_PREEMPT_H */ diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 25a73aab438f..3994169985ef 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -8,7 +8,6 @@ #include <asm-generic/sections.h> extern char __alt_instructions[], __alt_instructions_end[]; -extern char __exception_text_start[], __exception_text_end[]; extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 7434844036d3..89cba2622b79 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -26,6 +26,8 @@ DECLARE_PER_CPU(bool, fpsimd_context_busy); static __must_check inline bool may_use_simd(void) { /* + * We must make sure that the SVE has been initialized properly + * before using the SIMD in kernel. * fpsimd_context_busy is only set while preemption is disabled, * and is clear whenever preemption is enabled. Since * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy @@ -33,8 +35,10 @@ static __must_check inline bool may_use_simd(void) * migrated, and if it's clear we cannot be migrated to a CPU * where it is set. */ - return !in_irq() && !irqs_disabled() && !in_nmi() && - !this_cpu_read(fpsimd_context_busy); + return !WARN_ON(!system_capabilities_finalized()) && + system_supports_fpsimd() && + !in_irq() && !irqs_disabled() && !in_nmi() && + !this_cpu_read(fpsimd_context_busy); } #else /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index b093b287babf..102404dc1e13 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -11,4 +11,13 @@ /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() +/* + * Changing this will break osq_lock() thanks to the call inside + * smp_cond_load_relaxed(). + * + * See: + * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net + */ +#define vcpu_is_preempted(cpu) false + #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6e919fafb43d..b91570ff9db1 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -146,6 +146,7 @@ #define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) #define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) #define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) +#define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7) #define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) #define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) @@ -365,6 +366,9 @@ #define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) #define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) +#define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) +#define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) + #define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0) #define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1) #define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) @@ -538,7 +542,20 @@ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) +/* MAIR_ELx memory attributes (used by Linux) */ +#define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) +#define MAIR_ATTR_DEVICE_nGnRE UL(0x04) +#define MAIR_ATTR_DEVICE_GRE UL(0x0c) +#define MAIR_ATTR_NORMAL_NC UL(0x44) +#define MAIR_ATTR_NORMAL_WT UL(0xbb) +#define MAIR_ATTR_NORMAL UL(0xff) +#define MAIR_ATTR_MASK UL(0xff) + +/* Position the attr at the correct index */ +#define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) + /* id_aa64isar0 */ +#define ID_AA64ISAR0_RNDR_SHIFT 60 #define ID_AA64ISAR0_TS_SHIFT 52 #define ID_AA64ISAR0_FHM_SHIFT 48 #define ID_AA64ISAR0_DP_SHIFT 44 @@ -553,6 +570,10 @@ #define ID_AA64ISAR0_AES_SHIFT 4 /* id_aa64isar1 */ +#define ID_AA64ISAR1_I8MM_SHIFT 52 +#define ID_AA64ISAR1_DGH_SHIFT 48 +#define ID_AA64ISAR1_BF16_SHIFT 44 +#define ID_AA64ISAR1_SPECRES_SHIFT 40 #define ID_AA64ISAR1_SB_SHIFT 36 #define ID_AA64ISAR1_FRINTTS_SHIFT 32 #define ID_AA64ISAR1_GPI_SHIFT 28 @@ -605,12 +626,20 @@ #define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 /* id_aa64zfr0 */ +#define ID_AA64ZFR0_F64MM_SHIFT 56 +#define ID_AA64ZFR0_F32MM_SHIFT 52 +#define ID_AA64ZFR0_I8MM_SHIFT 44 #define ID_AA64ZFR0_SM4_SHIFT 40 #define ID_AA64ZFR0_SHA3_SHIFT 32 +#define ID_AA64ZFR0_BF16_SHIFT 20 #define ID_AA64ZFR0_BITPERM_SHIFT 16 #define ID_AA64ZFR0_AES_SHIFT 4 #define ID_AA64ZFR0_SVEVER_SHIFT 0 +#define ID_AA64ZFR0_F64MM 0x1 +#define ID_AA64ZFR0_F32MM 0x1 +#define ID_AA64ZFR0_I8MM 0x1 +#define ID_AA64ZFR0_BF16 0x1 #define ID_AA64ZFR0_SM4 0x1 #define ID_AA64ZFR0_SHA3 0x1 #define ID_AA64ZFR0_BITPERM 0x1 @@ -655,6 +684,7 @@ #define ID_AA64MMFR1_VMIDBITS_16 2 /* id_aa64mmfr2 */ +#define ID_AA64MMFR2_E0PD_SHIFT 60 #define ID_AA64MMFR2_FWB_SHIFT 40 #define ID_AA64MMFR2_AT_SHIFT 32 #define ID_AA64MMFR2_LVA_SHIFT 16 @@ -679,6 +709,14 @@ #define ID_ISAR5_AES_SHIFT 4 #define ID_ISAR5_SEVL_SHIFT 0 +#define ID_ISAR6_I8MM_SHIFT 24 +#define ID_ISAR6_BF16_SHIFT 20 +#define ID_ISAR6_SPECRES_SHIFT 16 +#define ID_ISAR6_SB_SHIFT 12 +#define ID_ISAR6_FHM_SHIFT 8 +#define ID_ISAR6_DP_SHIFT 4 +#define ID_ISAR6_JSCVT_SHIFT 0 + #define MVFR0_FPROUND_SHIFT 28 #define MVFR0_FPSHVEC_SHIFT 24 #define MVFR0_FPSQRT_SHIFT 20 diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 2629a68b8724..5af82587909e 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -42,7 +42,6 @@ #endif #define __ARCH_WANT_SYS_CLONE -#define __ARCH_WANT_SYS_CLONE3 #ifndef __COMPAT_SYSCALL_NR #include <uapi/asm/unistd.h> diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index c50ee1b7d5cd..537b1e695365 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -16,7 +16,7 @@ #define VDSO_HAS_CLOCK_GETRES 1 -#define VDSO_HAS_32BIT_FALLBACK 1 +#define BUILD_VDSO32 1 static __always_inline int gettimeofday_fallback(struct __kernel_old_timeval *_tv, diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h new file mode 100644 index 000000000000..2ca708ab9b20 --- /dev/null +++ b/arch/arm64/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ARM64_VMALLOC_H +#define _ASM_ARM64_VMALLOC_H + +#endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index a1e72886b30c..7752d93bb50f 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -65,5 +65,13 @@ #define HWCAP2_SVESM4 (1 << 6) #define HWCAP2_FLAGM2 (1 << 7) #define HWCAP2_FRINT (1 << 8) +#define HWCAP2_SVEI8MM (1 << 9) +#define HWCAP2_SVEF32MM (1 << 10) +#define HWCAP2_SVEF64MM (1 << 11) +#define HWCAP2_SVEBF16 (1 << 12) +#define HWCAP2_I8MM (1 << 13) +#define HWCAP2_BF16 (1 << 14) +#define HWCAP2_DGH (1 << 15) +#define HWCAP2_RNG (1 << 16) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h index 4703d218663a..f83a70e07df8 100644 --- a/arch/arm64/include/uapi/asm/unistd.h +++ b/arch/arm64/include/uapi/asm/unistd.h @@ -19,5 +19,6 @@ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS +#define __ARCH_WANT_SYS_CLONE3 #include <asm-generic/unistd.h> diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 3a58e9db5cfe..a100483b47c4 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -274,7 +274,7 @@ int apei_claim_sea(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) return err; - current_flags = arch_local_save_flags(); + current_flags = local_daif_save_flags(); /* * SEA can interrupt SError, mask it and describe this as an NMI so diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index ca158be21f83..7832b3216370 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -618,7 +618,8 @@ static struct insn_emulation_ops setend_ops = { }; /* - * Invoked as late_initcall, since not needed before init spawned. + * Invoked as core_initcall, which guarantees that the instruction + * emulation is ready for userspace. */ static int __init armv8_deprecated_init(void) { diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 6ea337d464c4..32c7bf858dd9 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -42,11 +42,11 @@ ENTRY(__cpu_soft_restart) mov x0, #HVC_SOFT_RESTART hvc #0 // no return -1: mov x18, x1 // entry +1: mov x8, x1 // entry mov x0, x2 // arg0 mov x1, x3 // arg1 mov x2, x4 // arg2 - br x18 + br x8 ENDPROC(__cpu_soft_restart) .popsection diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6a09ca7644ea..703ad0a84f99 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -547,6 +547,9 @@ static const struct midr_range spectre_v2_safe_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), { /* sentinel */ } }; @@ -756,6 +759,20 @@ static const struct arm64_cpu_capabilities erratum_843419_list[] = { }; #endif +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT_VHE +static const struct midr_range erratum_speculative_at_vhe_list[] = { +#ifdef CONFIG_ARM64_ERRATUM_1165522 + /* Cortex A76 r0p0 to r2p0 */ + MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), +#endif +#ifdef CONFIG_ARM64_ERRATUM_1530923 + /* Cortex A55 r0p0 to r2p0 */ + MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 2, 0), +#endif + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -882,12 +899,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_RANGE_LIST(erratum_1418040_list), }, #endif -#ifdef CONFIG_ARM64_ERRATUM_1165522 +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT_VHE { - /* Cortex-A76 r0p0 to r2p0 */ - .desc = "ARM erratum 1165522", - .capability = ARM64_WORKAROUND_1165522, - ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), + .desc = "ARM errata 1165522, 1530923", + .capability = ARM64_WORKAROUND_SPECULATIVE_AT_VHE, + ERRATA_MIDR_RANGE_LIST(erratum_speculative_at_vhe_list), }, #endif #ifdef CONFIG_ARM64_ERRATUM_1463225 @@ -924,7 +940,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_ERRATUM_1319367 { .desc = "ARM erratum 1319367", - .capability = ARM64_WORKAROUND_1319367, + .capability = ARM64_WORKAROUND_SPECULATIVE_AT_NVHE, ERRATA_MIDR_RANGE_LIST(ca57_a72), }, #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 04cf64e9f0c9..0b6715625cf6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -32,9 +32,7 @@ static unsigned long elf_hwcap __read_mostly; #define COMPAT_ELF_HWCAP_DEFAULT \ (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_IDIV|\ COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; @@ -47,19 +45,23 @@ static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM6 /* Need also bit for ARM64_CB_PATCH */ DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); +bool arm64_use_ng_mappings = false; +EXPORT_SYMBOL(arm64_use_ng_mappings); + /* * Flag to indicate if we have computed the system wide * capabilities based on the boot time active CPUs. This * will be used to determine if a new booting CPU should * go through the verification process to make sure that it * supports the system capabilities, without using a hotplug - * notifier. + * notifier. This is also used to decide if we could use + * the fast path for checking constant CPU caps. */ -static bool sys_caps_initialised; - -static inline void set_sys_caps_initialised(void) +DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); +EXPORT_SYMBOL(arm64_const_caps_ready); +static inline void finalize_system_capabilities(void) { - sys_caps_initialised = true; + static_branch_enable(&arm64_const_caps_ready); } static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p) @@ -119,6 +121,7 @@ static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); * sync with the documentation of the CPU feature register ABI. */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RNDR_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), @@ -135,6 +138,10 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_I8MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DGH_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_BF16_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SPECRES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), @@ -177,10 +184,18 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F64MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F32MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_I8MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BF16_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0), @@ -225,6 +240,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_E0PD_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), @@ -313,6 +329,17 @@ static const struct arm64_ftr_bits ftr_id_mmfr4[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_isar6[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_I8MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_BF16_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_SPECRES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_SB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_FHM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_DP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_JSCVT_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* State3 */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), /* State2 */ @@ -396,6 +423,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5), ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4), + ARM64_FTR_REG(SYS_ID_ISAR6_EL1, ftr_id_isar6), /* Op1 = 0, CRn = 0, CRm = 3 */ ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits), @@ -600,6 +628,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); + init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6); init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); @@ -753,6 +782,8 @@ void update_cpu_features(int cpu, info->reg_id_isar4, boot->reg_id_isar4); taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, info->reg_id_isar5, boot->reg_id_isar5); + taint |= check_update_ftr_reg(SYS_ID_ISAR6_EL1, cpu, + info->reg_id_isar6, boot->reg_id_isar6); /* * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and @@ -785,7 +816,7 @@ void update_cpu_features(int cpu, /* Probe vector lengths, unless we already gave up on SVE */ if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) && - !sys_caps_initialised) + !system_capabilities_finalized()) sve_update_vq_map(); } @@ -831,6 +862,7 @@ static u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_ISAR3_EL1); read_sysreg_case(SYS_ID_ISAR4_EL1); read_sysreg_case(SYS_ID_ISAR5_EL1); + read_sysreg_case(SYS_ID_ISAR6_EL1); read_sysreg_case(SYS_MVFR0_EL1); read_sysreg_case(SYS_MVFR1_EL1); read_sysreg_case(SYS_MVFR2_EL1); @@ -965,6 +997,46 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) return has_cpuid_feature(entry, scope); } +/* + * This check is triggered during the early boot before the cpufeature + * is initialised. Checking the status on the local CPU allows the boot + * CPU to detect the need for non-global mappings and thus avoiding a + * pagetable re-write after all the CPUs are booted. This check will be + * anyway run on individual CPUs, allowing us to get the consistent + * state once the SMP CPUs are up and thus make the switch to non-global + * mappings if required. + */ +bool kaslr_requires_kpti(void) +{ + if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) + return false; + + /* + * E0PD does a similar job to KPTI so can be used instead + * where available. + */ + if (IS_ENABLED(CONFIG_ARM64_E0PD)) { + u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1); + if (cpuid_feature_extract_unsigned_field(mmfr2, + ID_AA64MMFR2_E0PD_SHIFT)) + return false; + } + + /* + * Systems affected by Cavium erratum 24756 are incompatible + * with KPTI. + */ + if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { + extern const struct midr_range cavium_erratum_27456_cpus[]; + + if (is_midr_in_range_list(read_cpuid_id(), + cavium_erratum_27456_cpus)) + return false; + } + + return kaslr_offset() > 0; +} + static bool __meltdown_safe = true; static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ @@ -975,6 +1047,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, static const struct midr_range kpti_safe_list[] = { MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), @@ -1008,7 +1081,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, } /* Useful for KASLR robustness */ - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0) { + if (kaslr_requires_kpti()) { if (!__kpti_forced) { str = "KASLR"; __kpti_forced = 1; @@ -1043,7 +1116,6 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) extern kpti_remap_fn idmap_kpti_install_ng_mappings; kpti_remap_fn *remap_fn; - static bool kpti_applied = false; int cpu = smp_processor_id(); /* @@ -1051,7 +1123,7 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) * it already or we have KASLR enabled and therefore have not * created any global mappings at all. */ - if (kpti_applied || kaslr_offset() > 0) + if (arm64_use_ng_mappings) return; remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); @@ -1061,7 +1133,7 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) cpu_uninstall_idmap(); if (!cpu) - kpti_applied = true; + arm64_use_ng_mappings = true; return; } @@ -1251,6 +1323,14 @@ static void cpu_enable_address_auth(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_PTR_AUTH */ +#ifdef CONFIG_ARM64_E0PD +static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) +{ + if (this_cpu_has_cap(ARM64_HAS_E0PD)) + sysreg_clear_set(tcr_el1, 0, TCR_E0PD1); +} +#endif /* CONFIG_ARM64_E0PD */ + #ifdef CONFIG_ARM64_PSEUDO_NMI static bool enable_pseudo_nmi; @@ -1291,7 +1371,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ -#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) +#ifdef CONFIG_ARM64_LSE_ATOMICS { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, @@ -1302,7 +1382,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .min_field_value = 2, }, -#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ +#endif /* CONFIG_ARM64_LSE_ATOMICS */ { .desc = "Software prefetching using PRFM", .capability = ARM64_HAS_NO_HW_PREFETCH, @@ -1368,7 +1448,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE, .min_field_value = 0, .matches = has_no_fpsimd, }, @@ -1567,6 +1647,31 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = 1, }, #endif +#ifdef CONFIG_ARM64_E0PD + { + .desc = "E0PD", + .capability = ARM64_HAS_E0PD, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_E0PD_SHIFT, + .matches = has_cpuid_feature, + .min_field_value = 1, + .cpu_enable = cpu_enable_e0pd, + }, +#endif +#ifdef CONFIG_ARCH_RANDOM + { + .desc = "Random Number Generator", + .capability = ARM64_HAS_RNG, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR0_EL1, + .field_pos = ID_AA64ISAR0_RNDR_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = 1, + }, +#endif {}, }; @@ -1596,6 +1701,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .match_list = list, \ } +#define HWCAP_CAP_MATCH(match, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + .matches = match, \ + } + #ifdef CONFIG_ARM64_PTR_AUTH static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { @@ -1638,6 +1749,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), @@ -1651,6 +1763,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), @@ -1658,8 +1773,12 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_PTR_AUTH @@ -1669,8 +1788,35 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { {}, }; +#ifdef CONFIG_COMPAT +static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) +{ + /* + * Check that all of MVFR1_EL1.{SIMDSP, SIMDInt, SIMDLS} are available, + * in line with that of arm32 as in vfp_init(). We make sure that the + * check is future proof, by making sure value is non-zero. + */ + u32 mvfr1; + + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) + mvfr1 = read_sanitised_ftr_reg(SYS_MVFR1_EL1); + else + mvfr1 = read_sysreg_s(SYS_MVFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDSP_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDINT_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDLS_SHIFT); +} +#endif + static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT + HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), @@ -1974,7 +2120,7 @@ void check_local_cpu_capabilities(void) * Otherwise, this CPU should verify that it has all the system * advertised capabilities. */ - if (!sys_caps_initialised) + if (!system_capabilities_finalized()) update_cpu_capabilities(SCOPE_LOCAL_CPU); else verify_local_cpu_capabilities(); @@ -1988,14 +2134,6 @@ static void __init setup_boot_cpu_capabilities(void) enable_cpu_capabilities(SCOPE_BOOT_CPU); } -DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); -EXPORT_SYMBOL(arm64_const_caps_ready); - -static void __init mark_const_caps_ready(void) -{ - static_branch_enable(&arm64_const_caps_ready); -} - bool this_cpu_has_cap(unsigned int n) { if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) { @@ -2054,7 +2192,6 @@ void __init setup_cpu_features(void) u32 cwg; setup_system_capabilities(); - mark_const_caps_ready(); setup_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) @@ -2067,7 +2204,7 @@ void __init setup_cpu_features(void) minsigstksz_setup(); /* Advertise that we have computed the system capabilities */ - set_sys_caps_initialised(); + finalize_system_capabilities(); /* * Check for sane CTR_EL0.CWG value. diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 56bba746da1c..86136075ae41 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -84,6 +84,14 @@ static const char *const hwcap_str[] = { "svesm4", "flagm2", "frint", + "svei8mm", + "svef32mm", + "svef64mm", + "svebf16", + "i8mm", + "bf16", + "dgh", + "rng", NULL }; @@ -360,6 +368,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); + info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1); info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 5dce5e56995a..fde59981445c 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -36,14 +36,14 @@ static void notrace el1_pc(struct pt_regs *regs, unsigned long esr) } NOKPROBE_SYMBOL(el1_pc); -static void el1_undef(struct pt_regs *regs) +static void notrace el1_undef(struct pt_regs *regs) { local_daif_inherit(regs); do_undefinstr(regs); } NOKPROBE_SYMBOL(el1_undef); -static void el1_inv(struct pt_regs *regs, unsigned long esr) +static void notrace el1_inv(struct pt_regs *regs, unsigned long esr) { local_daif_inherit(regs); bad_mode(regs, 0, esr); @@ -215,7 +215,7 @@ static void notrace el0_svc(struct pt_regs *regs) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - el0_svc_handler(regs); + do_el0_svc(regs); } NOKPROBE_SYMBOL(el0_svc); @@ -281,7 +281,7 @@ static void notrace el0_svc_compat(struct pt_regs *regs) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - el0_svc_compat_handler(regs); + do_el0_svc_compat(regs); } NOKPROBE_SYMBOL(el0_svc_compat); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 7c6a0a41676f..9461d812ae27 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -60,16 +60,16 @@ .macro kernel_ventry, el, label, regsize = 64 .align 7 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -alternative_if ARM64_UNMAP_KERNEL_AT_EL0 .if \el == 0 +alternative_if ARM64_UNMAP_KERNEL_AT_EL0 .if \regsize == 64 mrs x30, tpidrro_el0 msr tpidrro_el0, xzr .else mov x30, xzr .endif - .endif alternative_else_nop_endif + .endif #endif sub sp, sp, #S_FRAME_SIZE @@ -167,9 +167,13 @@ alternative_cb_end .if \el == 0 clear_gp_regs mrs x21, sp_el0 - ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear, - ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug - disable_step_tsk x19, x20 // exceptions when scheduling. + ldr_this_cpu tsk, __entry_task, x20 + msr sp_el0, tsk + + // Ensure MDSCR_EL1.SS is clear, since we can unmask debug exceptions + // when scheduling. + ldr x19, [tsk, #TSK_TI_FLAGS] + disable_step_tsk x19, x20 apply_ssbd 1, x22, x23 @@ -232,13 +236,6 @@ alternative_else_nop_endif str w21, [sp, #S_SYSCALLNO] .endif - /* - * Set sp_el0 to current thread_info. - */ - .if \el == 0 - msr sp_el0, tsk - .endif - /* Save pmr */ alternative_if ARM64_HAS_IRQ_PRIO_MASKING mrs_s x20, SYS_ICC_PMR_EL1 @@ -605,7 +602,7 @@ el1_irq: irq_handler -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count alternative_if ARM64_HAS_IRQ_PRIO_MASKING /* @@ -653,6 +650,7 @@ el0_sync: mov x0, sp bl el0_sync_handler b ret_to_user +ENDPROC(el0_sync) #ifdef CONFIG_COMPAT .align 6 @@ -661,16 +659,18 @@ el0_sync_compat: mov x0, sp bl el0_sync_compat_handler b ret_to_user -ENDPROC(el0_sync) +ENDPROC(el0_sync_compat) .align 6 el0_irq_compat: kernel_entry 0, 32 b el0_irq_naked +ENDPROC(el0_irq_compat) el0_error_compat: kernel_entry 0, 32 b el0_error_naked +ENDPROC(el0_error_compat) #endif .align 6 diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 3eb338f14386..94289d126993 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -269,6 +269,7 @@ static void sve_free(struct task_struct *task) */ static void task_fpsimd_load(void) { + WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); if (system_supports_sve() && test_thread_flag(TIF_SVE)) @@ -289,6 +290,7 @@ static void fpsimd_save(void) this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ + WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { @@ -1092,6 +1094,7 @@ void fpsimd_bind_task_to_cpu(void) struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); + WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; last->sve_vl = current->thread.sve_vl; @@ -1114,6 +1117,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); + WARN_ON(!system_supports_fpsimd()); WARN_ON(!in_softirq() && !irqs_disabled()); last->st = st; @@ -1128,8 +1132,19 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, */ void fpsimd_restore_current_state(void) { - if (!system_supports_fpsimd()) + /* + * For the tasks that were created before we detected the absence of + * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(), + * e.g, init. This could be then inherited by the children processes. + * If we later detect that the system doesn't support FP/SIMD, + * we must clear the flag for all the tasks to indicate that the + * FPSTATE is clean (as we can't have one) to avoid looping for ever in + * do_notify_resume(). + */ + if (!system_supports_fpsimd()) { + clear_thread_flag(TIF_FOREIGN_FPSTATE); return; + } get_cpu_fpsimd_context(); @@ -1148,7 +1163,7 @@ void fpsimd_restore_current_state(void) */ void fpsimd_update_current_state(struct user_fpsimd_state const *state) { - if (!system_supports_fpsimd()) + if (WARN_ON(!system_supports_fpsimd())) return; get_cpu_fpsimd_context(); @@ -1179,7 +1194,13 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) void fpsimd_flush_task_state(struct task_struct *t) { t->thread.fpsimd_cpu = NR_CPUS; - + /* + * If we don't support fpsimd, bail out after we have + * reset the fpsimd_cpu for this task and clear the + * FPSTATE. + */ + if (!system_supports_fpsimd()) + return; barrier(); set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE); @@ -1193,6 +1214,7 @@ void fpsimd_flush_task_state(struct task_struct *t) */ static void fpsimd_flush_cpu_state(void) { + WARN_ON(!system_supports_fpsimd()); __this_cpu_write(fpsimd_last_state.st, NULL); set_thread_flag(TIF_FOREIGN_FPSTATE); } @@ -1203,6 +1225,8 @@ static void fpsimd_flush_cpu_state(void) */ void fpsimd_save_and_flush_cpu_state(void) { + if (!system_supports_fpsimd()) + return; WARN_ON(preemptible()); __get_cpu_fpsimd_context(); fpsimd_save(); diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index a96b2921d22c..590963c9c609 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -182,78 +182,79 @@ int arch_hibernation_header_restore(void *addr) } EXPORT_SYMBOL(arch_hibernation_header_restore); -/* - * Copies length bytes, starting at src_start into an new page, - * perform cache maintentance, then maps it at the specified address low - * address as executable. - * - * This is used by hibernate to copy the code it needs to execute when - * overwriting the kernel text. This function generates a new set of page - * tables, which it loads into ttbr0. - * - * Length is provided as we probably only want 4K of data, even on a 64K - * page system. - */ -static int create_safe_exec_page(void *src_start, size_t length, - unsigned long dst_addr, - phys_addr_t *phys_dst_addr, - void *(*allocator)(gfp_t mask), - gfp_t mask) +static int trans_pgd_map_page(pgd_t *trans_pgd, void *page, + unsigned long dst_addr, + pgprot_t pgprot) { - int rc = 0; - pgd_t *trans_pgd; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; - unsigned long dst = (unsigned long)allocator(mask); - - if (!dst) { - rc = -ENOMEM; - goto out; - } - - memcpy((void *)dst, src_start, length); - __flush_icache_range(dst, dst + length); - - trans_pgd = allocator(mask); - if (!trans_pgd) { - rc = -ENOMEM; - goto out; - } pgdp = pgd_offset_raw(trans_pgd, dst_addr); if (pgd_none(READ_ONCE(*pgdp))) { - pudp = allocator(mask); - if (!pudp) { - rc = -ENOMEM; - goto out; - } + pudp = (void *)get_safe_page(GFP_ATOMIC); + if (!pudp) + return -ENOMEM; pgd_populate(&init_mm, pgdp, pudp); } pudp = pud_offset(pgdp, dst_addr); if (pud_none(READ_ONCE(*pudp))) { - pmdp = allocator(mask); - if (!pmdp) { - rc = -ENOMEM; - goto out; - } + pmdp = (void *)get_safe_page(GFP_ATOMIC); + if (!pmdp) + return -ENOMEM; pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, dst_addr); if (pmd_none(READ_ONCE(*pmdp))) { - ptep = allocator(mask); - if (!ptep) { - rc = -ENOMEM; - goto out; - } + ptep = (void *)get_safe_page(GFP_ATOMIC); + if (!ptep) + return -ENOMEM; pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, dst_addr); - set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC)); + set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC)); + + return 0; +} + +/* + * Copies length bytes, starting at src_start into an new page, + * perform cache maintenance, then maps it at the specified address low + * address as executable. + * + * This is used by hibernate to copy the code it needs to execute when + * overwriting the kernel text. This function generates a new set of page + * tables, which it loads into ttbr0. + * + * Length is provided as we probably only want 4K of data, even on a 64K + * page system. + */ +static int create_safe_exec_page(void *src_start, size_t length, + unsigned long dst_addr, + phys_addr_t *phys_dst_addr) +{ + void *page = (void *)get_safe_page(GFP_ATOMIC); + pgd_t *trans_pgd; + int rc; + + if (!page) + return -ENOMEM; + + memcpy(page, src_start, length); + __flush_icache_range((unsigned long)page, (unsigned long)page + length); + + trans_pgd = (void *)get_safe_page(GFP_ATOMIC); + if (!trans_pgd) + return -ENOMEM; + + rc = trans_pgd_map_page(trans_pgd, page, dst_addr, + PAGE_KERNEL_EXEC); + if (rc) + return rc; /* * Load our new page tables. A strict BBM approach requires that we @@ -269,13 +270,12 @@ static int create_safe_exec_page(void *src_start, size_t length, */ cpu_set_reserved_ttbr0(); local_flush_tlb_all(); - write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1); + write_sysreg(phys_to_ttbr(virt_to_phys(trans_pgd)), ttbr0_el1); isb(); - *phys_dst_addr = virt_to_phys((void *)dst); + *phys_dst_addr = virt_to_phys(page); -out: - return rc; + return 0; } #define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) @@ -450,7 +450,7 @@ static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start, return -ENOMEM; } else { set_pud(dst_pudp, - __pud(pud_val(pud) & ~PMD_SECT_RDONLY)); + __pud(pud_val(pud) & ~PUD_SECT_RDONLY)); } } while (dst_pudp++, src_pudp++, addr = next, addr != end); @@ -476,6 +476,24 @@ static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start, return 0; } +static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start, + unsigned long end) +{ + int rc; + pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC); + + if (!trans_pgd) { + pr_err("Failed to allocate memory for temporary page tables.\n"); + return -ENOMEM; + } + + rc = copy_page_tables(trans_pgd, start, end); + if (!rc) + *dst_pgdp = trans_pgd; + + return rc; +} + /* * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit(). * @@ -484,7 +502,7 @@ static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start, */ int swsusp_arch_resume(void) { - int rc = 0; + int rc; void *zero_page; size_t exit_size; pgd_t *tmp_pg_dir; @@ -497,15 +515,9 @@ int swsusp_arch_resume(void) * Create a second copy of just the linear map, and use this when * restoring. */ - tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); - if (!tmp_pg_dir) { - pr_err("Failed to allocate memory for temporary page tables.\n"); - rc = -ENOMEM; - goto out; - } - rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, PAGE_END); + rc = trans_pgd_create_copy(&tmp_pg_dir, PAGE_OFFSET, PAGE_END); if (rc) - goto out; + return rc; /* * We need a zero page that is zero before & after resume in order to @@ -514,8 +526,7 @@ int swsusp_arch_resume(void) zero_page = (void *)get_safe_page(GFP_ATOMIC); if (!zero_page) { pr_err("Failed to allocate zero page.\n"); - rc = -ENOMEM; - goto out; + return -ENOMEM; } /* @@ -530,11 +541,10 @@ int swsusp_arch_resume(void) */ rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size, (unsigned long)hibernate_exit, - &phys_hibernate_exit, - (void *)get_safe_page, GFP_ATOMIC); + &phys_hibernate_exit); if (rc) { pr_err("Failed to create safe executable page for hibernate_exit code.\n"); - goto out; + return rc; } /* @@ -561,8 +571,7 @@ int swsusp_arch_resume(void) resume_hdr.reenter_kernel, restore_pblist, resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page)); -out: - return rc; + return 0; } int hibernate_resume_nonboot_cpu_disable(void) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 2a11a962e571..53b8a4ee64ff 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -120,6 +120,17 @@ u64 __init kaslr_early_init(u64 dt_phys) return 0; } + /* + * Mix in any entropy obtainable architecturally, open coded + * since this runs extremely early. + */ + if (__early_cpu_has_rndr()) { + unsigned long raw; + + if (__arm64_rndr(&raw)) + seed ^= raw; + } + if (!seed) { kaslr_status = KASLR_DISABLED_NO_SEED; return 0; diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 29a9428486a5..af9987c154ca 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -47,10 +47,6 @@ static void *image_load(struct kimage *image, struct kexec_segment *kernel_segment; int ret; - /* We don't support crash kernels yet. */ - if (image->type == KEXEC_TYPE_CRASH) - return ERR_PTR(-EOPNOTSUPP); - /* * We require a kernel with an unambiguous Image header. Per * Documentation/arm64/booting.rst, this is the case when image_size diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 0df8493624e0..8e9c924423b4 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -160,18 +160,6 @@ void machine_kexec(struct kimage *kimage) kexec_image_info(kimage); - pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__, - kimage->control_code_page); - pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__, - &reboot_code_buffer_phys); - pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__, - reboot_code_buffer); - pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__, - arm64_relocate_new_kernel); - pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n", - __func__, __LINE__, arm64_relocate_new_kernel_size, - arm64_relocate_new_kernel_size); - /* * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use * after the kernel is shut down. diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 7b08bf9499b6..dd3ae8081b38 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -17,12 +17,15 @@ #include <linux/memblock.h> #include <linux/of_fdt.h> #include <linux/random.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/types.h> #include <linux/vmalloc.h> #include <asm/byteorder.h> /* relevant device tree properties */ +#define FDT_PROP_KEXEC_ELFHDR "linux,elfcorehdr" +#define FDT_PROP_MEM_RANGE "linux,usable-memory-range" #define FDT_PROP_INITRD_START "linux,initrd-start" #define FDT_PROP_INITRD_END "linux,initrd-end" #define FDT_PROP_BOOTARGS "bootargs" @@ -40,6 +43,10 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) vfree(image->arch.dtb); image->arch.dtb = NULL; + vfree(image->arch.elf_headers); + image->arch.elf_headers = NULL; + image->arch.elf_headers_sz = 0; + return kexec_image_post_load_cleanup_default(image); } @@ -55,6 +62,31 @@ static int setup_dtb(struct kimage *image, off = ret; + ret = fdt_delprop(dtb, off, FDT_PROP_KEXEC_ELFHDR); + if (ret && ret != -FDT_ERR_NOTFOUND) + goto out; + ret = fdt_delprop(dtb, off, FDT_PROP_MEM_RANGE); + if (ret && ret != -FDT_ERR_NOTFOUND) + goto out; + + if (image->type == KEXEC_TYPE_CRASH) { + /* add linux,elfcorehdr */ + ret = fdt_appendprop_addrrange(dtb, 0, off, + FDT_PROP_KEXEC_ELFHDR, + image->arch.elf_headers_mem, + image->arch.elf_headers_sz); + if (ret) + return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL); + + /* add linux,usable-memory-range */ + ret = fdt_appendprop_addrrange(dtb, 0, off, + FDT_PROP_MEM_RANGE, + crashk_res.start, + crashk_res.end - crashk_res.start + 1); + if (ret) + return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL); + } + /* add bootargs */ if (cmdline) { ret = fdt_setprop_string(dtb, off, FDT_PROP_BOOTARGS, cmdline); @@ -125,8 +157,8 @@ out: } /* - * More space needed so that we can add initrd, bootargs, kaslr-seed, and - * rng-seed. + * More space needed so that we can add initrd, bootargs, kaslr-seed, + * rng-seed, userable-memory-range and elfcorehdr. */ #define DTB_EXTRA_SPACE 0x1000 @@ -174,6 +206,43 @@ static int create_dtb(struct kimage *image, } } +static int prepare_elf_headers(void **addr, unsigned long *sz) +{ + struct crash_mem *cmem; + unsigned int nr_ranges; + int ret; + u64 i; + phys_addr_t start, end; + + nr_ranges = 1; /* for exclusion of crashkernel region */ + for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, + MEMBLOCK_NONE, &start, &end, NULL) + nr_ranges++; + + cmem = kmalloc(sizeof(struct crash_mem) + + sizeof(struct crash_mem_range) * nr_ranges, GFP_KERNEL); + if (!cmem) + return -ENOMEM; + + cmem->max_nr_ranges = nr_ranges; + cmem->nr_ranges = 0; + for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, + MEMBLOCK_NONE, &start, &end, NULL) { + cmem->ranges[cmem->nr_ranges].start = start; + cmem->ranges[cmem->nr_ranges].end = end - 1; + cmem->nr_ranges++; + } + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + + if (!ret) + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + + kfree(cmem); + return ret; +} + int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, @@ -181,14 +250,43 @@ int load_other_segments(struct kimage *image, char *cmdline) { struct kexec_buf kbuf; - void *dtb = NULL; - unsigned long initrd_load_addr = 0, dtb_len; + void *headers, *dtb = NULL; + unsigned long headers_sz, initrd_load_addr = 0, dtb_len; int ret = 0; kbuf.image = image; /* not allocate anything below the kernel */ kbuf.buf_min = kernel_load_addr + kernel_size; + /* load elf core header */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = prepare_elf_headers(&headers, &headers_sz); + if (ret) { + pr_err("Preparing elf core header failed\n"); + goto out_err; + } + + kbuf.buffer = headers; + kbuf.bufsz = headers_sz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = headers_sz; + kbuf.buf_align = SZ_64K; /* largest supported page size */ + kbuf.buf_max = ULONG_MAX; + kbuf.top_down = true; + + ret = kexec_add_buffer(&kbuf); + if (ret) { + vfree(headers); + goto out_err; + } + image->arch.elf_headers = headers; + image->arch.elf_headers_mem = kbuf.mem; + image->arch.elf_headers_sz = headers_sz; + + pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + image->arch.elf_headers_mem, headers_sz, headers_sz); + } + /* load initrd */ if (initrd) { kbuf.buffer = initrd; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 71f788cd2b18..bbb0f0c145f6 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -360,8 +360,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) asmlinkage void ret_from_fork(void) asm("ret_from_fork"); -int copy_thread(unsigned long clone_flags, unsigned long stack_start, - unsigned long stk_sz, struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long stack_start, + unsigned long stk_sz, struct task_struct *p, unsigned long tls) { struct pt_regs *childregs = task_pt_regs(p); @@ -394,11 +394,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, } /* - * If a TLS pointer was passed to clone (4th argument), use it - * for the new thread. + * If a TLS pointer was passed to clone, use it for the new + * thread. */ if (clone_flags & CLONE_SETTLS) - p->thread.uw.tp_value = childregs->regs[3]; + p->thread.uw.tp_value = tls; } else { memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h; @@ -646,6 +646,6 @@ asmlinkage void __sched arm64_preempt_schedule_irq(void) * Only allow a task to be preempted once cpufeatures have been * enabled. */ - if (static_branch_likely(&arm64_const_caps_ready)) + if (system_capabilities_finalized()) preempt_schedule_irq(); } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6771c399d40c..cd6e5fa48b9c 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -615,6 +615,13 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return 0; } +static int fpr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!system_supports_fpsimd()) + return -ENODEV; + return regset->n; +} + /* * TODO: update fp accessors for lazy context switching (sync/flush hwstate) */ @@ -637,6 +644,9 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + if (!system_supports_fpsimd()) + return -EINVAL; + if (target == current) fpsimd_preserve_current_state(); @@ -676,6 +686,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, { int ret; + if (!system_supports_fpsimd()) + return -EINVAL; + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0); if (ret) return ret; @@ -1134,6 +1147,7 @@ static const struct user_regset aarch64_regsets[] = { */ .size = sizeof(u32), .align = sizeof(u32), + .active = fpr_active, .get = fpr_get, .set = fpr_set }, @@ -1348,6 +1362,9 @@ static int compat_vfp_get(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.uw.fpsimd_state; if (target == current) @@ -1381,6 +1398,9 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.uw.fpsimd_state; vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); @@ -1438,6 +1458,7 @@ static const struct user_regset aarch32_regsets[] = { .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), + .active = fpr_active, .get = compat_vfp_get, .set = compat_vfp_set }, diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 56f664561754..b6f9455d7ca3 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -285,6 +285,13 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + /* + * If know now we are going to need KPTI then use non-global + * mappings from the start, avoiding the cost of rewriting + * everything later. + */ + arm64_use_ng_mappings = kaslr_requires_kpti(); + early_fixmap_init(); early_ioremap_init(); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index dd2cdc0d5be2..339882db5a91 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -371,6 +371,8 @@ static int parse_user_sigframe(struct user_ctxs *user, goto done; case FPSIMD_MAGIC: + if (!system_supports_fpsimd()) + goto invalid; if (user->fpsimd) goto invalid; @@ -506,7 +508,7 @@ static int restore_sigframe(struct pt_regs *regs, if (err == 0) err = parse_user_sigframe(&user, sf); - if (err == 0) { + if (err == 0 && system_supports_fpsimd()) { if (!user.fpsimd) return -EINVAL; @@ -623,7 +625,7 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); - if (err == 0) { + if (err == 0 && system_supports_fpsimd()) { struct fpsimd_context __user *fpsimd_ctx = apply_user_offset(user, user->fpsimd_offset); err |= preserve_fpsimd_context(fpsimd_ctx); diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 12a585386c2f..82feca6f7052 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -223,7 +223,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, err |= !valid_user_regs(®s->user_regs, current); aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace; - if (err == 0) + if (err == 0 && system_supports_fpsimd()) err |= compat_restore_vfp_context(&aux->vfp); return err; @@ -419,7 +419,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace; - if (err == 0) + if (err == 0 && system_supports_fpsimd()) err |= compat_preserve_vfp_context(&aux->vfp); __put_user_error(0, &aux->end_magic, err); diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c index 52cfc6148355..b26955f56750 100644 --- a/arch/arm64/kernel/ssbd.c +++ b/arch/arm64/kernel/ssbd.c @@ -37,7 +37,7 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) /* Unsupported */ if (state == ARM64_SSBD_UNKNOWN) - return -EINVAL; + return -ENODEV; /* Treat the unaffected/mitigated state separately */ if (state == ARM64_SSBD_MITIGATED) { @@ -102,7 +102,7 @@ static int ssbd_prctl_get(struct task_struct *task) { switch (arm64_get_ssbd_state()) { case ARM64_SSBD_UNKNOWN: - return -EINVAL; + return -ENODEV; case ARM64_SSBD_FORCE_ENABLE: return PR_SPEC_DISABLE; case ARM64_SSBD_KERNEL: diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 9a9d98a443fc..a12c0c88d345 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -154,14 +154,14 @@ static inline void sve_user_discard(void) sve_user_disable(); } -void el0_svc_handler(struct pt_regs *regs) +void do_el0_svc(struct pt_regs *regs) { sve_user_discard(); el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table); } #ifdef CONFIG_COMPAT -void el0_svc_compat_handler(struct pt_regs *regs) +void do_el0_svc_compat(struct pt_regs *regs) { el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls, compat_sys_call_table); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 73caf35c2262..cf402be5c573 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -144,9 +144,12 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) #ifdef CONFIG_PREEMPT #define S_PREEMPT " PREEMPT" +#elif defined(CONFIG_PREEMPT_RT) +#define S_PREEMPT " PREEMPT_RT" #else #define S_PREEMPT "" #endif + #define S_SMP " SMP" static int __die(const char *str, int err, struct pt_regs *regs) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index e5cc8d66bf53..0c6832ec52b1 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -22,7 +22,12 @@ .text .pushsection .hyp.text, "ax" +/* + * We treat x18 as callee-saved as the host may use it as a platform + * register (e.g. for shadow call stack). + */ .macro save_callee_saved_regs ctxt + str x18, [\ctxt, #CPU_XREG_OFFSET(18)] stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] @@ -32,6 +37,8 @@ .endm .macro restore_callee_saved_regs ctxt + // We require \ctxt is not x18-x28 + ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)] ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] @@ -48,7 +55,7 @@ ENTRY(__guest_enter) // x0: vcpu // x1: host context // x2-x17: clobbered by macros - // x18: guest context + // x29: guest context // Store the host regs save_callee_saved_regs x1 @@ -67,31 +74,28 @@ alternative_else_nop_endif ret 1: - add x18, x0, #VCPU_CONTEXT + add x29, x0, #VCPU_CONTEXT // Macro ptrauth_switch_to_guest format: // ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3) // The below macro to restore guest keys is not implemented in C code // as it may cause Pointer Authentication key signing mismatch errors // when this feature is enabled for kernel code. - ptrauth_switch_to_guest x18, x0, x1, x2 + ptrauth_switch_to_guest x29, x0, x1, x2 // Restore guest regs x0-x17 - ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)] - ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)] - ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)] - ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)] - ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)] - ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)] - ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)] - ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)] - ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)] - - // Restore guest regs x19-x29, lr - restore_callee_saved_regs x18 - - // Restore guest reg x18 - ldr x18, [x18, #CPU_XREG_OFFSET(18)] + ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] + ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)] + ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)] + ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)] + ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)] + ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)] + ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)] + ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)] + ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)] + + // Restore guest regs x18-x29, lr + restore_callee_saved_regs x29 // Do not touch any register after this! eret @@ -114,7 +118,7 @@ ENTRY(__guest_exit) // Retrieve the guest regs x0-x1 from the stack ldp x2, x3, [sp], #16 // x0, x1 - // Store the guest regs x0-x1 and x4-x18 + // Store the guest regs x0-x1 and x4-x17 stp x2, x3, [x1, #CPU_XREG_OFFSET(0)] stp x4, x5, [x1, #CPU_XREG_OFFSET(4)] stp x6, x7, [x1, #CPU_XREG_OFFSET(6)] @@ -123,9 +127,8 @@ ENTRY(__guest_exit) stp x12, x13, [x1, #CPU_XREG_OFFSET(12)] stp x14, x15, [x1, #CPU_XREG_OFFSET(14)] stp x16, x17, [x1, #CPU_XREG_OFFSET(16)] - str x18, [x1, #CPU_XREG_OFFSET(18)] - // Store the guest regs x19-x29, lr + // Store the guest regs x18-x29, lr save_callee_saved_regs x1 get_host_ctxt x2, x3 diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 72fbbd86eb5e..dfe8dd172512 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -28,7 +28,15 @@ /* Check whether the FP regs were dirtied while in the host-side run loop: */ static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) { - if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) + /* + * When the system doesn't support FP/SIMD, we cannot rely on + * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an + * abort on the very first access to FP and thus we should never + * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always + * trap the accesses. + */ + if (!system_supports_fpsimd() || + vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_FP_HOST); @@ -119,7 +127,7 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) write_sysreg(val, cptr_el2); - if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; isb(); @@ -158,11 +166,11 @@ static void deactivate_traps_vhe(void) write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); /* - * ARM erratum 1165522 requires the actual execution of the above - * before we can switch to the EL2/EL0 translation regime used by + * ARM errata 1165522 and 1530923 require the actual execution of the + * above before we can switch to the EL2/EL0 translation regime used by * the host. */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522)); + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE)); write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); write_sysreg(vectors, vbar_el1); @@ -173,7 +181,7 @@ static void __hyp_text __deactivate_traps_nvhe(void) { u64 mdcr_el2 = read_sysreg(mdcr_el2); - if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { u64 val; /* diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 22b8128d19f6..7672a978926c 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -118,7 +118,7 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - if (!cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + if (!cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); } else if (!ctxt->__hyp_running_vcpu) { @@ -149,7 +149,7 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); - if (cpus_have_const_cap(ARM64_WORKAROUND_1319367) && + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE) && ctxt->__hyp_running_vcpu) { /* * Must only be done for host registers, hence the context diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index c2bc17ca6430..92f560e3e1aa 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -23,10 +23,10 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, local_irq_save(cxt->flags); - if (cpus_have_const_cap(ARM64_WORKAROUND_1165522)) { + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { /* - * For CPUs that are affected by ARM erratum 1165522, we - * cannot trust stage-1 to be in a correct state at that + * For CPUs that are affected by ARM errata 1165522 or 1530923, + * we cannot trust stage-1 to be in a correct state at that * point. Since we do not want to force a full load of the * vcpu state, we prevent the EL1 page-table walker to * allocate new TLBs. This is done by setting the EPD bits @@ -63,7 +63,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, struct tlb_inv_context *cxt) { - if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { u64 val; /* @@ -103,7 +103,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); isb(); - if (cpus_have_const_cap(ARM64_WORKAROUND_1165522)) { + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { /* Restore the registers to what they were */ write_sysreg_el1(cxt->tcr, SYS_TCR); write_sysreg_el1(cxt->sctlr, SYS_SCTLR); @@ -117,7 +117,7 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, { write_sysreg(0, vttbr_el2); - if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { /* Ensure write of the host VMID */ isb(); /* Restore the host's TCR_EL1 */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 46822afc57e0..3e909b117f0c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1424,7 +1424,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_SANITISED(ID_ISAR4_EL1), ID_SANITISED(ID_ISAR5_EL1), ID_SANITISED(ID_MMFR4_EL1), - ID_UNALLOCATED(2,7), + ID_SANITISED(ID_ISAR6_EL1), /* CRm=3 */ ID_SANITISED(MVFR0_EL1), @@ -2098,9 +2098,9 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu, WARN_ON(1); } - kvm_err("Unsupported guest CP%d access at: %08lx [%08lx]\n", - cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); - print_sys_reg_instr(params); + print_sys_reg_msg(params, + "Unsupported guest CP%d access at: %08lx [%08lx]\n", + cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); kvm_inject_undefined(vcpu); } @@ -2233,6 +2233,12 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run) NULL, 0); } +static bool is_imp_def_sys_reg(struct sys_reg_params *params) +{ + // See ARM DDI 0487E.a, section D12.3.2 + return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011; +} + static int emulate_sys_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *params) { @@ -2248,10 +2254,12 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, if (likely(r)) { perform_access(vcpu, params, r); + } else if (is_imp_def_sys_reg(params)) { + kvm_inject_undefined(vcpu); } else { - kvm_err("Unsupported guest sys_reg access at: %lx [%08lx]\n", - *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); - print_sys_reg_instr(params); + print_sys_reg_msg(params, + "Unsupported guest sys_reg access at: %lx [%08lx]\n", + *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); kvm_inject_undefined(vcpu); } return 1; @@ -2360,8 +2368,11 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) return NULL; + if (!index_to_params(id, ¶ms)) + return NULL; + table = get_target_table(vcpu->arch.target, true, &num); - r = find_reg_by_id(id, ¶ms, table, num); + r = find_reg(¶ms, table, num); if (!r) r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 9bca0312d798..5a6fc30f5989 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -62,11 +62,24 @@ struct sys_reg_desc { #define REG_HIDDEN_USER (1 << 0) /* hidden from userspace ioctls */ #define REG_HIDDEN_GUEST (1 << 1) /* hidden from guest */ -static inline void print_sys_reg_instr(const struct sys_reg_params *p) +static __printf(2, 3) +inline void print_sys_reg_msg(const struct sys_reg_params *p, + char *fmt, ...) { + va_list va; + + va_start(va, fmt); /* Look, we even formatted it for you to paste into the table! */ - kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n", + kvm_pr_unimpl("%pV { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n", + &(struct va_format){ fmt, &va }, p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read"); + va_end(va); +} + +static inline void print_sys_reg_instr(const struct sys_reg_params *p) +{ + /* GCC warns on an empty format string */ + print_sys_reg_msg(p, "%s", ""); } static inline bool ignore_write(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index c21b936dc01d..2fc253466dbf 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -1,9 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 lib-y := clear_user.o delay.o copy_from_user.o \ copy_to_user.o copy_in_user.o copy_page.o \ - clear_page.o memchr.o memcpy.o memmove.o memset.o \ - memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \ - strchr.o strrchr.o tishift.o + clear_page.o csum.o memchr.o memcpy.o memmove.o \ + memset.o memcmp.o strcmp.o strncmp.o strlen.o \ + strnlen.o strchr.o strrchr.o tishift.o ifeq ($(CONFIG_KERNEL_MODE_NEON), y) obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index 78a9ef66288a..073acbf02a7c 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -14,7 +14,7 @@ * Parameters: * x0 - dest */ -ENTRY(clear_page) +SYM_FUNC_START(clear_page) mrs x1, dczid_el0 and w1, w1, #0xf mov x2, #4 @@ -25,5 +25,5 @@ ENTRY(clear_page) tst x0, #(PAGE_SIZE - 1) b.ne 1b ret -ENDPROC(clear_page) +SYM_FUNC_END(clear_page) EXPORT_SYMBOL(clear_page) diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index aeafc03e961a..48a3a26eff66 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -19,7 +19,7 @@ * * Alignment fixed up by hardware. */ -ENTRY(__arch_clear_user) +SYM_FUNC_START(__arch_clear_user) mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f @@ -40,7 +40,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 ret -ENDPROC(__arch_clear_user) +SYM_FUNC_END(__arch_clear_user) EXPORT_SYMBOL(__arch_clear_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index ebb3c06cbb5d..8e25e89ad01f 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -53,12 +53,12 @@ .endm end .req x5 -ENTRY(__arch_copy_from_user) +SYM_FUNC_START(__arch_copy_from_user) add end, x0, x2 #include "copy_template.S" mov x0, #0 // Nothing to copy ret -ENDPROC(__arch_copy_from_user) +SYM_FUNC_END(__arch_copy_from_user) EXPORT_SYMBOL(__arch_copy_from_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 3d8153a1ebce..667139013ed1 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -55,12 +55,12 @@ end .req x5 -ENTRY(__arch_copy_in_user) +SYM_FUNC_START(__arch_copy_in_user) add end, x0, x2 #include "copy_template.S" mov x0, #0 ret -ENDPROC(__arch_copy_in_user) +SYM_FUNC_END(__arch_copy_in_user) EXPORT_SYMBOL(__arch_copy_in_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index bbb8562396af..e7a793961408 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -17,7 +17,7 @@ * x0 - dest * x1 - src */ -ENTRY(copy_page) +SYM_FUNC_START(copy_page) alternative_if ARM64_HAS_NO_HW_PREFETCH // Prefetch three cache lines ahead. prfm pldl1strm, [x1, #128] @@ -34,46 +34,46 @@ alternative_else_nop_endif ldp x14, x15, [x1, #96] ldp x16, x17, [x1, #112] - mov x18, #(PAGE_SIZE - 128) + add x0, x0, #256 add x1, x1, #128 1: - subs x18, x18, #128 + tst x0, #(PAGE_SIZE - 1) alternative_if ARM64_HAS_NO_HW_PREFETCH prfm pldl1strm, [x1, #384] alternative_else_nop_endif - stnp x2, x3, [x0] + stnp x2, x3, [x0, #-256] ldp x2, x3, [x1] - stnp x4, x5, [x0, #16] + stnp x4, x5, [x0, #16 - 256] ldp x4, x5, [x1, #16] - stnp x6, x7, [x0, #32] + stnp x6, x7, [x0, #32 - 256] ldp x6, x7, [x1, #32] - stnp x8, x9, [x0, #48] + stnp x8, x9, [x0, #48 - 256] ldp x8, x9, [x1, #48] - stnp x10, x11, [x0, #64] + stnp x10, x11, [x0, #64 - 256] ldp x10, x11, [x1, #64] - stnp x12, x13, [x0, #80] + stnp x12, x13, [x0, #80 - 256] ldp x12, x13, [x1, #80] - stnp x14, x15, [x0, #96] + stnp x14, x15, [x0, #96 - 256] ldp x14, x15, [x1, #96] - stnp x16, x17, [x0, #112] + stnp x16, x17, [x0, #112 - 256] ldp x16, x17, [x1, #112] add x0, x0, #128 add x1, x1, #128 - b.gt 1b + b.ne 1b - stnp x2, x3, [x0] - stnp x4, x5, [x0, #16] - stnp x6, x7, [x0, #32] - stnp x8, x9, [x0, #48] - stnp x10, x11, [x0, #64] - stnp x12, x13, [x0, #80] - stnp x14, x15, [x0, #96] - stnp x16, x17, [x0, #112] + stnp x2, x3, [x0, #-256] + stnp x4, x5, [x0, #16 - 256] + stnp x6, x7, [x0, #32 - 256] + stnp x8, x9, [x0, #48 - 256] + stnp x10, x11, [x0, #64 - 256] + stnp x12, x13, [x0, #80 - 256] + stnp x14, x15, [x0, #96 - 256] + stnp x16, x17, [x0, #112 - 256] ret -ENDPROC(copy_page) +SYM_FUNC_END(copy_page) EXPORT_SYMBOL(copy_page) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 357eae2c18eb..1a104d0089f3 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -52,12 +52,12 @@ .endm end .req x5 -ENTRY(__arch_copy_to_user) +SYM_FUNC_START(__arch_copy_to_user) add end, x0, x2 #include "copy_template.S" mov x0, #0 ret -ENDPROC(__arch_copy_to_user) +SYM_FUNC_END(__arch_copy_to_user) EXPORT_SYMBOL(__arch_copy_to_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S index e6135f16649b..243e107e9896 100644 --- a/arch/arm64/lib/crc32.S +++ b/arch/arm64/lib/crc32.S @@ -85,17 +85,17 @@ CPU_BE( rev16 w3, w3 ) .endm .align 5 -ENTRY(crc32_le) +SYM_FUNC_START(crc32_le) alternative_if_not ARM64_HAS_CRC32 b crc32_le_base alternative_else_nop_endif __crc32 -ENDPROC(crc32_le) +SYM_FUNC_END(crc32_le) .align 5 -ENTRY(__crc32c_le) +SYM_FUNC_START(__crc32c_le) alternative_if_not ARM64_HAS_CRC32 b __crc32c_le_base alternative_else_nop_endif __crc32 c -ENDPROC(__crc32c_le) +SYM_FUNC_END(__crc32c_le) diff --git a/arch/arm64/lib/csum.c b/arch/arm64/lib/csum.c new file mode 100644 index 000000000000..1f82c66b32ea --- /dev/null +++ b/arch/arm64/lib/csum.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2019-2020 Arm Ltd. + +#include <linux/compiler.h> +#include <linux/kasan-checks.h> +#include <linux/kernel.h> + +#include <net/checksum.h> + +/* Looks dumb, but generates nice-ish code */ +static u64 accumulate(u64 sum, u64 data) +{ + __uint128_t tmp = (__uint128_t)sum + data; + return tmp + (tmp >> 64); +} + +unsigned int do_csum(const unsigned char *buff, int len) +{ + unsigned int offset, shift, sum; + const u64 *ptr; + u64 data, sum64 = 0; + + if (unlikely(len == 0)) + return 0; + + offset = (unsigned long)buff & 7; + /* + * This is to all intents and purposes safe, since rounding down cannot + * result in a different page or cache line being accessed, and @buff + * should absolutely not be pointing to anything read-sensitive. We do, + * however, have to be careful not to piss off KASAN, which means using + * unchecked reads to accommodate the head and tail, for which we'll + * compensate with an explicit check up-front. + */ + kasan_check_read(buff, len); + ptr = (u64 *)(buff - offset); + len = len + offset - 8; + + /* + * Head: zero out any excess leading bytes. Shifting back by the same + * amount should be at least as fast as any other way of handling the + * odd/even alignment, and means we can ignore it until the very end. + */ + shift = offset * 8; + data = READ_ONCE_NOCHECK(*ptr++); +#ifdef __LITTLE_ENDIAN + data = (data >> shift) << shift; +#else + data = (data << shift) >> shift; +#endif + + /* + * Body: straightforward aligned loads from here on (the paired loads + * underlying the quadword type still only need dword alignment). The + * main loop strictly excludes the tail, so the second loop will always + * run at least once. + */ + while (unlikely(len > 64)) { + __uint128_t tmp1, tmp2, tmp3, tmp4; + + tmp1 = READ_ONCE_NOCHECK(*(__uint128_t *)ptr); + tmp2 = READ_ONCE_NOCHECK(*(__uint128_t *)(ptr + 2)); + tmp3 = READ_ONCE_NOCHECK(*(__uint128_t *)(ptr + 4)); + tmp4 = READ_ONCE_NOCHECK(*(__uint128_t *)(ptr + 6)); + + len -= 64; + ptr += 8; + + /* This is the "don't dump the carry flag into a GPR" idiom */ + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + tmp2 += (tmp2 >> 64) | (tmp2 << 64); + tmp3 += (tmp3 >> 64) | (tmp3 << 64); + tmp4 += (tmp4 >> 64) | (tmp4 << 64); + tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64); + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64); + tmp3 += (tmp3 >> 64) | (tmp3 << 64); + tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64); + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + tmp1 = ((tmp1 >> 64) << 64) | sum64; + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + sum64 = tmp1 >> 64; + } + while (len > 8) { + __uint128_t tmp; + + sum64 = accumulate(sum64, data); + tmp = READ_ONCE_NOCHECK(*(__uint128_t *)ptr); + + len -= 16; + ptr += 2; + +#ifdef __LITTLE_ENDIAN + data = tmp >> 64; + sum64 = accumulate(sum64, tmp); +#else + data = tmp; + sum64 = accumulate(sum64, tmp >> 64); +#endif + } + if (len > 0) { + sum64 = accumulate(sum64, data); + data = READ_ONCE_NOCHECK(*ptr); + len -= 8; + } + /* + * Tail: zero any over-read bytes similarly to the head, again + * preserving odd/even alignment. + */ + shift = len * -8; +#ifdef __LITTLE_ENDIAN + data = (data << shift) >> shift; +#else + data = (data >> shift) << shift; +#endif + sum64 = accumulate(sum64, data); + + /* Finally, folding */ + sum64 += (sum64 >> 32) | (sum64 << 32); + sum = sum64 >> 32; + sum += (sum >> 16) | (sum << 16); + if (offset & 1) + return (u16)swab32(sum); + + return sum >> 16; +} diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 48a3ab636e4f..edf6b970a277 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -19,7 +19,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -WEAK(memchr) +SYM_FUNC_START_WEAK_PI(memchr) and w1, w1, #0xff 1: subs x2, x2, #1 b.mi 2f @@ -30,5 +30,5 @@ WEAK(memchr) ret 2: mov x0, #0 ret -ENDPIPROC(memchr) +SYM_FUNC_END_PI(memchr) EXPORT_SYMBOL_NOKASAN(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index b297bdaaf549..c0671e793ea9 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -46,7 +46,7 @@ pos .req x11 limit_wd .req x12 mask .req x13 -WEAK(memcmp) +SYM_FUNC_START_WEAK_PI(memcmp) cbz limit, .Lret0 eor tmp1, src1, src2 tst tmp1, #7 @@ -243,5 +243,5 @@ CPU_LE( rev data2, data2 ) .Lret0: mov result, #0 ret -ENDPIPROC(memcmp) +SYM_FUNC_END_PI(memcmp) EXPORT_SYMBOL_NOKASAN(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index d79f48994dbb..9f382adfa88a 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -57,11 +57,11 @@ .endm .weak memcpy -ENTRY(__memcpy) -ENTRY(memcpy) +SYM_FUNC_START_ALIAS(__memcpy) +SYM_FUNC_START_PI(memcpy) #include "copy_template.S" ret -ENDPIPROC(memcpy) +SYM_FUNC_END_PI(memcpy) EXPORT_SYMBOL(memcpy) -ENDPROC(__memcpy) +SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(__memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 784775136480..02cda2e33bde 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -46,8 +46,8 @@ D_l .req x13 D_h .req x14 .weak memmove -ENTRY(__memmove) -ENTRY(memmove) +SYM_FUNC_START_ALIAS(__memmove) +SYM_FUNC_START_PI(memmove) cmp dstin, src b.lo __memcpy add tmp1, src, count @@ -184,7 +184,7 @@ ENTRY(memmove) tst count, #0x3f b.ne .Ltail63 ret -ENDPIPROC(memmove) +SYM_FUNC_END_PI(memmove) EXPORT_SYMBOL(memmove) -ENDPROC(__memmove) +SYM_FUNC_END_ALIAS(__memmove) EXPORT_SYMBOL(__memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 9fb97e6bc560..77c3c7ba0084 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -43,8 +43,8 @@ tmp3w .req w9 tmp3 .req x9 .weak memset -ENTRY(__memset) -ENTRY(memset) +SYM_FUNC_START_ALIAS(__memset) +SYM_FUNC_START_PI(memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 @@ -203,7 +203,7 @@ ENTRY(memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret -ENDPIPROC(memset) +SYM_FUNC_END_PI(memset) EXPORT_SYMBOL(memset) -ENDPROC(__memset) +SYM_FUNC_END_ALIAS(__memset) EXPORT_SYMBOL(__memset) diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S index ca3ec18171a4..1f47eae3b0d6 100644 --- a/arch/arm64/lib/strchr.S +++ b/arch/arm64/lib/strchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -WEAK(strchr) +SYM_FUNC_START_WEAK(strchr) and w1, w1, #0xff 1: ldrb w2, [x0], #1 cmp w2, w1 @@ -28,5 +28,5 @@ WEAK(strchr) cmp w2, w1 csel x0, x0, xzr, eq ret -ENDPROC(strchr) +SYM_FUNC_END(strchr) EXPORT_SYMBOL_NOKASAN(strchr) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index e9aefbe0b740..4767540d1b94 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -48,7 +48,7 @@ tmp3 .req x9 zeroones .req x10 pos .req x11 -WEAK(strcmp) +SYM_FUNC_START_WEAK_PI(strcmp) eor tmp1, src1, src2 mov zeroones, #REP8_01 tst tmp1, #7 @@ -219,5 +219,5 @@ CPU_BE( orr syndrome, diff, has_nul ) lsr data1, data1, #56 sub result, data1, data2, lsr #56 ret -ENDPIPROC(strcmp) +SYM_FUNC_END_PI(strcmp) EXPORT_SYMBOL_NOKASAN(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 87b0cb066915..ee3ed882dd79 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -44,7 +44,7 @@ pos .req x12 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -WEAK(strlen) +SYM_FUNC_START_WEAK_PI(strlen) mov zeroones, #REP8_01 bic src, srcin, #15 ands tmp1, srcin, #15 @@ -111,5 +111,5 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ csinv data1, data1, xzr, le csel data2, data2, data2a, le b .Lrealigned -ENDPIPROC(strlen) +SYM_FUNC_END_PI(strlen) EXPORT_SYMBOL_NOKASAN(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index f571581888fa..2a7ee949ed47 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -52,7 +52,7 @@ limit_wd .req x13 mask .req x14 endloop .req x15 -WEAK(strncmp) +SYM_FUNC_START_WEAK_PI(strncmp) cbz limit, .Lret0 eor tmp1, src1, src2 mov zeroones, #REP8_01 @@ -295,5 +295,5 @@ CPU_BE( orr syndrome, diff, has_nul ) .Lret0: mov result, #0 ret -ENDPIPROC(strncmp) +SYM_FUNC_END_PI(strncmp) EXPORT_SYMBOL_NOKASAN(strncmp) diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S index c0bac9493c68..b72913a99038 100644 --- a/arch/arm64/lib/strnlen.S +++ b/arch/arm64/lib/strnlen.S @@ -47,7 +47,7 @@ limit_wd .req x14 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -WEAK(strnlen) +SYM_FUNC_START_WEAK_PI(strnlen) cbz limit, .Lhit_limit mov zeroones, #REP8_01 bic src, srcin, #15 @@ -156,5 +156,5 @@ CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ .Lhit_limit: mov len, limit ret -ENDPIPROC(strnlen) +SYM_FUNC_END_PI(strnlen) EXPORT_SYMBOL_NOKASAN(strnlen) diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S index 794ac49ea433..13132d1ed6d1 100644 --- a/arch/arm64/lib/strrchr.S +++ b/arch/arm64/lib/strrchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of last occurrence of 'c' or 0 */ -WEAK(strrchr) +SYM_FUNC_START_WEAK_PI(strrchr) mov x3, #0 and w1, w1, #0xff 1: ldrb w2, [x0], #1 @@ -29,5 +29,5 @@ WEAK(strrchr) b 1b 2: mov x0, x3 ret -ENDPIPROC(strrchr) +SYM_FUNC_END_PI(strrchr) EXPORT_SYMBOL_NOKASAN(strrchr) diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S index 047622536535..a88613834fb0 100644 --- a/arch/arm64/lib/tishift.S +++ b/arch/arm64/lib/tishift.S @@ -7,7 +7,7 @@ #include <asm/assembler.h> -ENTRY(__ashlti3) +SYM_FUNC_START(__ashlti3) cbz x2, 1f mov x3, #64 sub x3, x3, x2 @@ -26,10 +26,10 @@ ENTRY(__ashlti3) lsl x1, x0, x1 mov x0, x2 ret -ENDPROC(__ashlti3) +SYM_FUNC_END(__ashlti3) EXPORT_SYMBOL(__ashlti3) -ENTRY(__ashrti3) +SYM_FUNC_START(__ashrti3) cbz x2, 1f mov x3, #64 sub x3, x3, x2 @@ -48,10 +48,10 @@ ENTRY(__ashrti3) asr x0, x1, x0 mov x1, x2 ret -ENDPROC(__ashrti3) +SYM_FUNC_END(__ashrti3) EXPORT_SYMBOL(__ashrti3) -ENTRY(__lshrti3) +SYM_FUNC_START(__lshrti3) cbz x2, 1f mov x3, #64 sub x3, x3, x2 @@ -70,5 +70,5 @@ ENTRY(__lshrti3) lsr x0, x1, x0 mov x1, x2 ret -ENDPROC(__lshrti3) +SYM_FUNC_END(__lshrti3) EXPORT_SYMBOL(__lshrti3) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index db767b072601..2d881f34dd9d 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -24,7 +24,7 @@ * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(__flush_icache_range) +SYM_FUNC_START(__flush_icache_range) /* FALLTHROUGH */ /* @@ -37,7 +37,7 @@ ENTRY(__flush_icache_range) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(__flush_cache_user_range) +SYM_FUNC_START(__flush_cache_user_range) uaccess_ttbr0_enable x2, x3, x4 alternative_if ARM64_HAS_CACHE_IDC dsb ishst @@ -66,8 +66,8 @@ alternative_else_nop_endif 9: mov x0, #-EFAULT b 1b -ENDPROC(__flush_icache_range) -ENDPROC(__flush_cache_user_range) +SYM_FUNC_END(__flush_icache_range) +SYM_FUNC_END(__flush_cache_user_range) /* * invalidate_icache_range(start,end) @@ -77,7 +77,7 @@ ENDPROC(__flush_cache_user_range) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(invalidate_icache_range) +SYM_FUNC_START(invalidate_icache_range) alternative_if ARM64_HAS_CACHE_DIC mov x0, xzr isb @@ -94,7 +94,7 @@ alternative_else_nop_endif 2: mov x0, #-EFAULT b 1b -ENDPROC(invalidate_icache_range) +SYM_FUNC_END(invalidate_icache_range) /* * __flush_dcache_area(kaddr, size) @@ -105,10 +105,10 @@ ENDPROC(invalidate_icache_range) * - kaddr - kernel address * - size - size in question */ -ENTRY(__flush_dcache_area) +SYM_FUNC_START_PI(__flush_dcache_area) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -ENDPIPROC(__flush_dcache_area) +SYM_FUNC_END_PI(__flush_dcache_area) /* * __clean_dcache_area_pou(kaddr, size) @@ -119,14 +119,14 @@ ENDPIPROC(__flush_dcache_area) * - kaddr - kernel address * - size - size in question */ -ENTRY(__clean_dcache_area_pou) +SYM_FUNC_START(__clean_dcache_area_pou) alternative_if ARM64_HAS_CACHE_IDC dsb ishst ret alternative_else_nop_endif dcache_by_line_op cvau, ish, x0, x1, x2, x3 ret -ENDPROC(__clean_dcache_area_pou) +SYM_FUNC_END(__clean_dcache_area_pou) /* * __inval_dcache_area(kaddr, size) @@ -138,7 +138,8 @@ ENDPROC(__clean_dcache_area_pou) * - kaddr - kernel address * - size - size in question */ -ENTRY(__inval_dcache_area) +SYM_FUNC_START_LOCAL(__dma_inv_area) +SYM_FUNC_START_PI(__inval_dcache_area) /* FALLTHROUGH */ /* @@ -146,7 +147,6 @@ ENTRY(__inval_dcache_area) * - start - virtual start address of region * - size - size in question */ -__dma_inv_area: add x1, x1, x0 dcache_line_size x2, x3 sub x3, x2, #1 @@ -165,8 +165,8 @@ __dma_inv_area: b.lo 2b dsb sy ret -ENDPIPROC(__inval_dcache_area) -ENDPROC(__dma_inv_area) +SYM_FUNC_END_PI(__inval_dcache_area) +SYM_FUNC_END(__dma_inv_area) /* * __clean_dcache_area_poc(kaddr, size) @@ -177,7 +177,8 @@ ENDPROC(__dma_inv_area) * - kaddr - kernel address * - size - size in question */ -ENTRY(__clean_dcache_area_poc) +SYM_FUNC_START_LOCAL(__dma_clean_area) +SYM_FUNC_START_PI(__clean_dcache_area_poc) /* FALLTHROUGH */ /* @@ -185,11 +186,10 @@ ENTRY(__clean_dcache_area_poc) * - start - virtual start address of region * - size - size in question */ -__dma_clean_area: dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -ENDPIPROC(__clean_dcache_area_poc) -ENDPROC(__dma_clean_area) +SYM_FUNC_END_PI(__clean_dcache_area_poc) +SYM_FUNC_END(__dma_clean_area) /* * __clean_dcache_area_pop(kaddr, size) @@ -200,13 +200,13 @@ ENDPROC(__dma_clean_area) * - kaddr - kernel address * - size - size in question */ -ENTRY(__clean_dcache_area_pop) +SYM_FUNC_START_PI(__clean_dcache_area_pop) alternative_if_not ARM64_HAS_DCPOP b __clean_dcache_area_poc alternative_else_nop_endif dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret -ENDPIPROC(__clean_dcache_area_pop) +SYM_FUNC_END_PI(__clean_dcache_area_pop) /* * __dma_flush_area(start, size) @@ -216,10 +216,10 @@ ENDPIPROC(__clean_dcache_area_pop) * - start - virtual start address of region * - size - size in question */ -ENTRY(__dma_flush_area) +SYM_FUNC_START_PI(__dma_flush_area) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -ENDPIPROC(__dma_flush_area) +SYM_FUNC_END_PI(__dma_flush_area) /* * __dma_map_area(start, size, dir) @@ -227,11 +227,11 @@ ENDPIPROC(__dma_flush_area) * - size - size of region * - dir - DMA direction */ -ENTRY(__dma_map_area) +SYM_FUNC_START_PI(__dma_map_area) cmp w2, #DMA_FROM_DEVICE b.eq __dma_inv_area b __dma_clean_area -ENDPIPROC(__dma_map_area) +SYM_FUNC_END_PI(__dma_map_area) /* * __dma_unmap_area(start, size, dir) @@ -239,8 +239,8 @@ ENDPIPROC(__dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(__dma_unmap_area) +SYM_FUNC_START_PI(__dma_unmap_area) cmp w2, #DMA_TO_DEVICE b.ne __dma_inv_area ret -ENDPIPROC(__dma_unmap_area) +SYM_FUNC_END_PI(__dma_unmap_area) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index b5e329fde2dd..8ef73e89d514 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -29,15 +29,9 @@ static cpumask_t tlb_flush_pending; #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) #define ASID_FIRST_VERSION (1UL << asid_bits) -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1) -#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1) -#define idx2asid(idx) (((idx) << 1) & ~ASID_MASK) -#else -#define NUM_USER_ASIDS (ASID_FIRST_VERSION) +#define NUM_USER_ASIDS ASID_FIRST_VERSION #define asid2idx(asid) ((asid) & ~ASID_MASK) #define idx2asid(idx) asid2idx(idx) -#endif /* Get the ASIDBits supported by the current CPU */ static u32 get_cpu_asid_bits(void) @@ -77,13 +71,33 @@ void verify_cpu_asid_bits(void) } } +static void set_kpti_asid_bits(void) +{ + unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); + /* + * In case of KPTI kernel/user ASIDs are allocated in + * pairs, the bottom bit distinguishes the two: if it + * is set, then the ASID will map only userspace. Thus + * mark even as reserved for kernel. + */ + memset(asid_map, 0xaa, len); +} + +static void set_reserved_asid_bits(void) +{ + if (arm64_kernel_unmapped_at_el0()) + set_kpti_asid_bits(); + else + bitmap_clear(asid_map, 0, NUM_USER_ASIDS); +} + static void flush_context(void) { int i; u64 asid; /* Update the list of reserved ASIDs and the ASID bitmap. */ - bitmap_clear(asid_map, 0, NUM_USER_ASIDS); + set_reserved_asid_bits(); for_each_possible_cpu(i) { asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); @@ -261,6 +275,14 @@ static int asids_init(void) panic("Failed to allocate bitmap for %lu ASIDs\n", NUM_USER_ASIDS); + /* + * We cannot call set_reserved_asid_bits() here because CPU + * caps are not finalized yet, so it is safer to assume KPTI + * and reserve kernel ASID's from beginning. + */ + if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) + set_kpti_asid_bits(); + pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS); return 0; } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 077b02a2d4d3..85566d32958f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -445,7 +445,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, const struct fault_info *inf; struct mm_struct *mm = current->mm; vm_fault_t fault, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE; + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (kprobe_page_fault(regs, esr)) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 5a3b15a14a7f..40797cbfba2d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1070,7 +1070,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; /* * FIXME: Cleanup page tables (also in arch_add_memory() in case @@ -1079,7 +1078,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be * unlocked yet. */ - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 9ce7bd9d4d9c..250c49008d73 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -54,7 +54,7 @@ static int change_memory_common(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { unsigned long start = addr; - unsigned long size = PAGE_SIZE*numpages; + unsigned long size = PAGE_SIZE * numpages; unsigned long end = start + size; struct vm_struct *area; int i; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index a1e0592d1fbc..aafed6902411 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -42,7 +42,14 @@ #define TCR_KASAN_FLAGS 0 #endif -#define MAIR(attr, mt) ((attr) << ((mt) * 8)) +/* Default MAIR_EL1 */ +#define MAIR_EL1_SET \ + (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) #ifdef CONFIG_CPU_PM /** @@ -50,7 +57,7 @@ * * x0: virtual address of context pointer */ -ENTRY(cpu_do_suspend) +SYM_FUNC_START(cpu_do_suspend) mrs x2, tpidr_el0 mrs x3, tpidrro_el0 mrs x4, contextidr_el1 @@ -74,7 +81,7 @@ alternative_endif stp x10, x11, [x0, #64] stp x12, x13, [x0, #80] ret -ENDPROC(cpu_do_suspend) +SYM_FUNC_END(cpu_do_suspend) /** * cpu_do_resume - restore CPU register context @@ -82,7 +89,7 @@ ENDPROC(cpu_do_suspend) * x0: Address of context pointer */ .pushsection ".idmap.text", "awx" -ENTRY(cpu_do_resume) +SYM_FUNC_START(cpu_do_resume) ldp x2, x3, [x0] ldp x4, x5, [x0, #16] ldp x6, x8, [x0, #32] @@ -131,7 +138,7 @@ alternative_else_nop_endif isb ret -ENDPROC(cpu_do_resume) +SYM_FUNC_END(cpu_do_resume) .popsection #endif @@ -142,7 +149,7 @@ ENDPROC(cpu_do_resume) * * - pgd_phys - physical address of new TTB */ -ENTRY(cpu_do_switch_mm) +SYM_FUNC_START(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id phys_to_ttbr x3, x0 @@ -161,7 +168,7 @@ alternative_else_nop_endif msr ttbr0_el1, x3 // now update TTBR0 isb b post_ttbr_update_workaround // Back to C code... -ENDPROC(cpu_do_switch_mm) +SYM_FUNC_END(cpu_do_switch_mm) .pushsection ".idmap.text", "awx" @@ -182,7 +189,7 @@ ENDPROC(cpu_do_switch_mm) * This is the low-level counterpart to cpu_replace_ttbr1, and should not be * called by anything else. It can only be executed from a TTBR0 mapping. */ -ENTRY(idmap_cpu_replace_ttbr1) +SYM_FUNC_START(idmap_cpu_replace_ttbr1) save_and_disable_daif flags=x2 __idmap_cpu_set_reserved_ttbr1 x1, x3 @@ -194,7 +201,7 @@ ENTRY(idmap_cpu_replace_ttbr1) restore_daif x2 ret -ENDPROC(idmap_cpu_replace_ttbr1) +SYM_FUNC_END(idmap_cpu_replace_ttbr1) .popsection #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 @@ -222,7 +229,7 @@ ENDPROC(idmap_cpu_replace_ttbr1) */ __idmap_kpti_flag: .long 1 -ENTRY(idmap_kpti_install_ng_mappings) +SYM_FUNC_START(idmap_kpti_install_ng_mappings) cpu .req w0 num_cpus .req w1 swapper_pa .req x2 @@ -250,15 +257,15 @@ ENTRY(idmap_kpti_install_ng_mappings) /* We're the boot CPU. Wait for the others to catch up */ sevl 1: wfe - ldaxr w18, [flag_ptr] - eor w18, w18, num_cpus - cbnz w18, 1b + ldaxr w17, [flag_ptr] + eor w17, w17, num_cpus + cbnz w17, 1b /* We need to walk swapper, so turn off the MMU. */ pre_disable_mmu_workaround - mrs x18, sctlr_el1 - bic x18, x18, #SCTLR_ELx_M - msr sctlr_el1, x18 + mrs x17, sctlr_el1 + bic x17, x17, #SCTLR_ELx_M + msr sctlr_el1, x17 isb /* Everybody is enjoying the idmap, so we can rewrite swapper. */ @@ -281,9 +288,9 @@ skip_pgd: isb /* We're done: fire up the MMU again */ - mrs x18, sctlr_el1 - orr x18, x18, #SCTLR_ELx_M - msr sctlr_el1, x18 + mrs x17, sctlr_el1 + orr x17, x17, #SCTLR_ELx_M + msr sctlr_el1, x17 isb /* @@ -353,47 +360,48 @@ skip_pte: b.ne do_pte b next_pmd + .unreq cpu + .unreq num_cpus + .unreq swapper_pa + .unreq cur_pgdp + .unreq end_pgdp + .unreq pgd + .unreq cur_pudp + .unreq end_pudp + .unreq pud + .unreq cur_pmdp + .unreq end_pmdp + .unreq pmd + .unreq cur_ptep + .unreq end_ptep + .unreq pte + /* Secondary CPUs end up here */ __idmap_kpti_secondary: /* Uninstall swapper before surgery begins */ - __idmap_cpu_set_reserved_ttbr1 x18, x17 + __idmap_cpu_set_reserved_ttbr1 x16, x17 /* Increment the flag to let the boot CPU we're ready */ -1: ldxr w18, [flag_ptr] - add w18, w18, #1 - stxr w17, w18, [flag_ptr] +1: ldxr w16, [flag_ptr] + add w16, w16, #1 + stxr w17, w16, [flag_ptr] cbnz w17, 1b /* Wait for the boot CPU to finish messing around with swapper */ sevl 1: wfe - ldxr w18, [flag_ptr] - cbnz w18, 1b + ldxr w16, [flag_ptr] + cbnz w16, 1b /* All done, act like nothing happened */ - offset_ttbr1 swapper_ttb, x18 + offset_ttbr1 swapper_ttb, x16 msr ttbr1_el1, swapper_ttb isb ret - .unreq cpu - .unreq num_cpus - .unreq swapper_pa .unreq swapper_ttb .unreq flag_ptr - .unreq cur_pgdp - .unreq end_pgdp - .unreq pgd - .unreq cur_pudp - .unreq end_pudp - .unreq pud - .unreq cur_pmdp - .unreq end_pmdp - .unreq pmd - .unreq cur_ptep - .unreq end_ptep - .unreq pte -ENDPROC(idmap_kpti_install_ng_mappings) +SYM_FUNC_END(idmap_kpti_install_ng_mappings) .popsection #endif @@ -404,7 +412,7 @@ ENDPROC(idmap_kpti_install_ng_mappings) * value of the SCTLR_EL1 register. */ .pushsection ".idmap.text", "awx" -ENTRY(__cpu_setup) +SYM_FUNC_START(__cpu_setup) tlbi vmalle1 // Invalidate local TLB dsb nsh @@ -416,23 +424,9 @@ ENTRY(__cpu_setup) enable_dbg // since this is per-cpu reset_pmuserenr_el0 x0 // Disable PMU access from EL0 /* - * Memory region attributes for LPAE: - * - * n = AttrIndx[2:0] - * n MAIR - * DEVICE_nGnRnE 000 00000000 - * DEVICE_nGnRE 001 00000100 - * DEVICE_GRE 010 00001100 - * NORMAL_NC 011 01000100 - * NORMAL 100 11111111 - * NORMAL_WT 101 10111011 + * Memory region attributes */ - ldr x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \ - MAIR(0x04, MT_DEVICE_nGnRE) | \ - MAIR(0x0c, MT_DEVICE_GRE) | \ - MAIR(0x44, MT_NORMAL_NC) | \ - MAIR(0xff, MT_NORMAL) | \ - MAIR(0xbb, MT_NORMAL_WT) + mov_q x5, MAIR_EL1_SET msr mair_el1, x5 /* * Prepare SCTLR @@ -475,4 +469,4 @@ ENTRY(__cpu_setup) #endif /* CONFIG_ARM64_HW_AFDBM */ msr tcr_el1, x10 ret // return to head.S -ENDPROC(__cpu_setup) +SYM_FUNC_END(__cpu_setup) diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index c5f05c4a4d00..5b09aca55108 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -56,11 +56,11 @@ #define XEN_IMM 0xEA1 #define HYPERCALL_SIMPLE(hypercall) \ -ENTRY(HYPERVISOR_##hypercall) \ +SYM_FUNC_START(HYPERVISOR_##hypercall) \ mov x16, #__HYPERVISOR_##hypercall; \ hvc XEN_IMM; \ ret; \ -ENDPROC(HYPERVISOR_##hypercall) +SYM_FUNC_END(HYPERVISOR_##hypercall) #define HYPERCALL0 HYPERCALL_SIMPLE #define HYPERCALL1 HYPERCALL_SIMPLE @@ -86,7 +86,7 @@ HYPERCALL2(multicall); HYPERCALL2(vm_assist); HYPERCALL3(dm_op); -ENTRY(privcmd_call) +SYM_FUNC_START(privcmd_call) mov x16, x0 mov x0, x1 mov x1, x2 @@ -109,4 +109,4 @@ ENTRY(privcmd_call) */ uaccess_ttbr0_disable x6, x7 ret -ENDPROC(privcmd_call); +SYM_FUNC_END(privcmd_call); diff --git a/arch/c6x/include/asm/vmalloc.h b/arch/c6x/include/asm/vmalloc.h new file mode 100644 index 000000000000..26c6c6696bbd --- /dev/null +++ b/arch/c6x/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_C6X_VMALLOC_H +#define _ASM_C6X_VMALLOC_H + +#endif /* _ASM_C6X_VMALLOC_H */ diff --git a/arch/c6x/kernel/entry.S b/arch/c6x/kernel/entry.S index 4332a10aec6c..fb154d19625b 100644 --- a/arch/c6x/kernel/entry.S +++ b/arch/c6x/kernel/entry.S @@ -18,7 +18,7 @@ #define DP B14 #define SP B15 -#ifndef CONFIG_PREEMPT +#ifndef CONFIG_PREEMPTION #define resume_kernel restore_all #endif @@ -287,7 +287,7 @@ work_notifysig: ;; is a little bit different ;; ENTRY(ret_from_exception) -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION MASK_INT B2 #endif @@ -557,7 +557,7 @@ ENDPROC(_nmi_handler) ;; ;; Jump to schedule() then return to ret_from_isr ;; -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION resume_kernel: GET_THREAD_INFO A12 LDW .D1T1 *+A12(THREAD_INFO_PREEMPT_COUNT),A1 @@ -582,7 +582,7 @@ preempt_schedule: B .S2 preempt_schedule_irq #endif ADDKPC .S2 preempt_schedule,B3,4 -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ ENTRY(enable_exception) DINT diff --git a/arch/csky/include/asm/vmalloc.h b/arch/csky/include/asm/vmalloc.h new file mode 100644 index 000000000000..43dca6336b4c --- /dev/null +++ b/arch/csky/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_CSKY_VMALLOC_H +#define _ASM_CSKY_VMALLOC_H + +#endif /* _ASM_CSKY_VMALLOC_H */ diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index a7a5b67df898..007706328000 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -277,7 +277,7 @@ ENTRY(csky_irq) zero_fp psrset ee -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION mov r9, sp /* Get current stack pointer */ bmaski r10, THREAD_SHIFT andn r9, r10 /* Get thread_info */ @@ -294,7 +294,7 @@ ENTRY(csky_irq) mov a0, sp jbsr csky_do_IRQ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION subi r12, 1 stw r12, (r9, TINFO_PREEMPT) cmpnei r12, 0 diff --git a/arch/h8300/include/asm/vmalloc.h b/arch/h8300/include/asm/vmalloc.h new file mode 100644 index 000000000000..08a55c1dfa23 --- /dev/null +++ b/arch/h8300/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_H8300_VMALLOC_H +#define _ASM_H8300_VMALLOC_H + +#endif /* _ASM_H8300_VMALLOC_H */ diff --git a/arch/h8300/kernel/entry.S b/arch/h8300/kernel/entry.S index 4ade5f8299ba..c6e289b5f1f2 100644 --- a/arch/h8300/kernel/entry.S +++ b/arch/h8300/kernel/entry.S @@ -284,12 +284,12 @@ badsys: mov.l er0,@(LER0:16,sp) bra resume_userspace -#if !defined(CONFIG_PREEMPT) +#if !defined(CONFIG_PREEMPTION) #define resume_kernel restore_all #endif ret_from_exception: -#if defined(CONFIG_PREEMPT) +#if defined(CONFIG_PREEMPTION) orc #0xc0,ccr #endif ret_from_interrupt: @@ -319,7 +319,7 @@ work_resched: restore_all: RESTORE_ALL /* Does RTE */ -#if defined(CONFIG_PREEMPT) +#if defined(CONFIG_PREEMPTION) resume_kernel: mov.l @(TI_PRE_COUNT:16,er4),er0 bne restore_all:8 diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 12cd9231c4b8..0231d69c8bf2 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -91,7 +91,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ "1: %0 = memw_locked(%1);\n" \ " %0 = "#op "(%0,%2);\n" \ " memw_locked(%1,P3)=%0;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -107,7 +107,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ "1: %0 = memw_locked(%1);\n" \ " %0 = "#op "(%0,%2);\n" \ " memw_locked(%1,P3)=%0;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -124,7 +124,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ "1: %0 = memw_locked(%2);\n" \ " %1 = "#op "(%0,%3);\n" \ " memw_locked(%2,P3)=%1;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output), "=&r" (val) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -173,7 +173,7 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) " }" " memw_locked(%2, p3) = %1;" " {" - " if !p3 jump 1b;" + " if (!p3) jump 1b;" " }" "2:" : "=&r" (__oldval), "=&r" (tmp) diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index 47384b094b94..71429f756af0 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -38,7 +38,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = clrbit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -62,7 +62,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = setbit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -88,7 +88,7 @@ static inline int test_and_change_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = togglebit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -223,7 +223,7 @@ static inline int ffs(int x) int r; asm("{ P0 = cmp.eq(%1,#0); %0 = ct0(%1);}\n" - "{ if P0 %0 = #0; if !P0 %0 = add(%0,#1);}\n" + "{ if (P0) %0 = #0; if (!P0) %0 = add(%0,#1);}\n" : "=&r" (r) : "r" (x) : "p0"); diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h index 6091322c3af9..92b8a02e588a 100644 --- a/arch/hexagon/include/asm/cmpxchg.h +++ b/arch/hexagon/include/asm/cmpxchg.h @@ -30,7 +30,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, __asm__ __volatile__ ( "1: %0 = memw_locked(%1);\n" /* load into retval */ " memw_locked(%1,P0) = %2;\n" /* store into memory */ - " if !P0 jump 1b;\n" + " if (!P0) jump 1b;\n" : "=&r" (retval) : "r" (ptr), "r" (x) : "memory", "p0" diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h index cb635216a732..0191f7c7193e 100644 --- a/arch/hexagon/include/asm/futex.h +++ b/arch/hexagon/include/asm/futex.h @@ -16,7 +16,7 @@ /* For example: %1 = %4 */ \ insn \ "2: memw_locked(%3,p2) = %1;\n" \ - " if !p2 jump 1b;\n" \ + " if (!p2) jump 1b;\n" \ " %1 = #0;\n" \ "3:\n" \ ".section .fixup,\"ax\"\n" \ @@ -84,10 +84,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, "1: %1 = memw_locked(%3)\n" " {\n" " p2 = cmp.eq(%1,%4)\n" - " if !p2.new jump:NT 3f\n" + " if (!p2.new) jump:NT 3f\n" " }\n" "2: memw_locked(%3,p2) = %5\n" - " if !p2 jump 1b\n" + " if (!p2) jump 1b\n" "3:\n" ".section .fixup,\"ax\"\n" "4: %0 = #%6\n" diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h index 539e3efcf39c..bda2a9c2df78 100644 --- a/arch/hexagon/include/asm/io.h +++ b/arch/hexagon/include/asm/io.h @@ -172,7 +172,7 @@ static inline void writel(u32 data, volatile void __iomem *addr) #define writel_relaxed __raw_writel void __iomem *ioremap(unsigned long phys_addr, unsigned long size); -#define ioremap_nocache ioremap +#define ioremap_uc(X, Y) ioremap((X), (Y)) #define __raw_writel writel diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h index bfe07d842ff3..ef103b73bec8 100644 --- a/arch/hexagon/include/asm/spinlock.h +++ b/arch/hexagon/include/asm/spinlock.h @@ -30,9 +30,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0);\n" " { P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -46,7 +46,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock) "1: R6 = memw_locked(%0);\n" " R6 = add(R6,#-1);\n" " memw_locked(%0,P3) = R6\n" - " if !P3 jump 1b;\n" + " if (!P3) jump 1b;\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -61,7 +61,7 @@ static inline int arch_read_trylock(arch_rwlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1);\n" " { %0 = #0; P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n" - " { if !P3 jump 1f; }\n" + " { if (!P3) jump 1f; }\n" " memw_locked(%1,P3) = R6;\n" " { %0 = P3 }\n" "1:\n" @@ -78,9 +78,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0)\n" " { P3 = cmp.eq(R6,#0); R6 = #-1;}\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -94,7 +94,7 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1)\n" " { %0 = #0; P3 = cmp.eq(R6,#0); R6 = #-1;}\n" - " { if !P3 jump 1f; }\n" + " { if (!P3) jump 1f; }\n" " memw_locked(%1,P3) = R6;\n" " %0 = P3;\n" "1:\n" @@ -117,9 +117,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0);\n" " P3 = cmp.eq(R6,#0);\n" - " { if !P3 jump 1b; R6 = #1; }\n" + " { if (!P3) jump 1b; R6 = #1; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -139,7 +139,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1);\n" " P3 = cmp.eq(R6,#0);\n" - " { if !P3 jump 1f; R6 = #1; %0 = #0; }\n" + " { if (!P3) jump 1f; R6 = #1; %0 = #0; }\n" " memw_locked(%1,P3) = R6;\n" " %0 = P3;\n" "1:\n" diff --git a/arch/hexagon/include/asm/vmalloc.h b/arch/hexagon/include/asm/vmalloc.h new file mode 100644 index 000000000000..7b04609e525c --- /dev/null +++ b/arch/hexagon/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_HEXAGON_VMALLOC_H +#define _ASM_HEXAGON_VMALLOC_H + +#endif /* _ASM_HEXAGON_VMALLOC_H */ diff --git a/arch/hexagon/kernel/stacktrace.c b/arch/hexagon/kernel/stacktrace.c index 35f29423fda8..5ed02f699479 100644 --- a/arch/hexagon/kernel/stacktrace.c +++ b/arch/hexagon/kernel/stacktrace.c @@ -11,8 +11,6 @@ #include <linux/thread_info.h> #include <linux/module.h> -register unsigned long current_frame_pointer asm("r30"); - struct stackframe { unsigned long fp; unsigned long rets; @@ -30,7 +28,7 @@ void save_stack_trace(struct stack_trace *trace) low = (unsigned long)task_stack_page(current); high = low + THREAD_SIZE; - fp = current_frame_pointer; + fp = (unsigned long)__builtin_frame_address(0); while (fp >= low && fp <= (high - sizeof(*frame))) { frame = (struct stackframe *)fp; diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S index 12242c27e2df..554371d92bed 100644 --- a/arch/hexagon/kernel/vm_entry.S +++ b/arch/hexagon/kernel/vm_entry.S @@ -265,12 +265,12 @@ event_dispatch: * should be in the designated register (usually R19) * * If we were in kernel mode, we don't need to check scheduler - * or signals if CONFIG_PREEMPT is not set. If set, then it has + * or signals if CONFIG_PREEMPTION is not set. If set, then it has * to jump to a need_resched kind of block. - * BTW, CONFIG_PREEMPT is not supported yet. + * BTW, CONFIG_PREEMPTION is not supported yet. */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION R0 = #VM_INT_DISABLE trap1(#HVM_TRAP1_VMSETIE) #endif @@ -369,7 +369,7 @@ ret_from_fork: R26.L = #LO(do_work_pending); R0 = #VM_INT_DISABLE; } - if P0 jump check_work_pending + if (P0) jump check_work_pending { R0 = R25; callr R24 diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h index f886d4dc9d55..b66ba907019c 100644 --- a/arch/ia64/include/asm/acpi.h +++ b/arch/ia64/include/asm/acpi.h @@ -38,7 +38,10 @@ int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); /* Low-level suspend routine. */ extern int acpi_suspend_lowlevel(void); -extern unsigned long acpi_wakeup_address; +static inline unsigned long acpi_get_wakeup_address(void) +{ + return 0; +} /* * Record the cpei override flag and current logical cpu. This is diff --git a/arch/ia64/include/asm/vga.h b/arch/ia64/include/asm/vga.h index 30cb373f3de8..64ce0b971a0a 100644 --- a/arch/ia64/include/asm/vga.h +++ b/arch/ia64/include/asm/vga.h @@ -18,7 +18,7 @@ extern unsigned long vga_console_iobase; extern unsigned long vga_console_membase; -#define VGA_MAP_MEM(x,s) ((unsigned long) ioremap_nocache(vga_console_membase + (x), s)) +#define VGA_MAP_MEM(x,s) ((unsigned long) ioremap(vga_console_membase + (x), s)) #define vga_readb(x) (*(x)) #define vga_writeb(x,y) (*(y) = (x)) diff --git a/arch/ia64/include/asm/vmalloc.h b/arch/ia64/include/asm/vmalloc.h new file mode 100644 index 000000000000..a2b51141ad28 --- /dev/null +++ b/arch/ia64/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_IA64_VMALLOC_H +#define _ASM_IA64_VMALLOC_H + +#endif /* _ASM_IA64_VMALLOC_H */ diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 70d1587ddcd4..a5636524af76 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -42,8 +42,6 @@ int acpi_lapic; unsigned int acpi_cpei_override; unsigned int acpi_cpei_phys_cpuid; -unsigned long acpi_wakeup_address = 0; - #define ACPI_MAX_PLATFORM_INTERRUPTS 256 /* Array to record platform interrupt vectors for generic interrupt routing. */ diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c index f80eb7fb544d..258d7b70c0f3 100644 --- a/arch/ia64/kernel/cyclone.c +++ b/arch/ia64/kernel/cyclone.c @@ -50,7 +50,7 @@ int __init init_cyclone_clock(void) /* find base address */ offset = (CYCLONE_CBAR_ADDR); - reg = ioremap_nocache(offset, sizeof(u64)); + reg = ioremap(offset, sizeof(u64)); if(!reg){ printk(KERN_ERR "Summit chipset: Could not find valid CBAR" " register.\n"); @@ -68,7 +68,7 @@ int __init init_cyclone_clock(void) /* setup PMCC */ offset = (base + CYCLONE_PMCC_OFFSET); - reg = ioremap_nocache(offset, sizeof(u64)); + reg = ioremap(offset, sizeof(u64)); if(!reg){ printk(KERN_ERR "Summit chipset: Could not find valid PMCC" " register.\n"); @@ -80,7 +80,7 @@ int __init init_cyclone_clock(void) /* setup MPCS */ offset = (base + CYCLONE_MPCS_OFFSET); - reg = ioremap_nocache(offset, sizeof(u64)); + reg = ioremap(offset, sizeof(u64)); if(!reg){ printk(KERN_ERR "Summit chipset: Could not find valid MPCS" " register.\n"); @@ -92,7 +92,7 @@ int __init init_cyclone_clock(void) /* map in cyclone_timer */ offset = (base + CYCLONE_MPMC_OFFSET); - cyclone_timer = ioremap_nocache(offset, sizeof(u32)); + cyclone_timer = ioremap(offset, sizeof(u32)); if(!cyclone_timer){ printk(KERN_ERR "Summit chipset: Could not find valid MPMC" " register.\n"); diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index a9992be5718b..2ac926331500 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -670,12 +670,12 @@ GLOBAL_ENTRY(ia64_leave_syscall) * * p6 controls whether current_thread_info()->flags needs to be check for * extra work. We always check for extra work when returning to user-level. - * With CONFIG_PREEMPT, we also check for extra work when the preempt_count + * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count * is 0. After extra work processing has been completed, execution * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check * needs to be redone. */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION RSM_PSR_I(p0, r2, r18) // disable interrupts cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 @@ -685,7 +685,7 @@ GLOBAL_ENTRY(ia64_leave_syscall) (pUStk) mov r21=0 // r21 <- 0 ;; cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) -#else /* !CONFIG_PREEMPT */ +#else /* !CONFIG_PREEMPTION */ RSM_PSR_I(pUStk, r2, r18) cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk @@ -814,12 +814,12 @@ GLOBAL_ENTRY(ia64_leave_kernel) * * p6 controls whether current_thread_info()->flags needs to be check for * extra work. We always check for extra work when returning to user-level. - * With CONFIG_PREEMPT, we also check for extra work when the preempt_count + * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count * is 0. After extra work processing has been completed, execution * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check * needs to be redone. */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION RSM_PSR_I(p0, r17, r31) // disable interrupts cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 @@ -1120,7 +1120,7 @@ skip_rbs_switch: /* * On entry: - * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPT) + * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPTION) * r31 = current->thread_info->flags * On exit: * p6 = TRUE if work-pending-check needs to be redone diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index b8356edbde65..a6d6a0556f08 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -841,7 +841,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) return 1; } -#if !defined(CONFIG_PREEMPT) +#if !defined(CONFIG_PREEMPTION) if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ ia64_psr(regs)->ri = p->ainsn.slot; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 58fd67068bac..b01d68a2d5d9 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -689,9 +689,7 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 6663f1741798..6ad6cdac74b3 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -14,6 +14,7 @@ config M68K select HAVE_AOUT if MMU select HAVE_ASM_MODVERSIONS select HAVE_DEBUG_BUGVERBOSE + select HAVE_COPY_THREAD_TLS select GENERIC_IRQ_SHOW select GENERIC_ATOMIC64 select HAVE_UID16 diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 619d30d663a2..e1134c3e0b69 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -562,6 +562,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -574,7 +575,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -612,6 +613,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -620,6 +624,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -651,4 +656,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index caa0558abcdb..484cb1643df1 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -518,6 +518,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -530,7 +531,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -568,6 +569,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -576,6 +580,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -607,4 +612,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 2551c7e9ac54..eb6a46b6d135 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -540,6 +540,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -552,7 +553,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -590,6 +591,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -598,6 +602,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -629,4 +634,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 4ffc1e5646d5..bee9263a409c 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -511,6 +511,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -523,7 +524,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -561,6 +562,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -569,6 +573,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -600,4 +605,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 806da3d97ca4..c8847a8bcbd6 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -520,6 +520,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -532,7 +533,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -570,6 +571,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -578,6 +582,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -609,4 +614,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 250da20e291c..303ffafd9cad 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -542,6 +542,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -554,7 +555,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -592,6 +593,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -600,6 +604,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -631,4 +636,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index b764a0368a56..89a704226cd9 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -628,6 +628,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -640,7 +641,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -678,6 +679,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -686,6 +690,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -717,4 +722,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 7800d3a8d46e..f62c1f4d03a0 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -510,6 +510,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -522,7 +523,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -560,6 +561,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -568,6 +572,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -599,4 +604,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index c32dc2d2058d..58dcad26a751 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -511,6 +511,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -523,7 +524,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -561,6 +562,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -569,6 +573,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -600,4 +605,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index bf0a65ce57e0..5d3c28d1d545 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -529,6 +529,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -541,7 +542,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -579,6 +580,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -587,6 +591,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -618,4 +623,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 5f3cfa2926d2..5ef9e17dcd51 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -513,6 +513,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -525,7 +526,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -563,6 +564,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 58354d2018d5..22e1accc60a3 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -512,6 +512,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -524,7 +525,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -562,6 +563,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -570,6 +574,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -601,4 +606,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/include/asm/kmap.h b/arch/m68k/include/asm/kmap.h index 559cb91bede1..dec05743d426 100644 --- a/arch/m68k/include/asm/kmap.h +++ b/arch/m68k/include/asm/kmap.h @@ -27,7 +27,6 @@ static inline void __iomem *ioremap(unsigned long physaddr, unsigned long size) return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); } -#define ioremap_nocache ioremap #define ioremap_uc ioremap #define ioremap_wt ioremap_wt static inline void __iomem *ioremap_wt(unsigned long physaddr, diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 2e0047cf86f8..4ae52414cd9d 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -30,5 +30,6 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK +#define __ARCH_WANT_SYS_CLONE3 #endif /* _ASM_M68K_UNISTD_H_ */ diff --git a/arch/m68k/include/asm/vmalloc.h b/arch/m68k/include/asm/vmalloc.h new file mode 100644 index 000000000000..bc1dca6cf134 --- /dev/null +++ b/arch/m68k/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_M68K_VMALLOC_H +#define _ASM_M68K_VMALLOC_H + +#endif /* _ASM_M68K_VMALLOC_H */ diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 97cd3ea5f10b..9dd76fbb7c6b 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -69,6 +69,13 @@ ENTRY(__sys_vfork) lea %sp@(24),%sp rts +ENTRY(__sys_clone3) + SAVE_SWITCH_STACK + pea %sp@(SWITCH_STACK_SIZE) + jbsr m68k_clone3 + lea %sp@(28),%sp + rts + ENTRY(sys_sigreturn) SAVE_SWITCH_STACK movel %sp,%sp@- | switch_stack pointer diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 4e77a06735c1..8f0d9140700f 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -30,8 +30,9 @@ #include <linux/init_task.h> #include <linux/mqueue.h> #include <linux/rcupdate.h> - +#include <linux/syscalls.h> #include <linux/uaccess.h> + #include <asm/traps.h> #include <asm/machdep.h> #include <asm/setup.h> @@ -107,20 +108,43 @@ void flush_thread(void) * on top of pt_regs, which means that sys_clone() arguments would be * buried. We could, of course, copy them, but it's too costly for no * good reason - generic clone() would have to copy them *again* for - * do_fork() anyway. So in this case it's actually better to pass pt_regs * - * and extract arguments for do_fork() from there. Eventually we might - * go for calling do_fork() directly from the wrapper, but only after we - * are finished with do_fork() prototype conversion. + * _do_fork() anyway. So in this case it's actually better to pass pt_regs * + * and extract arguments for _do_fork() from there. Eventually we might + * go for calling _do_fork() directly from the wrapper, but only after we + * are finished with _do_fork() prototype conversion. */ asmlinkage int m68k_clone(struct pt_regs *regs) { /* regs will be equal to current_pt_regs() */ - return do_fork(regs->d1, regs->d2, 0, - (int __user *)regs->d3, (int __user *)regs->d4); + struct kernel_clone_args args = { + .flags = regs->d1 & ~CSIGNAL, + .pidfd = (int __user *)regs->d3, + .child_tid = (int __user *)regs->d4, + .parent_tid = (int __user *)regs->d3, + .exit_signal = regs->d1 & CSIGNAL, + .stack = regs->d2, + .tls = regs->d5, + }; + + if (!legacy_clone_args_valid(&args)) + return -EINVAL; + + return _do_fork(&args); +} + +/* + * Because extra registers are saved on the stack after the sys_clone3() + * arguments, this C wrapper extracts them from pt_regs * and then calls the + * generic sys_clone3() implementation. + */ +asmlinkage int m68k_clone3(struct pt_regs *regs) +{ + return sys_clone3((struct clone_args __user *)regs->d1, regs->d2); } -int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long arg, struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, + unsigned long arg, struct task_struct *p, + unsigned long tls) { struct fork_frame { struct switch_stack sw; @@ -155,7 +179,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.usp = usp ?: rdusp(); if (clone_flags & CLONE_SETTLS) - task_thread_info(p)->tp_value = frame->regs.d5; + task_thread_info(p)->tp_value = tls; #ifdef CONFIG_FPU if (!FPU_IS_EMU) { diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index a88a285a0e5f..a00a5d0db602 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -434,4 +434,4 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 common clone3 __sys_clone3 diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 5f46ebe7bfe3..a105f113fd67 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -11,7 +11,7 @@ config MICROBLAZE select ARCH_HAS_UNCACHED_SEGMENT if !MMU select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_WANT_IPC_PARSE_VERSION - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select TIMER_OF select CLONE_BACKWARDS3 select COMMON_CLK diff --git a/arch/microblaze/include/asm/vmalloc.h b/arch/microblaze/include/asm/vmalloc.h new file mode 100644 index 000000000000..04013a42b0fe --- /dev/null +++ b/arch/microblaze/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_MICROBLAZE_VMALLOC_H +#define _ASM_MICROBLAZE_VMALLOC_H + +#endif /* _ASM_MICROBLAZE_VMALLOC_H */ diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S index de7083bd1d24..f6ded356394a 100644 --- a/arch/microblaze/kernel/entry.S +++ b/arch/microblaze/kernel/entry.S @@ -728,7 +728,7 @@ no_intr_resched: bri 6f; /* MS: Return to kernel state. */ 2: -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION lwi r11, CURRENT_TASK, TS_THREAD_INFO; /* MS: get preempt_count from thread info */ lwi r5, r11, TI_PREEMPT_COUNT; diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index add388236f4e..a2739a34bb12 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -15,7 +15,7 @@ config MIPS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_IPC_PARSE_VERSION - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select CPU_NO_EFFICIENT_FFS if (TARGET_ISA_REV < 1) select CPU_PM if CPU_IDLE @@ -47,7 +47,7 @@ config MIPS select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES select HAVE_ASM_MODVERSIONS - select HAVE_EBPF_JIT if (!CPU_MICROMIPS) + select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2 select HAVE_CONTEXT_TRACKING select HAVE_COPY_THREAD_TLS select HAVE_C_RECORDMCOUNT diff --git a/arch/mips/ar7/clock.c b/arch/mips/ar7/clock.c index 7de162432d7f..95def949c971 100644 --- a/arch/mips/ar7/clock.c +++ b/arch/mips/ar7/clock.c @@ -236,9 +236,9 @@ static void tnetd7300_set_clock(u32 shift, struct tnetd7300_clock *clock, static void __init tnetd7300_init_clocks(void) { - u32 *bootcr = (u32 *)ioremap_nocache(AR7_REGS_DCL, 4); + u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4); struct tnetd7300_clocks *clocks = - ioremap_nocache(UR8_REGS_CLOCKS, + ioremap(UR8_REGS_CLOCKS, sizeof(struct tnetd7300_clocks)); bus_clk.rate = tnetd7300_get_clock(BUS_PLL_SOURCE_SHIFT, @@ -320,9 +320,9 @@ static int tnetd7200_get_clock_base(int clock_id, u32 *bootcr) static void __init tnetd7200_init_clocks(void) { - u32 *bootcr = (u32 *)ioremap_nocache(AR7_REGS_DCL, 4); + u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4); struct tnetd7200_clocks *clocks = - ioremap_nocache(AR7_REGS_CLOCKS, + ioremap(AR7_REGS_CLOCKS, sizeof(struct tnetd7200_clocks)); int cpu_base, cpu_mul, cpu_prediv, cpu_postdiv; int dsp_base, dsp_mul, dsp_prediv, dsp_postdiv; diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c index 2292e55c12e2..8b006addd6ba 100644 --- a/arch/mips/ar7/gpio.c +++ b/arch/mips/ar7/gpio.c @@ -308,7 +308,7 @@ int __init ar7_gpio_init(void) size = 0x1f; } - gpch->regs = ioremap_nocache(AR7_REGS_GPIO, size); + gpch->regs = ioremap(AR7_REGS_GPIO, size); if (!gpch->regs) { printk(KERN_ERR "%s: failed to ioremap regs\n", gpch->chip.label); diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index 1f2028266493..215149a85d83 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -702,7 +702,7 @@ static int __init ar7_register_devices(void) pr_warn("unable to register usb slave: %d\n", res); /* Register watchdog only if enabled in hardware */ - bootcr = ioremap_nocache(AR7_REGS_DCL, 4); + bootcr = ioremap(AR7_REGS_DCL, 4); val = readl(bootcr); iounmap(bootcr); if (val & AR7_WDT_HW_ENA) { diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c index 8da996142d6a..24f619199ee7 100644 --- a/arch/mips/ath25/ar2315.c +++ b/arch/mips/ath25/ar2315.c @@ -262,7 +262,7 @@ void __init ar2315_plat_mem_setup(void) u32 config; /* Detect memory size */ - sdram_base = ioremap_nocache(AR2315_SDRAMCTL_BASE, + sdram_base = ioremap(AR2315_SDRAMCTL_BASE, AR2315_SDRAMCTL_SIZE); memcfg = __raw_readl(sdram_base + AR2315_MEM_CFG); memsize = 1 + ATH25_REG_MS(memcfg, AR2315_MEM_CFG_DATA_WIDTH); @@ -272,7 +272,7 @@ void __init ar2315_plat_mem_setup(void) add_memory_region(0, memsize, BOOT_MEM_RAM); iounmap(sdram_base); - ar2315_rst_base = ioremap_nocache(AR2315_RST_BASE, AR2315_RST_SIZE); + ar2315_rst_base = ioremap(AR2315_RST_BASE, AR2315_RST_SIZE); /* Detect the hardware based on the device ID */ devid = ar2315_rst_reg_read(AR2315_SREV) & AR2315_REV_CHIP; diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c index acd55a9cffe3..47f3e98974fc 100644 --- a/arch/mips/ath25/ar5312.c +++ b/arch/mips/ath25/ar5312.c @@ -185,7 +185,7 @@ static void __init ar5312_flash_init(void) void __iomem *flashctl_base; u32 ctl; - flashctl_base = ioremap_nocache(AR5312_FLASHCTL_BASE, + flashctl_base = ioremap(AR5312_FLASHCTL_BASE, AR5312_FLASHCTL_SIZE); ctl = __raw_readl(flashctl_base + AR5312_FLASHCTL0); @@ -358,7 +358,7 @@ void __init ar5312_plat_mem_setup(void) u32 devid; /* Detect memory size */ - sdram_base = ioremap_nocache(AR5312_SDRAMCTL_BASE, + sdram_base = ioremap(AR5312_SDRAMCTL_BASE, AR5312_SDRAMCTL_SIZE); memcfg = __raw_readl(sdram_base + AR5312_MEM_CFG1); bank0_ac = ATH25_REG_MS(memcfg, AR5312_MEM_CFG1_AC0); @@ -369,7 +369,7 @@ void __init ar5312_plat_mem_setup(void) add_memory_region(0, memsize, BOOT_MEM_RAM); iounmap(sdram_base); - ar5312_rst_base = ioremap_nocache(AR5312_RST_BASE, AR5312_RST_SIZE); + ar5312_rst_base = ioremap(AR5312_RST_BASE, AR5312_RST_SIZE); devid = ar5312_rst_reg_read(AR5312_REV); devid >>= AR5312_REV_WMAC_MIN_S; diff --git a/arch/mips/ath25/board.c b/arch/mips/ath25/board.c index 989e71015ee6..cb99f9739910 100644 --- a/arch/mips/ath25/board.c +++ b/arch/mips/ath25/board.c @@ -111,7 +111,7 @@ int __init ath25_find_config(phys_addr_t base, unsigned long size) u8 *mac_addr; u32 offset; - flash_base = ioremap_nocache(base, size); + flash_base = ioremap(base, size); flash_limit = flash_base + size; ath25_board.config = NULL; diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c index 63eacb8b0eb5..137abbc65c60 100644 --- a/arch/mips/ath79/common.c +++ b/arch/mips/ath79/common.c @@ -41,7 +41,7 @@ static void __iomem *ath79_ddr_pci_win_base; void ath79_ddr_ctrl_init(void) { - ath79_ddr_base = ioremap_nocache(AR71XX_DDR_CTRL_BASE, + ath79_ddr_base = ioremap(AR71XX_DDR_CTRL_BASE, AR71XX_DDR_CTRL_SIZE); if (soc_is_ar913x() || soc_is_ar724x() || soc_is_ar933x()) { ath79_ddr_wb_flush_base = ath79_ddr_base + 0x7c; diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index ea385a865781..484ee28922a9 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -226,9 +226,9 @@ void __init plat_mem_setup(void) else if (fw_passed_dtb) __dt_setup_arch((void *)KSEG0ADDR(fw_passed_dtb)); - ath79_reset_base = ioremap_nocache(AR71XX_RESET_BASE, + ath79_reset_base = ioremap(AR71XX_RESET_BASE, AR71XX_RESET_SIZE); - ath79_pll_base = ioremap_nocache(AR71XX_PLL_BASE, + ath79_pll_base = ioremap(AR71XX_PLL_BASE, AR71XX_PLL_SIZE); ath79_detect_sys_type(); ath79_ddr_ctrl_init(); diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 172801ed35b8..d859f079b771 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -29,6 +29,9 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \ -DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS) +# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. +KCOV_INSTRUMENT := n + # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o diff --git a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c index ba8f82a29a81..e794b2d53adf 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c +++ b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c @@ -45,13 +45,6 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc; /* See header file for descriptions of functions */ /** - * This macro returns the size of a member of a structure. - * Logically it is the same as "sizeof(s::field)" in C++, but - * C lacks the "::" operator. - */ -#define SIZEOF_FIELD(s, field) sizeof(((s *)NULL)->field) - -/** * This macro returns a member of the * cvmx_bootmem_named_block_desc_t structure. These members can't * be directly addressed as they might be in memory not directly @@ -65,7 +58,7 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc; #define CVMX_BOOTMEM_NAMED_GET_FIELD(addr, field) \ __cvmx_bootmem_desc_get(addr, \ offsetof(struct cvmx_bootmem_named_block_desc, field), \ - SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field)) + sizeof_field(struct cvmx_bootmem_named_block_desc, field)) /** * This function is the implementation of the get macros defined diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 1f742c32a883..4f34d92b52f9 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -357,7 +357,7 @@ static void octeon_write_lcd(const char *s) { if (octeon_bootinfo->led_display_base_addr) { void __iomem *lcd_address = - ioremap_nocache(octeon_bootinfo->led_display_base_addr, + ioremap(octeon_bootinfo->led_display_base_addr, 8); int i; for (i = 0; i < 8; i++, s++) { diff --git a/arch/mips/generic/board-ocelot.c b/arch/mips/generic/board-ocelot.c index 06d92fb37769..c238e95190ac 100644 --- a/arch/mips/generic/board-ocelot.c +++ b/arch/mips/generic/board-ocelot.c @@ -51,7 +51,7 @@ static void __init ocelot_earlyprintk_init(void) { void __iomem *uart_base; - uart_base = ioremap_nocache(UART_UART, 0x20); + uart_base = ioremap(UART_UART, 0x20); setup_8250_early_printk_port((unsigned long)uart_base, 2, 50000); } diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index feb069cbf44e..655f40ddb6d1 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -63,7 +63,7 @@ .endm .macro local_irq_disable reg=t0 -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION lw \reg, TI_PRE_COUNT($28) addi \reg, \reg, 1 sw \reg, TI_PRE_COUNT($28) @@ -73,7 +73,7 @@ xori \reg, \reg, 1 mtc0 \reg, CP0_STATUS irq_disable_hazard -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION lw \reg, TI_PRE_COUNT($28) addi \reg, \reg, -1 sw \reg, TI_PRE_COUNT($28) diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h index c46c59b0f1b4..49f0061a6051 100644 --- a/arch/mips/include/asm/cpu-type.h +++ b/arch/mips/include/asm/cpu-type.h @@ -15,7 +15,8 @@ static inline int __pure __get_cpu_type(const int cpu_type) { switch (cpu_type) { -#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2EF) +#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \ + defined(CONFIG_SYS_HAS_CPU_LOONGSON2F) case CPU_LOONGSON2EF: #endif diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index 3f6ce74335b4..cf1f2a4a2418 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -227,29 +227,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, */ #define ioremap(offset, size) \ __ioremap_mode((offset), (size), _CACHE_UNCACHED) - -/* - * ioremap_nocache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_nocache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * on the CPU as well as honouring existing caching rules from things like - * the PCI bus. Note that there are other caches and buffers on many - * busses. In particular driver authors should read up on PCI writes - * - * It's useful if some control registers are in such an area and - * write combining or read caching is not desirable: - */ -#define ioremap_nocache(offset, size) \ - __ioremap_mode((offset), (size), _CACHE_UNCACHED) -#define ioremap_uc ioremap_nocache +#define ioremap_uc ioremap /* * ioremap_cache - map bus memory into CPU space diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 4993db40482c..ee26f9a4575d 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -49,8 +49,26 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -/* How to get the thread information struct from C. */ +/* + * A pointer to the struct thread_info for the currently executing thread is + * held in register $28/$gp. + * + * We declare __current_thread_info as a global register variable rather than a + * local register variable within current_thread_info() because clang doesn't + * support explicit local register variables. + * + * When building the VDSO we take care not to declare the global register + * variable because this causes GCC to not preserve the value of $28/$gp in + * functions that change its value (which is common in the PIC VDSO when + * accessing the GOT). Since the VDSO shouldn't be accessing + * __current_thread_info anyway we declare it extern in order to cause a link + * failure if it's referenced. + */ +#ifdef __VDSO__ +extern struct thread_info *__current_thread_info; +#else register struct thread_info *__current_thread_info __asm__("$28"); +#endif static inline struct thread_info *current_thread_info(void) { diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h index b08825531e9f..a58687e26c5d 100644 --- a/arch/mips/include/asm/vdso/gettimeofday.h +++ b/arch/mips/include/asm/vdso/gettimeofday.h @@ -26,8 +26,6 @@ #define __VDSO_USE_SYSCALL ULLONG_MAX -#ifdef CONFIG_MIPS_CLOCK_VSYSCALL - static __always_inline long gettimeofday_fallback( struct __kernel_old_timeval *_tv, struct timezone *_tz) @@ -48,17 +46,6 @@ static __always_inline long gettimeofday_fallback( return error ? -ret : ret; } -#else - -static __always_inline long gettimeofday_fallback( - struct __kernel_old_timeval *_tv, - struct timezone *_tz) -{ - return -1; -} - -#endif - static __always_inline long clock_gettime_fallback( clockid_t _clkid, struct __kernel_timespec *_ts) @@ -109,8 +96,6 @@ static __always_inline int clock_getres_fallback( #if _MIPS_SIM != _MIPS_SIM_ABI64 -#define VDSO_HAS_32BIT_FALLBACK 1 - static __always_inline long clock_gettime32_fallback( clockid_t _clkid, struct old_timespec32 *_ts) diff --git a/arch/mips/include/asm/vmalloc.h b/arch/mips/include/asm/vmalloc.h new file mode 100644 index 000000000000..25dc09b25eaf --- /dev/null +++ b/arch/mips/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_MIPS_VMALLOC_H +#define _ASM_MIPS_VMALLOC_H + +#endif /* _ASM_MIPS_VMALLOC_H */ diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c index f777e44653d5..47312c529410 100644 --- a/arch/mips/kernel/cacheinfo.c +++ b/arch/mips/kernel/cacheinfo.c @@ -50,6 +50,25 @@ static int __init_cache_level(unsigned int cpu) return 0; } +static void fill_cpumask_siblings(int cpu, cpumask_t *cpu_map) +{ + int cpu1; + + for_each_possible_cpu(cpu1) + if (cpus_are_siblings(cpu, cpu1)) + cpumask_set_cpu(cpu1, cpu_map); +} + +static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map) +{ + int cpu1; + int cluster = cpu_cluster(&cpu_data[cpu]); + + for_each_possible_cpu(cpu1) + if (cpu_cluster(&cpu_data[cpu1]) == cluster) + cpumask_set_cpu(cpu1, cpu_map); +} + static int __populate_cache_leaves(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; @@ -57,14 +76,20 @@ static int __populate_cache_leaves(unsigned int cpu) struct cacheinfo *this_leaf = this_cpu_ci->info_list; if (c->icache.waysize) { + /* L1 caches are per core */ + fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map); populate_cache(dcache, this_leaf, 1, CACHE_TYPE_DATA); + fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map); populate_cache(icache, this_leaf, 1, CACHE_TYPE_INST); } else { populate_cache(dcache, this_leaf, 1, CACHE_TYPE_UNIFIED); } - if (c->scache.waysize) + if (c->scache.waysize) { + /* L2 cache is per cluster */ + fill_cpumask_cluster(cpu, &this_leaf->shared_cpu_map); populate_cache(scache, this_leaf, 2, CACHE_TYPE_UNIFIED); + } if (c->tcache.waysize) populate_cache(tcache, this_leaf, 3, CACHE_TYPE_UNIFIED); diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index 5469d43b6966..4849a48afc0f 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -19,7 +19,7 @@ #include <asm/thread_info.h> #include <asm/war.h> -#ifndef CONFIG_PREEMPT +#ifndef CONFIG_PREEMPTION #define resume_kernel restore_all #else #define __ret_from_irq ret_from_exception @@ -27,7 +27,7 @@ .text .align 5 -#ifndef CONFIG_PREEMPT +#ifndef CONFIG_PREEMPTION FEXPORT(ret_from_exception) local_irq_disable # preempt stop b __ret_from_irq @@ -53,7 +53,7 @@ resume_userspace: bnez t0, work_pending j restore_all -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION resume_kernel: local_irq_disable lw t0, TI_PRE_COUNT($28) diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index e5ea3db23d6b..cdb93ed91cde 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -194,7 +194,7 @@ static void mips_cm_probe_l2sync(void) write_gcr_l2_only_sync_base(addr | CM_GCR_L2_ONLY_SYNC_BASE_SYNCEN); /* Map the region */ - mips_cm_l2sync_base = ioremap_nocache(addr, MIPS_CM_L2SYNC_SIZE); + mips_cm_l2sync_base = ioremap(addr, MIPS_CM_L2SYNC_SIZE); } int mips_cm_probe(void) @@ -215,7 +215,7 @@ int mips_cm_probe(void) if (!addr) return -ENODEV; - mips_gcr_base = ioremap_nocache(addr, MIPS_CM_GCR_SIZE); + mips_gcr_base = ioremap(addr, MIPS_CM_GCR_SIZE); if (!mips_gcr_base) return -ENXIO; diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c index 69e3e0b556bf..8d2535123f11 100644 --- a/arch/mips/kernel/mips-cpc.c +++ b/arch/mips/kernel/mips-cpc.c @@ -78,7 +78,7 @@ int mips_cpc_probe(void) if (!addr) return -ENODEV; - mips_cpc_base = ioremap_nocache(addr, 0x8000); + mips_cpc_base = ioremap(addr, 0x8000); if (!mips_cpc_base) return -ENXIO; diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c index 037b08f3257e..42222f849bd2 100644 --- a/arch/mips/lantiq/falcon/sysctrl.c +++ b/arch/mips/lantiq/falcon/sysctrl.c @@ -221,16 +221,16 @@ void __init ltq_soc_init(void) res_sys[2].name) < 0)) pr_err("Failed to request core resources"); - status_membase = ioremap_nocache(res_status.start, + status_membase = ioremap(res_status.start, resource_size(&res_status)); - ltq_ebu_membase = ioremap_nocache(res_ebu.start, + ltq_ebu_membase = ioremap(res_ebu.start, resource_size(&res_ebu)); if (!status_membase || !ltq_ebu_membase) panic("Failed to remap core resources"); for (i = 0; i < 3; i++) { - sysctl_membase[i] = ioremap_nocache(res_sys[i].start, + sysctl_membase[i] = ioremap(res_sys[i].start, resource_size(&res_sys[i])); if (!sysctl_membase[i]) panic("Failed to remap sysctrl resources"); diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index 115b417dfb8e..df8eed3875f6 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -349,7 +349,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) res.name)) pr_err("Failed to request icu%i memory\n", vpe); - ltq_icu_membase[vpe] = ioremap_nocache(res.start, + ltq_icu_membase[vpe] = ioremap(res.start, resource_size(&res)); if (!ltq_icu_membase[vpe]) @@ -402,7 +402,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) res.name)) pr_err("Failed to request eiu memory"); - ltq_eiu_membase = ioremap_nocache(res.start, + ltq_eiu_membase = ioremap(res.start, resource_size(&res)); if (!ltq_eiu_membase) panic("Failed to remap eiu memory"); diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 156a95ac5c72..aa37545ebe8f 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -431,10 +431,10 @@ void __init ltq_soc_init(void) res_ebu.name)) pr_err("Failed to request core resources"); - pmu_membase = ioremap_nocache(res_pmu.start, resource_size(&res_pmu)); - ltq_cgu_membase = ioremap_nocache(res_cgu.start, + pmu_membase = ioremap(res_pmu.start, resource_size(&res_pmu)); + ltq_cgu_membase = ioremap(res_cgu.start, resource_size(&res_cgu)); - ltq_ebu_membase = ioremap_nocache(res_ebu.start, + ltq_ebu_membase = ioremap(res_ebu.start, resource_size(&res_ebu)); if (!pmu_membase || !ltq_cgu_membase || !ltq_ebu_membase) panic("Failed to remap core resources"); diff --git a/arch/mips/loongson2ef/common/reset.c b/arch/mips/loongson2ef/common/reset.c index e7c87161ce00..e49c40646995 100644 --- a/arch/mips/loongson2ef/common/reset.c +++ b/arch/mips/loongson2ef/common/reset.c @@ -17,11 +17,11 @@ static inline void loongson_reboot(void) { #ifndef CONFIG_CPU_JUMP_WORKAROUNDS - ((void (*)(void))ioremap_nocache(LOONGSON_BOOT_BASE, 4)) (); + ((void (*)(void))ioremap(LOONGSON_BOOT_BASE, 4)) (); #else void (*func)(void); - func = (void *)ioremap_nocache(LOONGSON_BOOT_BASE, 4); + func = (void *)ioremap(LOONGSON_BOOT_BASE, 4); __asm__ __volatile__( " .set noat \n" diff --git a/arch/mips/loongson32/common/prom.c b/arch/mips/loongson32/common/prom.c index 73dd25142484..fd76114fa3b0 100644 --- a/arch/mips/loongson32/common/prom.c +++ b/arch/mips/loongson32/common/prom.c @@ -26,13 +26,13 @@ void __init prom_init(void) memsize = DEFAULT_MEMSIZE; if (strstr(arcs_cmdline, "console=ttyS3")) - uart_base = ioremap_nocache(LS1X_UART3_BASE, 0x0f); + uart_base = ioremap(LS1X_UART3_BASE, 0x0f); else if (strstr(arcs_cmdline, "console=ttyS2")) - uart_base = ioremap_nocache(LS1X_UART2_BASE, 0x0f); + uart_base = ioremap(LS1X_UART2_BASE, 0x0f); else if (strstr(arcs_cmdline, "console=ttyS1")) - uart_base = ioremap_nocache(LS1X_UART1_BASE, 0x0f); + uart_base = ioremap(LS1X_UART1_BASE, 0x0f); else - uart_base = ioremap_nocache(LS1X_UART0_BASE, 0x0f); + uart_base = ioremap(LS1X_UART0_BASE, 0x0f); setup_8250_early_printk_port((unsigned long)uart_base, 0, 0); } diff --git a/arch/mips/loongson32/common/reset.c b/arch/mips/loongson32/common/reset.c index 6c36a414dde7..0c7399b303fb 100644 --- a/arch/mips/loongson32/common/reset.c +++ b/arch/mips/loongson32/common/reset.c @@ -37,7 +37,7 @@ static void ls1x_power_off(void) static int __init ls1x_reboot_setup(void) { - wdt_reg_base = ioremap_nocache(LS1X_WDT_BASE, (SZ_4 + SZ_8)); + wdt_reg_base = ioremap(LS1X_WDT_BASE, (SZ_4 + SZ_8)); if (!wdt_reg_base) panic("Failed to remap watchdog registers"); diff --git a/arch/mips/loongson32/common/time.c b/arch/mips/loongson32/common/time.c index f97662045c73..4cc73f7ac0d4 100644 --- a/arch/mips/loongson32/common/time.c +++ b/arch/mips/loongson32/common/time.c @@ -49,7 +49,7 @@ static inline void ls1x_pwmtimer_restart(void) void __init ls1x_pwmtimer_init(void) { - timer_reg_base = ioremap_nocache(LS1X_TIMER_BASE, SZ_16); + timer_reg_base = ioremap(LS1X_TIMER_BASE, SZ_16); if (!timer_reg_base) panic("Failed to remap timer registers"); diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c index 88b3bd5fed25..bc7671079f0c 100644 --- a/arch/mips/loongson64/reset.c +++ b/arch/mips/loongson64/reset.c @@ -17,7 +17,7 @@ static inline void loongson_reboot(void) { - ((void (*)(void))ioremap_nocache(LOONGSON_BOOT_BASE, 4)) (); + ((void (*)(void))ioremap(LOONGSON_BOOT_BASE, 4)) (); } static void loongson_restart(char *command) diff --git a/arch/mips/mti-malta/malta-dtshim.c b/arch/mips/mti-malta/malta-dtshim.c index 98a063093b69..0ddf03df6268 100644 --- a/arch/mips/mti-malta/malta-dtshim.c +++ b/arch/mips/mti-malta/malta-dtshim.c @@ -240,7 +240,7 @@ static void __init remove_gic(void *fdt) * On systems using the RocIT system controller a GIC may be * present without a CM. Detect whether that is the case. */ - biu_base = ioremap_nocache(MSC01_BIU_REG_BASE, + biu_base = ioremap(MSC01_BIU_REG_BASE, MSC01_BIU_ADDRSPACE_SZ); sc_cfg = __raw_readl(biu_base + MSC01_SC_CFG_OFS); if (sc_cfg & MSC01_SC_CFG_GICPRES_MSK) { diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 46b76751f3a5..561154cbcc40 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -604,6 +604,7 @@ static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value) static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) { int off, b_off; + int tcc_reg; ctx->flags |= EBPF_SEEN_TC; /* @@ -616,14 +617,14 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) b_off = b_imm(this_idx + 1, ctx); emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off); /* - * if (--TCC < 0) + * if (TCC-- < 0) * goto out; */ /* Delay slot */ - emit_instr(ctx, daddiu, MIPS_R_T5, - (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1); + tcc_reg = (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4; + emit_instr(ctx, daddiu, MIPS_R_T5, tcc_reg, -1); b_off = b_imm(this_idx + 1, ctx); - emit_instr(ctx, bltz, MIPS_R_T5, b_off); + emit_instr(ctx, bltz, tcc_reg, b_off); /* * prog = array->ptrs[index]; * if (prog == NULL) @@ -1803,7 +1804,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) unsigned int image_size; u8 *image_ptr; - if (!prog->jit_requested || MIPS_ISA_REV < 2) + if (!prog->jit_requested) return prog; tmp = bpf_jit_blind_constants(prog); diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c index 4f2411f489af..01a2af8215c8 100644 --- a/arch/mips/pci/pci-alchemy.c +++ b/arch/mips/pci/pci-alchemy.c @@ -409,7 +409,7 @@ static int alchemy_pci_probe(struct platform_device *pdev) goto out6; } - ctx->regs = ioremap_nocache(r->start, resource_size(r)); + ctx->regs = ioremap(r->start, resource_size(r)); if (!ctx->regs) { dev_err(&pdev->dev, "cannot map pci regs\n"); ret = -ENODEV; diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index 0fed6fc17fe4..490953f51528 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -441,7 +441,7 @@ static int ar2315_pci_probe(struct platform_device *pdev) apc->mem_res.flags = IORESOURCE_MEM; /* Remap PCI config space */ - apc->cfg_mem = devm_ioremap_nocache(dev, res->start, + apc->cfg_mem = devm_ioremap(dev, res->start, AR2315_PCI_CFG_SIZE); if (!apc->cfg_mem) { dev_err(dev, "failed to remap PCI config space\n"); diff --git a/arch/mips/pci/pci-bcm63xx.c b/arch/mips/pci/pci-bcm63xx.c index 151d9b5870bb..5548365605c0 100644 --- a/arch/mips/pci/pci-bcm63xx.c +++ b/arch/mips/pci/pci-bcm63xx.c @@ -221,7 +221,7 @@ static int __init bcm63xx_register_pci(void) * a spinlock for each io access, so this is currently kind of * broken on SMP. */ - pci_iospace_start = ioremap_nocache(BCM_PCI_IO_BASE_PA, 4); + pci_iospace_start = ioremap(BCM_PCI_IO_BASE_PA, 4); if (!pci_iospace_start) return -ENOMEM; diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c index c9f4d4ba058a..e1f12e398136 100644 --- a/arch/mips/pci/pci-rt2880.c +++ b/arch/mips/pci/pci-rt2880.c @@ -218,7 +218,7 @@ static int rt288x_pci_probe(struct platform_device *pdev) { void __iomem *io_map_base; - rt2880_pci_base = ioremap_nocache(RT2880_PCI_BASE, PAGE_SIZE); + rt2880_pci_base = ioremap(RT2880_PCI_BASE, PAGE_SIZE); io_map_base = ioremap(RT2880_PCI_IO_BASE, RT2880_PCI_IO_SIZE); rt2880_pci_controller.io_map_base = (unsigned long) io_map_base; diff --git a/arch/mips/pic32/pic32mzda/early_console.c b/arch/mips/pic32/pic32mzda/early_console.c index 8c236738b5ee..25372e62783b 100644 --- a/arch/mips/pic32/pic32mzda/early_console.c +++ b/arch/mips/pic32/pic32mzda/early_console.c @@ -135,7 +135,7 @@ void __init fw_init_early_console(char port) char *arch_cmdline = pic32_getcmdline(); int baud = -1; - uart_base = ioremap_nocache(PIC32_BASE_UART, 0xc00); + uart_base = ioremap(PIC32_BASE_UART, 0xc00); baud = get_baud_from_cmdline(arch_cmdline); if (port == -1) diff --git a/arch/mips/pic32/pic32mzda/early_pin.c b/arch/mips/pic32/pic32mzda/early_pin.c index 504e6ab399b5..f2822632b017 100644 --- a/arch/mips/pic32/pic32mzda/early_pin.c +++ b/arch/mips/pic32/pic32mzda/early_pin.c @@ -122,7 +122,7 @@ static const struct void pic32_pps_input(int function, int pin) { - void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0xF4); + void __iomem *pps_base = ioremap(PPS_BASE, 0xF4); int i; for (i = 0; i < ARRAY_SIZE(input_pin_reg); i++) { @@ -252,7 +252,7 @@ static const struct void pic32_pps_output(int function, int pin) { - void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0x170); + void __iomem *pps_base = ioremap(PPS_BASE, 0x170); int i; for (i = 0; i < ARRAY_SIZE(output_pin_reg); i++) { diff --git a/arch/mips/pmcs-msp71xx/msp_serial.c b/arch/mips/pmcs-msp71xx/msp_serial.c index 8e6e8db8dd5f..940c684f6921 100644 --- a/arch/mips/pmcs-msp71xx/msp_serial.c +++ b/arch/mips/pmcs-msp71xx/msp_serial.c @@ -105,7 +105,7 @@ void __init msp_serial_setup(void) /* Initialize first serial port */ up.mapbase = MSP_UART0_BASE; - up.membase = ioremap_nocache(up.mapbase, MSP_UART_REG_LEN); + up.membase = ioremap(up.mapbase, MSP_UART_REG_LEN); up.irq = MSP_INT_UART0; up.uartclk = uartclk; up.regshift = 2; @@ -143,7 +143,7 @@ void __init msp_serial_setup(void) } up.mapbase = MSP_UART1_BASE; - up.membase = ioremap_nocache(up.mapbase, MSP_UART_REG_LEN); + up.membase = ioremap(up.mapbase, MSP_UART_REG_LEN); up.irq = MSP_INT_UART1; up.line = 1; up.private_data = (void*)UART1_STATUS_REG; diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c index c945d76cfce5..220ca0cd7945 100644 --- a/arch/mips/ralink/irq.c +++ b/arch/mips/ralink/irq.c @@ -165,7 +165,7 @@ static int __init intc_of_init(struct device_node *node, res.name)) pr_err("Failed to request intc memory"); - rt_intc_membase = ioremap_nocache(res.start, + rt_intc_membase = ioremap(res.start, resource_size(&res)); if (!rt_intc_membase) panic("Failed to remap intc memory"); diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 59b23095bfbb..90c6d4a11c5d 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -43,7 +43,7 @@ __iomem void *plat_of_remap_node(const char *node) res.name)) panic("Failed to request resources for %s", node); - return ioremap_nocache(res.start, resource_size(&res)); + return ioremap(res.start, resource_size(&res)); } void __init device_tree_init(void) diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index c9ecf17f8660..dd34f1b32b79 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -286,7 +286,7 @@ static int __init plat_setup_devices(void) nand_slot0_res[0].end = nand_slot0_res[0].start + 0x1000; /* Read and map device controller 3 */ - dev3.base = ioremap_nocache(readl(IDT434_REG_BASE + DEV3BASE), 1); + dev3.base = ioremap(readl(IDT434_REG_BASE + DEV3BASE), 1); if (!dev3.base) { printk(KERN_ERR "rb532: cannot remap device controller 3\n"); diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c index fdc704abc8d4..94f02ada4082 100644 --- a/arch/mips/rb532/gpio.c +++ b/arch/mips/rb532/gpio.c @@ -192,7 +192,7 @@ int __init rb532_gpio_init(void) struct resource *r; r = rb532_gpio_reg0_res; - rb532_gpio_chip->regbase = ioremap_nocache(r->start, resource_size(r)); + rb532_gpio_chip->regbase = ioremap(r->start, resource_size(r)); if (!rb532_gpio_chip->regbase) { printk(KERN_ERR "rb532: cannot remap GPIO register 0\n"); diff --git a/arch/mips/rb532/prom.c b/arch/mips/rb532/prom.c index 26e957b21fbf..303cc3dc1749 100644 --- a/arch/mips/rb532/prom.c +++ b/arch/mips/rb532/prom.c @@ -110,7 +110,7 @@ void __init prom_init(void) phys_addr_t memsize; phys_addr_t ddrbase; - ddr = ioremap_nocache(ddr_reg[0].start, + ddr = ioremap(ddr_reg[0].start, ddr_reg[0].end - ddr_reg[0].start); if (!ddr) { diff --git a/arch/mips/rb532/setup.c b/arch/mips/rb532/setup.c index 1aa4df1385cb..51af9d374d66 100644 --- a/arch/mips/rb532/setup.c +++ b/arch/mips/rb532/setup.c @@ -49,7 +49,7 @@ void __init plat_mem_setup(void) set_io_port_base(KSEG1); - pci_reg = ioremap_nocache(pci0_res[0].start, + pci_reg = ioremap(pci0_res[0].start, pci0_res[0].end - pci0_res[0].start); if (!pci_reg) { printk(KERN_ERR "Could not remap PCI registers\n"); diff --git a/arch/mips/sni/rm200.c b/arch/mips/sni/rm200.c index 160b88000b4b..f6fa9afcbfd3 100644 --- a/arch/mips/sni/rm200.c +++ b/arch/mips/sni/rm200.c @@ -399,10 +399,10 @@ void __init sni_rm200_i8259_irqs(void) { int i; - rm200_pic_master = ioremap_nocache(0x16000020, 4); + rm200_pic_master = ioremap(0x16000020, 4); if (!rm200_pic_master) return; - rm200_pic_slave = ioremap_nocache(0x160000a0, 4); + rm200_pic_slave = ioremap(0x160000a0, 4); if (!rm200_pic_slave) { iounmap(rm200_pic_master); return; diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index e05938997e69..b2a2e032dc99 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -18,6 +18,10 @@ ccflags-vdso := \ $(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \ -D__VDSO__ +ifndef CONFIG_64BIT +ccflags-vdso += -DBUILD_VDSO32 +endif + ifdef CONFIG_CC_IS_CLANG ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) endif diff --git a/arch/mips/vdso/vgettimeofday.c b/arch/mips/vdso/vgettimeofday.c index 6ebdc37c89fc..6b83b6376a4b 100644 --- a/arch/mips/vdso/vgettimeofday.c +++ b/arch/mips/vdso/vgettimeofday.c @@ -17,12 +17,22 @@ int __vdso_clock_gettime(clockid_t clock, return __cvdso_clock_gettime32(clock, ts); } +#ifdef CONFIG_MIPS_CLOCK_VSYSCALL + +/* + * This is behind the ifdef so that we don't provide the symbol when there's no + * possibility of there being a usable clocksource, because there's nothing we + * can do without it. When libc fails the symbol lookup it should fall back on + * the standard syscall path. + */ int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { return __cvdso_gettimeofday(tv, tz); } +#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */ + int __vdso_clock_getres(clockid_t clock_id, struct old_timespec32 *res) { @@ -43,12 +53,22 @@ int __vdso_clock_gettime(clockid_t clock, return __cvdso_clock_gettime(clock, ts); } +#ifdef CONFIG_MIPS_CLOCK_VSYSCALL + +/* + * This is behind the ifdef so that we don't provide the symbol when there's no + * possibility of there being a usable clocksource, because there's nothing we + * can do without it. When libc fails the symbol lookup it should fall back on + * the standard syscall path. + */ int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { return __cvdso_gettimeofday(tv, tz); } +#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */ + int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res) { diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 12c06a833b7c..e30298e99e1b 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -62,7 +62,7 @@ config GENERIC_HWEIGHT config GENERIC_LOCKBREAK def_bool y - depends on PREEMPT + depends on PREEMPTION config TRACE_IRQFLAGS_SUPPORT def_bool y diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index d9ac7e6408ef..caddded56e77 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -9,7 +9,11 @@ #define PG_dcache_dirty PG_arch_1 void flush_icache_range(unsigned long start, unsigned long end); +#define flush_icache_range flush_icache_range + void flush_icache_page(struct vm_area_struct *vma, struct page *page); +#define flush_icache_page flush_icache_page + #ifdef CONFIG_CPU_CACHE_ALIASING void flush_cache_mm(struct mm_struct *mm); void flush_cache_dup_mm(struct mm_struct *mm); @@ -40,12 +44,11 @@ void invalidate_kernel_vmap_range(void *addr, int size); #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages) #else -#include <asm-generic/cacheflush.h> -#undef flush_icache_range -#undef flush_icache_page -#undef flush_icache_user_range void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, unsigned long addr, int len); +#define flush_icache_user_range flush_icache_user_range + +#include <asm-generic/cacheflush.h> #endif #endif /* __NDS32_CACHEFLUSH_H__ */ diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index 0214e4150539..6abc58ac406d 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -195,7 +195,7 @@ extern void paging_init(void); #define pte_unmap(pte) do { } while (0) #define pte_unmap_nested(pte) do { } while (0) -#define pmd_off_k(address) pmd_offset(pgd_offset_k(address), address) +#define pmd_off_k(address) pmd_offset(pud_offset(p4d_offset(pgd_offset_k(address), (address)), (address)), (address)) #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) /* diff --git a/arch/nds32/include/asm/vmalloc.h b/arch/nds32/include/asm/vmalloc.h new file mode 100644 index 000000000000..caeed3898419 --- /dev/null +++ b/arch/nds32/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_NDS32_VMALLOC_H +#define _ASM_NDS32_VMALLOC_H + +#endif /* _ASM_NDS32_VMALLOC_H */ diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S index 1df02a793364..6a2966c2d8c8 100644 --- a/arch/nds32/kernel/ex-exit.S +++ b/arch/nds32/kernel/ex-exit.S @@ -72,7 +72,7 @@ restore_user_regs_last .endm -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION .macro preempt_stop .endm #else @@ -158,7 +158,7 @@ no_work_pending: /* * preemptive kernel */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION resume_kernel: gie_disable lwi $t0, [tsk+#TSK_TI_PREEMPT] diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index fd2a54b8cd57..22ab77ea27ad 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -89,18 +89,6 @@ int __init ftrace_dyn_arch_init(void) return 0; } -int ftrace_arch_code_modify_prepare(void) -{ - set_all_modules_text_rw(); - return 0; -} - -int ftrace_arch_code_modify_post_process(void) -{ - set_all_modules_text_ro(); - return 0; -} - static unsigned long gen_sethi_insn(unsigned long addr) { unsigned long opcode = 0x46000000; diff --git a/arch/nios2/include/asm/vmalloc.h b/arch/nios2/include/asm/vmalloc.h new file mode 100644 index 000000000000..ec7a9260090b --- /dev/null +++ b/arch/nios2/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_NIOS2_VMALLOC_H +#define _ASM_NIOS2_VMALLOC_H + +#endif /* _ASM_NIOS2_VMALLOC_H */ diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S index 1e515ccd698e..3d8d1d0bcb64 100644 --- a/arch/nios2/kernel/entry.S +++ b/arch/nios2/kernel/entry.S @@ -365,7 +365,7 @@ ENTRY(ret_from_interrupt) ldw r1, PT_ESTATUS(sp) /* check if returning to kernel */ TSTBNZ r1, r1, ESTATUS_EU, Luser_return -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION GET_THREAD_INFO r1 ldw r4, TI_PREEMPT_COUNT(r1) bne r4, r0, restore_all diff --git a/arch/nios2/mm/ioremap.c b/arch/nios2/mm/ioremap.c index b56af759dcdf..819bdfcc2e71 100644 --- a/arch/nios2/mm/ioremap.c +++ b/arch/nios2/mm/ioremap.c @@ -138,6 +138,14 @@ void __iomem *ioremap(unsigned long phys_addr, unsigned long size) return NULL; } + /* + * Map uncached objects in the low part of address space to + * CONFIG_NIOS2_IO_REGION_BASE + */ + if (IS_MAPPABLE_UNCACHEABLE(phys_addr) && + IS_MAPPABLE_UNCACHEABLE(last_addr)) + return (void __iomem *)(CONFIG_NIOS2_IO_REGION_BASE + phys_addr); + /* Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; phys_addr &= PAGE_MASK; diff --git a/arch/openrisc/include/asm/vmalloc.h b/arch/openrisc/include/asm/vmalloc.h new file mode 100644 index 000000000000..75435eceec32 --- /dev/null +++ b/arch/openrisc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_OPENRISC_VMALLOC_H +#define _ASM_OPENRISC_VMALLOC_H + +#endif /* _ASM_OPENRISC_VMALLOC_H */ diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index b16237c95ea3..71034b54d74e 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -18,7 +18,7 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_KERNEL_BZIP2 @@ -62,6 +62,7 @@ config PARISC select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE select HAVE_KPROBES_ON_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_COPY_THREAD_TLS help The PA-RISC microprocessor is designed by Hewlett-Packard and used @@ -81,7 +82,7 @@ config STACK_GROWSUP config GENERIC_LOCKBREAK bool default y - depends on SMP && PREEMPT + depends on SMP && PREEMPTION config ARCH_HAS_ILOG2_U32 bool diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h index f627c37dad9c..ab5c215cf46c 100644 --- a/arch/parisc/include/asm/cmpxchg.h +++ b/arch/parisc/include/asm/cmpxchg.h @@ -44,8 +44,14 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size) ** if (((unsigned long)p & 0xf) == 0) ** return __ldcw(p); */ -#define xchg(ptr, x) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) _x_ = (x); \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)_x_, (ptr), sizeof(*(ptr))); \ + __ret; \ +}) /* bug catcher for when unsupported size is used - won't link */ extern void __cmpxchg_called_with_bad_pointer(void); diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h index 46212b52c23e..cab8f64ca4a2 100644 --- a/arch/parisc/include/asm/io.h +++ b/arch/parisc/include/asm/io.h @@ -128,9 +128,8 @@ static inline void gsc_writeq(unsigned long long val, unsigned long addr) * The standard PCI ioremap interfaces */ void __iomem *ioremap(unsigned long offset, unsigned long size); -#define ioremap_nocache(off, sz) ioremap((off), (sz)) -#define ioremap_wc ioremap_nocache -#define ioremap_uc ioremap_nocache +#define ioremap_wc ioremap +#define ioremap_uc ioremap extern void iounmap(const volatile void __iomem *addr); diff --git a/arch/parisc/include/asm/kexec.h b/arch/parisc/include/asm/kexec.h index a99ea747d7ed..87e174006995 100644 --- a/arch/parisc/include/asm/kexec.h +++ b/arch/parisc/include/asm/kexec.h @@ -2,8 +2,6 @@ #ifndef _ASM_PARISC_KEXEC_H #define _ASM_PARISC_KEXEC_H -#ifdef CONFIG_KEXEC - /* Maximum physical address we can use pages from */ #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) /* Maximum address we can reach in physical address mode */ @@ -32,6 +30,4 @@ static inline void crash_setup_regs(struct pt_regs *newregs, #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_KEXEC */ - #endif /* _ASM_PARISC_KEXEC_H */ diff --git a/arch/parisc/include/asm/vmalloc.h b/arch/parisc/include/asm/vmalloc.h new file mode 100644 index 000000000000..1088ae4e7af9 --- /dev/null +++ b/arch/parisc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_PARISC_VMALLOC_H +#define _ASM_PARISC_VMALLOC_H + +#endif /* _ASM_PARISC_VMALLOC_H */ diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 2663c8f8be11..068d90950d93 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -37,5 +37,5 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_KEXEC) += kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += kexec.o relocate_kernel.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 3b330e58a4f0..a5f3e50fe976 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -810,7 +810,7 @@ EXPORT_SYMBOL(device_to_hwpath); static void walk_native_bus(unsigned long io_io_low, unsigned long io_io_high, struct device *parent); -static void walk_lower_bus(struct parisc_device *dev) +static void __init walk_lower_bus(struct parisc_device *dev) { unsigned long io_io_low, io_io_high; @@ -889,8 +889,8 @@ static void print_parisc_device(struct parisc_device *dev) static int count; print_pa_hwpath(dev, hw_path); - pr_info("%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", - ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type, + pr_info("%d. %s at %pap [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", + ++count, dev->name, &(dev->hpa.start), hw_path, dev->id.hw_type, dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); if (dev->num_addrs) { diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index b96d74496977..9a03e29c8733 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -940,14 +940,14 @@ intr_restore: rfi nop -#ifndef CONFIG_PREEMPT +#ifndef CONFIG_PREEMPTION # define intr_do_preempt intr_restore -#endif /* !CONFIG_PREEMPT */ +#endif /* !CONFIG_PREEMPTION */ .import schedule,code intr_do_resched: /* Only call schedule on return to userspace. If we're returning - * to kernel space, we may schedule if CONFIG_PREEMPT, otherwise + * to kernel space, we may schedule if CONFIG_PREEMPTION, otherwise * we jump back to intr_restore. */ LDREG PT_IASQ0(%r16), %r20 @@ -979,7 +979,7 @@ intr_do_resched: * and preempt_count is 0. otherwise, we continue on * our merry way back to the current running task. */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION .import preempt_schedule_irq,code intr_do_preempt: rsm PSW_SM_I, %r0 /* disable interrupts */ @@ -999,7 +999,7 @@ intr_do_preempt: nop b,n intr_restore /* ssm PSW_SM_I done by intr_restore */ -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ /* * External interrupts. diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index 36434d4da381..749c4579db0d 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -327,8 +327,7 @@ static int pdt_mainloop(void *unused) ((pde & PDT_ADDR_SINGLE_ERR) == 0)) memory_failure(pde >> PAGE_SHIFT, 0); else - soft_offline_page( - pfn_to_page(pde >> PAGE_SHIFT), 0); + soft_offline_page(pde >> PAGE_SHIFT, 0); #else pr_crit("PDT: memory error at 0x%lx ignored.\n" "Rebuild kernel with CONFIG_MEMORY_FAILURE=y " diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index 676683641d00..e1a8fee3ad49 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -792,7 +792,7 @@ static int perf_write_image(uint64_t *memaddr) return -1; } - runway = ioremap_nocache(cpu_device->hpa.start, 4096); + runway = ioremap(cpu_device->hpa.start, 4096); if (!runway) { pr_err("perf_write_image: ioremap failed!\n"); return -ENOMEM; diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index ecc5c2771208..230a6422b99f 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -208,8 +208,8 @@ arch_initcall(parisc_idle_init); * Copy architecture-specific thread state */ int -copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long kthread_arg, struct task_struct *p) +copy_thread_tls(unsigned long clone_flags, unsigned long usp, + unsigned long kthread_arg, struct task_struct *p, unsigned long tls) { struct pt_regs *cregs = &(p->thread.regs); void *stack = task_stack_page(p); @@ -254,9 +254,9 @@ copy_thread(unsigned long clone_flags, unsigned long usp, cregs->ksp = (unsigned long)stack + THREAD_SZ_ALGN + FRAME_SIZE; cregs->kpc = (unsigned long) &child_return; - /* Setup thread TLS area from the 4th parameter in clone */ + /* Setup thread TLS area */ if (clone_flags & CLONE_SETTLS) - cregs->cr27 = cregs->gr[23]; + cregs->cr27 = tls; } return 0; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index ddca8287d43b..354cf060b67f 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -401,7 +401,7 @@ static void __init map_pages(unsigned long start_vaddr, pmd = (pmd_t *) __pa(pmd); } - pgd_populate(NULL, pg_dir, __va(pmd)); + pud_populate(NULL, (pud_t *)pg_dir, __va(pmd)); #endif pg_dir++; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1ec34e16ed65..e7c607059f54 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -106,7 +106,7 @@ config LOCKDEP_SUPPORT config GENERIC_LOCKBREAK bool default y - depends on SMP && PREEMPT + depends on SMP && PREEMPTION config GENERIC_HWEIGHT bool @@ -149,7 +149,7 @@ config PPC select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select DYNAMIC_FTRACE if FUNCTION_TRACER @@ -455,11 +455,7 @@ config PPC_TRANSACTIONAL_MEM config PPC_UV bool "Ultravisor support" depends on KVM_BOOK3S_HV_POSSIBLE - select ZONE_DEVICE - select DEV_PAGEMAP_OPS - select DEVICE_PRIVATE - select MEMORY_HOTPLUG - select MEMORY_HOTREMOVE + depends on DEVICE_PRIVATE default n help This option paravirtualizes the kernel to run in POWER platforms that diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi index e1a961f05dcd..baa0c503e741 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi index c288f3c6c637..93095600e808 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy6: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi index 94f3e7175012..ff4bd38f0645 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi index 94a76982d214..1fa38ed6f59e 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy7: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi index b5ff5f71c6b8..a8cc9780c0c4 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi index ee44182c6348..8b8bd70c9382 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi index f05f0d775039..619c880b54d8 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy2: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi index a9114ec51075..d7ebb73a400d 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy3: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi index 44dd00ac7367..b151d696a069 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy4: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi index 5b1b84b58602..adc0ae0013a3 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy5: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi index 0e1daaef9e74..435047e0e250 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy14: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi index 68c5ef779266..c098657cca0a 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy15: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi index 605363cc1117..9d06824815f3 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy8: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi index 1955dfa13634..70e947730c4b 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy9: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi index 2c1476454ee0..ad96e6529595 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy10: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi index b8b541ff5fb0..034bc4b71f7a 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy11: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi index 4b2cfddd1b15..93ca23d82b39 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy12: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi index 0a52ddf7cc17..23b3117a2fd2 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy13: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index fbe8df433019..123adcefd40f 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -18,8 +18,6 @@ * mb() prevents loads and stores being reordered across this point. * rmb() prevents loads being reordered across this point. * wmb() prevents stores being reordered across this point. - * read_barrier_depends() prevents data-dependent loads being reordered - * across this point (nop on PPC). * * *mb() variants without smp_ prefix must order all types of memory * operations with one another. sync is the only instruction sufficient diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 15b75005bc34..3fa1b962dc27 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -600,8 +600,11 @@ extern void slb_set_size(u16 size); * */ #define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2) + +// The + 2 accounts for INVALID_REGION and 1 more to avoid overlap with kernel #define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \ - MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT) + MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT + 2) + /* * For platforms that support on 65bit VA we limit the context bits */ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index a63ec938636d..635969b5b58e 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -691,8 +691,6 @@ static inline void iosync(void) * * ioremap_prot allows to specify the page flags as an argument and can * also be hooked by the platform via ppc_md. * - * * ioremap_nocache is identical to ioremap - * * * ioremap_wc enables write combining * * * ioremap_wt enables write through @@ -715,7 +713,6 @@ extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size, extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size); void __iomem *ioremap_wt(phys_addr_t address, unsigned long size); void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); -#define ioremap_nocache(addr, size) ioremap((addr), (size)) #define ioremap_uc(addr, size) ioremap((addr), (size)) #define ioremap_cache(addr, size) \ ioremap_prot((addr), (size), pgprot_val(PAGE_KERNEL)) diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index e9a960e28f3c..860228e917dc 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -15,6 +15,7 @@ * * (the type definitions are in asm/spinlock_types.h) */ +#include <linux/jump_label.h> #include <linux/irqflags.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> @@ -36,10 +37,12 @@ #endif #ifdef CONFIG_PPC_PSERIES +DECLARE_STATIC_KEY_FALSE(shared_processor); + #define vcpu_is_preempted vcpu_is_preempted static inline bool vcpu_is_preempted(int cpu) { - if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + if (!static_branch_unlikely(&shared_processor)) return false; return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1); } @@ -110,13 +113,8 @@ static inline void splpar_rw_yield(arch_rwlock_t *lock) {}; static inline bool is_shared_processor(void) { -/* - * LPPACA is only available on Pseries so guard anything LPPACA related to - * allow other platforms (which include this common header) to compile. - */ -#ifdef CONFIG_PPC_PSERIES - return (IS_ENABLED(CONFIG_PPC_SPLPAR) && - lppaca_shared_proc(local_paca->lppaca_ptr)); +#ifdef CONFIG_PPC_SPLPAR + return static_branch_unlikely(&shared_processor); #else return false; #endif diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 15002b51ff18..c92fe7fe9692 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -401,7 +401,7 @@ copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n) return n; } -extern unsigned long __clear_user(void __user *addr, unsigned long size); +unsigned long __arch_clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) { @@ -409,12 +409,17 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size) might_fault(); if (likely(access_ok(addr, size))) { allow_write_to_user(addr, size); - ret = __clear_user(addr, size); + ret = __arch_clear_user(addr, size); prevent_write_to_user(addr, size); } return ret; } +static inline unsigned long __clear_user(void __user *addr, unsigned long size) +{ + return clear_user(addr, size); +} + extern long strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h new file mode 100644 index 000000000000..b992dfaaa161 --- /dev/null +++ b/arch/powerpc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_POWERPC_VMALLOC_H +#define _ASM_POWERPC_VMALLOC_H + +#endif /* _ASM_POWERPC_VMALLOC_H */ diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h index f2dfcd50a2d3..33aee7490cbb 100644 --- a/arch/powerpc/include/asm/xive-regs.h +++ b/arch/powerpc/include/asm/xive-regs.h @@ -39,6 +39,7 @@ #define XIVE_ESB_VAL_P 0x2 #define XIVE_ESB_VAL_Q 0x1 +#define XIVE_ESB_INVALID 0xFF /* * Thread Management (aka "TM") registers diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index d60908ea37fb..e1a4c39b83b8 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -897,7 +897,7 @@ resume_kernel: bne- 0b 1: -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION /* check current_thread_info->preempt_count */ lwz r0,TI_PREEMPT(r2) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ @@ -921,7 +921,7 @@ resume_kernel: */ bl trace_hardirqs_on #endif -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ restore_kuap: kuap_restore r1, r2, r9, r10, r0 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3fd3ef352e3f..a9a1d3cdb523 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -846,7 +846,7 @@ resume_kernel: bne- 0b 1: -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION /* Check if we need to preempt */ andi. r0,r4,_TIF_NEED_RESCHED beq+ restore @@ -877,7 +877,7 @@ resume_kernel: li r10,MSR_RI mtmsrd r10,1 /* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ .globl fast_exc_return_irq fast_exc_return_irq: diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5645bc9cbc09..add67498c126 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -619,8 +619,6 @@ void __do_irq(struct pt_regs *regs) trace_irq_entry(regs); - check_stack_overflow(); - /* * Query the platform PIC for the interrupt & ack it. * @@ -652,6 +650,8 @@ void do_IRQ(struct pt_regs *regs) irqsp = hardirq_ctx[raw_smp_processor_id()]; sirqsp = softirq_ctx[raw_smp_processor_id()]; + check_stack_overflow(); + /* Already there ? */ if (unlikely(cursp == irqsp || cursp == sirqsp)) { __do_irq(regs); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index dc53578193ee..6ff3f896d908 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4983,7 +4983,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) if (nesting_enabled(kvm)) kvmhv_release_all_nested(kvm); kvm->arch.process_table = 0; - uv_svm_terminate(kvm->arch.lpid); + if (kvm->arch.secure_guest) + uv_svm_terminate(kvm->arch.lpid); kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 0496e66aaa56..c6fbbd29bd87 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1117,7 +1117,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ld r7, VCPU_GPR(R7)(r4) bne ret_to_ultra - lwz r0, VCPU_CR(r4) + ld r0, VCPU_CR(r4) mtcr r0 ld r0, VCPU_GPR(R0)(r4) @@ -1137,7 +1137,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) * R3 = UV_RETURN */ ret_to_ultra: - lwz r0, VCPU_CR(r4) + ld r0, VCPU_CR(r4) mtcr r0 ld r0, VCPU_GPR(R3)(r4) diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S index f69a6aab7bfb..1ddb26394e8a 100644 --- a/arch/powerpc/lib/string_32.S +++ b/arch/powerpc/lib/string_32.S @@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) -_GLOBAL(__clear_user) +_GLOBAL(__arch_clear_user) /* * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination @@ -87,4 +87,4 @@ _GLOBAL(__clear_user) EX_TABLE(8b, 91b) EX_TABLE(9b, 91b) -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 507b18b1660e..169872bc0892 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -17,7 +17,7 @@ PPC64_CACHES: .section ".text" /** - * __clear_user: - Zero a block of memory in user space, with less checking. + * __arch_clear_user: - Zero a block of memory in user space, with less checking. * @to: Destination address, in user space. * @n: Number of bytes to zero. * @@ -58,7 +58,7 @@ err3; stb r0,0(r3) mr r3,r4 blr -_GLOBAL_TOC(__clear_user) +_GLOBAL_TOC(__arch_clear_user) cmpdi r4,32 neg r6,r3 li r0,0 @@ -181,4 +181,4 @@ err1; dcbz 0,r3 cmpdi r4,32 blt .Lshort_clear b .Lmedium_clear -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 9488b63dfc87..f5535eae637f 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -151,10 +151,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap); int ret; - __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); @@ -289,6 +288,14 @@ void __init mem_init(void) BUILD_BUG_ON(MMU_PAGE_COUNT > 16); #ifdef CONFIG_SWIOTLB + /* + * Some platforms (e.g. 85xx) limit DMA-able memory way below + * 4G. We force memblock to bottom-up mode to ensure that the + * memory allocated in swiotlb_init() is DMA-able. + * As it's the last memblock allocation, no need to reset it + * back to to-down. + */ + memblock_set_bottom_up(true); swiotlb_init(0); #endif diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 090af2d2d3e4..96eb8e43f39b 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -103,7 +103,7 @@ static void mmu_patch_addis(s32 *site, long simm) patch_instruction_site(site, instr); } -void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot) +static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot) { unsigned long s = offset; unsigned long v = PAGE_OFFSET + s; diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 42bbcd47cc85..dffe1a45b6ed 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -50,7 +50,7 @@ static void slice_print_mask(const char *label, const struct slice_mask *mask) { #endif -static inline bool slice_addr_is_low(unsigned long addr) +static inline notrace bool slice_addr_is_low(unsigned long addr) { u64 tmp = (u64)addr; @@ -659,7 +659,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, mm_ctx_user_psize(¤t->mm->context), 1); } -unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) +unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr) { unsigned char *psizes; int index, mask_index; diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h index 6e5a2a4faeab..4ec2a9f14f84 100644 --- a/arch/powerpc/net/bpf_jit32.h +++ b/arch/powerpc/net/bpf_jit32.h @@ -97,12 +97,12 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); #ifdef CONFIG_SMP #ifdef CONFIG_PPC64 #define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct, paca_index) != 2); \ + do { BUILD_BUG_ON(sizeof_field(struct paca_struct, paca_index) != 2); \ PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index)); \ } while (0) #else #define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(FIELD_SIZEOF(struct task_struct, cpu) != 4); \ + do { BUILD_BUG_ON(sizeof_field(struct task_struct, cpu) != 4); \ PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu)); \ } while(0) #endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index d57b46e0dd60..0acc9d5fb19e 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -321,7 +321,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); break; case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); break; case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */ @@ -333,16 +333,16 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, /*** Ancillary info loads ***/ case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, + BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != 2); PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff, protocol)); break; case BPF_ANC | SKF_AD_IFINDEX: case BPF_ANC | SKF_AD_HATYPE: - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, + BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4); - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, + BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2); PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, dev)); @@ -365,17 +365,17 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, break; case BPF_ANC | SKF_AD_MARK: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, mark)); break; case BPF_ANC | SKF_AD_RXHASH: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, hash)); break; case BPF_ANC | SKF_AD_VLAN_TAG: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2); PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, vlan_tci)); @@ -388,7 +388,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_ANDI(r_A, r_A, 1); break; case BPF_ANC | SKF_AD_QUEUE: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, + BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != 2); PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, queue_mapping)); diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 91571841df8a..9dba7e880885 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -539,6 +539,16 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info, /* balloon page list reference */ get_page(newpage); + /* + * When we migrate a page to a different zone, we have to fixup the + * count of both involved zones as we adjusted the managed page count + * when inflating. + */ + if (page_zone(page) != page_zone(newpage)) { + adjust_managed_page_count(page, 1); + adjust_managed_page_count(newpage, -1); + } + spin_lock_irqsave(&b_dev_info->pages_lock, flags); balloon_page_insert(b_dev_info, newpage); balloon_page_delete(page); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 0a40201f315f..0c8421dd01ab 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -74,6 +74,9 @@ #include "pseries.h" #include "../../../../drivers/pci/pci.h" +DEFINE_STATIC_KEY_FALSE(shared_processor); +EXPORT_SYMBOL_GPL(shared_processor); + int CMO_PrPSP = -1; int CMO_SecPSP = -1; unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K); @@ -758,6 +761,10 @@ static void __init pSeries_setup_arch(void) if (firmware_has_feature(FW_FEATURE_LPAR)) { vpa_init(boot_cpuid); + + if (lppaca_shared_proc(get_lppaca())) + static_branch_enable(&shared_processor); + ppc_md.power_save = pseries_lpar_idle; ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; #ifdef CONFIG_PCI_IOV diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index f5fadbd2533a..9651ca061828 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -972,12 +972,21 @@ static int xive_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, bool *state) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(data); + u8 pq; switch (which) { case IRQCHIP_STATE_ACTIVE: - *state = !xd->stale_p && - (xd->saved_p || - !!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P)); + pq = xive_esb_read(xd, XIVE_ESB_GET); + + /* + * The esb value being all 1's means we couldn't get + * the PQ state of the interrupt through mmio. It may + * happen, for example when querying a PHB interrupt + * while the PHB is in an error state. We consider the + * interrupt to be inactive in that case. + */ + *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p && + (xd->saved_p || !!(pq & XIVE_ESB_VAL_P)); return 0; default: return -EINVAL; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 759ffb00267c..fa7dc03459e7 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -64,6 +64,8 @@ config RISCV select SPARSEMEM_STATIC if 32BIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select HAVE_ARCH_MMAP_RND_BITS if MMU + select ARCH_HAS_GCOV_PROFILE_ALL + select HAVE_COPY_THREAD_TLS config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT @@ -154,7 +156,7 @@ config GENERIC_HWEIGHT def_bool y config FIX_EARLYCON_MEM - def_bool CONFIG_MMU + def_bool MMU config PGTABLE_LEVELS int diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 634759ac8c71..d325b67d00df 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -2,8 +2,8 @@ menu "SoC selection" config SOC_SIFIVE bool "SiFive SoCs" - select SERIAL_SIFIVE - select SERIAL_SIFIVE_CONSOLE + select SERIAL_SIFIVE if TTY + select SERIAL_SIFIVE_CONSOLE if TTY select CLK_SIFIVE select CLK_SIFIVE_FU540_PRCI select SIFIVE_PLIC diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index a474f98ce4fa..36db8145f9f4 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -24,7 +24,7 @@ $(obj)/Image: vmlinux FORCE $(obj)/Image.gz: $(obj)/Image FORCE $(call if_changed,gzip) -loader.o: $(src)/loader.S $(obj)/Image +$(obj)/loader.o: $(src)/loader.S $(obj)/Image $(obj)/loader: $(obj)/loader.o $(obj)/Image $(obj)/loader.lds FORCE $(Q)$(LD) -T $(obj)/loader.lds -o $@ $(obj)/loader.o diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 70a1891e7cd0..a2e3d54e830c 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -54,6 +54,7 @@ reg = <1>; riscv,isa = "rv64imafdc"; tlb-split; + next-level-cache = <&l2cache>; cpu1_intc: interrupt-controller { #interrupt-cells = <1>; compatible = "riscv,cpu-intc"; @@ -77,6 +78,7 @@ reg = <2>; riscv,isa = "rv64imafdc"; tlb-split; + next-level-cache = <&l2cache>; cpu2_intc: interrupt-controller { #interrupt-cells = <1>; compatible = "riscv,cpu-intc"; @@ -100,6 +102,7 @@ reg = <3>; riscv,isa = "rv64imafdc"; tlb-split; + next-level-cache = <&l2cache>; cpu3_intc: interrupt-controller { #interrupt-cells = <1>; compatible = "riscv,cpu-intc"; @@ -123,6 +126,7 @@ reg = <4>; riscv,isa = "rv64imafdc"; tlb-split; + next-level-cache = <&l2cache>; cpu4_intc: interrupt-controller { #interrupt-cells = <1>; compatible = "riscv,cpu-intc"; @@ -253,6 +257,17 @@ #pwm-cells = <3>; status = "disabled"; }; + l2cache: cache-controller@2010000 { + compatible = "sifive,fu540-c000-ccache", "cache"; + cache-block-size = <64>; + cache-level = <2>; + cache-sets = <1024>; + cache-size = <2097152>; + cache-unified; + interrupt-parent = <&plic0>; + interrupts = <1 2 3>; + reg = <0x0 0x2010000 0x0 0x1000>; + }; }; }; diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index dd62b691c443..27e005fca584 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -5,4 +5,8 @@ #include <linux/ftrace.h> #include <asm-generic/asm-prototypes.h> +long long __lshrti3(long long a, int b); +long long __ashrti3(long long a, int b); +long long __ashlti3(long long a, int b); + #endif /* _ASM_RISCV_PROTOTYPES_H */ diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 0a62d2d68455..435b65532e29 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -116,9 +116,9 @@ # define SR_PIE SR_MPIE # define SR_PP SR_MPP -# define IRQ_SOFT IRQ_M_SOFT -# define IRQ_TIMER IRQ_M_TIMER -# define IRQ_EXT IRQ_M_EXT +# define RV_IRQ_SOFT IRQ_M_SOFT +# define RV_IRQ_TIMER IRQ_M_TIMER +# define RV_IRQ_EXT IRQ_M_EXT #else /* CONFIG_RISCV_M_MODE */ # define CSR_STATUS CSR_SSTATUS # define CSR_IE CSR_SIE @@ -133,15 +133,15 @@ # define SR_PIE SR_SPIE # define SR_PP SR_SPP -# define IRQ_SOFT IRQ_S_SOFT -# define IRQ_TIMER IRQ_S_TIMER -# define IRQ_EXT IRQ_S_EXT +# define RV_IRQ_SOFT IRQ_S_SOFT +# define RV_IRQ_TIMER IRQ_S_TIMER +# define RV_IRQ_EXT IRQ_S_EXT #endif /* CONFIG_RISCV_M_MODE */ /* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */ -#define IE_SIE (_AC(0x1, UL) << IRQ_SOFT) -#define IE_TIE (_AC(0x1, UL) << IRQ_TIMER) -#define IE_EIE (_AC(0x1, UL) << IRQ_EXT) +#define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT) +#define IE_TIE (_AC(0x1, UL) << RV_IRQ_TIMER) +#define IE_EIE (_AC(0x1, UL) << RV_IRQ_EXT) #ifndef __ASSEMBLY__ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7ff0ed4f292e..36ae01761352 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -90,6 +90,27 @@ extern pgd_t swapper_pg_dir[]; #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC +#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) +#define VMALLOC_END (PAGE_OFFSET - 1) +#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) + +/* + * Roughly size the vmemmap space to be large enough to fit enough + * struct pages to map half the virtual address space. Then + * position vmemmap directly below the VMALLOC region. + */ +#define VMEMMAP_SHIFT \ + (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) +#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) +#define VMEMMAP_END (VMALLOC_START - 1) +#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) + +/* + * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel + * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. + */ +#define vmemmap ((struct page *)VMEMMAP_START) + static inline int pmd_present(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE)); @@ -400,23 +421,6 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#define VMALLOC_END (PAGE_OFFSET - 1) -#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) - -/* - * Roughly size the vmemmap space to be large enough to fit enough - * struct pages to map half the virtual address space. Then - * position vmemmap directly below the VMALLOC region. - */ -#define VMEMMAP_SHIFT \ - (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) -#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) -#define VMEMMAP_END (VMALLOC_START - 1) -#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) - -#define vmemmap ((struct page *)VMEMMAP_START) - #define PCI_IO_SIZE SZ_16M #define PCI_IO_END VMEMMAP_START #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) diff --git a/arch/riscv/include/asm/sifive_l2_cache.h b/arch/riscv/include/asm/sifive_l2_cache.h deleted file mode 100644 index 04f6748fc50b..000000000000 --- a/arch/riscv/include/asm/sifive_l2_cache.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * SiFive L2 Cache Controller header file - * - */ - -#ifndef _ASM_RISCV_SIFIVE_L2_CACHE_H -#define _ASM_RISCV_SIFIVE_L2_CACHE_H - -extern int register_sifive_l2_error_notifier(struct notifier_block *nb); -extern int unregister_sifive_l2_error_notifier(struct notifier_block *nb); - -#define SIFIVE_L2_ERR_TYPE_CE 0 -#define SIFIVE_L2_ERR_TYPE_UE 1 - -#endif /* _ASM_RISCV_SIFIVE_L2_CACHE_H */ diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h new file mode 100644 index 000000000000..ff9abc00d139 --- /dev/null +++ b/arch/riscv/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_RISCV_VMALLOC_H +#define _ASM_RISCV_VMALLOC_H + +#endif /* _ASM_RISCV_VMALLOC_H */ diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index a1349ca64669..bad4d85b5e91 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -155,7 +155,7 @@ _save_context: REG_L x2, PT_SP(sp) .endm -#if !IS_ENABLED(CONFIG_PREEMPT) +#if !IS_ENABLED(CONFIG_PREEMPTION) .set resume_kernel, restore_all #endif @@ -246,6 +246,7 @@ check_syscall_nr: */ li t1, -1 beq a7, t1, ret_from_syscall_rejected + blt a7, t1, 1f /* Call syscall */ la s0, sys_call_table slli t0, a7, RISCV_LGPTR @@ -304,7 +305,7 @@ restore_all: sret #endif -#if IS_ENABLED(CONFIG_PREEMPT) +#if IS_ENABLED(CONFIG_PREEMPTION) resume_kernel: REG_L s0, TASK_TI_PREEMPT_COUNT(tp) bnez s0, restore_all diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index b94d8db5ddcc..c40fdcdeb950 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -142,7 +142,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, */ old = *parent; - if (function_graph_enter(old, self_addr, frame_pointer, parent)) + if (!function_graph_enter(old, self_addr, frame_pointer, parent)) *parent = return_hooker; } diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 84a6f0a4b120..a4242be66966 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -80,7 +80,9 @@ _start_kernel: #ifdef CONFIG_SMP li t0, CONFIG_NR_CPUS - bgeu a0, t0, .Lsecondary_park + blt a0, t0, .Lgood_cores + tail .Lsecondary_park +.Lgood_cores: #endif /* Pick one hart to run the main boot sequence */ @@ -209,11 +211,6 @@ relocate: tail smp_callin #endif -.align 2 -.Lsecondary_park: - /* We lack SMP support or have too many harts, so park this hart */ - wfi - j .Lsecondary_park END(_start) #ifdef CONFIG_RISCV_M_MODE @@ -246,12 +243,12 @@ ENTRY(reset_regs) li t4, 0 li t5, 0 li t6, 0 - csrw sscratch, 0 + csrw CSR_SCRATCH, 0 #ifdef CONFIG_FPU csrr t0, CSR_MISA andi t0, t0, (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D) - bnez t0, .Lreset_regs_done + beqz t0, .Lreset_regs_done li t1, SR_FS csrs CSR_STATUS, t1 @@ -295,6 +292,13 @@ ENTRY(reset_regs) END(reset_regs) #endif /* CONFIG_RISCV_M_MODE */ +.section ".text", "ax",@progbits +.align 2 +.Lsecondary_park: + /* We lack SMP support or have too many harts, so park this hart */ + wfi + j .Lsecondary_park + __PAGE_ALIGNED_BSS /* Empty zero page */ .balign PAGE_SIZE diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index 3f07a91d5afb..345c4f2eba13 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -23,11 +23,11 @@ asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs) irq_enter(); switch (regs->cause & ~CAUSE_IRQ_FLAG) { - case IRQ_TIMER: + case RV_IRQ_TIMER: riscv_timer_interrupt(); break; #ifdef CONFIG_SMP - case IRQ_SOFT: + case RV_IRQ_SOFT: /* * We only use software interrupts to pass IPIs, so if a non-SMP * system gets one, then we don't know what to do. @@ -35,7 +35,7 @@ asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs) riscv_software_interrupt(); break; #endif - case IRQ_EXT: + case RV_IRQ_EXT: handle_arch_irq(regs); break; default: diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 95a3031e5c7c..817cf7b0974c 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -99,8 +99,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } -int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long arg, struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, + unsigned long arg, struct task_struct *p, unsigned long tls) { struct pt_regs *childregs = task_pt_regs(p); @@ -121,7 +121,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, if (usp) /* User fork */ childregs->sp = usp; if (clone_flags & CLONE_SETTLS) - childregs->tp = childregs->a5; + childregs->tp = tls; childregs->a0 = 0; /* Return value of fork() */ p->thread.ra = (unsigned long)ret_from_fork; } diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index 4800cf703186..2a02b7eebee0 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -9,8 +9,5 @@ /* * Assembly functions that may be used (directly or indirectly) by modules */ -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(__asm_copy_to_user); -EXPORT_SYMBOL(__asm_copy_from_user); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 49a5852fd07d..33b16f4212f7 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -58,7 +58,8 @@ quiet_cmd_vdsold = VDSOLD $@ cmd_vdsold = $(CC) $(KBUILD_CFLAGS) $(call cc-option, -no-pie) -nostdlib -nostartfiles $(SYSCFLAGS_$(@F)) \ -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp && \ $(CROSS_COMPILE)objcopy \ - $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ + $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ + rm $@.tmp # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/riscv/lib/tishift.S b/arch/riscv/lib/tishift.S index 15f9d54c7db6..ef90075c4b0a 100644 --- a/arch/riscv/lib/tishift.S +++ b/arch/riscv/lib/tishift.S @@ -4,34 +4,73 @@ */ #include <linux/linkage.h> +#include <asm-generic/export.h> -ENTRY(__lshrti3) +SYM_FUNC_START(__lshrti3) beqz a2, .L1 li a5,64 sub a5,a5,a2 - addi sp,sp,-16 sext.w a4,a5 blez a5, .L2 sext.w a2,a2 - sll a4,a1,a4 srl a0,a0,a2 - srl a1,a1,a2 + sll a4,a1,a4 + srl a2,a1,a2 or a0,a0,a4 - sd a1,8(sp) - sd a0,0(sp) - ld a0,0(sp) - ld a1,8(sp) - addi sp,sp,16 - ret + mv a1,a2 .L1: ret .L2: - negw a4,a4 - srl a1,a1,a4 - sd a1,0(sp) - sd zero,8(sp) - ld a0,0(sp) - ld a1,8(sp) - addi sp,sp,16 + negw a0,a4 + li a2,0 + srl a0,a1,a0 + mv a1,a2 + ret +SYM_FUNC_END(__lshrti3) +EXPORT_SYMBOL(__lshrti3) + +SYM_FUNC_START(__ashrti3) + beqz a2, .L3 + li a5,64 + sub a5,a5,a2 + sext.w a4,a5 + blez a5, .L4 + sext.w a2,a2 + srl a0,a0,a2 + sll a4,a1,a4 + sra a2,a1,a2 + or a0,a0,a4 + mv a1,a2 +.L3: + ret +.L4: + negw a0,a4 + srai a2,a1,0x3f + sra a0,a1,a0 + mv a1,a2 + ret +SYM_FUNC_END(__ashrti3) +EXPORT_SYMBOL(__ashrti3) + +SYM_FUNC_START(__ashlti3) + beqz a2, .L5 + li a5,64 + sub a5,a5,a2 + sext.w a4,a5 + blez a5, .L6 + sext.w a2,a2 + sll a1,a1,a2 + srl a4,a0,a4 + sll a2,a0,a2 + or a1,a1,a4 + mv a0,a2 +.L5: + ret +.L6: + negw a1,a4 + li a2,0 + sll a1,a0,a1 + mv a0,a2 ret -ENDPROC(__lshrti3) +SYM_FUNC_END(__ashlti3) +EXPORT_SYMBOL(__ashlti3) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index fecd65657a6f..f29d2ba2c0a6 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -1,4 +1,5 @@ #include <linux/linkage.h> +#include <asm-generic/export.h> #include <asm/asm.h> #include <asm/csr.h> @@ -66,6 +67,8 @@ ENTRY(__asm_copy_from_user) j 3b ENDPROC(__asm_copy_to_user) ENDPROC(__asm_copy_from_user) +EXPORT_SYMBOL(__asm_copy_to_user) +EXPORT_SYMBOL(__asm_copy_from_user) ENTRY(__clear_user) @@ -108,6 +111,7 @@ ENTRY(__clear_user) bltu a0, a3, 5b j 3b ENDPROC(__clear_user) +EXPORT_SYMBOL(__clear_user) .section .fixup,"ax" .balign 4 diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 3c8b33258457..a1bd95c8047a 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -10,7 +10,6 @@ obj-y += extable.o obj-$(CONFIG_MMU) += fault.o obj-y += cacheflush.o obj-y += context.o -obj-y += sifive_l2_cache.o ifeq ($(CONFIG_MMU),y) obj-$(CONFIG_SMP) += tlbflush.o diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 8f1900686640..8930ab7278e6 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -22,6 +22,7 @@ void flush_icache_all(void) else on_each_cpu(ipi_remote_fence_i, NULL, 1); } +EXPORT_SYMBOL(flush_icache_all); /* * Performs an icache flush for the given MM context. RISC-V has no direct diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 69f6678db7f3..965a8cf4829c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -99,13 +99,13 @@ static void __init setup_initrd(void) pr_info("initrd not found or empty"); goto disable; } - if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) { + if (__pa_symbol(initrd_end) > PFN_PHYS(max_low_pfn)) { pr_err("initrd extends beyond end of memory"); goto disable; } size = initrd_end - initrd_start; - memblock_reserve(__pa(initrd_start), size); + memblock_reserve(__pa_symbol(initrd_start), size); initrd_below_start_ok = 1; pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n", @@ -124,8 +124,8 @@ void __init setup_bootmem(void) { struct memblock_region *reg; phys_addr_t mem_size = 0; - phys_addr_t vmlinux_end = __pa(&_end); - phys_addr_t vmlinux_start = __pa(&_start); + phys_addr_t vmlinux_end = __pa_symbol(&_end); + phys_addr_t vmlinux_start = __pa_symbol(&_start); /* Find the memory region containing the kernel */ for_each_memblock(memory, reg) { @@ -445,7 +445,7 @@ static void __init setup_vm_final(void) /* Setup swapper PGD for fixmap */ create_pgd_mapping(swapper_pg_dir, FIXADDR_START, - __pa(fixmap_pgd_next), + __pa_symbol(fixmap_pgd_next), PGDIR_SIZE, PAGE_TABLE); /* Map all memory banks */ @@ -474,7 +474,7 @@ static void __init setup_vm_final(void) clear_fixmap(FIX_PMD); /* Move to swapper page table */ - csr_write(CSR_SATP, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE); + csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); local_flush_tlb_all(); } #else diff --git a/arch/riscv/mm/sifive_l2_cache.c b/arch/riscv/mm/sifive_l2_cache.c deleted file mode 100644 index a9ffff3277c7..000000000000 --- a/arch/riscv/mm/sifive_l2_cache.c +++ /dev/null @@ -1,178 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * SiFive L2 cache controller Driver - * - * Copyright (C) 2018-2019 SiFive, Inc. - * - */ -#include <linux/debugfs.h> -#include <linux/interrupt.h> -#include <linux/of_irq.h> -#include <linux/of_address.h> -#include <asm/sifive_l2_cache.h> - -#define SIFIVE_L2_DIRECCFIX_LOW 0x100 -#define SIFIVE_L2_DIRECCFIX_HIGH 0x104 -#define SIFIVE_L2_DIRECCFIX_COUNT 0x108 - -#define SIFIVE_L2_DATECCFIX_LOW 0x140 -#define SIFIVE_L2_DATECCFIX_HIGH 0x144 -#define SIFIVE_L2_DATECCFIX_COUNT 0x148 - -#define SIFIVE_L2_DATECCFAIL_LOW 0x160 -#define SIFIVE_L2_DATECCFAIL_HIGH 0x164 -#define SIFIVE_L2_DATECCFAIL_COUNT 0x168 - -#define SIFIVE_L2_CONFIG 0x00 -#define SIFIVE_L2_WAYENABLE 0x08 -#define SIFIVE_L2_ECCINJECTERR 0x40 - -#define SIFIVE_L2_MAX_ECCINTR 3 - -static void __iomem *l2_base; -static int g_irq[SIFIVE_L2_MAX_ECCINTR]; - -enum { - DIR_CORR = 0, - DATA_CORR, - DATA_UNCORR, -}; - -#ifdef CONFIG_DEBUG_FS -static struct dentry *sifive_test; - -static ssize_t l2_write(struct file *file, const char __user *data, - size_t count, loff_t *ppos) -{ - unsigned int val; - - if (kstrtouint_from_user(data, count, 0, &val)) - return -EINVAL; - if ((val >= 0 && val < 0xFF) || (val >= 0x10000 && val < 0x100FF)) - writel(val, l2_base + SIFIVE_L2_ECCINJECTERR); - else - return -EINVAL; - return count; -} - -static const struct file_operations l2_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .write = l2_write -}; - -static void setup_sifive_debug(void) -{ - sifive_test = debugfs_create_dir("sifive_l2_cache", NULL); - - debugfs_create_file("sifive_debug_inject_error", 0200, - sifive_test, NULL, &l2_fops); -} -#endif - -static void l2_config_read(void) -{ - u32 regval, val; - - regval = readl(l2_base + SIFIVE_L2_CONFIG); - val = regval & 0xFF; - pr_info("L2CACHE: No. of Banks in the cache: %d\n", val); - val = (regval & 0xFF00) >> 8; - pr_info("L2CACHE: No. of ways per bank: %d\n", val); - val = (regval & 0xFF0000) >> 16; - pr_info("L2CACHE: Sets per bank: %llu\n", (uint64_t)1 << val); - val = (regval & 0xFF000000) >> 24; - pr_info("L2CACHE: Bytes per cache block: %llu\n", (uint64_t)1 << val); - - regval = readl(l2_base + SIFIVE_L2_WAYENABLE); - pr_info("L2CACHE: Index of the largest way enabled: %d\n", regval); -} - -static const struct of_device_id sifive_l2_ids[] = { - { .compatible = "sifive,fu540-c000-ccache" }, - { /* end of table */ }, -}; - -static ATOMIC_NOTIFIER_HEAD(l2_err_chain); - -int register_sifive_l2_error_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&l2_err_chain, nb); -} -EXPORT_SYMBOL_GPL(register_sifive_l2_error_notifier); - -int unregister_sifive_l2_error_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&l2_err_chain, nb); -} -EXPORT_SYMBOL_GPL(unregister_sifive_l2_error_notifier); - -static irqreturn_t l2_int_handler(int irq, void *device) -{ - unsigned int add_h, add_l; - - if (irq == g_irq[DIR_CORR]) { - add_h = readl(l2_base + SIFIVE_L2_DIRECCFIX_HIGH); - add_l = readl(l2_base + SIFIVE_L2_DIRECCFIX_LOW); - pr_err("L2CACHE: DirError @ 0x%08X.%08X\n", add_h, add_l); - /* Reading this register clears the DirError interrupt sig */ - readl(l2_base + SIFIVE_L2_DIRECCFIX_COUNT); - atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_CE, - "DirECCFix"); - } - if (irq == g_irq[DATA_CORR]) { - add_h = readl(l2_base + SIFIVE_L2_DATECCFIX_HIGH); - add_l = readl(l2_base + SIFIVE_L2_DATECCFIX_LOW); - pr_err("L2CACHE: DataError @ 0x%08X.%08X\n", add_h, add_l); - /* Reading this register clears the DataError interrupt sig */ - readl(l2_base + SIFIVE_L2_DATECCFIX_COUNT); - atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_CE, - "DatECCFix"); - } - if (irq == g_irq[DATA_UNCORR]) { - add_h = readl(l2_base + SIFIVE_L2_DATECCFAIL_HIGH); - add_l = readl(l2_base + SIFIVE_L2_DATECCFAIL_LOW); - pr_err("L2CACHE: DataFail @ 0x%08X.%08X\n", add_h, add_l); - /* Reading this register clears the DataFail interrupt sig */ - readl(l2_base + SIFIVE_L2_DATECCFAIL_COUNT); - atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_UE, - "DatECCFail"); - } - - return IRQ_HANDLED; -} - -static int __init sifive_l2_init(void) -{ - struct device_node *np; - struct resource res; - int i, rc; - - np = of_find_matching_node(NULL, sifive_l2_ids); - if (!np) - return -ENODEV; - - if (of_address_to_resource(np, 0, &res)) - return -ENODEV; - - l2_base = ioremap(res.start, resource_size(&res)); - if (!l2_base) - return -ENOMEM; - - for (i = 0; i < SIFIVE_L2_MAX_ECCINTR; i++) { - g_irq[i] = irq_of_parse_and_map(np, i); - rc = request_irq(g_irq[i], l2_int_handler, 0, "l2_ecc", NULL); - if (rc) { - pr_err("L2CACHE: Could not request IRQ %d\n", g_irq[i]); - return rc; - } - } - - l2_config_read(); - -#ifdef CONFIG_DEBUG_FS - setup_sifive_debug(); -#endif - return 0; -} -device_initcall(sifive_l2_init); diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c index 5451ef3845f2..7fbf56aab661 100644 --- a/arch/riscv/net/bpf_jit_comp.c +++ b/arch/riscv/net/bpf_jit_comp.c @@ -631,14 +631,14 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) return -1; emit(rv_bgeu(RV_REG_A2, RV_REG_T1, off >> 1), ctx); - /* if (--TCC < 0) + /* if (TCC-- < 0) * goto out; */ emit(rv_addi(RV_REG_T1, tcc, -1), ctx); off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; if (is_13b_check(off, insn)) return -1; - emit(rv_blt(RV_REG_T1, RV_REG_ZERO, off >> 1), ctx); + emit(rv_blt(tcc, RV_REG_ZERO, off >> 1), ctx); /* prog = array->ptrs[index]; * if (!prog) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d4051e88e625..287714d51b47 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -30,7 +30,7 @@ config GENERIC_BUG_RELATIVE_POINTERS def_bool y config GENERIC_LOCKBREAK - def_bool y if PREEMPT + def_bool y if PREEMPTTION config PGSTE def_bool y if KVM @@ -110,7 +110,7 @@ config S390 select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_IPC_PARSE_VERSION - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 select DYNAMIC_FTRACE if FUNCTION_TRACER select GENERIC_CLOCKEVENTS @@ -124,6 +124,7 @@ config S390 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN + select HAVE_ARCH_KASAN_VMALLOC select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index b5ea9e14c017..6ede29907fbf 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -130,11 +130,11 @@ static inline bool should_resched(int preempt_offset) #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION extern asmlinkage void preempt_schedule(void); #define __preempt_schedule() preempt_schedule() extern asmlinkage void preempt_schedule_notrace(void); #define __preempt_schedule_notrace() preempt_schedule_notrace() -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ #endif /* __ASM_PREEMPT_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 6dc6c4fbc8e2..69289e99cabd 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -27,7 +27,6 @@ #define MACHINE_FLAG_DIAG9C BIT(3) #define MACHINE_FLAG_ESOP BIT(4) #define MACHINE_FLAG_IDTE BIT(5) -#define MACHINE_FLAG_DIAG44 BIT(6) #define MACHINE_FLAG_EDAT1 BIT(7) #define MACHINE_FLAG_EDAT2 BIT(8) #define MACHINE_FLAG_TOPOLOGY BIT(10) @@ -94,7 +93,6 @@ extern unsigned long __swsusp_reset_dma; #define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C) #define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP) #define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE) -#define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44) #define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1) #define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 6da8885251d6..670f14a228e5 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -194,9 +194,9 @@ static inline unsigned long long get_tod_clock_monotonic(void) { unsigned long long tod; - preempt_disable(); + preempt_disable_notrace(); tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; - preempt_enable(); + preempt_enable_notrace(); return tod; } diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index ef3c00b049ab..4093a2856929 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -86,7 +86,7 @@ static inline int share(unsigned long addr, u16 cmd) }; if (!is_prot_virt_guest()) - return -ENOTSUPP; + return -EOPNOTSUPP; /* * Sharing is page wise, if we encounter addresses that are * not page aligned, we assume something went wrong. If diff --git a/arch/s390/include/asm/vmalloc.h b/arch/s390/include/asm/vmalloc.h new file mode 100644 index 000000000000..3ba3a6bdca25 --- /dev/null +++ b/arch/s390/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_S390_VMALLOC_H +#define _ASM_S390_VMALLOC_H + +#endif /* _ASM_S390_VMALLOC_H */ diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index d306fe04489a..2c122d8bab93 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -195,6 +195,8 @@ void die(struct pt_regs *regs, const char *str) regs->int_code >> 17, ++die_counter); #ifdef CONFIG_PREEMPT pr_cont("PREEMPT "); +#elif defined(CONFIG_PREEMPT_RT) + pr_cont("PREEMPT_RT "); #endif pr_cont("SMP "); if (debug_pagealloc_enabled()) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index db32a55daaec..cd241ee66eff 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -204,21 +204,6 @@ static __init void detect_diag9c(void) S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C; } -static __init void detect_diag44(void) -{ - int rc; - - diag_stat_inc(DIAG_STAT_X044); - asm volatile( - " diag 0,0,0x44\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc"); - if (!rc) - S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44; -} - static __init void detect_machine_facilities(void) { if (test_facility(8)) { @@ -331,7 +316,6 @@ void __init startup_init(void) setup_arch_string(); setup_boot_command_line(); detect_diag9c(); - detect_diag44(); detect_machine_facilities(); save_vector_registers(); setup_topology(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 270d1d145761..9205add8481d 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -790,7 +790,7 @@ ENTRY(io_int_handler) .Lio_work: tm __PT_PSW+1(%r11),0x01 # returning to user ? jo .Lio_work_user # yes -> do resched & signal -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION # check for preemptive scheduling icm %r0,15,__LC_PREEMPT_COUNT jnz .Lio_restore # preemption is disabled diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 9e1660a6b9db..c3597d2e2ae0 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -35,6 +35,7 @@ EXPORT_SYMBOL(_mcount) ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller + stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller lgr %r1,%r15 #if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)) aghi %r0,MCOUNT_RETURN_FIXUP diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index c07fdcd73726..77d93c534284 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1303,18 +1303,28 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) */ if (flush_all && done) break; - - /* If an event overflow happened, discard samples by - * processing any remaining sample-data-blocks. - */ - if (event_overflow) - flush_all = 1; } /* Account sample overflows in the event hardware structure */ if (sampl_overflow) OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + sampl_overflow, 1 + num_sdb); + + /* Perf_event_overflow() and perf_event_account_interrupt() limit + * the interrupt rate to an upper limit. Roughly 1000 samples per + * task tick. + * Hitting this limit results in a large number + * of throttled REF_REPORT_THROTTLE entries and the samples + * are dropped. + * Slightly increase the interval to avoid hitting this limit. + */ + if (event_overflow) { + SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10); + debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n", + __func__, + DIV_ROUND_UP(SAMPL_RATE(hwc), 10)); + } + if (sampl_overflow || event_overflow) debug_sprintf_event(sfdbg, 4, "%s: " "overflows: sample %llu event %llu" diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 9cbf490fd162..d5fbd754f41a 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1052,7 +1052,7 @@ static void __init log_component_list(void) if (!early_ipl_comp_list_addr) return; - if (ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR) + if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL) pr_info("Linux is running with Secure-IPL enabled\n"); else pr_info("Linux is running with Secure-IPL disabled\n"); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2794cad9312e..a08bd2522dd9 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -413,14 +413,11 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted); void smp_yield_cpu(int cpu) { - if (MACHINE_HAS_DIAG9C) { - diag_stat_inc_norecursion(DIAG_STAT_X09C); - asm volatile("diag %0,0,0x9c" - : : "d" (pcpu_devices[cpu].address)); - } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) { - diag_stat_inc_norecursion(DIAG_STAT_X044); - asm volatile("diag 0,0,0x44"); - } + if (!MACHINE_HAS_DIAG9C) + return; + diag_stat_inc_norecursion(DIAG_STAT_X09C); + asm volatile("diag %0,0,0x9c" + : : "d" (pcpu_devices[cpu].address)); } /* diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index da2d4d4c5b0e..707fd99f6734 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -36,10 +36,17 @@ static bool update_stack_info(struct unwind_state *state, unsigned long sp) return true; } -static inline bool is_task_pt_regs(struct unwind_state *state, - struct pt_regs *regs) +static inline bool is_final_pt_regs(struct unwind_state *state, + struct pt_regs *regs) { - return task_pt_regs(state->task) == regs; + /* user mode or kernel thread pt_regs at the bottom of task stack */ + if (task_pt_regs(state->task) == regs) + return true; + + /* user mode pt_regs at the bottom of irq stack */ + return state->stack_info.type == STACK_TYPE_IRQ && + state->stack_info.end - sizeof(struct pt_regs) == (unsigned long)regs && + READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE; } bool unwind_next_frame(struct unwind_state *state) @@ -80,7 +87,7 @@ bool unwind_next_frame(struct unwind_state *state) if (!on_stack(info, sp, sizeof(struct pt_regs))) goto out_err; regs = (struct pt_regs *) sp; - if (is_task_pt_regs(state, regs)) + if (is_final_pt_regs(state, regs)) goto out_stop; ip = READ_ONCE_NOCHECK(regs->psw.addr); sp = READ_ONCE_NOCHECK(regs->gprs[15]); diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index ce1e4bbe53aa..9b2dab5a69f9 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -242,7 +242,6 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) void arch_spin_lock_wait(arch_spinlock_t *lp) { - /* Use classic spinlocks + niai if the steal time is >= 10% */ if (test_cpu_flag(CIF_DEDICATED_CPU)) arch_spin_lock_queued(lp); else diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index bda7ac0ddd29..32b7a30b2485 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -238,7 +238,7 @@ static int test_unwind_irq(struct unwindme *u) { preempt_disable(); if (register_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler)) { - pr_info("Couldn't reqister external interrupt handler"); + pr_info("Couldn't register external interrupt handler"); return -1; } u->task = current; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index f0ce22220565..ac44bd76db4b 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -292,10 +292,8 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); vmem_remove_mapping(start, size); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 460f25572940..06345616a646 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -82,7 +82,8 @@ static pte_t * __init kasan_early_pte_alloc(void) enum populate_mode { POPULATE_ONE2ONE, POPULATE_MAP, - POPULATE_ZERO_SHADOW + POPULATE_ZERO_SHADOW, + POPULATE_SHALLOW }; static void __init kasan_early_vmemmap_populate(unsigned long address, unsigned long end, @@ -116,6 +117,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, pgd_populate(&init_mm, pg_dir, p4_dir); } + if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) && + mode == POPULATE_SHALLOW) { + address = (address + P4D_SIZE) & P4D_MASK; + continue; + } + p4_dir = p4d_offset(pg_dir, address); if (p4d_none(*p4_dir)) { if (mode == POPULATE_ZERO_SHADOW && @@ -130,6 +137,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, p4d_populate(&init_mm, p4_dir, pu_dir); } + if (!IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) && + mode == POPULATE_SHALLOW) { + address = (address + PUD_SIZE) & PUD_MASK; + continue; + } + pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { if (mode == POPULATE_ZERO_SHADOW && @@ -195,6 +208,9 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, page = kasan_early_shadow_page; pte_val(*pt_dir) = __pa(page) | pgt_prot_zero; break; + case POPULATE_SHALLOW: + /* should never happen */ + break; } } address += PAGE_SIZE; @@ -313,22 +329,50 @@ void __init kasan_early_init(void) init_mm.pgd = early_pg_dir; /* * Current memory layout: - * +- 0 -------------+ +- shadow start -+ - * | 1:1 ram mapping | /| 1/8 ram | - * +- end of ram ----+ / +----------------+ - * | ... gap ... |/ | kasan | - * +- shadow start --+ | zero | - * | 1/8 addr space | | page | - * +- shadow end -+ | mapping | - * | ... gap ... |\ | (untracked) | - * +- modules vaddr -+ \ +----------------+ - * | 2Gb | \| unmapped | allocated per module - * +-----------------+ +- shadow end ---+ + * +- 0 -------------+ +- shadow start -+ + * | 1:1 ram mapping | /| 1/8 ram | + * | | / | | + * +- end of ram ----+ / +----------------+ + * | ... gap ... | / | | + * | |/ | kasan | + * +- shadow start --+ | zero | + * | 1/8 addr space | | page | + * +- shadow end -+ | mapping | + * | ... gap ... |\ | (untracked) | + * +- vmalloc area -+ \ | | + * | vmalloc_size | \ | | + * +- modules vaddr -+ \ +----------------+ + * | 2Gb | \| unmapped | allocated per module + * +-----------------+ +- shadow end ---+ + * + * Current memory layout (KASAN_VMALLOC): + * +- 0 -------------+ +- shadow start -+ + * | 1:1 ram mapping | /| 1/8 ram | + * | | / | | + * +- end of ram ----+ / +----------------+ + * | ... gap ... | / | kasan | + * | |/ | zero | + * +- shadow start --+ | page | + * | 1/8 addr space | | mapping | + * +- shadow end -+ | (untracked) | + * | ... gap ... |\ | | + * +- vmalloc area -+ \ +- vmalloc area -+ + * | vmalloc_size | \ |shallow populate| + * +- modules vaddr -+ \ +- modules area -+ + * | 2Gb | \|shallow populate| + * +-----------------+ +- shadow end ---+ */ /* populate kasan shadow (for identity mapping and zero page mapping) */ kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP); if (IS_ENABLED(CONFIG_MODULES)) untracked_mem_end = vmax - MODULES_LEN; + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + untracked_mem_end = vmax - vmalloc_size - MODULES_LEN; + /* shallowly populate kasan shadow for vmalloc and modules */ + kasan_early_vmemmap_populate(__sha(untracked_mem_end), + __sha(vmax), POPULATE_SHALLOW); + } + /* populate kasan shadow for untracked memory */ kasan_early_vmemmap_populate(__sha(max_physmem_end), __sha(untracked_mem_end), POPULATE_ZERO_SHADOW); diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore index 04a03433c720..c82157f46b18 100644 --- a/arch/s390/purgatory/.gitignore +++ b/arch/s390/purgatory/.gitignore @@ -1,3 +1,4 @@ purgatory +purgatory.chk purgatory.lds purgatory.ro diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index bc0d7a0d0394..c57f8c40e992 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -4,7 +4,7 @@ OBJECT_FILES_NON_STANDARD := y purgatory-y := head.o purgatory.o string.o sha256.o mem.o -targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro +targets += $(purgatory-y) purgatory.lds purgatory purgatory.chk purgatory.ro PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE @@ -15,8 +15,10 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE - $(call if_changed_rule,cc_o_c) +KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n +KASAN_SANITIZE := n KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare @@ -26,15 +28,22 @@ KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) -LDFLAGS_purgatory := -r --no-undefined -nostdlib -z nodefaultlib -T +# Since we link purgatory with -r unresolved symbols are not checked, so we +# also link a purgatory.chk binary without -r to check for unresolved symbols. +PURGATORY_LDFLAGS := -nostdlib -z nodefaultlib +LDFLAGS_purgatory := -r $(PURGATORY_LDFLAGS) -T +LDFLAGS_purgatory.chk := -e purgatory_start $(PURGATORY_LDFLAGS) $(obj)/purgatory: $(obj)/purgatory.lds $(PURGATORY_OBJS) FORCE $(call if_changed,ld) +$(obj)/purgatory.chk: $(obj)/purgatory FORCE + $(call if_changed,ld) + OBJCOPYFLAGS_purgatory.ro := -O elf64-s390 OBJCOPYFLAGS_purgatory.ro += --remove-section='*debug*' OBJCOPYFLAGS_purgatory.ro += --remove-section='.comment' OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*' -$(obj)/purgatory.ro: $(obj)/purgatory FORCE +$(obj)/purgatory.ro: $(obj)/purgatory $(obj)/purgatory.chk FORCE $(call if_changed,objcopy) $(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE diff --git a/arch/s390/purgatory/string.c b/arch/s390/purgatory/string.c new file mode 100644 index 000000000000..c98c22a72db7 --- /dev/null +++ b/arch/s390/purgatory/string.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +#define __HAVE_ARCH_MEMCMP /* arch function */ +#include "../lib/string.c" diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index f356ee674d89..9ece111b0254 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -108,7 +108,7 @@ config GENERIC_CALIBRATE_DELAY config GENERIC_LOCKBREAK def_bool y - depends on SMP && PREEMPT + depends on SMP && PREEMPTION config ARCH_SUSPEND_POSSIBLE def_bool n diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c index d964c4d6b139..77dad1e511b4 100644 --- a/arch/sh/boards/board-sh7785lcr.c +++ b/arch/sh/boards/board-sh7785lcr.c @@ -341,7 +341,7 @@ static void __init sh7785lcr_setup(char **cmdline_p) pm_power_off = sh7785lcr_power_off; /* sm501 DRAM configuration */ - sm501_reg = ioremap_nocache(SM107_REG_ADDR, SM501_DRAM_CONTROL); + sm501_reg = ioremap(SM107_REG_ADDR, SM501_DRAM_CONTROL); if (!sm501_reg) { printk(KERN_ERR "%s: ioremap error.\n", __func__); return; diff --git a/arch/sh/boards/mach-cayman/irq.c b/arch/sh/boards/mach-cayman/irq.c index 9108789fafef..3b6ea2d99013 100644 --- a/arch/sh/boards/mach-cayman/irq.c +++ b/arch/sh/boards/mach-cayman/irq.c @@ -137,7 +137,7 @@ void init_cayman_irq(void) { int i; - epld_virt = (unsigned long)ioremap_nocache(EPLD_BASE, 1024); + epld_virt = (unsigned long)ioremap(EPLD_BASE, 1024); if (!epld_virt) { printk(KERN_ERR "Cayman IRQ: Unable to remap EPLD\n"); return; diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c index 4cec14700adc..8ef76e288da0 100644 --- a/arch/sh/boards/mach-cayman/setup.c +++ b/arch/sh/boards/mach-cayman/setup.c @@ -99,7 +99,7 @@ static int __init smsc_superio_setup(void) { unsigned char devid, devrev; - smsc_superio_virt = (unsigned long)ioremap_nocache(SMSC_SUPERIO_BASE, 1024); + smsc_superio_virt = (unsigned long)ioremap(SMSC_SUPERIO_BASE, 1024); if (!smsc_superio_virt) { panic("Unable to remap SMSC SuperIO\n"); } diff --git a/arch/sh/boards/mach-sdk7786/fpga.c b/arch/sh/boards/mach-sdk7786/fpga.c index 895576ff8376..a37e1e88c6b1 100644 --- a/arch/sh/boards/mach-sdk7786/fpga.c +++ b/arch/sh/boards/mach-sdk7786/fpga.c @@ -32,7 +32,7 @@ static void __iomem *sdk7786_fpga_probe(void) * is reserved. */ for (area = PA_AREA0; area < PA_AREA7; area += SZ_64M) { - base = ioremap_nocache(area + FPGA_REGS_OFFSET, FPGA_REGS_SIZE); + base = ioremap(area + FPGA_REGS_OFFSET, FPGA_REGS_SIZE); if (!base) { /* Failed to remap this area, move along. */ continue; diff --git a/arch/sh/drivers/heartbeat.c b/arch/sh/drivers/heartbeat.c index cf2fcccca812..24391b444b28 100644 --- a/arch/sh/drivers/heartbeat.c +++ b/arch/sh/drivers/heartbeat.c @@ -96,7 +96,7 @@ static int heartbeat_drv_probe(struct platform_device *pdev) return -ENOMEM; } - hd->base = ioremap_nocache(res->start, resource_size(res)); + hd->base = ioremap(res->start, resource_size(res)); if (unlikely(!hd->base)) { dev_err(&pdev->dev, "ioremap failed\n"); diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c index 49303fab187b..03225d27770b 100644 --- a/arch/sh/drivers/pci/pci-sh5.c +++ b/arch/sh/drivers/pci/pci-sh5.c @@ -115,12 +115,12 @@ static int __init sh5pci_init(void) return -EINVAL; } - pcicr_virt = (unsigned long)ioremap_nocache(SH5PCI_ICR_BASE, 1024); + pcicr_virt = (unsigned long)ioremap(SH5PCI_ICR_BASE, 1024); if (!pcicr_virt) { panic("Unable to remap PCICR\n"); } - PCI_IO_AREA = (unsigned long)ioremap_nocache(SH5PCI_IO_BASE, 0x10000); + PCI_IO_AREA = (unsigned long)ioremap(SH5PCI_IO_BASE, 0x10000); if (!PCI_IO_AREA) { panic("Unable to remap PCIIO\n"); } diff --git a/arch/sh/drivers/platform_early.c b/arch/sh/drivers/platform_early.c index f6d148451dfc..f3dc3f25b3ff 100644 --- a/arch/sh/drivers/platform_early.c +++ b/arch/sh/drivers/platform_early.c @@ -325,9 +325,9 @@ int __init sh_early_platform_driver_probe(char *class_str, } /** - * sh_early_platform_cleanup - clean up early platform code + * early_platform_cleanup - clean up early platform code */ -static int __init sh_early_platform_cleanup(void) +void __init early_platform_cleanup(void) { struct platform_device *pd, *pd2; @@ -337,11 +337,4 @@ static int __init sh_early_platform_cleanup(void) list_del(&pd->dev.devres_head); memset(&pd->dev.devres_head, 0, sizeof(pd->dev.devres_head)); } - - return 0; } -/* - * This must happen once after all early devices are probed but before probing - * real platform devices. - */ -subsys_initcall(sh_early_platform_cleanup); diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index 1495489225ac..39c9ead489e5 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -367,7 +367,6 @@ static inline void ioremap_fixed_init(void) { } static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; } #endif -#define ioremap_nocache ioremap #define ioremap_uc ioremap /* diff --git a/arch/sh/include/asm/vmalloc.h b/arch/sh/include/asm/vmalloc.h new file mode 100644 index 000000000000..716b77472646 --- /dev/null +++ b/arch/sh/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_SH_VMALLOC_H +#define _ASM_SH_VMALLOC_H + +#endif /* _ASM_SH_VMALLOC_H */ diff --git a/arch/sh/kernel/cpu/irq/intc-sh5.c b/arch/sh/kernel/cpu/irq/intc-sh5.c index 744f903b4df3..1b3050facda8 100644 --- a/arch/sh/kernel/cpu/irq/intc-sh5.c +++ b/arch/sh/kernel/cpu/irq/intc-sh5.c @@ -124,7 +124,7 @@ void __init plat_irq_setup(void) unsigned long reg; int i; - intc_virt = (unsigned long)ioremap_nocache(INTC_BASE, 1024); + intc_virt = (unsigned long)ioremap(INTC_BASE, 1024); if (!intc_virt) { panic("Unable to remap INTC\n"); } diff --git a/arch/sh/kernel/cpu/sh2/smp-j2.c b/arch/sh/kernel/cpu/sh2/smp-j2.c index ae44dc24c455..d0d5d81455ae 100644 --- a/arch/sh/kernel/cpu/sh2/smp-j2.c +++ b/arch/sh/kernel/cpu/sh2/smp-j2.c @@ -88,8 +88,8 @@ static void j2_start_cpu(unsigned int cpu, unsigned long entry_point) if (!np) return; if (of_property_read_u32_array(np, "cpu-release-addr", regs, 2)) return; - release = ioremap_nocache(regs[0], sizeof(u32)); - initpc = ioremap_nocache(regs[1], sizeof(u32)); + release = ioremap(regs[0], sizeof(u32)); + initpc = ioremap(regs[1], sizeof(u32)); __raw_writel(entry_point, initpc); __raw_writel(1, release); diff --git a/arch/sh/kernel/cpu/sh5/clock-sh5.c b/arch/sh/kernel/cpu/sh5/clock-sh5.c index 43763c26a752..dee6be2c2344 100644 --- a/arch/sh/kernel/cpu/sh5/clock-sh5.c +++ b/arch/sh/kernel/cpu/sh5/clock-sh5.c @@ -68,7 +68,7 @@ static struct sh_clk_ops *sh5_clk_ops[] = { void __init arch_init_clk_ops(struct sh_clk_ops **ops, int idx) { - cprc_base = (unsigned long)ioremap_nocache(CPRC_BASE, 1024); + cprc_base = (unsigned long)ioremap(CPRC_BASE, 1024); BUG_ON(!cprc_base); if (idx < ARRAY_SIZE(sh5_clk_ops)) diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S index de68ffdfffbf..81c8b64b977f 100644 --- a/arch/sh/kernel/cpu/sh5/entry.S +++ b/arch/sh/kernel/cpu/sh5/entry.S @@ -86,7 +86,7 @@ andi r6, ~0xf0, r6; \ putcon r6, SR; -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION # define preempt_stop() CLI() #else # define preempt_stop() @@ -884,7 +884,7 @@ ret_from_exception: /* Check softirqs */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION pta ret_from_syscall, tr0 blink tr0, ZERO diff --git a/arch/sh/kernel/dma-coherent.c b/arch/sh/kernel/dma-coherent.c index eeb25a4fa55f..d4811691b93c 100644 --- a/arch/sh/kernel/dma-coherent.c +++ b/arch/sh/kernel/dma-coherent.c @@ -28,7 +28,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, arch_sync_dma_for_device(virt_to_phys(ret), size, DMA_BIDIRECTIONAL); - ret_nocache = (void __force *)ioremap_nocache(virt_to_phys(ret), size); + ret_nocache = (void __force *)ioremap(virt_to_phys(ret), size); if (!ret_nocache) { free_pages((unsigned long)ret, order); return NULL; diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index d31f66e82ce5..956a7a03b0c8 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -41,7 +41,7 @@ */ #include <asm/dwarf.h> -#if defined(CONFIG_PREEMPT) +#if defined(CONFIG_PREEMPTION) # define preempt_stop() cli ; TRACE_IRQS_OFF #else # define preempt_stop() @@ -84,7 +84,7 @@ ENTRY(ret_from_irq) get_current_thread_info r8, r0 bt resume_kernel ! Yes, it's from kernel, go back soon -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION bra resume_userspace nop ENTRY(resume_kernel) diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index 6d61f8cf4c13..0d5f3c9d52f3 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -266,6 +266,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, ptr = &remcomInBuffer[1]; if (kgdb_hex2long(&ptr, &addr)) linux_regs->pc = addr; + /* fallthrough */ case 'D': case 'k': atomic_set(&kgdb_cpu_doing_single_step, -1); diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index dfdbaa50946e..d1b1ff2be17a 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -434,9 +434,7 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index eb24cb1afc11..e8c3ea01c12f 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -277,7 +277,7 @@ config US3_MC config GENERIC_LOCKBREAK bool default y - depends on SPARC64 && SMP && PREEMPT + depends on SPARC64 && SMP && PREEMPTION config NUMA bool "NUMA support" diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h index f4afa301954a..9bb27e5c22f1 100644 --- a/arch/sparc/include/asm/io_64.h +++ b/arch/sparc/include/asm/io_64.h @@ -406,7 +406,6 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size) return (void __iomem *)offset; } -#define ioremap_nocache(X,Y) ioremap((X),(Y)) #define ioremap_uc(X,Y) ioremap((X),(Y)) #define ioremap_wc(X,Y) ioremap((X),(Y)) #define ioremap_wt(X,Y) ioremap((X),(Y)) diff --git a/arch/sparc/include/asm/vmalloc.h b/arch/sparc/include/asm/vmalloc.h new file mode 100644 index 000000000000..04b8ab9518b8 --- /dev/null +++ b/arch/sparc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_SPARC_VMALLOC_H +#define _ASM_SPARC_VMALLOC_H + +#endif /* _ASM_SPARC_VMALLOC_H */ diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index 29aa34f11720..c5fd4b450d9b 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -310,7 +310,7 @@ kern_rtt_restore: retry to_kernel: -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION ldsw [%g6 + TI_PRE_COUNT], %l5 brnz %l5, kern_fpucheck ldx [%g6 + TI_FLAGS], %l5 diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c index 84cc8f7f83e9..c8eabb973b86 100644 --- a/arch/sparc/net/bpf_jit_comp_32.c +++ b/arch/sparc/net/bpf_jit_comp_32.c @@ -180,19 +180,19 @@ do { \ #define emit_loadptr(BASE, STRUCT, FIELD, DEST) \ do { unsigned int _off = offsetof(STRUCT, FIELD); \ - BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(void *)); \ + BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(void *)); \ *prog++ = LDPTRI | RS1(BASE) | S13(_off) | RD(DEST); \ } while (0) #define emit_load32(BASE, STRUCT, FIELD, DEST) \ do { unsigned int _off = offsetof(STRUCT, FIELD); \ - BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u32)); \ + BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(u32)); \ *prog++ = LD32I | RS1(BASE) | S13(_off) | RD(DEST); \ } while (0) #define emit_load16(BASE, STRUCT, FIELD, DEST) \ do { unsigned int _off = offsetof(STRUCT, FIELD); \ - BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u16)); \ + BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(u16)); \ *prog++ = LD16I | RS1(BASE) | S13(_off) | RD(DEST); \ } while (0) @@ -202,7 +202,7 @@ do { unsigned int _off = offsetof(STRUCT, FIELD); \ } while (0) #define emit_load8(BASE, STRUCT, FIELD, DEST) \ -do { BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u8)); \ +do { BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(u8)); \ __emit_load8(BASE, STRUCT, FIELD, DEST); \ } while (0) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 2a6d04fcb3e9..6f0edd0c0220 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -14,6 +14,7 @@ config UML select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_BUGVERBOSE + select HAVE_COPY_THREAD_TLS select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h index 81c647ef9c6c..adf91ef553ae 100644 --- a/arch/um/include/asm/ptrace-generic.h +++ b/arch/um/include/asm/ptrace-generic.h @@ -36,7 +36,7 @@ extern long subarch_ptrace(struct task_struct *child, long request, extern unsigned long getreg(struct task_struct *child, int regno); extern int putreg(struct task_struct *child, int regno, unsigned long value); -extern int arch_copy_tls(struct task_struct *new); +extern int arch_set_tls(struct task_struct *new, unsigned long tls); extern void clear_flushed_tls(struct task_struct *task); extern int syscall_trace_enter(struct pt_regs *regs); extern void syscall_trace_leave(struct pt_regs *regs); diff --git a/arch/um/include/asm/vmalloc.h b/arch/um/include/asm/vmalloc.h new file mode 100644 index 000000000000..9a7b9ed93733 --- /dev/null +++ b/arch/um/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_UM_VMALLOC_H +#define _ASM_UM_VMALLOC_H + +#endif /* _ASM_UM_VMALLOC_H */ diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 263a8f069133..17045e7211bf 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -153,8 +153,8 @@ void fork_handler(void) userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); } -int copy_thread(unsigned long clone_flags, unsigned long sp, - unsigned long arg, struct task_struct * p) +int copy_thread_tls(unsigned long clone_flags, unsigned long sp, + unsigned long arg, struct task_struct * p, unsigned long tls) { void (*handler)(void); int kthread = current->flags & PF_KTHREAD; @@ -188,7 +188,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, * Set a new TLS for the child thread? */ if (clone_flags & CLONE_SETTLS) - ret = arch_copy_tls(p); + ret = arch_set_tls(p, tls); } return ret; diff --git a/arch/unicore32/include/asm/io.h b/arch/unicore32/include/asm/io.h index 4b460e01acfa..3ca74e1cde7d 100644 --- a/arch/unicore32/include/asm/io.h +++ b/arch/unicore32/include/asm/io.h @@ -31,7 +31,6 @@ extern void __uc32_iounmap(volatile void __iomem *addr); * */ #define ioremap(cookie, size) __uc32_ioremap(cookie, size) -#define ioremap_nocache(cookie, size) __uc32_ioremap(cookie, size) #define iounmap(cookie) __uc32_iounmap(cookie) #define readb_relaxed readb diff --git a/arch/unicore32/include/asm/vmalloc.h b/arch/unicore32/include/asm/vmalloc.h new file mode 100644 index 000000000000..054435818a14 --- /dev/null +++ b/arch/unicore32/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_UNICORE32_VMALLOC_H +#define _ASM_UNICORE32_VMALLOC_H + +#endif /* _ASM_UNICORE32_VMALLOC_H */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5e8949953660..42bc6fb0d2be 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -96,7 +96,7 @@ config X86 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_HUGE_PMD_SHARE select ARCH_WANTS_THP_SWAP if X86_64 - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLKEVT_I8253 select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CLOCKSOURCE_WATCHDOG @@ -124,6 +124,7 @@ config X86 select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY + select GENERIC_VDSO_TIME_NS select GUP_GET_PTE_LOW_HIGH if X86_PAE select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI @@ -439,8 +440,8 @@ config X86_MPPARSE (esp with 64bit cpus) with acpi support, MADT and DSDT will override it config GOLDFISH - def_bool y - depends on X86_GOLDFISH + def_bool y + depends on X86_GOLDFISH config RETPOLINE bool "Avoid speculative indirect branches in kernel" @@ -456,6 +457,7 @@ config X86_CPU_RESCTRL bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) select KERNFS + select PROC_CPU_RESCTRL if PROC_FS help Enable x86 CPU resource control support. @@ -477,7 +479,7 @@ config X86_BIGSMP bool "Support for big SMP systems with more than 8 CPUs" depends on SMP ---help--- - This option is needed for the systems that have more than 8 CPUs + This option is needed for the systems that have more than 8 CPUs. config X86_EXTENDED_PLATFORM bool "Support for extended (non-PC) x86 platforms" @@ -561,9 +563,9 @@ config X86_UV # Please maintain the alphabetic order if and when there are additions config X86_GOLDFISH - bool "Goldfish (Virtual Platform)" - depends on X86_EXTENDED_PLATFORM - ---help--- + bool "Goldfish (Virtual Platform)" + depends on X86_EXTENDED_PLATFORM + ---help--- Enable support for the Goldfish virtual platform used primarily for Android development. Unless you are building for the Android Goldfish emulator say N here. @@ -806,9 +808,9 @@ config KVM_GUEST timing infrastructure such as time of day, and system time config ARCH_CPUIDLE_HALTPOLL - def_bool n - prompt "Disable host haltpoll when loading haltpoll driver" - help + def_bool n + prompt "Disable host haltpoll when loading haltpoll driver" + help If virtualized under KVM, disable host haltpoll. config PVH @@ -887,16 +889,16 @@ config HPET_EMULATE_RTC depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) config APB_TIMER - def_bool y if X86_INTEL_MID - prompt "Intel MID APB Timer Support" if X86_INTEL_MID - select DW_APB_TIMER - depends on X86_INTEL_MID && SFI - help - APB timer is the replacement for 8254, HPET on X86 MID platforms. - The APBT provides a stable time base on SMP - systems, unlike the TSC, but it is more expensive to access, - as it is off-chip. APB timers are always running regardless of CPU - C states, they are used as per CPU clockevent device when possible. + def_bool y if X86_INTEL_MID + prompt "Intel MID APB Timer Support" if X86_INTEL_MID + select DW_APB_TIMER + depends on X86_INTEL_MID && SFI + help + APB timer is the replacement for 8254, HPET on X86 MID platforms. + The APBT provides a stable time base on SMP + systems, unlike the TSC, but it is more expensive to access, + as it is off-chip. APB timers are always running regardless of CPU + C states, they are used as per CPU clockevent device when possible. # Mark as expert because too many people got it wrong. # The code disables itself when not needed. @@ -1035,8 +1037,8 @@ config SCHED_MC_PRIO If unsure say Y here. config UP_LATE_INIT - def_bool y - depends on !SMP && X86_LOCAL_APIC + def_bool y + depends on !SMP && X86_LOCAL_APIC config X86_UP_APIC bool "Local APIC support on uniprocessors" if !PCI_MSI @@ -1185,8 +1187,8 @@ config X86_LEGACY_VM86 If unsure, say N here. config VM86 - bool - default X86_LEGACY_VM86 + bool + default X86_LEGACY_VM86 config X86_16BIT bool "Enable support for 16-bit segments" if EXPERT @@ -1207,10 +1209,10 @@ config X86_ESPFIX64 depends on X86_16BIT && X86_64 config X86_VSYSCALL_EMULATION - bool "Enable vsyscall emulation" if EXPERT - default y - depends on X86_64 - ---help--- + bool "Enable vsyscall emulation" if EXPERT + default y + depends on X86_64 + ---help--- This enables emulation of the legacy vsyscall page. Disabling it is roughly equivalent to booting with vsyscall=none, except that it will also disable the helpful warning if a program @@ -1512,7 +1514,7 @@ config X86_CPA_STATISTICS bool "Enable statistic for Change Page Attribute" depends on DEBUG_FS ---help--- - Expose statistics about the Change Page Attribute mechanims, which + Expose statistics about the Change Page Attribute mechanism, which helps to determine the effectiveness of preserving large and huge page mappings when mapping protections are changed. @@ -1543,12 +1545,12 @@ config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT # Common NUMA Features config NUMA - bool "Numa Memory Allocation and Scheduler Support" + bool "NUMA Memory Allocation and Scheduler Support" depends on SMP depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP) default y if X86_BIGSMP ---help--- - Enable NUMA (Non Uniform Memory Access) support. + Enable NUMA (Non-Uniform Memory Access) support. The kernel will try to allocate memory used by a CPU on the local memory controller of the CPU and add some more @@ -1648,9 +1650,9 @@ config ARCH_PROC_KCORE_TEXT depends on X86_64 && PROC_KCORE config ILLEGAL_POINTER_VALUE - hex - default 0 if X86_32 - default 0xdead000000000000 if X86_64 + hex + default 0 if X86_32 + default 0xdead000000000000 if X86_64 config X86_PMEM_LEGACY_DEVICE bool @@ -1991,11 +1993,12 @@ config EFI platforms. config EFI_STUB - bool "EFI stub support" - depends on EFI && !X86_USE_3DNOW - select RELOCATABLE - ---help--- - This kernel feature allows a bzImage to be loaded directly + bool "EFI stub support" + depends on EFI && !X86_USE_3DNOW + depends on $(cc-option,-mabi=ms) || X86_32 + select RELOCATABLE + ---help--- + This kernel feature allows a bzImage to be loaded directly by EFI firmware without the use of a bootloader. See Documentation/admin-guide/efi-stub.rst for more information. diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index af9c967782f6..bc3a497c029c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -387,6 +387,14 @@ config X86_DEBUGCTLMSR def_bool y depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486SX || M486) && !UML +config IA32_FEAT_CTL + def_bool y + depends on CPU_SUP_INTEL || CPU_SUP_CENTAUR || CPU_SUP_ZHAOXIN + +config X86_VMX_FEATURE_NAMES + def_bool y + depends on IA32_FEAT_CTL && X86_FEATURE_NAMES + menuconfig PROCESSOR_SELECT bool "Supported processor vendors" if EXPERT ---help--- diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 95410d6ee2ff..748b6d28a91d 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -88,7 +88,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index aa976adb7094..56aa5fa0a66b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -89,7 +89,7 @@ vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone -vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ +vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o \ $(objtree)/drivers/firmware/efi/libstub/lib.a vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o @@ -103,7 +103,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o quiet_cmd_check_data_rel = DATAREL $@ define cmd_check_data_rel for obj in $(filter %.o,$^); do \ - ${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \ + $(READELF) -S $$obj | grep -qF .rel.local && { \ echo "error: $$obj has data relocations!" >&2; \ exit 1; \ } || true; \ diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 72b08fde6de6..287393d725f0 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -6,6 +6,8 @@ * * ----------------------------------------------------------------------- */ +#pragma GCC visibility push(hidden) + #include <linux/efi.h> #include <linux/pci.h> @@ -19,32 +21,18 @@ #include "eboot.h" static efi_system_table_t *sys_table; +extern const bool efi_is64; -static struct efi_config *efi_early; - -__pure const struct efi_config *__efi_early(void) +__pure efi_system_table_t *efi_system_table(void) { - return efi_early; + return sys_table; } -#define BOOT_SERVICES(bits) \ -static void setup_boot_services##bits(struct efi_config *c) \ -{ \ - efi_system_table_##bits##_t *table; \ - \ - table = (typeof(table))sys_table; \ - \ - c->runtime_services = table->runtime; \ - c->boot_services = table->boottime; \ - c->text_output = table->con_out; \ -} -BOOT_SERVICES(32); -BOOT_SERVICES(64); - -void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) +__attribute_const__ bool efi_is_64bit(void) { - efi_call_proto(efi_simple_text_output_protocol, output_string, - efi_early->text_output, str); + if (IS_ENABLED(CONFIG_EFI_MIXED)) + return efi_is64; + return IS_ENABLED(CONFIG_X86_64); } static efi_status_t @@ -63,17 +51,17 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) * large romsize. The UEFI spec limits the size of option ROMs to 16 * MiB so we reject any ROMs over 16 MiB in size to catch this. */ - romimage = (void *)(unsigned long)efi_table_attr(efi_pci_io_protocol, - romimage, pci); - romsize = efi_table_attr(efi_pci_io_protocol, romsize, pci); + romimage = efi_table_attr(pci, romimage); + romsize = efi_table_attr(pci, romsize); if (!romimage || !romsize || romsize > SZ_16M) return EFI_INVALID_PARAMETER; size = romsize + sizeof(*rom); - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)&rom); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'rom'\n"); + efi_printk("Failed to allocate memory for 'rom'\n"); return status; } @@ -85,27 +73,24 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) rom->pcilen = pci->romsize; *__rom = rom; - status = efi_call_proto(efi_pci_io_protocol, pci.read, pci, - EfiPciIoWidthUint16, PCI_VENDOR_ID, 1, - &rom->vendor); + status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16, + PCI_VENDOR_ID, 1, &rom->vendor); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to read rom->vendor\n"); + efi_printk("Failed to read rom->vendor\n"); goto free_struct; } - status = efi_call_proto(efi_pci_io_protocol, pci.read, pci, - EfiPciIoWidthUint16, PCI_DEVICE_ID, 1, - &rom->devid); + status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16, + PCI_DEVICE_ID, 1, &rom->devid); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to read rom->devid\n"); + efi_printk("Failed to read rom->devid\n"); goto free_struct; } - status = efi_call_proto(efi_pci_io_protocol, get_location, pci, - &rom->segment, &rom->bus, &rom->device, - &rom->function); + status = efi_call_proto(pci, get_location, &rom->segment, &rom->bus, + &rom->device, &rom->function); if (status != EFI_SUCCESS) goto free_struct; @@ -114,7 +99,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) return status; free_struct: - efi_call_early(free_pool, rom); + efi_bs_call(free_pool, rom); return status; } @@ -133,27 +118,24 @@ static void setup_efi_pci(struct boot_params *params) void **pci_handle = NULL; efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; unsigned long size = 0; - unsigned long nr_pci; struct setup_data *data; + efi_handle_t h; int i; - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - &pci_proto, NULL, &size, pci_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + &pci_proto, NULL, &size, pci_handle); if (status == EFI_BUFFER_TOO_SMALL) { - status = efi_call_early(allocate_pool, - EFI_LOADER_DATA, - size, (void **)&pci_handle); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)&pci_handle); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'pci_handle'\n"); + efi_printk("Failed to allocate memory for 'pci_handle'\n"); return; } - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, &pci_proto, - NULL, &size, pci_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + &pci_proto, NULL, &size, pci_handle); } if (status != EFI_SUCCESS) @@ -164,15 +146,12 @@ static void setup_efi_pci(struct boot_params *params) while (data && data->next) data = (struct setup_data *)(unsigned long)data->next; - nr_pci = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32)); - for (i = 0; i < nr_pci; i++) { + for_each_efi_handle(h, pci_handle, size, i) { efi_pci_io_protocol_t *pci = NULL; struct pci_setup_rom *rom; - status = efi_call_early(handle_protocol, - efi_is_64bit() ? ((u64 *)pci_handle)[i] - : ((u32 *)pci_handle)[i], - &pci_proto, (void **)&pci); + status = efi_bs_call(handle_protocol, h, &pci_proto, + (void **)&pci); if (status != EFI_SUCCESS || !pci) continue; @@ -189,7 +168,7 @@ static void setup_efi_pci(struct boot_params *params) } free_handle: - efi_call_early(free_pool, pci_handle); + efi_bs_call(free_pool, pci_handle); } static void retrieve_apple_device_properties(struct boot_params *boot_params) @@ -198,34 +177,34 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) struct setup_data *data, *new; efi_status_t status; u32 size = 0; - void *p; + apple_properties_protocol_t *p; - status = efi_call_early(locate_protocol, &guid, NULL, &p); + status = efi_bs_call(locate_protocol, &guid, NULL, (void **)&p); if (status != EFI_SUCCESS) return; - if (efi_table_attr(apple_properties_protocol, version, p) != 0x10000) { - efi_printk(sys_table, "Unsupported properties proto version\n"); + if (efi_table_attr(p, version) != 0x10000) { + efi_printk("Unsupported properties proto version\n"); return; } - efi_call_proto(apple_properties_protocol, get_all, p, NULL, &size); + efi_call_proto(p, get_all, NULL, &size); if (!size) return; do { - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - size + sizeof(struct setup_data), &new); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, + size + sizeof(struct setup_data), + (void **)&new); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'properties'\n"); + efi_printk("Failed to allocate memory for 'properties'\n"); return; } - status = efi_call_proto(apple_properties_protocol, get_all, p, - new->data, &size); + status = efi_call_proto(p, get_all, new->data, &size); if (status == EFI_BUFFER_TOO_SMALL) - efi_call_early(free_pool, new); + efi_bs_call(free_pool, new); } while (status == EFI_BUFFER_TOO_SMALL); new->type = SETUP_APPLE_PROPERTIES; @@ -247,7 +226,7 @@ static const efi_char16_t apple[] = L"Apple"; static void setup_quirks(struct boot_params *boot_params) { efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) - efi_table_attr(efi_system_table, fw_vendor, sys_table); + efi_table_attr(efi_system_table(), fw_vendor); if (!memcmp(fw_vendor, apple, sizeof(apple))) { if (IS_ENABLED(CONFIG_APPLE_PROPERTIES)) @@ -265,17 +244,16 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size) u32 width, height; void **uga_handle = NULL; efi_uga_draw_protocol_t *uga = NULL, *first_uga; - unsigned long nr_ugas; + efi_handle_t handle; int i; - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - size, (void **)&uga_handle); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)&uga_handle); if (status != EFI_SUCCESS) return status; - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - uga_proto, NULL, &size, uga_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + uga_proto, NULL, &size, uga_handle); if (status != EFI_SUCCESS) goto free_handle; @@ -283,24 +261,20 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size) width = 0; first_uga = NULL; - nr_ugas = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32)); - for (i = 0; i < nr_ugas; i++) { + for_each_efi_handle(handle, uga_handle, size, i) { efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; u32 w, h, depth, refresh; void *pciio; - unsigned long handle = efi_is_64bit() ? ((u64 *)uga_handle)[i] - : ((u32 *)uga_handle)[i]; - status = efi_call_early(handle_protocol, handle, - uga_proto, (void **)&uga); + status = efi_bs_call(handle_protocol, handle, uga_proto, + (void **)&uga); if (status != EFI_SUCCESS) continue; pciio = NULL; - efi_call_early(handle_protocol, handle, &pciio_proto, &pciio); + efi_bs_call(handle_protocol, handle, &pciio_proto, &pciio); - status = efi_call_proto(efi_uga_draw_protocol, get_mode, uga, - &w, &h, &depth, &refresh); + status = efi_call_proto(uga, get_mode, &w, &h, &depth, &refresh); if (status == EFI_SUCCESS && (!first_uga || pciio)) { width = w; height = h; @@ -336,7 +310,7 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size) si->rsvd_pos = 24; free_handle: - efi_call_early(free_pool, uga_handle); + efi_bs_call(free_pool, uga_handle); return status; } @@ -355,37 +329,38 @@ void setup_graphics(struct boot_params *boot_params) memset(si, 0, sizeof(*si)); size = 0; - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - &graphics_proto, NULL, &size, gop_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + &graphics_proto, NULL, &size, gop_handle); if (status == EFI_BUFFER_TOO_SMALL) - status = efi_setup_gop(NULL, si, &graphics_proto, size); + status = efi_setup_gop(si, &graphics_proto, size); if (status != EFI_SUCCESS) { size = 0; - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - &uga_proto, NULL, &size, uga_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + &uga_proto, NULL, &size, uga_handle); if (status == EFI_BUFFER_TOO_SMALL) setup_uga(si, &uga_proto, size); } } +void startup_32(struct boot_params *boot_params); + +void __noreturn efi_stub_entry(efi_handle_t handle, + efi_system_table_t *sys_table_arg, + struct boot_params *boot_params); + /* * Because the x86 boot code expects to be passed a boot_params we * need to create one ourselves (usually the bootloader would create * one for us). - * - * The caller is responsible for filling out ->code32_start in the - * returned boot_params. */ -struct boot_params *make_boot_params(struct efi_config *c) +efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, + efi_system_table_t *sys_table_arg) { struct boot_params *boot_params; struct apm_bios_info *bi; struct setup_header *hdr; efi_loaded_image_t *image; - void *handle; efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; int options_size = 0; efi_status_t status; @@ -393,31 +368,22 @@ struct boot_params *make_boot_params(struct efi_config *c) unsigned long ramdisk_addr; unsigned long ramdisk_size; - efi_early = c; - sys_table = (efi_system_table_t *)(unsigned long)efi_early->table; - handle = (void *)(unsigned long)efi_early->image_handle; + sys_table = sys_table_arg; /* Check if we were booted by the EFI firmware */ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) - return NULL; - - if (efi_is_64bit()) - setup_boot_services64(efi_early); - else - setup_boot_services32(efi_early); + return EFI_INVALID_PARAMETER; - status = efi_call_early(handle_protocol, handle, - &proto, (void *)&image); + status = efi_bs_call(handle_protocol, handle, &proto, (void *)&image); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); - return NULL; + efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); + return status; } - status = efi_low_alloc(sys_table, 0x4000, 1, - (unsigned long *)&boot_params); + status = efi_low_alloc(0x4000, 1, (unsigned long *)&boot_params); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate lowmem for boot params\n"); - return NULL; + efi_printk("Failed to allocate lowmem for boot params\n"); + return status; } memset(boot_params, 0x0, 0x4000); @@ -439,7 +405,7 @@ struct boot_params *make_boot_params(struct efi_config *c) hdr->type_of_loader = 0x21; /* Convert unicode cmdline to ascii */ - cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size); + cmdline_ptr = efi_convert_cmdline(image, &options_size); if (!cmdline_ptr) goto fail; @@ -457,15 +423,15 @@ struct boot_params *make_boot_params(struct efi_config *c) if (status != EFI_SUCCESS) goto fail2; - status = handle_cmdline_files(sys_table, image, + status = handle_cmdline_files(image, (char *)(unsigned long)hdr->cmd_line_ptr, "initrd=", hdr->initrd_addr_max, &ramdisk_addr, &ramdisk_size); if (status != EFI_SUCCESS && hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) { - efi_printk(sys_table, "Trying to load files to higher address\n"); - status = handle_cmdline_files(sys_table, image, + efi_printk("Trying to load files to higher address\n"); + status = handle_cmdline_files(image, (char *)(unsigned long)hdr->cmd_line_ptr, "initrd=", -1UL, &ramdisk_addr, &ramdisk_size); @@ -478,14 +444,17 @@ struct boot_params *make_boot_params(struct efi_config *c) boot_params->ext_ramdisk_image = (u64)ramdisk_addr >> 32; boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32; - return boot_params; + hdr->code32_start = (u32)(unsigned long)startup_32; + + efi_stub_entry(handle, sys_table, boot_params); + /* not reached */ fail2: - efi_free(sys_table, options_size, hdr->cmd_line_ptr); + efi_free(options_size, hdr->cmd_line_ptr); fail: - efi_free(sys_table, 0x4000, (unsigned long)boot_params); + efi_free(0x4000, (unsigned long)boot_params); - return NULL; + return status; } static void add_e820ext(struct boot_params *params, @@ -620,13 +589,13 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, sizeof(struct e820_entry) * nr_desc; if (*e820ext) { - efi_call_early(free_pool, *e820ext); + efi_bs_call(free_pool, *e820ext); *e820ext = NULL; *e820ext_size = 0; } - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - size, (void **)e820ext); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)e820ext); if (status == EFI_SUCCESS) *e820ext_size = size; @@ -650,7 +619,7 @@ static efi_status_t allocate_e820(struct boot_params *params, boot_map.key_ptr = NULL; boot_map.buff_size = &buff_size; - status = efi_get_memory_map(sys_table, &boot_map); + status = efi_get_memory_map(&boot_map); if (status != EFI_SUCCESS) return status; @@ -672,8 +641,7 @@ struct exit_boot_struct { struct efi_info *efi; }; -static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, - struct efi_boot_memmap *map, +static efi_status_t exit_boot_func(struct efi_boot_memmap *map, void *priv) { const char *signature; @@ -683,14 +651,14 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, : EFI32_LOADER_SIGNATURE; memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32)); - p->efi->efi_systab = (unsigned long)sys_table_arg; + p->efi->efi_systab = (unsigned long)efi_system_table(); p->efi->efi_memdesc_size = *map->desc_size; p->efi->efi_memdesc_version = *map->desc_ver; p->efi->efi_memmap = (unsigned long)*map->map; p->efi->efi_memmap_size = *map->map_size; #ifdef CONFIG_X86_64 - p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32; + p->efi->efi_systab_hi = (unsigned long)efi_system_table() >> 32; p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32; #endif @@ -722,8 +690,7 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) return status; /* Might as well exit boot services now */ - status = efi_exit_boot_services(sys_table, handle, &map, &priv, - exit_boot_func); + status = efi_exit_boot_services(handle, &map, &priv, exit_boot_func); if (status != EFI_SUCCESS) return status; @@ -741,33 +708,22 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) * On success we return a pointer to a boot_params structure, and NULL * on failure. */ -struct boot_params * -efi_main(struct efi_config *c, struct boot_params *boot_params) +struct boot_params *efi_main(efi_handle_t handle, + efi_system_table_t *sys_table_arg, + struct boot_params *boot_params) { struct desc_ptr *gdt = NULL; struct setup_header *hdr = &boot_params->hdr; efi_status_t status; struct desc_struct *desc; - void *handle; - efi_system_table_t *_table; unsigned long cmdline_paddr; - efi_early = c; - - _table = (efi_system_table_t *)(unsigned long)efi_early->table; - handle = (void *)(unsigned long)efi_early->image_handle; - - sys_table = _table; + sys_table = sys_table_arg; /* Check if we were booted by the EFI firmware */ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) goto fail; - if (efi_is_64bit()) - setup_boot_services64(efi_early); - else - setup_boot_services32(efi_early); - /* * make_boot_params() may have been called before efi_main(), in which * case this is the second time we parse the cmdline. This is ok, @@ -782,14 +738,14 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) * otherwise we ask the BIOS. */ if (boot_params->secure_boot == efi_secureboot_mode_unset) - boot_params->secure_boot = efi_get_secureboot(sys_table); + boot_params->secure_boot = efi_get_secureboot(); /* Ask the firmware to clear memory on unclean shutdown */ - efi_enable_reset_attack_mitigation(sys_table); + efi_enable_reset_attack_mitigation(); - efi_random_get_seed(sys_table); + efi_random_get_seed(); - efi_retrieve_tpm2_eventlog(sys_table); + efi_retrieve_tpm2_eventlog(); setup_graphics(boot_params); @@ -797,18 +753,17 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) setup_quirks(boot_params); - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - sizeof(*gdt), (void **)&gdt); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, sizeof(*gdt), + (void **)&gdt); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'gdt' structure\n"); + efi_printk("Failed to allocate memory for 'gdt' structure\n"); goto fail; } gdt->size = 0x800; - status = efi_low_alloc(sys_table, gdt->size, 8, - (unsigned long *)&gdt->address); + status = efi_low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'gdt'\n"); + efi_printk("Failed to allocate memory for 'gdt'\n"); goto fail; } @@ -818,13 +773,13 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) */ if (hdr->pref_address != hdr->code32_start) { unsigned long bzimage_addr = hdr->code32_start; - status = efi_relocate_kernel(sys_table, &bzimage_addr, + status = efi_relocate_kernel(&bzimage_addr, hdr->init_size, hdr->init_size, hdr->pref_address, hdr->kernel_alignment, LOAD_PHYSICAL_ADDR); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "efi_relocate_kernel() failed!\n"); + efi_printk("efi_relocate_kernel() failed!\n"); goto fail; } @@ -834,7 +789,7 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) status = exit_boot(boot_params, handle); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "exit_boot() failed!\n"); + efi_printk("exit_boot() failed!\n"); goto fail; } @@ -927,7 +882,8 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) return boot_params; fail: - efi_printk(sys_table, "efi_main() failed!\n"); + efi_printk("efi_main() failed!\n"); - return NULL; + for (;;) + asm("hlt"); } diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 8297387c4676..99f35343d443 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -12,22 +12,20 @@ #define DESC_TYPE_CODE_DATA (1 << 0) -typedef struct { - u32 get_mode; - u32 set_mode; - u32 blt; -} efi_uga_draw_protocol_32_t; +typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t; -typedef struct { - u64 get_mode; - u64 set_mode; - u64 blt; -} efi_uga_draw_protocol_64_t; - -typedef struct { - void *get_mode; - void *set_mode; - void *blt; -} efi_uga_draw_protocol_t; +union efi_uga_draw_protocol { + struct { + efi_status_t (__efiapi *get_mode)(efi_uga_draw_protocol_t *, + u32*, u32*, u32*, u32*); + void *set_mode; + void *blt; + }; + struct { + u32 get_mode; + u32 set_mode; + u32 blt; + } mixed_mode; +}; #endif /* BOOT_COMPRESSED_EBOOT_H */ diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S deleted file mode 100644 index ed6c351d34ed..000000000000 --- a/arch/x86/boot/compressed/efi_stub_32.S +++ /dev/null @@ -1,87 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * EFI call stub for IA32. - * - * This stub allows us to make EFI calls in physical mode with interrupts - * turned off. Note that this implementation is different from the one in - * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical - * mode at this point. - */ - -#include <linux/linkage.h> -#include <asm/page_types.h> - -/* - * efi_call_phys(void *, ...) is a function with variable parameters. - * All the callers of this function assure that all the parameters are 4-bytes. - */ - -/* - * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. - * So we'd better save all of them at the beginning of this function and restore - * at the end no matter how many we use, because we can not assure EFI runtime - * service functions will comply with gcc calling convention, too. - */ - -.text -SYM_FUNC_START(efi_call_phys) - /* - * 0. The function can only be called in Linux kernel. So CS has been - * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found - * the values of these registers are the same. And, the corresponding - * GDT entries are identical. So I will do nothing about segment reg - * and GDT, but change GDT base register in prelog and epilog. - */ - - /* - * 1. Because we haven't been relocated by this point we need to - * use relative addressing. - */ - call 1f -1: popl %edx - subl $1b, %edx - - /* - * 2. Now on the top of stack is the return - * address in the caller of efi_call_phys(), then parameter 1, - * parameter 2, ..., param n. To make things easy, we save the return - * address of efi_call_phys in a global variable. - */ - popl %ecx - movl %ecx, saved_return_addr(%edx) - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr(%edx) - - /* - * 3. Call the physical function. - */ - call *%ecx - - /* - * 4. Balance the stack. And because EAX contain the return value, - * we'd better not clobber it. We need to calculate our address - * again because %ecx and %edx are not preserved across EFI function - * calls. - */ - call 1f -1: popl %edx - subl $1b, %edx - - movl efi_rt_function_ptr(%edx), %ecx - pushl %ecx - - /* - * 10. Push the saved return address onto the stack and return. - */ - movl saved_return_addr(%edx), %ecx - pushl %ecx - ret -SYM_FUNC_END(efi_call_phys) -.previous - -.data -saved_return_addr: - .long 0 -efi_rt_function_ptr: - .long 0 diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S deleted file mode 100644 index 99494dff2113..000000000000 --- a/arch/x86/boot/compressed/efi_stub_64.S +++ /dev/null @@ -1,5 +0,0 @@ -#include <asm/segment.h> -#include <asm/msr.h> -#include <asm/processor-flags.h> - -#include "../../platform/efi/efi_stub_64.S" diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S index 593913692d16..8fb7f6799c52 100644 --- a/arch/x86/boot/compressed/efi_thunk_64.S +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -10,7 +10,7 @@ * needs to be able to service interrupts. * * On the plus side, we don't have to worry about mangling 64-bit - * addresses into 32-bits because we're executing with an identify + * addresses into 32-bits because we're executing with an identity * mapped pagetable and haven't transitioned to 64-bit virtual addresses * yet. */ @@ -23,16 +23,13 @@ .code64 .text -SYM_FUNC_START(efi64_thunk) +SYM_FUNC_START(__efi64_thunk) push %rbp push %rbx - subq $8, %rsp - leaq efi_exit32(%rip), %rax - movl %eax, 4(%rsp) - leaq efi_gdt64(%rip), %rax - movl %eax, (%rsp) - movl %eax, 2(%rax) /* Fixup the gdt base address */ + leaq 1f(%rip), %rbp + leaq efi_gdt64(%rip), %rbx + movl %ebx, 2(%rbx) /* Fixup the gdt base address */ movl %ds, %eax push %rax @@ -48,15 +45,10 @@ SYM_FUNC_START(efi64_thunk) movl %esi, 0x0(%rsp) movl %edx, 0x4(%rsp) movl %ecx, 0x8(%rsp) - movq %r8, %rsi - movl %esi, 0xc(%rsp) - movq %r9, %rsi - movl %esi, 0x10(%rsp) + movl %r8d, 0xc(%rsp) + movl %r9d, 0x10(%rsp) - sgdt save_gdt(%rip) - - leaq 1f(%rip), %rbx - movq %rbx, func_rt_ptr(%rip) + sgdt 0x14(%rsp) /* * Switch to gdt with 32-bit segments. This is the firmware GDT @@ -71,9 +63,9 @@ SYM_FUNC_START(efi64_thunk) pushq %rax lretq -1: addq $32, %rsp - - lgdt save_gdt(%rip) +1: lgdt 0x14(%rsp) + addq $32, %rsp + movq %rdi, %rax pop %rbx movl %ebx, %ss @@ -85,26 +77,13 @@ SYM_FUNC_START(efi64_thunk) /* * Convert 32-bit status code into 64-bit. */ - test %rax, %rax - jz 1f - movl %eax, %ecx - andl $0x0fffffff, %ecx - andl $0xf0000000, %eax - shl $32, %rax - or %rcx, %rax -1: - addq $8, %rsp + roll $1, %eax + rorq $1, %rax + pop %rbx pop %rbp ret -SYM_FUNC_END(efi64_thunk) - -SYM_FUNC_START_LOCAL(efi_exit32) - movq func_rt_ptr(%rip), %rax - push %rax - mov %rdi, %rax - ret -SYM_FUNC_END(efi_exit32) +SYM_FUNC_END(__efi64_thunk) .code32 /* @@ -144,9 +123,7 @@ SYM_FUNC_START_LOCAL(efi_enter32) */ cli - movl 56(%esp), %eax - movl %eax, 2(%eax) - lgdtl (%eax) + lgdtl (%ebx) movl %cr4, %eax btsl $(X86_CR4_PAE_BIT), %eax @@ -163,9 +140,8 @@ SYM_FUNC_START_LOCAL(efi_enter32) xorl %eax, %eax lldt %ax - movl 60(%esp), %eax pushl $__KERNEL_CS - pushl %eax + pushl %ebp /* Enable paging */ movl %cr0, %eax @@ -181,13 +157,6 @@ SYM_DATA_START(efi32_boot_gdt) .quad 0 SYM_DATA_END(efi32_boot_gdt) -SYM_DATA_START_LOCAL(save_gdt) - .word 0 - .quad 0 -SYM_DATA_END(save_gdt) - -SYM_DATA_LOCAL(func_rt_ptr, .quad 0) - SYM_DATA_START(efi_gdt64) .word efi_gdt64_end - efi_gdt64 .long 0 /* Filled out by user */ diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index f2dfd6d083ef..73f17d0544dd 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -145,67 +145,16 @@ SYM_FUNC_START(startup_32) SYM_FUNC_END(startup_32) #ifdef CONFIG_EFI_STUB -/* - * We don't need the return address, so set up the stack so efi_main() can find - * its arguments. - */ -SYM_FUNC_START(efi_pe_entry) - add $0x4, %esp - - call 1f -1: popl %esi - subl $1b, %esi - - popl %ecx - movl %ecx, efi32_config(%esi) /* Handle */ - popl %ecx - movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */ - - /* Relocate efi_config->call() */ - leal efi32_config(%esi), %eax - add %esi, 40(%eax) - pushl %eax - - call make_boot_params - cmpl $0, %eax - je fail - movl %esi, BP_code32_start(%eax) - popl %ecx - pushl %eax - pushl %ecx - jmp 2f /* Skip efi_config initialization */ -SYM_FUNC_END(efi_pe_entry) - SYM_FUNC_START(efi32_stub_entry) +SYM_FUNC_START_ALIAS(efi_stub_entry) add $0x4, %esp - popl %ecx - popl %edx - - call 1f -1: popl %esi - subl $1b, %esi - - movl %ecx, efi32_config(%esi) /* Handle */ - movl %edx, efi32_config+8(%esi) /* EFI System table pointer */ - - /* Relocate efi_config->call() */ - leal efi32_config(%esi), %eax - add %esi, 40(%eax) - pushl %eax -2: call efi_main - cmpl $0, %eax movl %eax, %esi - jne 2f -fail: - /* EFI init failed, so hang. */ - hlt - jmp fail -2: movl BP_code32_start(%esi), %eax leal startup_32(%eax), %eax jmp *%eax SYM_FUNC_END(efi32_stub_entry) +SYM_FUNC_END_ALIAS(efi_stub_entry) #endif .text @@ -240,11 +189,9 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) /* push arguments for extract_kernel: */ pushl $z_output_len /* decompressed length, end of relocs */ - movl BP_init_size(%esi), %eax - subl $_end, %eax - movl %ebx, %ebp - subl %eax, %ebp - pushl %ebp /* output address */ + leal _end(%ebx), %eax + subl BP_init_size(%esi), %eax + pushl %eax /* output address */ pushl $z_input_len /* input_len */ leal input_data(%ebx), %eax @@ -262,15 +209,6 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) jmp *%eax SYM_FUNC_END(.Lrelocated) -#ifdef CONFIG_EFI_STUB - .data -efi32_config: - .fill 5,8,0 - .long efi_call_phys - .long 0 - .byte 0 -#endif - /* * Stack and heap for uncompression */ diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 58a512e33d8d..1f1f6c8139b3 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -208,10 +208,12 @@ SYM_FUNC_START(startup_32) pushl $__KERNEL_CS leal startup_64(%ebp), %eax #ifdef CONFIG_EFI_MIXED - movl efi32_config(%ebp), %ebx - cmp $0, %ebx + movl efi32_boot_args(%ebp), %edi + cmp $0, %edi jz 1f - leal handover_entry(%ebp), %eax + leal efi64_stub_entry(%ebp), %eax + movl %esi, %edx + movl efi32_boot_args+4(%ebp), %esi 1: #endif pushl %eax @@ -232,17 +234,19 @@ SYM_FUNC_START(efi32_stub_entry) popl %edx popl %esi - leal (BP_scratch+4)(%esi), %esp call 1f 1: pop %ebp subl $1b, %ebp - movl %ecx, efi32_config(%ebp) - movl %edx, efi32_config+8(%ebp) + movl %ecx, efi32_boot_args(%ebp) + movl %edx, efi32_boot_args+4(%ebp) sgdtl efi32_boot_gdt(%ebp) + movb $0, efi_is64(%ebp) - leal efi32_config(%ebp), %eax - movl %eax, efi_config(%ebp) + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 jmp startup_32 SYM_FUNC_END(efi32_stub_entry) @@ -445,70 +449,17 @@ trampoline_return: SYM_CODE_END(startup_64) #ifdef CONFIG_EFI_STUB - -/* The entry point for the PE/COFF executable is efi_pe_entry. */ -SYM_FUNC_START(efi_pe_entry) - movq %rcx, efi64_config(%rip) /* Handle */ - movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */ - - leaq efi64_config(%rip), %rax - movq %rax, efi_config(%rip) - - call 1f -1: popq %rbp - subq $1b, %rbp - - /* - * Relocate efi_config->call(). - */ - addq %rbp, efi64_config+40(%rip) - - movq %rax, %rdi - call make_boot_params - cmpq $0,%rax - je fail - mov %rax, %rsi - leaq startup_32(%rip), %rax - movl %eax, BP_code32_start(%rsi) - jmp 2f /* Skip the relocation */ - -handover_entry: - call 1f -1: popq %rbp - subq $1b, %rbp - - /* - * Relocate efi_config->call(). - */ - movq efi_config(%rip), %rax - addq %rbp, 40(%rax) -2: - movq efi_config(%rip), %rdi + .org 0x390 +SYM_FUNC_START(efi64_stub_entry) +SYM_FUNC_START_ALIAS(efi_stub_entry) + and $~0xf, %rsp /* realign the stack */ call efi_main movq %rax,%rsi - cmpq $0,%rax - jne 2f -fail: - /* EFI init failed, so hang. */ - hlt - jmp fail -2: movl BP_code32_start(%esi), %eax leaq startup_64(%rax), %rax jmp *%rax -SYM_FUNC_END(efi_pe_entry) - - .org 0x390 -SYM_FUNC_START(efi64_stub_entry) - movq %rdi, efi64_config(%rip) /* Handle */ - movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */ - - leaq efi64_config(%rip), %rax - movq %rax, efi_config(%rip) - - movq %rdx, %rsi - jmp handover_entry SYM_FUNC_END(efi64_stub_entry) +SYM_FUNC_END_ALIAS(efi_stub_entry) #endif .text @@ -677,24 +628,11 @@ SYM_DATA_START_LOCAL(gdt) .quad 0x0000000000000000 /* TS continued */ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) -#ifdef CONFIG_EFI_STUB -SYM_DATA_LOCAL(efi_config, .quad 0) - #ifdef CONFIG_EFI_MIXED -SYM_DATA_START(efi32_config) - .fill 5,8,0 - .quad efi64_thunk - .byte 0 -SYM_DATA_END(efi32_config) +SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0) +SYM_DATA(efi_is64, .byte 1) #endif -SYM_DATA_START(efi64_config) - .fill 5,8,0 - .quad efi_call - .byte 1 -SYM_DATA_END(efi64_config) -#endif /* CONFIG_EFI_STUB */ - /* * Stack and heap for uncompression */ diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index 9caa10e82217..da0ccc5de538 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c @@ -15,6 +15,7 @@ #include "../include/asm/required-features.h" #include "../include/asm/disabled-features.h" #include "../include/asm/cpufeatures.h" +#include "../include/asm/vmxfeatures.h" #include "../kernel/cpu/capflags.c" int main(void) diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 0149e41d42c2..3da1c37c6dd5 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -51,7 +51,10 @@ SECTIONS . = ALIGN(16); _end = .; - /DISCARD/ : { *(.note*) } + /DISCARD/ : { + *(.eh_frame) + *(.note*) + } /* * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 76942cbd95a1..f2bb91e87877 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1728,7 +1728,7 @@ SYM_CODE_END(nmi) SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY mov $-ENOSYS, %eax - sysret + sysretl SYM_CODE_END(ignore_sysret) #endif diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index 93c6dc7812d0..ea7e0155c604 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -16,18 +16,23 @@ SECTIONS * segment. */ - vvar_start = . - 3 * PAGE_SIZE; - vvar_page = vvar_start; + vvar_start = . - 4 * PAGE_SIZE; + vvar_page = vvar_start; /* Place all vvars at the offsets in asm/vvar.h. */ #define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset; -#define __VVAR_KERNEL_LDS #include <asm/vvar.h> -#undef __VVAR_KERNEL_LDS #undef EMIT_VVAR pvclock_page = vvar_start + PAGE_SIZE; hvclock_page = vvar_start + 2 * PAGE_SIZE; + timens_page = vvar_start + 3 * PAGE_SIZE; + +#undef _ASM_X86_VVAR_H + /* Place all vvars in timens too at the offsets in asm/vvar.h. */ +#define EMIT_VVAR(name, offset) timens_ ## name = timens_page + offset; +#include <asm/vvar.h> +#undef EMIT_VVAR . = SIZEOF_HEADERS; diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c index 3a4d8d4d39f8..3842873b3ae3 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/entry/vdso/vdso2c.c @@ -75,12 +75,14 @@ enum { sym_vvar_page, sym_pvclock_page, sym_hvclock_page, + sym_timens_page, }; const int special_pages[] = { sym_vvar_page, sym_pvclock_page, sym_hvclock_page, + sym_timens_page, }; struct vdso_sym { @@ -93,6 +95,7 @@ struct vdso_sym required_syms[] = { [sym_vvar_page] = {"vvar_page", true}, [sym_pvclock_page] = {"pvclock_page", true}, [sym_hvclock_page] = {"hvclock_page", true}, + [sym_timens_page] = {"timens_page", true}, {"VDSO32_NOTE_MASK", true}, {"__kernel_vsyscall", true}, {"__kernel_sigreturn", true}, diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 240626e7f55a..43842fade8fa 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -11,6 +11,7 @@ #include <linux/smp.h> #include <linux/kernel.h> #include <linux/mm_types.h> +#include <linux/elf.h> #include <asm/processor.h> #include <asm/vdso.h> diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index f5937742b290..c1b8496b5606 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -14,16 +14,30 @@ #include <linux/elf.h> #include <linux/cpu.h> #include <linux/ptrace.h> +#include <linux/time_namespace.h> + #include <asm/pvclock.h> #include <asm/vgtod.h> #include <asm/proto.h> #include <asm/vdso.h> #include <asm/vvar.h> +#include <asm/tlb.h> #include <asm/page.h> #include <asm/desc.h> #include <asm/cpufeature.h> #include <clocksource/hyperv_timer.h> +#undef _ASM_X86_VVAR_H +#define EMIT_VVAR(name, offset) \ + const size_t name ## _offset = offset; +#include <asm/vvar.h> + +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page + _vdso_data_offset); +} +#undef EMIT_VVAR + #if defined(CONFIG_X86_64) unsigned int __read_mostly vdso64_enabled = 1; #endif @@ -37,6 +51,7 @@ void __init init_vdso_image(const struct vdso_image *image) image->alt_len)); } +static const struct vm_special_mapping vvar_mapping; struct linux_binprm; static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, @@ -84,10 +99,74 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } +static int vvar_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + const struct vdso_image *image = new_vma->vm_mm->context.vdso_image; + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; + + if (new_size != -image->sym_vvar_start) + return -EINVAL; + + return 0; +} + +#ifdef CONFIG_TIME_NS +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops(). + * For more details check_vma_flags() and __access_remote_vm() + */ + + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} + +/* + * The vvar page layout depends on whether a task belongs to the root or + * non-root time namespace. Whenever a task changes its namespace, the VVAR + * page tables are cleared and then they will re-faulted with a + * corresponding layout. + * See also the comment near timens_setup_vdso_data() for details. + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + if (down_write_killable(&mm->mmap_sem)) + return -EINTR; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; + + if (vma_is_special_mapping(vma, &vvar_mapping)) + zap_page_range(vma, vma->vm_start, size); + } + + up_write(&mm->mmap_sem); + return 0; +} +#else +static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { const struct vdso_image *image = vma->vm_mm->context.vdso_image; + unsigned long pfn; long sym_offset; if (!image) @@ -107,8 +186,36 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, return VM_FAULT_SIGBUS; if (sym_offset == image->sym_vvar_page) { - return vmf_insert_pfn(vma, vmf->address, - __pa_symbol(&__vvar_page) >> PAGE_SHIFT); + struct page *timens_page = find_timens_vvar_page(vma); + + pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT; + + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the sym_vvar_page offset and + * the real VVAR page is mapped with the sym_timens_page + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (timens_page) { + unsigned long addr; + vm_fault_t err; + + /* + * Optimization: inside time namespace pre-fault + * VVAR page too. As on timens page there are only + * offsets for clocks on VVAR, it'll be faulted + * shortly by VDSO code. + */ + addr = vmf->address + (image->sym_timens_page - sym_offset); + err = vmf_insert_pfn(vma, addr, pfn); + if (unlikely(err & VM_FAULT_ERROR)) + return err; + + pfn = page_to_pfn(timens_page); + } + + return vmf_insert_pfn(vma, vmf->address, pfn); } else if (sym_offset == image->sym_pvclock_page) { struct pvclock_vsyscall_time_info *pvti = pvclock_get_pvti_cpu0_va(); @@ -123,6 +230,14 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK)) return vmf_insert_pfn(vma, vmf->address, virt_to_phys(tsc_pg) >> PAGE_SHIFT); + } else if (sym_offset == image->sym_timens_page) { + struct page *timens_page = find_timens_vvar_page(vma); + + if (!timens_page) + return VM_FAULT_SIGBUS; + + pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT; + return vmf_insert_pfn(vma, vmf->address, pfn); } return VM_FAULT_SIGBUS; @@ -136,6 +251,7 @@ static const struct vm_special_mapping vdso_mapping = { static const struct vm_special_mapping vvar_mapping = { .name = "[vvar]", .fault = vvar_fault, + .mremap = vvar_mremap, }; /* diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index a7752cd78b89..1f22b6bbda68 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -14,6 +14,10 @@ static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp); static unsigned long perf_nmi_window; +/* AMD Event 0xFFF: Merge. Used with Large Increment per Cycle events */ +#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL) +#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE) + static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -301,6 +305,25 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) return offset; } +/* + * AMD64 events are detected based on their event codes. + */ +static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) +{ + return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); +} + +static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc) +{ + if (!(x86_pmu.flags & PMU_FL_PAIR)) + return false; + + switch (amd_get_event_code(hwc)) { + case 0x003: return true; /* Retired SSE/AVX FLOPs */ + default: return false; + } +} + static int amd_core_hw_config(struct perf_event *event) { if (event->attr.exclude_host && event->attr.exclude_guest) @@ -316,15 +339,10 @@ static int amd_core_hw_config(struct perf_event *event) else if (event->attr.exclude_guest) event->hw.config |= AMD64_EVENTSEL_HOSTONLY; - return 0; -} + if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw)) + event->hw.flags |= PERF_X86_EVENT_PAIR; -/* - * AMD64 events are detected based on their event codes. - */ -static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) -{ - return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); + return 0; } static inline int amd_is_nb_event(struct hw_perf_event *hwc) @@ -855,6 +873,29 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, } } +static struct event_constraint pair_constraint; + +static struct event_constraint * +amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (amd_is_pair_event_code(hwc)) + return &pair_constraint; + + return &unconstrained; +} + +static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (is_counter_pair(hwc)) + --cpuc->n_pair; +} + static ssize_t amd_event_sysfs_show(char *page, u64 config) { u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | @@ -898,33 +939,15 @@ static __initconst const struct x86_pmu amd_pmu = { static int __init amd_core_pmu_init(void) { + u64 even_ctr_mask = 0ULL; + int i; + if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) return 0; - /* Avoid calulating the value each time in the NMI handler */ + /* Avoid calculating the value each time in the NMI handler */ perf_nmi_window = msecs_to_jiffies(100); - switch (boot_cpu_data.x86) { - case 0x15: - pr_cont("Fam15h "); - x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; - break; - case 0x17: - pr_cont("Fam17h "); - /* - * In family 17h, there are no event constraints in the PMC hardware. - * We fallback to using default amd_get_event_constraints. - */ - break; - case 0x18: - pr_cont("Fam18h "); - /* Using default amd_get_event_constraints. */ - break; - default: - pr_err("core perfctr but no constraints; unknown hardware!\n"); - return -ENODEV; - } - /* * If core performance counter extensions exists, we must use * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also @@ -939,6 +962,32 @@ static int __init amd_core_pmu_init(void) */ x86_pmu.amd_nb_constraints = 0; + if (boot_cpu_data.x86 == 0x15) { + pr_cont("Fam15h "); + x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; + } + if (boot_cpu_data.x86 >= 0x17) { + pr_cont("Fam17h+ "); + /* + * Family 17h and compatibles have constraints for Large + * Increment per Cycle events: they may only be assigned an + * even numbered counter that has a consecutive adjacent odd + * numbered counter following it. + */ + for (i = 0; i < x86_pmu.num_counters - 1; i += 2) + even_ctr_mask |= 1 << i; + + pair_constraint = (struct event_constraint) + __EVENT_CONSTRAINT(0, even_ctr_mask, 0, + x86_pmu.num_counters / 2, 0, + PERF_X86_EVENT_PAIR); + + x86_pmu.get_event_constraints = amd_get_event_constraints_f17h; + x86_pmu.put_event_constraints = amd_put_event_constraints_f17h; + x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE; + x86_pmu.flags |= PMU_FL_PAIR; + } + pr_cont("core perfctr, "); return 0; } diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 9a89d98c55bd..3bb738f5a472 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -376,7 +376,7 @@ int x86_add_exclusive(unsigned int what) * LBR and BTS are still mutually exclusive. */ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) - return 0; + goto out; if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { mutex_lock(&pmc_reserve_mutex); @@ -388,6 +388,7 @@ int x86_add_exclusive(unsigned int what) mutex_unlock(&pmc_reserve_mutex); } +out: atomic_inc(&active_events); return 0; @@ -398,11 +399,15 @@ fail_unlock: void x86_del_exclusive(unsigned int what) { + atomic_dec(&active_events); + + /* + * See the comment in x86_add_exclusive(). + */ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) return; atomic_dec(&x86_pmu.lbr_exclusive[what]); - atomic_dec(&active_events); } int x86_setup_perfctr(struct perf_event *event) @@ -613,6 +618,7 @@ void x86_pmu_disable_all(void) int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct hw_perf_event *hwc = &cpuc->events[idx]->hw; u64 val; if (!test_bit(idx, cpuc->active_mask)) @@ -622,6 +628,8 @@ void x86_pmu_disable_all(void) continue; val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; wrmsrl(x86_pmu_config_addr(idx), val); + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_config_addr(idx + 1), 0); } } @@ -694,7 +702,7 @@ struct sched_state { int counter; /* counter index */ int unassigned; /* number of events to be assigned left */ int nr_gp; /* number of GP counters used */ - unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + u64 used; }; /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ @@ -751,8 +759,12 @@ static bool perf_sched_restore_state(struct perf_sched *sched) sched->saved_states--; sched->state = sched->saved[sched->saved_states]; - /* continue with next counter: */ - clear_bit(sched->state.counter++, sched->state.used); + /* this assignment didn't work out */ + /* XXX broken vs EVENT_PAIR */ + sched->state.used &= ~BIT_ULL(sched->state.counter); + + /* try the next one */ + sched->state.counter++; return true; } @@ -777,20 +789,32 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { idx = INTEL_PMC_IDX_FIXED; for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { - if (!__test_and_set_bit(idx, sched->state.used)) - goto done; + u64 mask = BIT_ULL(idx); + + if (sched->state.used & mask) + continue; + + sched->state.used |= mask; + goto done; } } /* Grab the first unused counter starting with idx */ idx = sched->state.counter; for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { - if (!__test_and_set_bit(idx, sched->state.used)) { - if (sched->state.nr_gp++ >= sched->max_gp) - return false; + u64 mask = BIT_ULL(idx); - goto done; - } + if (c->flags & PERF_X86_EVENT_PAIR) + mask |= mask << 1; + + if (sched->state.used & mask) + continue; + + if (sched->state.nr_gp++ >= sched->max_gp) + return false; + + sched->state.used |= mask; + goto done; } return false; @@ -867,12 +891,10 @@ EXPORT_SYMBOL_GPL(perf_assign_events); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { struct event_constraint *c; - unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; struct perf_event *e; int n0, i, wmin, wmax, unsched = 0; struct hw_perf_event *hwc; - - bitmap_zero(used_mask, X86_PMC_IDX_MAX); + u64 used_mask = 0; /* * Compute the number of events already present; see x86_pmu_add(), @@ -915,6 +937,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) * fastpath, try to reuse previous register */ for (i = 0; i < n; i++) { + u64 mask; + hwc = &cpuc->event_list[i]->hw; c = cpuc->event_constraint[i]; @@ -926,11 +950,16 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (!test_bit(hwc->idx, c->idxmsk)) break; + mask = BIT_ULL(hwc->idx); + if (is_counter_pair(hwc)) + mask |= mask << 1; + /* not already used */ - if (test_bit(hwc->idx, used_mask)) + if (used_mask & mask) break; - __set_bit(hwc->idx, used_mask); + used_mask |= mask; + if (assign) assign[i] = hwc->idx; } @@ -953,6 +982,15 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) READ_ONCE(cpuc->excl_cntrs->exclusive_present)) gpmax /= 2; + /* + * Reduce the amount of available counters to allow fitting + * the extra Merge events needed by large increment events. + */ + if (x86_pmu.flags & PMU_FL_PAIR) { + gpmax = x86_pmu.num_counters - cpuc->n_pair; + WARN_ON(gpmax <= 0); + } + unsched = perf_assign_events(cpuc->event_constraint, n, wmin, wmax, gpmax, assign); } @@ -1033,6 +1071,8 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, return -EINVAL; cpuc->event_list[n] = leader; n++; + if (is_counter_pair(&leader->hw)) + cpuc->n_pair++; } if (!dogrp) return n; @@ -1047,6 +1087,8 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, cpuc->event_list[n] = event; n++; + if (is_counter_pair(&event->hw)) + cpuc->n_pair++; } return n; } @@ -1233,6 +1275,13 @@ int x86_perf_event_set_period(struct perf_event *event) wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); /* + * Clear the Merge event counter's upper 16 bits since + * we currently declare a 48-bit counter width + */ + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_event_addr(idx + 1), 0); + + /* * Due to erratum on certan cpu we need * a second write to be sure the register * is updated properly @@ -1642,9 +1691,12 @@ static struct attribute_group x86_pmu_format_group __ro_after_init = { ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { - struct perf_pmu_events_attr *pmu_attr = \ + struct perf_pmu_events_attr *pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - u64 config = x86_pmu.event_map(pmu_attr->id); + u64 config = 0; + + if (pmu_attr->id < x86_pmu.max_events) + config = x86_pmu.event_map(pmu_attr->id); /* string trumps id */ if (pmu_attr->event_str) @@ -1713,6 +1765,9 @@ is_visible(struct kobject *kobj, struct attribute *attr, int idx) { struct perf_pmu_events_attr *pmu_attr; + if (idx >= x86_pmu.max_events) + return 0; + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); /* str trumps id */ return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0; diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 38de4a7f6752..6a3b599ee0fe 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -63,9 +63,17 @@ struct bts_buffer { static struct pmu bts_pmu; +static int buf_nr_pages(struct page *page) +{ + if (!PagePrivate(page)) + return 1; + + return 1 << page_private(page); +} + static size_t buf_size(struct page *page) { - return 1 << (PAGE_SHIFT + page_private(page)); + return buf_nr_pages(page) * PAGE_SIZE; } static void * @@ -83,9 +91,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages, /* count all the high order buffers */ for (pg = 0, nbuf = 0; pg < nr_pages;) { page = virt_to_page(pages[pg]); - if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1)) - return NULL; - pg += 1 << page_private(page); + pg += buf_nr_pages(page); nbuf++; } @@ -109,7 +115,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages, unsigned int __nr_pages; page = virt_to_page(pages[pg]); - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; + __nr_pages = buf_nr_pages(page); buf->buf[nbuf].page = page; buf->buf[nbuf].offset = offset; buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ce83950036c5..4b94ae4ae369 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -7,6 +7,7 @@ #include <asm/perf_event.h> #include <asm/tlbflush.h> #include <asm/insn.h> +#include <asm/io.h> #include "../perf_event.h" diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 5053a403e4ae..09913121e726 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -741,6 +741,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw), X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, model_skl), X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE_L, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE, model_skl), {}, }; diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index dbaa1b088a30..c37cb12d0ef6 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -15,6 +15,7 @@ #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_SKL_E3_IMC 0x1918 #define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c #define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 #define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 @@ -658,6 +659,10 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_E3_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, @@ -826,6 +831,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ + IMC_DEV(SKL_E3_IMC, &skl_uncore_pci_driver), /* Xeon E3 V5 Gen Core processor */ IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */ IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */ IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b10a5ec79e48..ad20220af303 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -369,11 +369,6 @@ #define SNR_M2M_PCI_PMON_BOX_CTL 0x438 #define SNR_M2M_PCI_PMON_UMASK_EXT 0xff -/* SNR PCIE3 */ -#define SNR_PCIE3_PCI_PMON_CTL0 0x508 -#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8 -#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e4 - /* SNR IMC */ #define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54 #define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38 @@ -4328,27 +4323,12 @@ static struct intel_uncore_type snr_uncore_m2m = { .format_group = &snr_m2m_uncore_format_group, }; -static struct intel_uncore_type snr_uncore_pcie3 = { - .name = "pcie3", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - .perf_ctr = SNR_PCIE3_PCI_PMON_CTR0, - .event_ctl = SNR_PCIE3_PCI_PMON_CTL0, - .event_mask = SNBEP_PMON_RAW_EVENT_MASK, - .box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL, - .ops = &ivbep_uncore_pci_ops, - .format_group = &ivbep_uncore_format_group, -}; - enum { SNR_PCI_UNCORE_M2M, - SNR_PCI_UNCORE_PCIE3, }; static struct intel_uncore_type *snr_pci_uncores[] = { [SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m, - [SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3, NULL, }; @@ -4357,10 +4337,6 @@ static const struct pci_device_id snr_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, SNR_PCI_UNCORE_M2M, 0), }, - { /* PCIe3 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0), - }, { /* end: all zeroes */ } }; @@ -4536,6 +4512,7 @@ static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = { INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"), INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), + { /* end: all zeroes */ }, }; static struct intel_uncore_ops snr_uncore_imc_freerunning_ops = { diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 930611db8f9a..f1cd1ca1a77b 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -77,6 +77,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode) #define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */ #define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */ #define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */ +#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -272,6 +273,7 @@ struct cpu_hw_events { struct amd_nb *amd_nb; /* Inverted mask of bits to clear in the perf_ctr ctrl registers */ u64 perf_ctr_virt_mask; + int n_pair; /* Large increment events */ void *kfree_on_online[X86_PERF_KFREE_MAX]; }; @@ -694,6 +696,7 @@ struct x86_pmu { * AMD bits */ unsigned int amd_nb_constraints : 1; + u64 perf_ctr_pair_en; /* * Extra registers for events @@ -743,6 +746,7 @@ do { \ #define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ #define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ #define PMU_FL_TFA 0x20 /* deal with TSX force abort */ +#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr @@ -838,6 +842,11 @@ int x86_pmu_hw_config(struct perf_event *event); void x86_pmu_disable_all(void); +static inline bool is_counter_pair(struct hw_perf_event *hwc) +{ + return hwc->flags & PERF_X86_EVENT_PAIR; +} + static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, u64 enable_mask) { @@ -845,6 +854,14 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, if (hwc->extra_reg.reg) wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); + + /* + * Add enabled Merge event on next counter + * if large increment event being enabled on this counter + */ + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en); + wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); } @@ -861,6 +878,9 @@ static inline void x86_pmu_disable_event(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; wrmsrl(hwc->config_base, hwc->config); + + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0); } void x86_pmu_enable_event(struct perf_event *event); diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 30416d7f19d4..a3aefe9b9401 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -114,8 +114,6 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, err |= fpu__restore_sig(buf, 1); - force_iret(); - return err; } diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index bc9693c9107e..ca0976456a6b 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -13,7 +13,6 @@ #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mpspec.h> -#include <asm/realmode.h> #include <asm/x86_init.h> #ifdef CONFIG_ACPI_APEI @@ -62,7 +61,7 @@ static inline void acpi_disable_pci(void) extern int (*acpi_suspend_lowlevel)(void); /* Physical address to resume after wakeup */ -#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start)) +unsigned long acpi_get_wakeup_address(void); /* * Check if the CPU can handle C2 and deeper diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 804734058c77..02c0078d3787 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -6,6 +6,7 @@ #include <linux/percpu-defs.h> #include <asm/processor.h> #include <asm/intel_ds.h> +#include <asm/pgtable_areas.h> #ifdef CONFIG_X86_64 @@ -134,15 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); extern void setup_cpu_entry_areas(void); extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); -/* Single page reserved for the readonly IDT mapping: */ -#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE -#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) - -#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) - -#define CPU_ENTRY_AREA_MAP_SIZE \ - (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) - extern struct cpu_entry_area *get_cpu_entry_area(int cpu); static inline struct entry_stack *cpu_entry_stack(int cpu) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index e9b62498fe75..f3327cb56edf 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -220,6 +220,7 @@ #define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ +#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -357,6 +358,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index d028e9acdf1c..86169a24b0d8 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -8,6 +8,7 @@ #include <asm/tlb.h> #include <asm/nospec-branch.h> #include <asm/mmu_context.h> +#include <linux/build_bug.h> /* * We map the EFI regions needed for runtime services non-contiguously, @@ -19,13 +20,16 @@ * This is the main reason why we're doing stable VA mappings for RT * services. * - * This flag is used in conjunction with a chicken bit called - * "efi=old_map" which can be used as a fallback to the old runtime - * services mapping method in case there's some b0rkage with a - * particular EFI implementation (haha, it is hard to hold up the - * sarcasm here...). + * SGI UV1 machines are known to be incompatible with this scheme, so we + * provide an opt-out for these machines via a DMI quirk that sets the + * attribute below. */ -#define EFI_OLD_MEMMAP EFI_ARCH_1 +#define EFI_UV1_MEMMAP EFI_ARCH_1 + +static inline bool efi_have_uv1_memmap(void) +{ + return IS_ENABLED(CONFIG_X86_UV) && efi_enabled(EFI_UV1_MEMMAP); +} #define EFI32_LOADER_SIGNATURE "EL32" #define EFI64_LOADER_SIGNATURE "EL64" @@ -34,10 +38,46 @@ #define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF -#ifdef CONFIG_X86_32 +/* + * The EFI services are called through variadic functions in many cases. These + * functions are implemented in assembler and support only a fixed number of + * arguments. The macros below allows us to check at build time that we don't + * try to call them with too many arguments. + * + * __efi_nargs() will return the number of arguments if it is 7 or less, and + * cause a BUILD_BUG otherwise. The limitations of the C preprocessor make it + * impossible to calculate the exact number of arguments beyond some + * pre-defined limit. The maximum number of arguments currently supported by + * any of the thunks is 7, so this is good enough for now and can be extended + * in the obvious way if we ever need more. + */ -extern asmlinkage unsigned long efi_call_phys(void *, ...); +#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__) +#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__, \ + __efi_arg_sentinel(7), __efi_arg_sentinel(6), \ + __efi_arg_sentinel(5), __efi_arg_sentinel(4), \ + __efi_arg_sentinel(3), __efi_arg_sentinel(2), \ + __efi_arg_sentinel(1), __efi_arg_sentinel(0)) +#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, n, ...) \ + __take_second_arg(n, \ + ({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 8; })) +#define __efi_arg_sentinel(n) , n +/* + * __efi_nargs_check(f, n, ...) will cause a BUILD_BUG if the ellipsis + * represents more than n arguments. + */ + +#define __efi_nargs_check(f, n, ...) \ + __efi_nargs_check_(f, __efi_nargs(__VA_ARGS__), n) +#define __efi_nargs_check_(f, p, n) __efi_nargs_check__(f, p, n) +#define __efi_nargs_check__(f, p, n) ({ \ + BUILD_BUG_ON_MSG( \ + (p) > (n), \ + #f " called with too many arguments (" #p ">" #n ")"); \ +}) + +#ifdef CONFIG_X86_32 #define arch_efi_call_virt_setup() \ ({ \ kernel_fpu_begin(); \ @@ -51,13 +91,7 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); }) -/* - * Wrap all the virtual calls in a way that forces the parameters on the stack. - */ -#define arch_efi_call_virt(p, f, args...) \ -({ \ - ((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args); \ -}) +#define arch_efi_call_virt(p, f, args...) p->f(args) #define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) @@ -65,9 +99,12 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); #define EFI_LOADER_SIGNATURE "EL64" -extern asmlinkage u64 efi_call(void *fp, ...); +extern asmlinkage u64 __efi_call(void *fp, ...); -#define efi_call_phys(f, args...) efi_call((f), args) +#define efi_call(...) ({ \ + __efi_nargs_check(efi_call, 7, __VA_ARGS__); \ + __efi_call(__VA_ARGS__); \ +}) /* * struct efi_scratch - Scratch space used while switching to/from efi_mm @@ -85,7 +122,7 @@ struct efi_scratch { kernel_fpu_begin(); \ firmware_restrict_branch_speculation_start(); \ \ - if (!efi_enabled(EFI_OLD_MEMMAP)) \ + if (!efi_have_uv1_memmap()) \ efi_switch_mm(&efi_mm); \ }) @@ -94,7 +131,7 @@ struct efi_scratch { #define arch_efi_call_virt_teardown() \ ({ \ - if (!efi_enabled(EFI_OLD_MEMMAP)) \ + if (!efi_have_uv1_memmap()) \ efi_switch_mm(efi_scratch.prev_mm); \ \ firmware_restrict_branch_speculation_end(); \ @@ -121,8 +158,6 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, extern struct efi_scratch efi_scratch; extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); extern int __init efi_memblock_x86_reserve_range(void); -extern pgd_t * __init efi_call_phys_prolog(void); -extern void __init efi_call_phys_epilog(pgd_t *save_pgd); extern void __init efi_print_memmap(void); extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); @@ -140,6 +175,8 @@ extern void efi_delete_dummy_variable(void); extern void efi_switch_mm(struct mm_struct *mm); extern void efi_recover_from_page_fault(unsigned long phys_addr); extern void efi_free_boot_services(void); +extern pgd_t * __init efi_uv1_memmap_phys_prolog(void); +extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd); struct efi_setup_data { u64 fw_vendor; @@ -152,93 +189,144 @@ struct efi_setup_data { extern u64 efi_setup; #ifdef CONFIG_EFI +extern efi_status_t __efi64_thunk(u32, ...); -static inline bool efi_is_native(void) +#define efi64_thunk(...) ({ \ + __efi_nargs_check(efi64_thunk, 6, __VA_ARGS__); \ + __efi64_thunk(__VA_ARGS__); \ +}) + +static inline bool efi_is_mixed(void) { - return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT); + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return false; + return IS_ENABLED(CONFIG_X86_64) && !efi_enabled(EFI_64BIT); } static inline bool efi_runtime_supported(void) { - if (efi_is_native()) - return true; - - if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP)) + if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT)) return true; - return false; + return IS_ENABLED(CONFIG_EFI_MIXED); } extern void parse_efi_setup(u64 phys_addr, u32 data_len); extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); -#ifdef CONFIG_EFI_MIXED extern void efi_thunk_runtime_setup(void); -extern efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map); -#else -static inline void efi_thunk_runtime_setup(void) {} -static inline efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - return EFI_SUCCESS; -} -#endif /* CONFIG_EFI_MIXED */ - +efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map); /* arch specific definitions used by the stub code */ -struct efi_config { - u64 image_handle; - u64 table; - u64 runtime_services; - u64 boot_services; - u64 text_output; - efi_status_t (*call)(unsigned long, ...); - bool is64; -} __packed; - -__pure const struct efi_config *__efi_early(void); +__attribute_const__ bool efi_is_64bit(void); -static inline bool efi_is_64bit(void) +static inline bool efi_is_native(void) { if (!IS_ENABLED(CONFIG_X86_64)) - return false; - + return true; if (!IS_ENABLED(CONFIG_EFI_MIXED)) return true; + return efi_is_64bit(); +} + +#define efi_mixed_mode_cast(attr) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(u32, __typeof__(attr)), \ + (unsigned long)(attr), (attr)) - return __efi_early()->is64; +#define efi_table_attr(inst, attr) \ + (efi_is_native() \ + ? inst->attr \ + : (__typeof__(inst->attr)) \ + efi_mixed_mode_cast(inst->mixed_mode.attr)) + +/* + * The following macros allow translating arguments if necessary from native to + * mixed mode. The use case for this is to initialize the upper 32 bits of + * output parameters, and where the 32-bit method requires a 64-bit argument, + * which must be split up into two arguments to be thunked properly. + * + * As examples, the AllocatePool boot service returns the address of the + * allocation, but it will not set the high 32 bits of the address. To ensure + * that the full 64-bit address is initialized, we zero-init the address before + * calling the thunk. + * + * The FreePages boot service takes a 64-bit physical address even in 32-bit + * mode. For the thunk to work correctly, a native 64-bit call of + * free_pages(addr, size) + * must be translated to + * efi64_thunk(free_pages, addr & U32_MAX, addr >> 32, size) + * so that the two 32-bit halves of addr get pushed onto the stack separately. + */ + +static inline void *efi64_zero_upper(void *p) +{ + ((u32 *)p)[1] = 0; + return p; } -#define efi_table_attr(table, attr, instance) \ - (efi_is_64bit() ? \ - ((table##_64_t *)(unsigned long)instance)->attr : \ - ((table##_32_t *)(unsigned long)instance)->attr) +#define __efi64_argmap_free_pages(addr, size) \ + ((addr), 0, (size)) + +#define __efi64_argmap_get_memory_map(mm_size, mm, key, size, ver) \ + ((mm_size), (mm), efi64_zero_upper(key), efi64_zero_upper(size), (ver)) + +#define __efi64_argmap_allocate_pool(type, size, buffer) \ + ((type), (size), efi64_zero_upper(buffer)) -#define efi_call_proto(protocol, f, instance, ...) \ - __efi_early()->call(efi_table_attr(protocol, f, instance), \ - instance, ##__VA_ARGS__) +#define __efi64_argmap_handle_protocol(handle, protocol, interface) \ + ((handle), (protocol), efi64_zero_upper(interface)) -#define efi_call_early(f, ...) \ - __efi_early()->call(efi_table_attr(efi_boot_services, f, \ - __efi_early()->boot_services), __VA_ARGS__) +#define __efi64_argmap_locate_protocol(protocol, reg, interface) \ + ((protocol), (reg), efi64_zero_upper(interface)) -#define __efi_call_early(f, ...) \ - __efi_early()->call((unsigned long)f, __VA_ARGS__); +/* PCI I/O */ +#define __efi64_argmap_get_location(protocol, seg, bus, dev, func) \ + ((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus), \ + efi64_zero_upper(dev), efi64_zero_upper(func)) + +/* + * The macros below handle the plumbing for the argument mapping. To add a + * mapping for a specific EFI method, simply define a macro + * __efi64_argmap_<method name>, following the examples above. + */ -#define efi_call_runtime(f, ...) \ - __efi_early()->call(efi_table_attr(efi_runtime_services, f, \ - __efi_early()->runtime_services), __VA_ARGS__) +#define __efi64_thunk_map(inst, func, ...) \ + efi64_thunk(inst->mixed_mode.func, \ + __efi64_argmap(__efi64_argmap_ ## func(__VA_ARGS__), \ + (__VA_ARGS__))) + +#define __efi64_argmap(mapped, args) \ + __PASTE(__efi64_argmap__, __efi_nargs(__efi_eat mapped))(mapped, args) +#define __efi64_argmap__0(mapped, args) __efi_eval mapped +#define __efi64_argmap__1(mapped, args) __efi_eval args + +#define __efi_eat(...) +#define __efi_eval(...) __VA_ARGS__ + +/* The three macros below handle dispatching via the thunk if needed */ + +#define efi_call_proto(inst, func, ...) \ + (efi_is_native() \ + ? inst->func(inst, ##__VA_ARGS__) \ + : __efi64_thunk_map(inst, func, inst, ##__VA_ARGS__)) + +#define efi_bs_call(func, ...) \ + (efi_is_native() \ + ? efi_system_table()->boottime->func(__VA_ARGS__) \ + : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ + boottime), func, __VA_ARGS__)) + +#define efi_rt_call(func, ...) \ + (efi_is_native() \ + ? efi_system_table()->runtime->func(__VA_ARGS__) \ + : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ + runtime), func, __VA_ARGS__)) extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index c2a7458f912c..85be2f506272 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -47,8 +47,6 @@ struct dyn_arch_ftrace { /* No extra data needed for x86 */ }; -int ftrace_int3_handler(struct pt_regs *regs); - #define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index c606c0b70738..4981c293f926 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -111,6 +111,7 @@ #define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */ #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ +#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ /* Xeon Phi */ diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h index 9e7adcdbe031..e6da1ce26256 100644 --- a/arch/x86/include/asm/intel_pmc_ipc.h +++ b/arch/x86/include/asm/intel_pmc_ipc.h @@ -31,30 +31,13 @@ #if IS_ENABLED(CONFIG_INTEL_PMC_IPC) -int intel_pmc_ipc_simple_command(int cmd, int sub); -int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen, u32 dptr, u32 sptr); int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen); int intel_pmc_s0ix_counter_read(u64 *data); -int intel_pmc_gcr_read(u32 offset, u32 *data); int intel_pmc_gcr_read64(u32 offset, u64 *data); -int intel_pmc_gcr_write(u32 offset, u32 data); -int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val); #else -static inline int intel_pmc_ipc_simple_command(int cmd, int sub) -{ - return -EINVAL; -} - -static inline int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen, u32 dptr, u32 sptr) -{ - return -EINVAL; -} - static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen) { @@ -66,26 +49,11 @@ static inline int intel_pmc_s0ix_counter_read(u64 *data) return -EINVAL; } -static inline int intel_pmc_gcr_read(u32 offset, u32 *data) -{ - return -EINVAL; -} - static inline int intel_pmc_gcr_read64(u32 offset, u64 *data) { return -EINVAL; } -static inline int intel_pmc_gcr_write(u32 offset, u32 data) -{ - return -EINVAL; -} - -static inline int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val) -{ - return -EINVAL; -} - #endif /*CONFIG_INTEL_PMC_IPC*/ #endif diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h index 4a8c6e817398..2a1442ba6e78 100644 --- a/arch/x86/include/asm/intel_scu_ipc.h +++ b/arch/x86/include/asm/intel_scu_ipc.h @@ -22,24 +22,12 @@ /* Read single register */ int intel_scu_ipc_ioread8(u16 addr, u8 *data); -/* Read two sequential registers */ -int intel_scu_ipc_ioread16(u16 addr, u16 *data); - -/* Read four sequential registers */ -int intel_scu_ipc_ioread32(u16 addr, u32 *data); - /* Read a vector */ int intel_scu_ipc_readv(u16 *addr, u8 *data, int len); /* Write single register */ int intel_scu_ipc_iowrite8(u16 addr, u8 data); -/* Write two sequential registers */ -int intel_scu_ipc_iowrite16(u16 addr, u16 data); - -/* Write four sequential registers */ -int intel_scu_ipc_iowrite32(u16 addr, u32 data); - /* Write a vector */ int intel_scu_ipc_writev(u16 *addr, u8 *data, int len); @@ -50,14 +38,6 @@ int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask); int intel_scu_ipc_simple_command(int cmd, int sub); int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, u32 *out, int outlen); -int intel_scu_ipc_raw_command(int cmd, int sub, u8 *in, int inlen, - u32 *out, int outlen, u32 dptr, u32 sptr); - -/* I2C control api */ -int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data); - -/* Update FW version */ -int intel_scu_ipc_fw_update(u8 *buffer, u32 length); extern struct blocking_notifier_head intel_scu_notifier; diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h index 214394860632..2f77e31a1283 100644 --- a/arch/x86/include/asm/intel_telemetry.h +++ b/arch/x86/include/asm/intel_telemetry.h @@ -40,13 +40,10 @@ struct telemetry_evtmap { struct telemetry_unit_config { struct telemetry_evtmap *telem_evts; void __iomem *regmap; - u32 ssram_base_addr; u8 ssram_evts_used; u8 curr_period; u8 max_period; u8 min_period; - u32 ssram_size; - }; struct telemetry_plt_config { diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 9997521fc5cd..e1aa17a468a8 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -399,4 +399,40 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset, extern bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size); +/** + * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units + * @__dst: destination, in MMIO space (must be 512-bit aligned) + * @src: source + * @count: number of 512 bits quantities to submit + * + * Submit data from kernel space to MMIO space, in units of 512 bits at a + * time. Order of access is not guaranteed, nor is a memory barrier + * performed afterwards. + * + * Warning: Do not use this helper unless your driver has checked that the CPU + * instruction is supported on the platform. + */ +static inline void iosubmit_cmds512(void __iomem *__dst, const void *src, + size_t count) +{ + /* + * Note that this isn't an "on-stack copy", just definition of "dst" + * as a pointer to 64-bytes of stuff that is going to be overwritten. + * In the MOVDIR64B case that may be needed as you can use the + * MOVDIR64B instruction to copy arbitrary memory around. This trick + * lets the compiler know how much gets clobbered. + */ + volatile struct { char _[64]; } *dst = __dst; + const u8 *from = src; + const u8 *end = from + count * 64; + + while (from < end) { + /* MOVDIR64B [rdx], rax */ + asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" + : "=m" (dst) + : "d" (from), "a" (dst)); + from += 64; + } +} + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 75f1e35e7c15..247ab14c6309 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -33,6 +33,7 @@ enum show_regs_mode { }; extern void die(const char *, struct pt_regs *,long); +void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_stack_regs(struct pt_regs *regs); extern void __show_regs(struct pt_regs *regs, enum show_regs_mode); diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 5dc909d9ad81..95b1f053bd96 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -11,12 +11,11 @@ #include <asm-generic/kprobes.h> -#define BREAKPOINT_INSTRUCTION 0xcc - #ifdef CONFIG_KPROBES #include <linux/types.h> #include <linux/ptrace.h> #include <linux/percpu.h> +#include <asm/text-patching.h> #include <asm/insn.h> #define __ARCH_WANT_KPROBES_INSN_SLOT @@ -25,10 +24,7 @@ struct pt_regs; struct kprobe; typedef u8 kprobe_opcode_t; -#define RELATIVEJUMP_OPCODE 0xe9 -#define RELATIVEJUMP_SIZE 5 -#define RELATIVECALL_OPCODE 0xe8 -#define RELATIVE_ADDR_SIZE 4 + #define MAX_STACK_SIZE 64 #define CUR_STACK_SIZE(ADDR) \ (current_top_of_stack() - (unsigned long)(ADDR)) @@ -43,11 +39,11 @@ extern __visible kprobe_opcode_t optprobe_template_entry[]; extern __visible kprobe_opcode_t optprobe_template_val[]; extern __visible kprobe_opcode_t optprobe_template_call[]; extern __visible kprobe_opcode_t optprobe_template_end[]; -#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) +#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + DISP32_SIZE) #define MAX_OPTINSN_SIZE \ (((unsigned long)optprobe_template_end - \ (unsigned long)optprobe_template_entry) + \ - MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE) + MAX_OPTIMIZED_LENGTH + JMP32_INSN_SIZE) extern const int kretprobe_blacklist_size; @@ -73,7 +69,7 @@ struct arch_specific_insn { struct arch_optimized_insn { /* copy of the original instructions */ - kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE]; + kprobe_opcode_t copied_insn[DISP32_SIZE]; /* detour code buffer */ kprobe_opcode_t *insn; /* the size of instructions copied to detour code buffer */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index dc2d4b206ab7..4359b955e0b7 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -144,7 +144,7 @@ struct mce_log_buffer { enum mce_notifier_prios { MCE_PRIO_FIRST = INT_MAX, - MCE_PRIO_SRAO = INT_MAX - 1, + MCE_PRIO_UC = INT_MAX - 1, MCE_PRIO_EXTLOG = INT_MAX - 2, MCE_PRIO_NFIT = INT_MAX - 3, MCE_PRIO_EDAC = INT_MAX - 4, @@ -290,6 +290,7 @@ extern void apei_mce_report_mem_error(int corrected, /* These may be used by multiple smca_hwid_mcatypes */ enum smca_bank_types { SMCA_LS = 0, /* Load Store */ + SMCA_LS_V2, /* Load Store */ SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h new file mode 100644 index 000000000000..9c2447b3555d --- /dev/null +++ b/arch/x86/include/asm/memtype.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MEMTYPE_H +#define _ASM_X86_MEMTYPE_H + +#include <linux/types.h> +#include <asm/pgtable_types.h> + +extern bool pat_enabled(void); +extern void pat_disable(const char *reason); +extern void pat_init(void); +extern void init_cache_modes(void); + +extern int memtype_reserve(u64 start, u64 end, + enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); +extern int memtype_free(u64 start, u64 end); + +extern int memtype_kernel_map_sync(u64 base, unsigned long size, + enum page_cache_mode pcm); + +extern int memtype_reserve_io(resource_size_t start, resource_size_t end, + enum page_cache_mode *pcm); + +extern void memtype_free_io(resource_size_t start, resource_size_t end); + +extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); + +#endif /* _ASM_X86_MEMTYPE_H */ diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 209492849566..6685e1218959 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -53,6 +53,6 @@ static inline void __init load_ucode_amd_bsp(unsigned int family) {} static inline void load_ucode_amd_ap(unsigned int family) {} static inline int __init save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } -void reload_ucode_amd(void) {} +static inline void reload_ucode_amd(void) {} #endif #endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 5f33924e200f..b243234e90cb 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -69,14 +69,6 @@ struct ldt_struct { int slot; }; -/* This is a multiple of PAGE_SIZE. */ -#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) - -static inline void *ldt_slot_va(int slot) -{ - return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); -} - /* * Used for LDT copy/destruction. */ @@ -99,87 +91,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { } static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } #endif -static inline void load_mm_ldt(struct mm_struct *mm) -{ #ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; - - /* READ_ONCE synchronizes with smp_store_release */ - ldt = READ_ONCE(mm->context.ldt); - - /* - * Any change to mm->context.ldt is followed by an IPI to all - * CPUs with the mm active. The LDT will not be freed until - * after the IPI is handled by all such CPUs. This means that, - * if the ldt_struct changes before we return, the values we see - * will be safe, and the new values will be loaded before we run - * any user code. - * - * NB: don't try to convert this to use RCU without extreme care. - * We would still need IRQs off, because we don't want to change - * the local LDT after an IPI loaded a newer value than the one - * that we can see. - */ - - if (unlikely(ldt)) { - if (static_cpu_has(X86_FEATURE_PTI)) { - if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { - /* - * Whoops -- either the new LDT isn't mapped - * (if slot == -1) or is mapped into a bogus - * slot (if slot > 1). - */ - clear_LDT(); - return; - } - - /* - * If page table isolation is enabled, ldt->entries - * will not be mapped in the userspace pagetables. - * Tell the CPU to access the LDT through the alias - * at ldt_slot_va(ldt->slot). - */ - set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); - } else { - set_ldt(ldt->entries, ldt->nr_entries); - } - } else { - clear_LDT(); - } +extern void load_mm_ldt(struct mm_struct *mm); +extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next); #else +static inline void load_mm_ldt(struct mm_struct *mm) +{ clear_LDT(); -#endif } - static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) { -#ifdef CONFIG_MODIFY_LDT_SYSCALL - /* - * Load the LDT if either the old or new mm had an LDT. - * - * An mm will never go from having an LDT to not having an LDT. Two - * mms never share an LDT, so we don't gain anything by checking to - * see whether the LDT changed. There's also no guarantee that - * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, - * then prev->context.ldt will also be non-NULL. - * - * If we really cared, we could optimize the case where prev == next - * and we're exiting lazy mode. Most of the time, if this happens, - * we don't actually need to reload LDTR, but modify_ldt() is mostly - * used by legacy code and emulators where we don't need this level of - * performance. - * - * This uses | instead of || because it generates better code. - */ - if (unlikely((unsigned long)prev->context.ldt | - (unsigned long)next->context.ldt)) - load_mm_ldt(next); -#endif - DEBUG_LOCKS_WARN_ON(preemptible()); } +#endif -void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); +extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); /* * Init a new mm. Used on mm copies, like at fork() diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 084e98da04a7..ebe1685e92dd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -558,7 +558,14 @@ #define MSR_IA32_EBL_CR_POWERON 0x0000002a #define MSR_EBC_FREQUENCY_ID 0x0000002c #define MSR_SMI_COUNT 0x00000034 -#define MSR_IA32_FEATURE_CONTROL 0x0000003a + +/* Referred to as IA32_FEATURE_CONTROL in Intel's SDM. */ +#define MSR_IA32_FEAT_CTL 0x0000003a +#define FEAT_CTL_LOCKED BIT(0) +#define FEAT_CTL_VMX_ENABLED_INSIDE_SMX BIT(1) +#define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2) +#define FEAT_CTL_LMCE_ENABLED BIT(20) + #define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_BNDCFGS 0x00000d90 @@ -566,11 +573,6 @@ #define MSR_IA32_XSS 0x00000da0 -#define FEATURE_CONTROL_LOCKED (1<<0) -#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) -#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) -#define FEATURE_CONTROL_LMCE (1<<20) - #define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE_BSP (1<<8) #define MSR_IA32_APICBASE_ENABLE (1<<11) diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index dbff1456d215..829df26fd7a3 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -24,7 +24,7 @@ #define _ASM_X86_MTRR_H #include <uapi/asm/mtrr.h> -#include <asm/pat.h> +#include <asm/memtype.h> /* @@ -86,7 +86,7 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) } static inline void mtrr_bp_init(void) { - pat_disable("MTRRs disabled, skipping PAT initialization too."); + pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel."); } #define mtrr_ap_init() do {} while (0) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 75ded1d13d98..9d5d949e662e 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -41,7 +41,6 @@ struct nmiaction { struct list_head list; nmi_handler_t handler; u64 max_duration; - struct irq_work irq_work; unsigned long flags; const char *name; }; diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 5c24a7b35166..07e95dcb40ad 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -37,7 +37,6 @@ */ #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ /* * Google experimented with loop-unrolling and this turned out to be diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h deleted file mode 100644 index 92015c65fa2a..000000000000 --- a/arch/x86/include/asm/pat.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PAT_H -#define _ASM_X86_PAT_H - -#include <linux/types.h> -#include <asm/pgtable_types.h> - -bool pat_enabled(void); -void pat_disable(const char *reason); -extern void pat_init(void); -extern void init_cache_modes(void); - -extern int reserve_memtype(u64 start, u64 end, - enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); -extern int free_memtype(u64 start, u64 end); - -extern int kernel_map_sync_memtype(u64 base, unsigned long size, - enum page_cache_mode pcm); - -int io_reserve_memtype(resource_size_t start, resource_size_t end, - enum page_cache_mode *pcm); - -void io_free_memtype(resource_size_t start, resource_size_t end); - -bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); - -#endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 90d0731fdcb6..c1fdd43fe187 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -9,7 +9,7 @@ #include <linux/scatterlist.h> #include <linux/numa.h> #include <asm/io.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/x86_init.h> struct pci_sysdata { diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index ee26e9215f18..29964b0e1075 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -322,17 +322,10 @@ struct perf_guest_switch_msr { u64 host, guest; }; -extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); extern void perf_check_microcode(void); extern int x86_perf_rdpmc_index(struct perf_event *event); #else -static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) -{ - *nr = 0; - return NULL; -} - static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) { memset(cap, 0, sizeof(*cap)); @@ -342,8 +335,23 @@ static inline void perf_events_lapic_init(void) { } static inline void perf_check_microcode(void) { } #endif +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) +extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); +#else +static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) +{ + *nr = 0; + return NULL; +} +#endif + #ifdef CONFIG_CPU_SUP_INTEL extern void intel_pt_handle_vmx(int on); +#else +static inline void intel_pt_handle_vmx(int on) +{ + +} #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h new file mode 100644 index 000000000000..b6355416a15a --- /dev/null +++ b/arch/x86/include/asm/pgtable_32_areas.h @@ -0,0 +1,53 @@ +#ifndef _ASM_X86_PGTABLE_32_AREAS_H +#define _ASM_X86_PGTABLE_32_AREAS_H + +#include <asm/cpu_entry_area.h> + +/* + * Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ +#define VMALLOC_OFFSET (8 * 1024 * 1024) + +#ifndef __ASSEMBLY__ +extern bool __vmalloc_start_set; /* set once high_memory is set */ +#endif + +#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif + +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE)) + +/* The +1 is for the readonly IDT page: */ +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) + +#define LDT_BASE_ADDR \ + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) + +#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) + +#define PKMAP_BASE \ + ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) + +#ifdef CONFIG_HIGHMEM +# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) +#else +# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) +#endif + +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END +#define MODULES_LEN (MODULES_VADDR - MODULES_END) + +#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) + +#endif /* _ASM_X86_PGTABLE_32_AREAS_H */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 0416d42e5bdd..5356a46b0373 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PGTABLE_32_DEFS_H -#define _ASM_X86_PGTABLE_32_DEFS_H +#ifndef _ASM_X86_PGTABLE_32_TYPES_H +#define _ASM_X86_PGTABLE_32_TYPES_H /* * The Linux x86 paging architecture is 'compile-time dual-mode', it @@ -20,55 +20,4 @@ #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) -/* Just any arbitrary offset to the start of the vmalloc VM area: the - * current 8MB value just means that there will be a 8MB "hole" after the - * physical memory until the kernel virtual memory starts. That means that - * any out-of-bounds memory accesses will hopefully be caught. - * The vmalloc() routines leaves a hole of 4kB between each vmalloced - * area for the same reason. ;) - */ -#define VMALLOC_OFFSET (8 * 1024 * 1024) - -#ifndef __ASSEMBLY__ -extern bool __vmalloc_start_set; /* set once high_memory is set */ -#endif - -#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif - -/* - * This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE. - * Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c - * to avoid include recursion hell. - */ -#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 43) - -/* The +1 is for the readonly IDT page: */ -#define CPU_ENTRY_AREA_BASE \ - ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) - -#define LDT_BASE_ADDR \ - ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) - -#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) - -#define PKMAP_BASE \ - ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) - -#ifdef CONFIG_HIGHMEM -# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) -#else -# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) -#endif - -#define MODULES_VADDR VMALLOC_START -#define MODULES_END VMALLOC_END -#define MODULES_LEN (MODULES_VADDR - MODULES_END) - -#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) - -#endif /* _ASM_X86_PGTABLE_32_DEFS_H */ +#endif /* _ASM_X86_PGTABLE_32_TYPES_H */ diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h new file mode 100644 index 000000000000..d34cce1b995c --- /dev/null +++ b/arch/x86/include/asm/pgtable_areas.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_PGTABLE_AREAS_H +#define _ASM_X86_PGTABLE_AREAS_H + +#ifdef CONFIG_X86_32 +# include <asm/pgtable_32_areas.h> +#endif + +/* Single page reserved for the readonly IDT mapping: */ +#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE +#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) + +#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) + +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) + +#endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b5e49e6bac63..ea7400726d7a 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -110,11 +110,6 @@ #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) -#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\ - _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \ - _PAGE_ACCESSED | _PAGE_DIRTY) - /* * Set of bits not changed in pte_modify. The pte's * protection key is treated like _PAGE_RW, for @@ -136,80 +131,93 @@ */ #ifndef __ASSEMBLY__ enum page_cache_mode { - _PAGE_CACHE_MODE_WB = 0, - _PAGE_CACHE_MODE_WC = 1, + _PAGE_CACHE_MODE_WB = 0, + _PAGE_CACHE_MODE_WC = 1, _PAGE_CACHE_MODE_UC_MINUS = 2, - _PAGE_CACHE_MODE_UC = 3, - _PAGE_CACHE_MODE_WT = 4, - _PAGE_CACHE_MODE_WP = 5, - _PAGE_CACHE_MODE_NUM = 8 + _PAGE_CACHE_MODE_UC = 3, + _PAGE_CACHE_MODE_WT = 4, + _PAGE_CACHE_MODE_WP = 5, + + _PAGE_CACHE_MODE_NUM = 8 }; #endif -#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT) -#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) -#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) +#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) - -#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \ - _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) -#define PAGE_COPY PAGE_COPY_NOEXEC -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) - -#define __PAGE_KERNEL_EXEC \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) -#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) - -#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) -#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) -#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) -#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) -#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) -#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) -#define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP) - -#define __PAGE_KERNEL_IO (__PAGE_KERNEL) -#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE) +#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) -#ifndef __ASSEMBLY__ +#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) +#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) -#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) +#define __PP _PAGE_PRESENT +#define __RW _PAGE_RW +#define _USR _PAGE_USER +#define ___A _PAGE_ACCESSED +#define ___D _PAGE_DIRTY +#define ___G _PAGE_GLOBAL +#define __NX _PAGE_NX + +#define _ENC _PAGE_ENC +#define __WP _PAGE_CACHE_WP +#define __NC _PAGE_NOCACHE +#define _PSE _PAGE_PSE + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) +#define __pg(x) __pgprot(x) + +#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) + +#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G) +#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0) +#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) + +#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G) +#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0) +#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC) +#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) +#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) +#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G) +#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) +#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) +#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G) +#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP) + + +#define __PAGE_KERNEL_IO __PAGE_KERNEL +#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ - _PAGE_DIRTY | _PAGE_ENC) -#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER) -#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) -#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) +#ifndef __ASSEMBLY__ -#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL) -#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP) +#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC) +#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC) +#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL | 0) +#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP | 0) -#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask) +#define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask) -#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC) -#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL) -#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC) -#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC) -#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC) -#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) +#define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC) +#define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0) +#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC) +#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC) +#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0) +#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC) +#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC) +#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC) +#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC) +#define PAGE_KERNEL_VVAR __pgprot_mask(__PAGE_KERNEL_VVAR | _ENC) -#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO) -#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE) +#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO) +#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE) #endif /* __ASSEMBLY__ */ @@ -449,9 +457,6 @@ static inline pteval_t pte_flags(pte_t pte) return native_pte_val(pte) & PTE_FLAGS_MASK; } -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) } ) - extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM]; extern uint8_t __pte2cachemode_tbl[8]; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0340aad3f2fc..6fb4870ed759 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -25,6 +25,7 @@ struct vm86; #include <asm/special_insns.h> #include <asm/fpu/types.h> #include <asm/unwind_hints.h> +#include <asm/vmxfeatures.h> #include <linux/personality.h> #include <linux/cache.h> @@ -85,6 +86,9 @@ struct cpuinfo_x86 { /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; #endif +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + __u32 vmx_capability[NVMXINTS]; +#endif __u8 x86_virt_bits; __u8 x86_phys_bits; /* CPUID returned core id bits: */ @@ -1015,11 +1019,4 @@ enum mds_mitigations { MDS_MITIGATION_VMWERV, }; -enum taa_mitigations { - TAA_MITIGATION_OFF, - TAA_MITIGATION_UCODE_NEEDED, - TAA_MITIGATION_VERW, - TAA_MITIGATION_TSX_DISABLED, -}; - #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 5057a8ed100b..6d6475fdd327 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -159,6 +159,19 @@ static inline bool user_64bit_mode(struct pt_regs *regs) #endif } +/* + * Determine whether the register set came from any context that is running in + * 64-bit mode. + */ +static inline bool any_64bit_mode(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_64 + return !user_mode(regs) || user_64bit_mode(regs); +#else + return false; +#endif +} + #ifdef CONFIG_X86_64 #define current_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp @@ -339,22 +352,6 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, #define ARCH_HAS_USER_SINGLE_STEP_REPORT -/* - * When hitting ptrace_stop(), we cannot return using SYSRET because - * that does not restore the full CPU state, only a minimal set. The - * ptracer can change arbitrary register values, which is usually okay - * because the usual ptrace stops run off the signal delivery path which - * forces IRET; however, ptrace_event() stops happen in arbitrary places - * in the kernel and don't force IRET path. - * - * So force IRET path after a ptrace stop. - */ -#define arch_ptrace_stop_needed(code, info) \ -({ \ - force_iret(); \ - false; \ -}) - struct user_desc; extern int do_get_thread_area(struct task_struct *p, int idx, struct user_desc __user *info); diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 09ecc32f6524..b35030eeec36 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -14,7 +14,7 @@ #include <linux/types.h> #include <asm/io.h> -/* This must match data at realmode.S */ +/* This must match data at realmode/rm/header.S */ struct real_mode_header { u32 text_start; u32 ro_end; @@ -36,7 +36,7 @@ struct real_mode_header { #endif }; -/* This must match data at trampoline_32/64.S */ +/* This must match data at realmode/rm/trampoline_{32,64}.S */ struct trampoline_header { #ifdef CONFIG_X86_32 u32 start; diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 2ee8e469dcf5..64c3dce374e5 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -81,8 +81,6 @@ int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); extern int kernel_set_to_readonly; -void set_kernel_text_rw(void); -void set_kernel_text_ro(void); #ifdef CONFIG_X86_64 static inline int set_mce_nospec(unsigned long pfn) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 23c626a742e8..67315fa3956a 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -25,14 +25,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, */ #define POKE_MAX_OPCODE_SIZE 5 -struct text_poke_loc { - void *addr; - int len; - s32 rel32; - u8 opcode; - const u8 text[POKE_MAX_OPCODE_SIZE]; -}; - extern void text_poke_early(void *addr, const void *opcode, size_t len); /* @@ -50,21 +42,13 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len); * an inconsistent instruction while you patch. */ extern void *text_poke(void *addr, const void *opcode, size_t len); +extern void text_poke_sync(void); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); -extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries); -extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr, - const void *opcode, size_t len, const void *emulate); -extern int after_bootmem; -extern __ro_after_init struct mm_struct *poking_mm; -extern __ro_after_init unsigned long poking_addr; -#ifndef CONFIG_UML_X86 -static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) -{ - regs->ip = ip; -} +extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate); +extern void text_poke_finish(void); #define INT3_INSN_SIZE 1 #define INT3_INSN_OPCODE 0xCC @@ -78,6 +62,67 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) #define JMP8_INSN_SIZE 2 #define JMP8_INSN_OPCODE 0xEB +#define DISP32_SIZE 4 + +static inline int text_opcode_size(u8 opcode) +{ + int size = 0; + +#define __CASE(insn) \ + case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break + + switch(opcode) { + __CASE(INT3); + __CASE(CALL); + __CASE(JMP32); + __CASE(JMP8); + } + +#undef __CASE + + return size; +} + +union text_poke_insn { + u8 text[POKE_MAX_OPCODE_SIZE]; + struct { + u8 opcode; + s32 disp; + } __attribute__((packed)); +}; + +static __always_inline +void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +{ + static union text_poke_insn insn; /* per instance */ + int size = text_opcode_size(opcode); + + insn.opcode = opcode; + + if (size > 1) { + insn.disp = (long)dest - (long)(addr + size); + if (size == 2) { + /* + * Ensure that for JMP9 the displacement + * actually fits the signed byte. + */ + BUG_ON((insn.disp >> 31) != (insn.disp >> 7)); + } + } + + return &insn.text; +} + +extern int after_bootmem; +extern __ro_after_init struct mm_struct *poking_mm; +extern __ro_after_init unsigned long poking_addr; + +#ifndef CONFIG_UML_X86 +static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) +{ + regs->ip = ip; +} + static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) { /* @@ -85,6 +130,9 @@ static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) * stack where the break point happened, and the saving of * pt_regs. We can extend the original stack because of * this gap. See the idtentry macro's create_gap option. + * + * Similarly entry_32.S will have a gap on the stack for (any) hardware + * exception and pt_regs; see FIXUP_FRAME. */ regs->sp -= sizeof(unsigned long); *(unsigned long *)regs->sp = val; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d779366ce3f8..cf4327986e98 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -239,15 +239,6 @@ static inline int arch_within_stack_frames(const void * const stack, current_thread_info()->status & TS_COMPAT) #endif -/* - * Force syscall return via IRET by making it look as if there was - * some work pending. IRET is our most capable (but slowest) syscall - * return path, which is able to restore modified SS, CS and certain - * EFLAGS values that other (fast) syscall return instructions - * are not able to restore properly. - */ -#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME) - extern void arch_task_cache_init(void); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); extern void arch_release_task_struct(struct task_struct *tsk); diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 230474e2ddb5..bbcdc7b8f963 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -21,6 +21,7 @@ struct vdso_image { long sym_vvar_page; long sym_pvclock_page; long sym_hvclock_page; + long sym_timens_page; long sym_VDSO32_NOTE_MASK; long sym___kernel_sigreturn; long sym___kernel_rt_sigreturn; diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index e9ee139cf29e..6ee1f7dba34b 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -21,6 +21,7 @@ #include <clocksource/hyperv_timer.h> #define __vdso_data (VVAR(_vdso_data)) +#define __timens_vdso_data (TIMENS(_vdso_data)) #define VDSO_HAS_TIME 1 @@ -56,6 +57,13 @@ extern struct ms_hyperv_tsc_page hvclock_page __attribute__((visibility("hidden"))); #endif +#ifdef CONFIG_TIME_NS +static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + return __timens_vdso_data; +} +#endif + #ifndef BUILD_VDSO32 static __always_inline @@ -96,8 +104,6 @@ long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) #else -#define VDSO_HAS_32BIT_FALLBACK 1 - static __always_inline long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h new file mode 100644 index 000000000000..29837740b520 --- /dev/null +++ b/arch/x86/include/asm/vmalloc.h @@ -0,0 +1,6 @@ +#ifndef _ASM_X86_VMALLOC_H +#define _ASM_X86_VMALLOC_H + +#include <asm/pgtable_areas.h> + +#endif /* _ASM_X86_VMALLOC_H */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 1835767aa335..9fbba31be825 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -15,67 +15,70 @@ #include <linux/bitops.h> #include <linux/types.h> #include <uapi/asm/vmx.h> +#include <asm/vmxfeatures.h> + +#define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f) /* * Definitions of Primary Processor-Based VM-Execution Controls. */ -#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 -#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 -#define CPU_BASED_HLT_EXITING 0x00000080 -#define CPU_BASED_INVLPG_EXITING 0x00000200 -#define CPU_BASED_MWAIT_EXITING 0x00000400 -#define CPU_BASED_RDPMC_EXITING 0x00000800 -#define CPU_BASED_RDTSC_EXITING 0x00001000 -#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 -#define CPU_BASED_CR3_STORE_EXITING 0x00010000 -#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 -#define CPU_BASED_CR8_STORE_EXITING 0x00100000 -#define CPU_BASED_TPR_SHADOW 0x00200000 -#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 -#define CPU_BASED_MOV_DR_EXITING 0x00800000 -#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 -#define CPU_BASED_USE_IO_BITMAPS 0x02000000 -#define CPU_BASED_MONITOR_TRAP_FLAG 0x08000000 -#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 -#define CPU_BASED_MONITOR_EXITING 0x20000000 -#define CPU_BASED_PAUSE_EXITING 0x40000000 -#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 +#define CPU_BASED_VIRTUAL_INTR_PENDING VMCS_CONTROL_BIT(VIRTUAL_INTR_PENDING) +#define CPU_BASED_USE_TSC_OFFSETING VMCS_CONTROL_BIT(TSC_OFFSETTING) +#define CPU_BASED_HLT_EXITING VMCS_CONTROL_BIT(HLT_EXITING) +#define CPU_BASED_INVLPG_EXITING VMCS_CONTROL_BIT(INVLPG_EXITING) +#define CPU_BASED_MWAIT_EXITING VMCS_CONTROL_BIT(MWAIT_EXITING) +#define CPU_BASED_RDPMC_EXITING VMCS_CONTROL_BIT(RDPMC_EXITING) +#define CPU_BASED_RDTSC_EXITING VMCS_CONTROL_BIT(RDTSC_EXITING) +#define CPU_BASED_CR3_LOAD_EXITING VMCS_CONTROL_BIT(CR3_LOAD_EXITING) +#define CPU_BASED_CR3_STORE_EXITING VMCS_CONTROL_BIT(CR3_STORE_EXITING) +#define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING) +#define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING) +#define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR) +#define CPU_BASED_VIRTUAL_NMI_PENDING VMCS_CONTROL_BIT(VIRTUAL_NMI_PENDING) +#define CPU_BASED_MOV_DR_EXITING VMCS_CONTROL_BIT(MOV_DR_EXITING) +#define CPU_BASED_UNCOND_IO_EXITING VMCS_CONTROL_BIT(UNCOND_IO_EXITING) +#define CPU_BASED_USE_IO_BITMAPS VMCS_CONTROL_BIT(USE_IO_BITMAPS) +#define CPU_BASED_MONITOR_TRAP_FLAG VMCS_CONTROL_BIT(MONITOR_TRAP_FLAG) +#define CPU_BASED_USE_MSR_BITMAPS VMCS_CONTROL_BIT(USE_MSR_BITMAPS) +#define CPU_BASED_MONITOR_EXITING VMCS_CONTROL_BIT(MONITOR_EXITING) +#define CPU_BASED_PAUSE_EXITING VMCS_CONTROL_BIT(PAUSE_EXITING) +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS VMCS_CONTROL_BIT(SEC_CONTROLS) #define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 /* * Definitions of Secondary Processor-Based VM-Execution Controls. */ -#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 -#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 -#define SECONDARY_EXEC_DESC 0x00000004 -#define SECONDARY_EXEC_RDTSCP 0x00000008 -#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 -#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 -#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 -#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 -#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 -#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 -#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 -#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 -#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 -#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 -#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 -#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000 -#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 -#define SECONDARY_EXEC_ENABLE_PML 0x00020000 -#define SECONDARY_EXEC_PT_CONCEAL_VMX 0x00080000 -#define SECONDARY_EXEC_XSAVES 0x00100000 -#define SECONDARY_EXEC_PT_USE_GPA 0x01000000 -#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000 -#define SECONDARY_EXEC_TSC_SCALING 0x02000000 +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VMCS_CONTROL_BIT(VIRT_APIC_ACCESSES) +#define SECONDARY_EXEC_ENABLE_EPT VMCS_CONTROL_BIT(EPT) +#define SECONDARY_EXEC_DESC VMCS_CONTROL_BIT(DESC_EXITING) +#define SECONDARY_EXEC_RDTSCP VMCS_CONTROL_BIT(RDTSCP) +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE VMCS_CONTROL_BIT(VIRTUAL_X2APIC) +#define SECONDARY_EXEC_ENABLE_VPID VMCS_CONTROL_BIT(VPID) +#define SECONDARY_EXEC_WBINVD_EXITING VMCS_CONTROL_BIT(WBINVD_EXITING) +#define SECONDARY_EXEC_UNRESTRICTED_GUEST VMCS_CONTROL_BIT(UNRESTRICTED_GUEST) +#define SECONDARY_EXEC_APIC_REGISTER_VIRT VMCS_CONTROL_BIT(APIC_REGISTER_VIRT) +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY VMCS_CONTROL_BIT(VIRT_INTR_DELIVERY) +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING VMCS_CONTROL_BIT(PAUSE_LOOP_EXITING) +#define SECONDARY_EXEC_RDRAND_EXITING VMCS_CONTROL_BIT(RDRAND_EXITING) +#define SECONDARY_EXEC_ENABLE_INVPCID VMCS_CONTROL_BIT(INVPCID) +#define SECONDARY_EXEC_ENABLE_VMFUNC VMCS_CONTROL_BIT(VMFUNC) +#define SECONDARY_EXEC_SHADOW_VMCS VMCS_CONTROL_BIT(SHADOW_VMCS) +#define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING) +#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING) +#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING) +#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX) +#define SECONDARY_EXEC_XSAVES VMCS_CONTROL_BIT(XSAVES) +#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC) +#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA) +#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) #define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000 -#define PIN_BASED_EXT_INTR_MASK 0x00000001 -#define PIN_BASED_NMI_EXITING 0x00000008 -#define PIN_BASED_VIRTUAL_NMIS 0x00000020 -#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 -#define PIN_BASED_POSTED_INTR 0x00000080 +#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) +#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING) +#define PIN_BASED_VIRTUAL_NMIS VMCS_CONTROL_BIT(VIRTUAL_NMIS) +#define PIN_BASED_VMX_PREEMPTION_TIMER VMCS_CONTROL_BIT(PREEMPTION_TIMER) +#define PIN_BASED_POSTED_INTR VMCS_CONTROL_BIT(POSTED_INTR) #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 @@ -114,7 +117,9 @@ #define VMX_MISC_MSR_LIST_MULTIPLIER 512 /* VMFUNC functions */ -#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 +#define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28) + +#define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING) #define VMFUNC_EPTP_ENTRIES 512 static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h new file mode 100644 index 000000000000..0d04d8bf15a5 --- /dev/null +++ b/arch/x86/include/asm/vmxfeatures.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_VMXFEATURES_H +#define _ASM_X86_VMXFEATURES_H + +/* + * Defines VMX CPU feature bits + */ +#define NVMXINTS 3 /* N 32-bit words worth of info */ + +/* + * Note: If the comment begins with a quoted string, that string is used + * in /proc/cpuinfo instead of the macro name. If the string is "", + * this feature bit is not displayed in /proc/cpuinfo at all. + */ + +/* Pin-Based VM-Execution Controls, EPT/VPID, APIC and VM-Functions, word 0 */ +#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* "" VM-Exit on vectored interrupts */ +#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* "" VM-Exit on NMIs */ +#define VMX_FEATURE_VIRTUAL_NMIS ( 0*32+ 5) /* "vnmi" NMI virtualization */ +#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* VMX Preemption Timer */ +#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* Posted Interrupts */ + +/* EPT/VPID features, scattered to bits 16-23 */ +#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* INVVPID is supported */ +#define VMX_FEATURE_EPT_EXECUTE_ONLY ( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */ +#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* EPT Accessed/Dirty bits */ +#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* 1GB EPT pages */ + +/* Aggregated APIC features 24-27 */ +#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* TPR shadow + virt APIC */ +#define VMX_FEATURE_APICV ( 0*32+ 25) /* TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */ + +/* VM-Functions, shifted to bits 28-31 */ +#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* EPTP switching (in guest) */ + +/* Primary Processor-Based VM-Execution Controls, word 1 */ +#define VMX_FEATURE_VIRTUAL_INTR_PENDING ( 1*32+ 2) /* "" VM-Exit if INTRs are unblocked in guest */ +#define VMX_FEATURE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */ +#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* "" VM-Exit on HLT */ +#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* "" VM-Exit on INVLPG */ +#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* "" VM-Exit on MWAIT */ +#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* "" VM-Exit on RDPMC */ +#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */ +#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */ +#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */ +#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */ +#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */ +#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */ +#define VMX_FEATURE_VIRTUAL_NMI_PENDING ( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */ +#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* "" VM-Exit on accesses to debug registers */ +#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* "" VM-Exit on *all* IN{S} and OUT{S}*/ +#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* "" VM-Exit based on I/O port */ +#define VMX_FEATURE_MONITOR_TRAP_FLAG ( 1*32+ 27) /* "mtf" VMX single-step VM-Exits */ +#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* "" VM-Exit based on MSR index */ +#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* "" VM-Exit on MONITOR (MWAIT's accomplice) */ +#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* "" VM-Exit on PAUSE (unconditionally) */ +#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* "" Enable Secondary VM-Execution Controls */ + +/* Secondary Processor-Based VM-Execution Controls, word 2 */ +#define VMX_FEATURE_VIRT_APIC_ACCESSES ( 2*32+ 0) /* "vapic" Virtualize memory mapped APIC accesses */ +#define VMX_FEATURE_EPT ( 2*32+ 1) /* Extended Page Tables, a.k.a. Two-Dimensional Paging */ +#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* "" VM-Exit on {S,L}*DT instructions */ +#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* "" Enable RDTSCP in guest */ +#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* "" Virtualize X2APIC for the guest */ +#define VMX_FEATURE_VPID ( 2*32+ 5) /* Virtual Processor ID (TLB ASID modifier) */ +#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* "" VM-Exit on WBINVD */ +#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* Allow Big Real Mode and other "invalid" states */ +#define VMX_FEATURE_APIC_REGISTER_VIRT ( 2*32+ 8) /* "vapic_reg" Hardware emulation of reads to the virtual-APIC */ +#define VMX_FEATURE_VIRT_INTR_DELIVERY ( 2*32+ 9) /* "vid" Evaluation and delivery of pending virtual interrupts */ +#define VMX_FEATURE_PAUSE_LOOP_EXITING ( 2*32+ 10) /* "ple" Conditionally VM-Exit on PAUSE at CPL0 */ +#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* "" VM-Exit on RDRAND*/ +#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* "" Enable INVPCID in guest */ +#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* "" Enable VM-Functions (leaf dependent) */ +#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* VMREAD/VMWRITE in guest can access shadow VMCS */ +#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */ +#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */ +#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */ +#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */ +#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */ +#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */ +#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */ +#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */ +#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */ +#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */ + +#endif /* _ASM_X86_VMXFEATURES_H */ diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index 32f5d9a0b90e..183e98e49ab9 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -19,10 +19,10 @@ #ifndef _ASM_X86_VVAR_H #define _ASM_X86_VVAR_H -#if defined(__VVAR_KERNEL_LDS) - -/* The kernel linker script defines its own magic to put vvars in the - * right place. +#ifdef EMIT_VVAR +/* + * EMIT_VVAR() is used by the kernel linker script to put vvars in the + * right place. Also, it's used by kernel code to import offsets values. */ #define DECLARE_VVAR(offset, type, name) \ EMIT_VVAR(name, offset) @@ -33,9 +33,12 @@ extern char __vvar_page; #define DECLARE_VVAR(offset, type, name) \ extern type vvar_ ## name[CS_BASES] \ - __attribute__((visibility("hidden"))); + __attribute__((visibility("hidden"))); \ + extern type timens_ ## name[CS_BASES] \ + __attribute__((visibility("hidden"))); \ #define VVAR(name) (vvar_ ## name) +#define TIMENS(name) (timens_ ## name) #define DEFINE_VVAR(type, name) \ type name[CS_BASES] \ diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ca13851f0570..26b7256f590f 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -27,6 +27,17 @@ static char temp_stack[4096]; #endif /** + * acpi_get_wakeup_address - provide physical address for S3 wakeup + * + * Returns the physical address where the kernel should be resumed after the + * system awakes from S3, e.g. for programming into the firmware waking vector. + */ +unsigned long acpi_get_wakeup_address(void) +{ + return ((unsigned long)(real_mode_header->wakeup_start)); +} + +/** * x86_acpi_enter_sleep_state - enter sleep state * @state: Sleep state to enter. * diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index fbb60ca4255c..d06c2079b6c1 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -3,7 +3,7 @@ * Variables and functions used by the code in sleep.c */ -#include <asm/realmode.h> +#include <linux/linkage.h> extern unsigned long saved_video_mode; extern long saved_magic; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 9ec463fe96f2..34360ca301a2 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -936,44 +936,81 @@ static void do_sync_core(void *info) sync_core(); } -static struct bp_patching_desc { +void text_poke_sync(void) +{ + on_each_cpu(do_sync_core, NULL, 1); +} + +struct text_poke_loc { + s32 rel_addr; /* addr := _stext + rel_addr */ + s32 rel32; + u8 opcode; + const u8 text[POKE_MAX_OPCODE_SIZE]; +}; + +struct bp_patching_desc { struct text_poke_loc *vec; int nr_entries; -} bp_patching; + atomic_t refs; +}; + +static struct bp_patching_desc *bp_desc; + +static inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) +{ + struct bp_patching_desc *desc = READ_ONCE(*descp); /* rcu_dereference */ + + if (!desc || !atomic_inc_not_zero(&desc->refs)) + return NULL; + + return desc; +} + +static inline void put_desc(struct bp_patching_desc *desc) +{ + smp_mb__before_atomic(); + atomic_dec(&desc->refs); +} -static int patch_cmp(const void *key, const void *elt) +static inline void *text_poke_addr(struct text_poke_loc *tp) +{ + return _stext + tp->rel_addr; +} + +static int notrace patch_cmp(const void *key, const void *elt) { struct text_poke_loc *tp = (struct text_poke_loc *) elt; - if (key < tp->addr) + if (key < text_poke_addr(tp)) return -1; - if (key > tp->addr) + if (key > text_poke_addr(tp)) return 1; return 0; } NOKPROBE_SYMBOL(patch_cmp); -int poke_int3_handler(struct pt_regs *regs) +int notrace poke_int3_handler(struct pt_regs *regs) { + struct bp_patching_desc *desc; struct text_poke_loc *tp; + int len, ret = 0; void *ip; + if (user_mode(regs)) + return 0; + /* * Having observed our INT3 instruction, we now must observe - * bp_patching.nr_entries. + * bp_desc: * - * nr_entries != 0 INT3 + * bp_desc = desc INT3 * WMB RMB - * write INT3 if (nr_entries) - * - * Idem for other elements in bp_patching. + * write INT3 if (desc) */ smp_rmb(); - if (likely(!bp_patching.nr_entries)) - return 0; - - if (user_mode(regs)) + desc = try_get_desc(&bp_desc); + if (!desc) return 0; /* @@ -984,19 +1021,20 @@ int poke_int3_handler(struct pt_regs *regs) /* * Skip the binary search if there is a single member in the vector. */ - if (unlikely(bp_patching.nr_entries > 1)) { - tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries, + if (unlikely(desc->nr_entries > 1)) { + tp = bsearch(ip, desc->vec, desc->nr_entries, sizeof(struct text_poke_loc), patch_cmp); if (!tp) - return 0; + goto out_put; } else { - tp = bp_patching.vec; - if (tp->addr != ip) - return 0; + tp = desc->vec; + if (text_poke_addr(tp) != ip) + goto out_put; } - ip += tp->len; + len = text_opcode_size(tp->opcode); + ip += len; switch (tp->opcode) { case INT3_INSN_OPCODE: @@ -1004,7 +1042,7 @@ int poke_int3_handler(struct pt_regs *regs) * Someone poked an explicit INT3, they'll want to handle it, * do not consume. */ - return 0; + goto out_put; case CALL_INSN_OPCODE: int3_emulate_call(regs, (long)ip + tp->rel32); @@ -1019,10 +1057,18 @@ int poke_int3_handler(struct pt_regs *regs) BUG(); } - return 1; + ret = 1; + +out_put: + put_desc(desc); + return ret; } NOKPROBE_SYMBOL(poke_int3_handler); +#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) +static struct text_poke_loc tp_vec[TP_VEC_MAX]; +static int tp_vec_nr; + /** * text_poke_bp_batch() -- update instructions on live kernel on SMP * @tp: vector of instructions to patch @@ -1044,16 +1090,20 @@ NOKPROBE_SYMBOL(poke_int3_handler); * replacing opcode * - sync cores */ -void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) +static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) { + struct bp_patching_desc desc = { + .vec = tp, + .nr_entries = nr_entries, + .refs = ATOMIC_INIT(1), + }; unsigned char int3 = INT3_INSN_OPCODE; unsigned int i; int do_sync; lockdep_assert_held(&text_mutex); - bp_patching.vec = tp; - bp_patching.nr_entries = nr_entries; + smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */ /* * Corresponding read barrier in int3 notifier for making sure the @@ -1065,18 +1115,20 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) * First step: add a int3 trap to the address that will be patched. */ for (i = 0; i < nr_entries; i++) - text_poke(tp[i].addr, &int3, sizeof(int3)); + text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE); - on_each_cpu(do_sync_core, NULL, 1); + text_poke_sync(); /* * Second step: update all but the first byte of the patched range. */ for (do_sync = 0, i = 0; i < nr_entries; i++) { - if (tp[i].len - sizeof(int3) > 0) { - text_poke((char *)tp[i].addr + sizeof(int3), - (const char *)tp[i].text + sizeof(int3), - tp[i].len - sizeof(int3)); + int len = text_opcode_size(tp[i].opcode); + + if (len - INT3_INSN_SIZE > 0) { + text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE, + (const char *)tp[i].text + INT3_INSN_SIZE, + len - INT3_INSN_SIZE); do_sync++; } } @@ -1087,7 +1139,7 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) * not necessary and we'd be safe even without it. But * better safe than sorry (plus there's not only Intel). */ - on_each_cpu(do_sync_core, NULL, 1); + text_poke_sync(); } /* @@ -1098,19 +1150,20 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) if (tp[i].text[0] == INT3_INSN_OPCODE) continue; - text_poke(tp[i].addr, tp[i].text, sizeof(int3)); + text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE); do_sync++; } if (do_sync) - on_each_cpu(do_sync_core, NULL, 1); + text_poke_sync(); /* - * sync_core() implies an smp_mb() and orders this store against - * the writing of the new instruction. + * Remove and synchronize_rcu(), except we have a very primitive + * refcount based completion. */ - bp_patching.vec = NULL; - bp_patching.nr_entries = 0; + WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */ + if (!atomic_dec_and_test(&desc.refs)) + atomic_cond_read_acquire(&desc.refs, !VAL); } void text_poke_loc_init(struct text_poke_loc *tp, void *addr, @@ -1118,11 +1171,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, { struct insn insn; - if (!opcode) - opcode = (void *)tp->text; - else - memcpy((void *)tp->text, opcode, len); - + memcpy((void *)tp->text, opcode, len); if (!emulate) emulate = opcode; @@ -1132,8 +1181,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, BUG_ON(!insn_complete(&insn)); BUG_ON(len != insn.length); - tp->addr = addr; - tp->len = len; + tp->rel_addr = addr - (void *)_stext; tp->opcode = insn.opcode.bytes[0]; switch (tp->opcode) { @@ -1167,6 +1215,55 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, } } +/* + * We hard rely on the tp_vec being ordered; ensure this is so by flushing + * early if needed. + */ +static bool tp_order_fail(void *addr) +{ + struct text_poke_loc *tp; + + if (!tp_vec_nr) + return false; + + if (!addr) /* force */ + return true; + + tp = &tp_vec[tp_vec_nr - 1]; + if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr) + return true; + + return false; +} + +static void text_poke_flush(void *addr) +{ + if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) { + text_poke_bp_batch(tp_vec, tp_vec_nr); + tp_vec_nr = 0; + } +} + +void text_poke_finish(void) +{ + text_poke_flush(NULL); +} + +void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate) +{ + struct text_poke_loc *tp; + + if (unlikely(system_state == SYSTEM_BOOTING)) { + text_poke_early(addr, opcode, len); + return; + } + + text_poke_flush(addr); + + tp = &tp_vec[tp_vec_nr++]; + text_poke_loc_init(tp, addr, opcode, len, emulate); +} + /** * text_poke_bp() -- update instructions on live kernel on SMP * @addr: address to patch @@ -1178,10 +1275,15 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, * dynamically allocated memory. This function should be used when it is * not possible to allocate memory. */ -void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) +void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) { struct text_poke_loc tp; + if (unlikely(system_state == SYSTEM_BOOTING)) { + text_poke_early(addr, opcode, len); + return; + } + text_poke_loc_init(&tp, addr, opcode, len, emulate); text_poke_bp_batch(&tp, 1); } diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 251c795b4eb3..69aed0ebbdfc 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -22,6 +22,7 @@ #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 +#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -52,6 +53,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, {} }; EXPORT_SYMBOL_GPL(amd_nb_misc_ids); @@ -66,6 +68,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 5da106f84e84..fe698f96617c 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -95,7 +95,7 @@ static inline void apbt_set_mapping(void) printk(KERN_WARNING "No timer base from SFI, use default\n"); apbt_address = APBT_DEFAULT_BASE; } - apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE); + apbt_virt_address = ioremap(apbt_address, APBT_MMAP_SIZE); if (!apbt_virt_address) { pr_debug("Failed mapping APBT phy address at %lu\n",\ (unsigned long)apbt_address); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d5b51a740524..ad53b2abc859 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1493,65 +1493,34 @@ static void check_efi_reboot(void) } /* Setup user proc fs files */ -static int proc_hubbed_show(struct seq_file *file, void *data) +static int __maybe_unused proc_hubbed_show(struct seq_file *file, void *data) { seq_printf(file, "0x%x\n", uv_hubbed_system); return 0; } -static int proc_hubless_show(struct seq_file *file, void *data) +static int __maybe_unused proc_hubless_show(struct seq_file *file, void *data) { seq_printf(file, "0x%x\n", uv_hubless_system); return 0; } -static int proc_oemid_show(struct seq_file *file, void *data) +static int __maybe_unused proc_oemid_show(struct seq_file *file, void *data) { seq_printf(file, "%s/%s\n", oem_id, oem_table_id); return 0; } -static int proc_hubbed_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_hubbed_show, (void *)NULL); -} - -static int proc_hubless_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_hubless_show, (void *)NULL); -} - -static int proc_oemid_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_oemid_show, (void *)NULL); -} - -/* (struct is "non-const" as open function is set at runtime) */ -static struct file_operations proc_version_fops = { - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static const struct file_operations proc_oemid_fops = { - .open = proc_oemid_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static __init void uv_setup_proc_files(int hubless) { struct proc_dir_entry *pde; - char *name = hubless ? "hubless" : "hubbed"; pde = proc_mkdir(UV_PROC_NODE, NULL); - proc_create("oemid", 0, pde, &proc_oemid_fops); - proc_create(name, 0, pde, &proc_version_fops); + proc_create_single("oemid", 0, pde, proc_oemid_show); if (hubless) - proc_version_fops.open = proc_hubless_open; + proc_create_single("hubless", 0, pde, proc_hubless_show); else - proc_version_fops.open = proc_hubbed_open; + proc_create_single("hubbed", 0, pde, proc_hubbed_show); } /* Initialize UV hubless systems */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 890f60083eca..7dc4ad68eb41 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -29,6 +29,7 @@ obj-y += umwait.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o +obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o ifdef CONFIG_CPU_SUP_INTEL obj-y += intel.o intel_pconfig.o tsx.o obj-$(CONFIG_PM) += intel_epb.o @@ -53,11 +54,12 @@ obj-$(CONFIG_ACRN_GUEST) += acrn.o ifdef CONFIG_X86_FEATURE_NAMES quiet_cmd_mkcapflags = MKCAP $@ - cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@ + cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $@ $^ cpufeature = $(src)/../../include/asm/cpufeatures.h +vmxfeature = $(src)/../../include/asm/vmxfeatures.h -$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE +$(obj)/capflags.c: $(cpufeature) $(vmxfeature) $(src)/mkcapflags.sh FORCE $(call if_changed,mkcapflags) endif targets += capflags.c diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 90f75e515876..ac83a0fef628 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -319,13 +319,6 @@ static void legacy_fixup_core_id(struct cpuinfo_x86 *c) c->cpu_core_id %= cus_per_node; } - -static void amd_get_topology_early(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_TOPOEXT)) - smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; -} - /* * Fixup core topology information for * (1) AMD multi-node processors @@ -615,9 +608,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) return; clear_all: - clear_cpu_cap(c, X86_FEATURE_SME); + setup_clear_cpu_cap(X86_FEATURE_SME); clear_sev: - clear_cpu_cap(c, X86_FEATURE_SEV); + setup_clear_cpu_cap(X86_FEATURE_SEV); } } @@ -717,7 +710,8 @@ static void early_init_amd(struct cpuinfo_x86 *c) } } - amd_get_topology_early(c); + if (cpu_has(c, X86_FEATURE_TOPOEXT)) + smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; } static void init_amd_k8(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8bf64899f56a..ed54b3b21c39 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -286,6 +286,13 @@ early_param("mds", mds_cmdline); #undef pr_fmt #define pr_fmt(fmt) "TAA: " fmt +enum taa_mitigations { + TAA_MITIGATION_OFF, + TAA_MITIGATION_UCODE_NEEDED, + TAA_MITIGATION_VERW, + TAA_MITIGATION_TSX_DISABLED, +}; + /* Default mitigation for TAA-affected CPUs */ static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW; static bool taa_nosmt __ro_after_init; diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 14433ff5b828..426792565d86 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -18,13 +18,6 @@ #define RNG_ENABLED (1 << 3) #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ -#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 -#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 -#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 -#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 -#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 -#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 - static void init_c3(struct cpuinfo_x86 *c) { u32 lo, hi; @@ -71,8 +64,6 @@ static void init_c3(struct cpuinfo_x86 *c) c->x86_cache_alignment = c->x86_clflush_size * 2; set_cpu_cap(c, X86_FEATURE_REP_GOOD); } - - cpu_detect_cache_sizes(c); } enum { @@ -119,31 +110,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c) } } -static void centaur_detect_vmx_virtcap(struct cpuinfo_x86 *c) -{ - u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; - - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); - msr_ctl = vmx_msr_high | vmx_msr_low; - - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) - set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) - set_cpu_cap(c, X86_FEATURE_VNMI); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, - vmx_msr_low, vmx_msr_high); - msr_ctl2 = vmx_msr_high | vmx_msr_low; - if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && - (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) - set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) - set_cpu_cap(c, X86_FEATURE_EPT); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) - set_cpu_cap(c, X86_FEATURE_VPID); - } -} - static void init_centaur(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_32 @@ -250,8 +216,7 @@ static void init_centaur(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); #endif - if (cpu_has(c, X86_FEATURE_VMX)) - centaur_detect_vmx_virtcap(c); + init_ia32_feat_ctl(c); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2e4d90294fe6..86b8241c8209 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -14,6 +14,7 @@ #include <linux/sched/mm.h> #include <linux/sched/clock.h> #include <linux/sched/task.h> +#include <linux/sched/smt.h> #include <linux/init.h> #include <linux/kprobes.h> #include <linux/kgdb.h> @@ -49,7 +50,7 @@ #include <asm/cpu.h> #include <asm/mce.h> #include <asm/msr.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/microcode.h> #include <asm/microcode_intel.h> #include <asm/intel-family.h> @@ -1023,6 +1024,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define MSBDS_ONLY BIT(5) #define NO_SWAPGS BIT(6) #define NO_ITLB_MULTIHIT BIT(7) +#define NO_SPECTRE_V2 BIT(8) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -1084,6 +1086,10 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + + /* Zhaoxin Family 7 */ + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), {} }; @@ -1116,7 +1122,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + + if (!cpu_matches(NO_SPECTRE_V2)) + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) @@ -1449,6 +1457,9 @@ static void identify_cpu(struct cpuinfo_x86 *c) #endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof(c->x86_capability)); +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + memset(&c->vmx_capability, 0, sizeof(c->vmx_capability)); +#endif generic_identify(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 38ab6e115eac..37fdefd14f28 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -80,4 +80,8 @@ extern void x86_spec_ctrl_setup_ap(void); extern u64 x86_read_arch_cap_msr(void); +#ifdef CONFIG_IA32_FEAT_CTL +void init_ia32_feat_ctl(struct cpuinfo_x86 *c); +#endif + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c new file mode 100644 index 000000000000..0268185bef94 --- /dev/null +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/tboot.h> + +#include <asm/cpufeature.h> +#include <asm/msr-index.h> +#include <asm/processor.h> +#include <asm/vmx.h> + +#undef pr_fmt +#define pr_fmt(fmt) "x86/cpu: " fmt + +#ifdef CONFIG_X86_VMX_FEATURE_NAMES +enum vmx_feature_leafs { + MISC_FEATURES = 0, + PRIMARY_CTLS, + SECONDARY_CTLS, + NR_VMX_FEATURE_WORDS, +}; + +#define VMX_F(x) BIT(VMX_FEATURE_##x & 0x1f) + +static void init_vmx_capabilities(struct cpuinfo_x86 *c) +{ + u32 supported, funcs, ept, vpid, ign; + + BUILD_BUG_ON(NVMXINTS != NR_VMX_FEATURE_WORDS); + + /* + * The high bits contain the allowed-1 settings, i.e. features that can + * be turned on. The low bits contain the allowed-0 settings, i.e. + * features that can be turned off. Ignore the allowed-0 settings, + * if a feature can be turned on then it's supported. + * + * Use raw rdmsr() for primary processor controls and pin controls MSRs + * as they exist on any CPU that supports VMX, i.e. we want the WARN if + * the RDMSR faults. + */ + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, ign, supported); + c->vmx_capability[PRIMARY_CTLS] = supported; + + rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS2, &ign, &supported); + c->vmx_capability[SECONDARY_CTLS] = supported; + + rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ign, supported); + rdmsr_safe(MSR_IA32_VMX_VMFUNC, &ign, &funcs); + + /* + * Except for EPT+VPID, which enumerates support for both in a single + * MSR, low for EPT, high for VPID. + */ + rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, &ept, &vpid); + + /* Pin, EPT, VPID and VM-Func are merged into a single word. */ + WARN_ON_ONCE(supported >> 16); + WARN_ON_ONCE(funcs >> 4); + c->vmx_capability[MISC_FEATURES] = (supported & 0xffff) | + ((vpid & 0x1) << 16) | + ((funcs & 0xf) << 28); + + /* EPT bits are full on scattered and must be manually handled. */ + if (ept & VMX_EPT_EXECUTE_ONLY_BIT) + c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_EXECUTE_ONLY); + if (ept & VMX_EPT_AD_BIT) + c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_AD); + if (ept & VMX_EPT_1GB_PAGE_BIT) + c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_1GB); + + /* Synthetic APIC features that are aggregates of multiple features. */ + if ((c->vmx_capability[PRIMARY_CTLS] & VMX_F(VIRTUAL_TPR)) && + (c->vmx_capability[SECONDARY_CTLS] & VMX_F(VIRT_APIC_ACCESSES))) + c->vmx_capability[MISC_FEATURES] |= VMX_F(FLEXPRIORITY); + + if ((c->vmx_capability[PRIMARY_CTLS] & VMX_F(VIRTUAL_TPR)) && + (c->vmx_capability[SECONDARY_CTLS] & VMX_F(APIC_REGISTER_VIRT)) && + (c->vmx_capability[SECONDARY_CTLS] & VMX_F(VIRT_INTR_DELIVERY)) && + (c->vmx_capability[MISC_FEATURES] & VMX_F(POSTED_INTR))) + c->vmx_capability[MISC_FEATURES] |= VMX_F(APICV); + + /* Set the synthetic cpufeatures to preserve /proc/cpuinfo's ABI. */ + if (c->vmx_capability[PRIMARY_CTLS] & VMX_F(VIRTUAL_TPR)) + set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + if (c->vmx_capability[MISC_FEATURES] & VMX_F(FLEXPRIORITY)) + set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + if (c->vmx_capability[MISC_FEATURES] & VMX_F(VIRTUAL_NMIS)) + set_cpu_cap(c, X86_FEATURE_VNMI); + if (c->vmx_capability[SECONDARY_CTLS] & VMX_F(EPT)) + set_cpu_cap(c, X86_FEATURE_EPT); + if (c->vmx_capability[MISC_FEATURES] & VMX_F(EPT_AD)) + set_cpu_cap(c, X86_FEATURE_EPT_AD); + if (c->vmx_capability[MISC_FEATURES] & VMX_F(VPID)) + set_cpu_cap(c, X86_FEATURE_VPID); +} +#endif /* CONFIG_X86_VMX_FEATURE_NAMES */ + +void init_ia32_feat_ctl(struct cpuinfo_x86 *c) +{ + bool tboot = tboot_enabled(); + u64 msr; + + if (rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr)) { + clear_cpu_cap(c, X86_FEATURE_VMX); + return; + } + + if (msr & FEAT_CTL_LOCKED) + goto update_caps; + + /* + * Ignore whatever value BIOS left in the MSR to avoid enabling random + * features or faulting on the WRMSR. + */ + msr = FEAT_CTL_LOCKED; + + /* + * Enable VMX if and only if the kernel may do VMXON at some point, + * i.e. KVM is enabled, to avoid unnecessarily adding an attack vector + * for the kernel, e.g. using VMX to hide malicious code. + */ + if (cpu_has(c, X86_FEATURE_VMX) && IS_ENABLED(CONFIG_KVM_INTEL)) { + msr |= FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; + + if (tboot) + msr |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX; + } + + wrmsrl(MSR_IA32_FEAT_CTL, msr); + +update_caps: + set_cpu_cap(c, X86_FEATURE_MSR_IA32_FEAT_CTL); + + if (!cpu_has(c, X86_FEATURE_VMX)) + return; + + if ( (tboot && !(msr & FEAT_CTL_VMX_ENABLED_INSIDE_SMX)) || + (!tboot && !(msr & FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX))) { + if (IS_ENABLED(CONFIG_KVM_INTEL)) + pr_err_once("VMX (%s TXT) disabled by BIOS\n", + tboot ? "inside" : "outside"); + clear_cpu_cap(c, X86_FEATURE_VMX); + } else { +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + init_vmx_capabilities(c); +#endif + } +} diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4a900804a023..57473e2c0869 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -494,52 +494,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -static void detect_vmx_virtcap(struct cpuinfo_x86 *c) -{ - /* Intel VMX MSR indicated features */ -#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 -#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 -#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 -#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 -#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 -#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 -#define x86_VMX_FEATURE_EPT_CAP_AD 0x00200000 - - u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; - u32 msr_vpid_cap, msr_ept_cap; - - clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW); - clear_cpu_cap(c, X86_FEATURE_VNMI); - clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); - clear_cpu_cap(c, X86_FEATURE_EPT); - clear_cpu_cap(c, X86_FEATURE_VPID); - clear_cpu_cap(c, X86_FEATURE_EPT_AD); - - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); - msr_ctl = vmx_msr_high | vmx_msr_low; - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) - set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) - set_cpu_cap(c, X86_FEATURE_VNMI); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, - vmx_msr_low, vmx_msr_high); - msr_ctl2 = vmx_msr_high | vmx_msr_low; - if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && - (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) - set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) { - set_cpu_cap(c, X86_FEATURE_EPT); - rdmsr(MSR_IA32_VMX_EPT_VPID_CAP, - msr_ept_cap, msr_vpid_cap); - if (msr_ept_cap & x86_VMX_FEATURE_EPT_CAP_AD) - set_cpu_cap(c, X86_FEATURE_EPT_AD); - } - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) - set_cpu_cap(c, X86_FEATURE_VPID); - } -} - #define MSR_IA32_TME_ACTIVATE 0x982 /* Helpers to access TME_ACTIVATE MSR */ @@ -755,8 +709,7 @@ static void init_intel(struct cpuinfo_x86 *c) /* Work around errata */ srat_detect_node(c); - if (cpu_has(c, X86_FEATURE_VMX)) - detect_vmx_virtcap(c); + init_ia32_feat_ctl(c); if (cpu_has(c, X86_FEATURE_TME)) detect_tme(c); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 5167bd2bb6b1..b3a50d962851 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -78,6 +78,7 @@ struct smca_bank_name { static struct smca_bank_name smca_names[] = { [SMCA_LS] = { "load_store", "Load Store Unit" }, + [SMCA_LS_V2] = { "load_store", "Load Store Unit" }, [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, [SMCA_DE] = { "decode_unit", "Decode Unit" }, @@ -138,6 +139,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = { /* ZN Core (HWID=0xB0) MCA types */ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF }, + { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF }, { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF }, { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF }, { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF }, @@ -266,10 +268,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu) smca_set_misc_banks_map(bank, cpu); /* Return early if this bank was already initialized. */ - if (smca_banks[bank].hwid) + if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0) return; - if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { pr_warn("Failed to read MCA_IPID for bank %d\n", bank); return; } diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 5f42f25bac8f..2c4f949611e4 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -53,8 +53,6 @@ #include "internal.h" -static DEFINE_MUTEX(mce_log_mutex); - /* sysfs synchronization */ static DEFINE_MUTEX(mce_sysfs_mutex); @@ -156,19 +154,10 @@ void mce_log(struct mce *m) if (!mce_gen_pool_add(m)) irq_work_queue(&mce_irq_work); } - -void mce_inject_log(struct mce *m) -{ - mutex_lock(&mce_log_mutex); - mce_log(m); - mutex_unlock(&mce_log_mutex); -} -EXPORT_SYMBOL_GPL(mce_inject_log); - -static struct notifier_block mce_srao_nb; +EXPORT_SYMBOL_GPL(mce_log); /* - * We run the default notifier if we have only the SRAO, the first and the + * We run the default notifier if we have only the UC, the first and the * default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS * notifiers registered on the chain. */ @@ -594,26 +583,29 @@ static struct notifier_block first_nb = { .priority = MCE_PRIO_FIRST, }; -static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, - void *data) +static int uc_decode_notifier(struct notifier_block *nb, unsigned long val, + void *data) { struct mce *mce = (struct mce *)data; unsigned long pfn; - if (!mce) + if (!mce || !mce_usable_address(mce)) return NOTIFY_DONE; - if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { - pfn = mce->addr >> PAGE_SHIFT; - if (!memory_failure(pfn, 0)) - set_mce_nospec(pfn); - } + if (mce->severity != MCE_AO_SEVERITY && + mce->severity != MCE_DEFERRED_SEVERITY) + return NOTIFY_DONE; + + pfn = mce->addr >> PAGE_SHIFT; + if (!memory_failure(pfn, 0)) + set_mce_nospec(pfn); return NOTIFY_OK; } -static struct notifier_block mce_srao_nb = { - .notifier_call = srao_decode_notifier, - .priority = MCE_PRIO_SRAO, + +static struct notifier_block mce_uc_nb = { + .notifier_call = uc_decode_notifier, + .priority = MCE_PRIO_UC, }; static int mce_default_notifier(struct notifier_block *nb, unsigned long val, @@ -763,26 +755,22 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) log_it: error_seen = true; - mce_read_aux(&m, i); + if (flags & MCP_DONTLOG) + goto clear_it; + mce_read_aux(&m, i); m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); - /* * Don't get the IP here because it's unlikely to * have anything to do with the actual error location. */ - if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) - mce_log(&m); - else if (mce_usable_address(&m)) { - /* - * Although we skipped logging this, we still want - * to take action. Add to the pool so the registered - * notifiers will see it. - */ - if (!mce_gen_pool_add(&m)) - mce_schedule_work(); - } + if (mca_cfg.dont_log_ce && !mce_usable_address(&m)) + goto clear_it; + + mce_log(&m); + +clear_it: /* * Clear state for this bank. */ @@ -807,7 +795,7 @@ EXPORT_SYMBOL_GPL(machine_check_poll); static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, struct pt_regs *regs) { - char *tmp; + char *tmp = *msg; int i; for (i = 0; i < this_cpu_read(mce_num_banks); i++) { @@ -819,8 +807,8 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, if (quirk_no_way_out) quirk_no_way_out(i, m, regs); + m->bank = i; if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { - m->bank = i; mce_read_aux(m, i); *msg = tmp; return 1; @@ -1232,8 +1220,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) DECLARE_BITMAP(toclear, MAX_NR_BANKS); struct mca_config *cfg = &mca_cfg; int cpu = smp_processor_id(); - char *msg = "Unknown"; struct mce m, *final; + char *msg = NULL; int worst = 0; /* @@ -1365,7 +1353,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) ist_end_non_atomic(); } else { if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0)) - mce_panic("Failed kernel mode recovery", &m, NULL); + mce_panic("Failed kernel mode recovery", &m, msg); } out_ist: @@ -2041,7 +2029,7 @@ int __init mcheck_init(void) { mcheck_intel_therm_init(); mce_register_decode_chain(&first_nb); - mce_register_decode_chain(&mce_srao_nb); + mce_register_decode_chain(&mce_uc_nb); mce_register_decode_chain(&mce_default_nb); mcheck_vendor_init_severity(); diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 1f30117b24ba..3413b41b8d55 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -494,7 +494,7 @@ static void do_inject(void) i_mce.status |= MCI_STATUS_SYNDV; if (inj_type == SW_INJ) { - mce_inject_log(&i_mce); + mce_log(&i_mce); return; } diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index e270d0770134..5627b1091b85 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -115,15 +115,16 @@ static bool lmce_supported(void) /* * BIOS should indicate support for LMCE by setting bit 20 in - * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will - * generate a #GP fault. + * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP + * fault. The MSR must also be locked for LMCE_ENABLED to take effect. + * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally + * locks the MSR in the event that it wasn't already locked by BIOS. */ - rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp); - if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) == - (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) - return true; + rdmsrl(MSR_IA32_FEAT_CTL, tmp); + if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED))) + return false; - return false; + return tmp & FEAT_CTL_LMCE_ENABLED; } bool mce_intel_cmci_poll(void) diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 842b273bce31..b785c0d0b590 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -84,8 +84,6 @@ static inline int apei_clear_mce(u64 record_id) } #endif -void mce_inject_log(struct mce *m); - /* * We consider records to be equivalent if bank+status+addr+misc all match. * This is only used when the system is going down because of a fatal error diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c index b38010b541d6..58b4ee3cda77 100644 --- a/arch/x86/kernel/cpu/mce/therm_throt.c +++ b/arch/x86/kernel/cpu/mce/therm_throt.c @@ -235,7 +235,7 @@ static void get_therm_status(int level, bool *proc_hot, u8 *temp) *temp = (msr_val >> 16) & 0x7F; } -static void throttle_active_work(struct work_struct *work) +static void __maybe_unused throttle_active_work(struct work_struct *work) { struct _thermal_state *state = container_of(to_delayed_work(work), struct _thermal_state, therm_work); @@ -467,6 +467,7 @@ static int thermal_throttle_online(unsigned int cpu) { struct thermal_state *state = &per_cpu(thermal_state, cpu); struct device *dev = get_cpu_device(cpu); + u32 l; state->package_throttle.level = PACKAGE_LEVEL; state->core_throttle.level = CORE_LEVEL; @@ -474,6 +475,10 @@ static int thermal_throttle_online(unsigned int cpu) INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work); INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work); + /* Unmask the thermal vector after the above workqueues are initialized. */ + l = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + return thermal_throttle_add_dev(dev, cpu); } @@ -722,10 +727,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c) rdmsr(MSR_IA32_MISC_ENABLE, l, h); wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); - /* Unmask the thermal vector: */ - l = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - pr_info_once("CPU0: Thermal monitoring enabled (%s)\n", tm2 ? "TM2" : "TM1"); diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index aed45b8895d5..1db560ed2ca3 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -6,8 +6,7 @@ set -e -IN=$1 -OUT=$2 +OUT=$1 dump_array() { @@ -15,6 +14,7 @@ dump_array() SIZE=$2 PFX=$3 POSTFIX=$4 + IN=$5 PFX_SZ=$(echo $PFX | wc -c) TABS="$(printf '\t\t\t\t\t')" @@ -57,11 +57,18 @@ trap 'rm "$OUT"' EXIT echo "#endif" echo "" - dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" "" + dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" "" $2 echo "" - dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32" + dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32" $2 + echo "" + echo "#ifdef CONFIG_X86_VMX_FEATURE_NAMES" + echo "#ifndef _ASM_X86_VMXFEATURES_H" + echo "#include <asm/vmxfeatures.h>" + echo "#endif" + dump_array "x86_vmx_flags" "NVMXINTS*32" "VMX_FEATURE_" "" $3 + echo "#endif /* CONFIG_X86_VMX_FEATURE_NAMES */" ) > $OUT trap - EXIT diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index aa5c064a6a22..51b9190c628b 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -15,7 +15,7 @@ #include <asm/tlbflush.h> #include <asm/mtrr.h> #include <asm/msr.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include "mtrr.h" diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 4d36dcc1cf87..da532f656a7b 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -101,9 +101,6 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) int length; size_t linelen; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - memset(line, 0, LINE_SIZE); len = min_t(size_t, len, LINE_SIZE - 1); @@ -226,8 +223,6 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #ifdef CONFIG_COMPAT case MTRRIOC32_ADD_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_file_add(sentry.base, sentry.size, sentry.type, true, file, 0); @@ -236,24 +231,18 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #ifdef CONFIG_COMPAT case MTRRIOC32_SET_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_add(sentry.base, sentry.size, sentry.type, false); break; case MTRRIOC_DEL_ENTRY: #ifdef CONFIG_COMPAT case MTRRIOC32_DEL_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_file_del(sentry.base, sentry.size, file, 0); break; case MTRRIOC_KILL_ENTRY: #ifdef CONFIG_COMPAT case MTRRIOC32_KILL_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_del(-1, sentry.base, sentry.size); break; case MTRRIOC_GET_ENTRY: @@ -279,8 +268,6 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #ifdef CONFIG_COMPAT case MTRRIOC32_ADD_PAGE_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_file_add(sentry.base, sentry.size, sentry.type, true, file, 1); @@ -289,8 +276,6 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #ifdef CONFIG_COMPAT case MTRRIOC32_SET_PAGE_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_add_page(sentry.base, sentry.size, sentry.type, false); break; @@ -298,16 +283,12 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #ifdef CONFIG_COMPAT case MTRRIOC32_DEL_PAGE_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_file_del(sentry.base, sentry.size, file, 1); break; case MTRRIOC_KILL_PAGE_ENTRY: #ifdef CONFIG_COMPAT case MTRRIOC32_KILL_PAGE_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_del_page(-1, sentry.base, sentry.size); break; case MTRRIOC_GET_PAGE_ENTRY: @@ -373,28 +354,6 @@ static int mtrr_close(struct inode *ino, struct file *file) return single_release(ino, file); } -static int mtrr_seq_show(struct seq_file *seq, void *offset); - -static int mtrr_open(struct inode *inode, struct file *file) -{ - if (!mtrr_if) - return -EIO; - if (!mtrr_if->get) - return -ENXIO; - return single_open(file, mtrr_seq_show, NULL); -} - -static const struct file_operations mtrr_fops = { - .owner = THIS_MODULE, - .open = mtrr_open, - .read = seq_read, - .llseek = seq_lseek, - .write = mtrr_write, - .unlocked_ioctl = mtrr_ioctl, - .compat_ioctl = mtrr_ioctl, - .release = mtrr_close, -}; - static int mtrr_seq_show(struct seq_file *seq, void *offset) { char factor; @@ -426,6 +385,28 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) return 0; } +static int mtrr_open(struct inode *inode, struct file *file) +{ + if (!mtrr_if) + return -EIO; + if (!mtrr_if->get) + return -ENXIO; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + return single_open(file, mtrr_seq_show, NULL); +} + +static const struct file_operations mtrr_fops = { + .owner = THIS_MODULE, + .open = mtrr_open, + .read = seq_read, + .llseek = seq_lseek, + .write = mtrr_write, + .unlocked_ioctl = mtrr_ioctl, + .compat_ioctl = mtrr_ioctl, + .release = mtrr_close, +}; + static int __init mtrr_if_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 507039c20128..6a80f36b5d59 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -52,7 +52,7 @@ #include <asm/e820/api.h> #include <asm/mtrr.h> #include <asm/msr.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include "mtrr.h" diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index cb2e49810d68..4eec8889b0ff 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -7,6 +7,10 @@ #include "cpu.h" +#ifdef CONFIG_X86_VMX_FEATURE_NAMES +extern const char * const x86_vmx_flags[NVMXINTS*32]; +#endif + /* * Get CPU information for use by the procfs. */ @@ -102,6 +106,17 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has(c, i) && x86_cap_flags[i] != NULL) seq_printf(m, " %s", x86_cap_flags[i]); +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + if (cpu_has(c, X86_FEATURE_VMX) && c->vmx_capability[0]) { + seq_puts(m, "\nvmx flags\t:"); + for (i = 0; i < 32*NVMXINTS; i++) { + if (test_bit(i, (unsigned long *)c->vmx_capability) && + x86_vmx_flags[i] != NULL) + seq_printf(m, " %s", x86_vmx_flags[i]); + } + } +#endif + seq_puts(m, "\nbugs\t\t:"); for (i = 0; i < 32*NBUGINTS; i++) { unsigned int bug_bit = 32*NCAPINTS + i; diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 03eb90d00af0..89049b343c7a 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -618,7 +618,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) if (static_branch_unlikely(&rdt_mon_enable_key)) rmdir_mondata_subdir_allrdtgrp(r, d->id); list_del(&d->list); - if (is_mbm_enabled()) + if (r->mon_capable && is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { /* diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index e49b77283924..181c992f448c 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -57,6 +57,7 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) } DECLARE_STATIC_KEY_FALSE(rdt_enable_key); +DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); /** * struct mon_evt - Entry in the event list of a resource diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 397206f23d14..773124b0e18a 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -514,7 +514,7 @@ void mbm_handle_overflow(struct work_struct *work) mutex_lock(&rdtgroup_mutex); - if (!static_branch_likely(&rdt_enable_key)) + if (!static_branch_likely(&rdt_mon_enable_key)) goto out_unlock; d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]); @@ -543,7 +543,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms) unsigned long delay = msecs_to_jiffies(delay_ms); int cpu; - if (!static_branch_likely(&rdt_enable_key)) + if (!static_branch_likely(&rdt_mon_enable_key)) return; cpu = cpumask_any(&dom->cpu_mask); dom->mbm_work_cpu = cpu; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 2e3b06d6bbc6..1504bcabc63c 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -532,11 +532,15 @@ static void move_myself(struct callback_head *head) kfree(rdtgrp); } + if (unlikely(current->flags & PF_EXITING)) + goto out; + preempt_disable(); /* update PQR_ASSOC MSR to make resource group go into effect */ resctrl_sched_in(); preempt_enable(); +out: kfree(callback); } @@ -725,6 +729,92 @@ static int rdtgroup_tasks_show(struct kernfs_open_file *of, return ret; } +#ifdef CONFIG_PROC_CPU_RESCTRL + +/* + * A task can only be part of one resctrl control group and of one monitor + * group which is associated to that control group. + * + * 1) res: + * mon: + * + * resctrl is not available. + * + * 2) res:/ + * mon: + * + * Task is part of the root resctrl control group, and it is not associated + * to any monitor group. + * + * 3) res:/ + * mon:mon0 + * + * Task is part of the root resctrl control group and monitor group mon0. + * + * 4) res:group0 + * mon: + * + * Task is part of resctrl control group group0, and it is not associated + * to any monitor group. + * + * 5) res:group0 + * mon:mon1 + * + * Task is part of resctrl control group group0 and monitor group mon1. + */ +int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns, + struct pid *pid, struct task_struct *tsk) +{ + struct rdtgroup *rdtg; + int ret = 0; + + mutex_lock(&rdtgroup_mutex); + + /* Return empty if resctrl has not been mounted. */ + if (!static_branch_unlikely(&rdt_enable_key)) { + seq_puts(s, "res:\nmon:\n"); + goto unlock; + } + + list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) { + struct rdtgroup *crg; + + /* + * Task information is only relevant for shareable + * and exclusive groups. + */ + if (rdtg->mode != RDT_MODE_SHAREABLE && + rdtg->mode != RDT_MODE_EXCLUSIVE) + continue; + + if (rdtg->closid != tsk->closid) + continue; + + seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "", + rdtg->kn->name); + seq_puts(s, "mon:"); + list_for_each_entry(crg, &rdtg->mon.crdtgrp_list, + mon.crdtgrp_list) { + if (tsk->rmid != crg->mon.rmid) + continue; + seq_printf(s, "%s", crg->kn->name); + break; + } + seq_putc(s, '\n'); + goto unlock; + } + /* + * The above search should succeed. Otherwise return + * with an error. + */ + ret = -ENOENT; +unlock: + mutex_unlock(&rdtgroup_mutex); + + return ret; +} +#endif + static int rdt_last_cmd_status_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { @@ -1741,9 +1831,6 @@ static int set_cache_qos_cfg(int level, bool enable) struct rdt_domain *d; int cpu; - if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) - return -ENOMEM; - if (level == RDT_RESOURCE_L3) update = l3_qos_cfg_update; else if (level == RDT_RESOURCE_L2) @@ -1751,6 +1838,9 @@ static int set_cache_qos_cfg(int level, bool enable) else return -EINVAL; + if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; + r_l = &rdt_resources_all[level]; list_for_each_entry(d, &r_l->domains, list) { /* Pick one CPU from each domain instance to update MSR */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index adf9b71386ef..62b137c3c97a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -4,7 +4,7 @@ */ #include <linux/cpu.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/apic.h> #include <asm/processor.h> diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index ee48c3fc8a65..d3a0791bc052 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -7,7 +7,7 @@ #include <linux/cpu.h> #include <asm/apic.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/processor.h> #include "cpu.h" diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index 3e20d322bc98..e2ad30e474f8 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -14,6 +14,9 @@ #include "cpu.h" +#undef pr_fmt +#define pr_fmt(fmt) "tsx: " fmt + enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; void tsx_disable(void) @@ -99,7 +102,7 @@ void __init tsx_init(void) tsx_ctrl_state = x86_get_tsx_auto_mode(); } else { tsx_ctrl_state = TSX_CTRL_DISABLE; - pr_err("tsx: invalid option, defaulting to off\n"); + pr_err("invalid option, defaulting to off\n"); } } else { /* tsx= not provided */ @@ -115,11 +118,12 @@ void __init tsx_init(void) tsx_disable(); /* - * tsx_disable() will change the state of the - * RTM CPUID bit. Clear it here since it is now - * expected to be not set. + * tsx_disable() will change the state of the RTM and HLE CPUID + * bits. Clear them here since they are now expected to be not + * set. */ setup_clear_cpu_cap(X86_FEATURE_RTM); + setup_clear_cpu_cap(X86_FEATURE_HLE); } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { /* @@ -131,10 +135,10 @@ void __init tsx_init(void) tsx_enable(); /* - * tsx_enable() will change the state of the - * RTM CPUID bit. Force it here since it is now - * expected to be set. + * tsx_enable() will change the state of the RTM and HLE CPUID + * bits. Force them here since they are now expected to be set. */ setup_force_cpu_cap(X86_FEATURE_RTM); + setup_force_cpu_cap(X86_FEATURE_HLE); } } diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c index 8e6f2f4b4afe..df1358ba622b 100644 --- a/arch/x86/kernel/cpu/zhaoxin.c +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -16,13 +16,6 @@ #define RNG_ENABLED (1 << 3) #define RNG_ENABLE (1 << 8) /* MSR_ZHAOXIN_RNG */ -#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 -#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 -#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 -#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 -#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 -#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 - static void init_zhaoxin_cap(struct cpuinfo_x86 *c) { u32 lo, hi; @@ -58,8 +51,6 @@ static void init_zhaoxin_cap(struct cpuinfo_x86 *c) if (c->x86 >= 0x6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - - cpu_detect_cache_sizes(c); } static void early_init_zhaoxin(struct cpuinfo_x86 *c) @@ -89,31 +80,6 @@ static void early_init_zhaoxin(struct cpuinfo_x86 *c) } -static void zhaoxin_detect_vmx_virtcap(struct cpuinfo_x86 *c) -{ - u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; - - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); - msr_ctl = vmx_msr_high | vmx_msr_low; - - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) - set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) - set_cpu_cap(c, X86_FEATURE_VNMI); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, - vmx_msr_low, vmx_msr_high); - msr_ctl2 = vmx_msr_high | vmx_msr_low; - if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && - (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) - set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) - set_cpu_cap(c, X86_FEATURE_EPT); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) - set_cpu_cap(c, X86_FEATURE_VPID); - } -} - static void init_zhaoxin(struct cpuinfo_x86 *c) { early_init_zhaoxin(c); @@ -141,8 +107,7 @@ static void init_zhaoxin(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); #endif - if (cpu_has(c, X86_FEATURE_VMX)) - zhaoxin_detect_vmx_virtcap(c); + init_ia32_feat_ctl(c); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 00fc55ac7ffa..fd87b59452a3 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -370,7 +370,7 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) /* Add crashk_low_res region */ if (crashk_low_res.end) { ei.addr = crashk_low_res.start; - ei.size = crashk_low_res.end - crashk_low_res.start + 1; + ei.size = resource_size(&crashk_low_res); ei.type = E820_TYPE_RAM; add_e820_entry(params, &ei); } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index e07424e19274..ae64ec7f752f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -365,7 +365,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) } NOKPROBE_SYMBOL(oops_end); -int __die(const char *str, struct pt_regs *regs, long err) +static void __die_header(const char *str, struct pt_regs *regs, long err) { const char *pr = ""; @@ -384,7 +384,11 @@ int __die(const char *str, struct pt_regs *regs, long err) IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); +} +NOKPROBE_SYMBOL(__die_header); +static int __die_body(const char *str, struct pt_regs *regs, long err) +{ show_regs(regs); print_modules(); @@ -394,6 +398,13 @@ int __die(const char *str, struct pt_regs *regs, long err) return 0; } +NOKPROBE_SYMBOL(__die_body); + +int __die(const char *str, struct pt_regs *regs, long err) +{ + __die_header(str, regs, err); + return __die_body(str, regs, err); +} NOKPROBE_SYMBOL(__die); /* @@ -410,6 +421,19 @@ void die(const char *str, struct pt_regs *regs, long err) oops_end(flags, regs, sig); } +void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr) +{ + unsigned long flags = oops_begin(); + int sig = SIGSEGV; + + __die_header(str, regs, err); + if (gp_addr) + kasan_non_canonical_hook(gp_addr); + if (__die_body(str, regs, err)) + sig = 0; + oops_end(flags, regs, sig); +} + void show_regs(struct pt_regs *regs) { show_regs_print_info(KERN_DEFAULT); diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 4cba91ec8049..2f9ec14be3b1 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -710,8 +710,12 @@ static struct chipset early_qrk[] __initdata = { */ { PCI_VENDOR_ID_INTEL, 0x0f00, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, + { PCI_VENDOR_ID_INTEL, 0x3e20, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, { PCI_VENDOR_ID_INTEL, 0x3ec4, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, + { PCI_VENDOR_ID_INTEL, 0x8a12, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, { PCI_VENDOR_ID_BROADCOM, 0x4331, PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, {} diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 0071b794ed19..400a05e1c1c5 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -352,6 +352,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpregs_unlock(); return 0; } + fpregs_deactivate(fpu); fpregs_unlock(); } @@ -403,6 +404,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } if (!ret) fpregs_mark_activate(); + else + fpregs_deactivate(fpu); fpregs_unlock(); err_out: diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 319be936c348..a1806598aaa4 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -107,23 +107,20 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) } EXPORT_SYMBOL_GPL(cpu_has_xfeatures); -static int xfeature_is_supervisor(int xfeature_nr) +static bool xfeature_is_supervisor(int xfeature_nr) { /* - * We currently do not support supervisor states, but if - * we did, we could find out like this. - * - * SDM says: If state component 'i' is a user state component, - * ECX[0] return 0; if state component i is a supervisor - * state component, ECX[0] returns 1. + * Extended State Enumeration Sub-leaves (EAX = 0DH, ECX = n, n > 1) + * returns ECX[0] set to (1) for a supervisor state, and cleared (0) + * for a user state. */ u32 eax, ebx, ecx, edx; cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); - return !!(ecx & 1); + return ecx & 1; } -static int xfeature_is_user(int xfeature_nr) +static bool xfeature_is_user(int xfeature_nr) { return !xfeature_is_supervisor(xfeature_nr); } @@ -259,7 +256,7 @@ static void __init setup_xstate_features(void) xmm_space); xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP]; - xstate_sizes[XFEATURE_SSE] = FIELD_SIZEOF(struct fxregs_state, + xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state, xmm_space); for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { @@ -419,7 +416,8 @@ static void __init setup_init_fpu_buf(void) print_xstate_features(); if (boot_cpu_has(X86_FEATURE_XSAVES)) - init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; + init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | + xfeatures_mask; /* * Init all the features state with header.xfeatures being 0x0 diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 060a361d9d11..37a0aeaf89e7 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -23,6 +23,7 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/memory.h> +#include <linux/vmalloc.h> #include <trace/syscall.h> @@ -34,6 +35,8 @@ #ifdef CONFIG_DYNAMIC_FTRACE +static int ftrace_poke_late = 0; + int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) { @@ -43,84 +46,37 @@ int ftrace_arch_code_modify_prepare(void) * ftrace has it set to "read/write". */ mutex_lock(&text_mutex); - set_kernel_text_rw(); - set_all_modules_text_rw(); + ftrace_poke_late = 1; return 0; } int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) { - set_all_modules_text_ro(); - set_kernel_text_ro(); + /* + * ftrace_make_{call,nop}() may be called during + * module load, and we need to finish the text_poke_queue() + * that they do, here. + */ + text_poke_finish(); + ftrace_poke_late = 0; mutex_unlock(&text_mutex); return 0; } -union ftrace_code_union { - char code[MCOUNT_INSN_SIZE]; - struct { - unsigned char op; - int offset; - } __attribute__((packed)); -}; - -static int ftrace_calc_offset(long ip, long addr) -{ - return (int)(addr - ip); -} - -static unsigned char * -ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr) +static const char *ftrace_nop_replace(void) { - static union ftrace_code_union calc; - - calc.op = op; - calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); - - return calc.code; -} - -static unsigned char * -ftrace_call_replace(unsigned long ip, unsigned long addr) -{ - return ftrace_text_replace(0xe8, ip, addr); -} - -static inline int -within(unsigned long addr, unsigned long start, unsigned long end) -{ - return addr >= start && addr < end; + return ideal_nops[NOP_ATOMIC5]; } -static unsigned long text_ip_addr(unsigned long ip) +static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) { - /* - * On x86_64, kernel text mappings are mapped read-only, so we use - * the kernel identity mapping instead of the kernel text mapping - * to modify the kernel text. - * - * For 32bit kernels, these mappings are same and we can use - * kernel identity mapping to modify code. - */ - if (within(ip, (unsigned long)_text, (unsigned long)_etext)) - ip = (unsigned long)__va(__pa_symbol(ip)); - - return ip; + return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr); } -static const unsigned char *ftrace_nop_replace(void) +static int ftrace_verify_code(unsigned long ip, const char *old_code) { - return ideal_nops[NOP_ATOMIC5]; -} - -static int -ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, - unsigned const char *new_code) -{ - unsigned char replaced[MCOUNT_INSN_SIZE]; - - ftrace_expected = old_code; + char cur_code[MCOUNT_INSN_SIZE]; /* * Note: @@ -129,31 +85,46 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, * Carefully read and modify the code with probe_kernel_*(), and make * sure what we read is what we expected it to be before modifying it. */ - /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { + WARN_ON(1); return -EFAULT; + } /* Make sure it is what we expect it to be */ - if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) { + WARN_ON(1); return -EINVAL; + } - ip = text_ip_addr(ip); - - /* replace the text with the new text */ - if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) - return -EPERM; + return 0; +} - sync_core(); +/* + * Marked __ref because it calls text_poke_early() which is .init.text. That is + * ok because that call will happen early, during boot, when .init sections are + * still present. + */ +static int __ref +ftrace_modify_code_direct(unsigned long ip, const char *old_code, + const char *new_code) +{ + int ret = ftrace_verify_code(ip, old_code); + if (ret) + return ret; + /* replace the text with the new text */ + if (ftrace_poke_late) + text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL); + else + text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE); return 0; } -int ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned const char *new, *old; unsigned long ip = rec->ip; + const char *new, *old; old = ftrace_call_replace(ip, addr); new = ftrace_nop_replace(); @@ -167,19 +138,20 @@ int ftrace_make_nop(struct module *mod, * just modify the code directly. */ if (addr == MCOUNT_ADDR) - return ftrace_modify_code_direct(rec->ip, old, new); + return ftrace_modify_code_direct(ip, old, new); - ftrace_expected = NULL; - - /* Normal cases use add_brk_on_nop */ + /* + * x86 overrides ftrace_replace_code -- this function will never be used + * in this case. + */ WARN_ONCE(1, "invalid use of ftrace_make_nop"); return -EINVAL; } int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned const char *new, *old; unsigned long ip = rec->ip; + const char *new, *old; old = ftrace_nop_replace(); new = ftrace_call_replace(ip, addr); @@ -189,43 +161,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* - * The modifying_ftrace_code is used to tell the breakpoint - * handler to call ftrace_int3_handler(). If it fails to - * call this handler for a breakpoint added by ftrace, then - * the kernel may crash. - * - * As atomic_writes on x86 do not need a barrier, we do not - * need to add smp_mb()s for this to work. It is also considered - * that we can not read the modifying_ftrace_code before - * executing the breakpoint. That would be quite remarkable if - * it could do that. Here's the flow that is required: - * - * CPU-0 CPU-1 - * - * atomic_inc(mfc); - * write int3s - * <trap-int3> // implicit (r)mb - * if (atomic_read(mfc)) - * call ftrace_int3_handler() - * - * Then when we are finished: - * - * atomic_dec(mfc); - * - * If we hit a breakpoint that was not set by ftrace, it does not - * matter if ftrace_int3_handler() is called or not. It will - * simply be ignored. But it is crucial that a ftrace nop/caller - * breakpoint is handled. No other user should ever place a - * breakpoint on an ftrace nop/caller location. It must only - * be done by this code. - */ -atomic_t modifying_ftrace_code __read_mostly; - -static int -ftrace_modify_code(unsigned long ip, unsigned const char *old_code, - unsigned const char *new_code); - -/* * Should never be called: * As it is only called by __ftrace_replace_code() which is called by * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() @@ -237,452 +172,84 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { WARN_ON(1); - ftrace_expected = NULL; return -EINVAL; } -static unsigned long ftrace_update_func; -static unsigned long ftrace_update_func_call; - -static int update_ftrace_func(unsigned long ip, void *new) -{ - unsigned char old[MCOUNT_INSN_SIZE]; - int ret; - - memcpy(old, (void *)ip, MCOUNT_INSN_SIZE); - - ftrace_update_func = ip; - /* Make sure the breakpoints see the ftrace_update_func update */ - smp_wmb(); - - /* See comment above by declaration of modifying_ftrace_code */ - atomic_inc(&modifying_ftrace_code); - - ret = ftrace_modify_code(ip, old, new); - - atomic_dec(&modifying_ftrace_code); - - return ret; -} - int ftrace_update_ftrace_func(ftrace_func_t func) { - unsigned long ip = (unsigned long)(&ftrace_call); - unsigned char *new; - int ret; - - ftrace_update_func_call = (unsigned long)func; - - new = ftrace_call_replace(ip, (unsigned long)func); - ret = update_ftrace_func(ip, new); - - /* Also update the regs callback function */ - if (!ret) { - ip = (unsigned long)(&ftrace_regs_call); - new = ftrace_call_replace(ip, (unsigned long)func); - ret = update_ftrace_func(ip, new); - } - - return ret; -} - -static nokprobe_inline int is_ftrace_caller(unsigned long ip) -{ - if (ip == ftrace_update_func) - return 1; - - return 0; -} - -/* - * A breakpoint was added to the code address we are about to - * modify, and this is the handle that will just skip over it. - * We are either changing a nop into a trace call, or a trace - * call to a nop. While the change is taking place, we treat - * it just like it was a nop. - */ -int ftrace_int3_handler(struct pt_regs *regs) -{ unsigned long ip; + const char *new; - if (WARN_ON_ONCE(!regs)) - return 0; - - ip = regs->ip - INT3_INSN_SIZE; - - if (ftrace_location(ip)) { - int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); - return 1; - } else if (is_ftrace_caller(ip)) { - if (!ftrace_update_func_call) { - int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); - return 1; - } - int3_emulate_call(regs, ftrace_update_func_call); - return 1; - } - - return 0; -} -NOKPROBE_SYMBOL(ftrace_int3_handler); - -static int ftrace_write(unsigned long ip, const char *val, int size) -{ - ip = text_ip_addr(ip); - - if (probe_kernel_write((void *)ip, val, size)) - return -EPERM; - - return 0; -} - -static int add_break(unsigned long ip, const char *old) -{ - unsigned char replaced[MCOUNT_INSN_SIZE]; - unsigned char brk = BREAKPOINT_INSTRUCTION; - - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - ftrace_expected = old; - - /* Make sure it is what we expect it to be */ - if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) - return -EINVAL; - - return ftrace_write(ip, &brk, 1); -} - -static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned const char *old; - unsigned long ip = rec->ip; - - old = ftrace_call_replace(ip, addr); - - return add_break(rec->ip, old); -} - - -static int add_brk_on_nop(struct dyn_ftrace *rec) -{ - unsigned const char *old; - - old = ftrace_nop_replace(); - - return add_break(rec->ip, old); -} - -static int add_breakpoints(struct dyn_ftrace *rec, bool enable) -{ - unsigned long ftrace_addr; - int ret; - - ftrace_addr = ftrace_get_addr_curr(rec); - - ret = ftrace_test_record(rec, enable); - - switch (ret) { - case FTRACE_UPDATE_IGNORE: - return 0; - - case FTRACE_UPDATE_MAKE_CALL: - /* converting nop to call */ - return add_brk_on_nop(rec); - - case FTRACE_UPDATE_MODIFY_CALL: - case FTRACE_UPDATE_MAKE_NOP: - /* converting a call to a nop */ - return add_brk_on_call(rec, ftrace_addr); - } - return 0; -} - -/* - * On error, we need to remove breakpoints. This needs to - * be done caefully. If the address does not currently have a - * breakpoint, we know we are done. Otherwise, we look at the - * remaining 4 bytes of the instruction. If it matches a nop - * we replace the breakpoint with the nop. Otherwise we replace - * it with the call instruction. - */ -static int remove_breakpoint(struct dyn_ftrace *rec) -{ - unsigned char ins[MCOUNT_INSN_SIZE]; - unsigned char brk = BREAKPOINT_INSTRUCTION; - const unsigned char *nop; - unsigned long ftrace_addr; - unsigned long ip = rec->ip; - - /* If we fail the read, just give up */ - if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - /* If this does not have a breakpoint, we are done */ - if (ins[0] != brk) - return 0; - - nop = ftrace_nop_replace(); - - /* - * If the last 4 bytes of the instruction do not match - * a nop, then we assume that this is a call to ftrace_addr. - */ - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) { - /* - * For extra paranoidism, we check if the breakpoint is on - * a call that would actually jump to the ftrace_addr. - * If not, don't touch the breakpoint, we make just create - * a disaster. - */ - ftrace_addr = ftrace_get_addr_new(rec); - nop = ftrace_call_replace(ip, ftrace_addr); - - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) - goto update; - - /* Check both ftrace_addr and ftrace_old_addr */ - ftrace_addr = ftrace_get_addr_curr(rec); - nop = ftrace_call_replace(ip, ftrace_addr); - - ftrace_expected = nop; - - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) - return -EINVAL; - } - - update: - return ftrace_write(ip, nop, 1); -} - -static int add_update_code(unsigned long ip, unsigned const char *new) -{ - /* skip breakpoint */ - ip++; - new++; - return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1); -} - -static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_call_replace(ip, addr); - return add_update_code(ip, new); -} - -static int add_update_nop(struct dyn_ftrace *rec) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_nop_replace(); - return add_update_code(ip, new); -} - -static int add_update(struct dyn_ftrace *rec, bool enable) -{ - unsigned long ftrace_addr; - int ret; - - ret = ftrace_test_record(rec, enable); - - ftrace_addr = ftrace_get_addr_new(rec); - - switch (ret) { - case FTRACE_UPDATE_IGNORE: - return 0; - - case FTRACE_UPDATE_MODIFY_CALL: - case FTRACE_UPDATE_MAKE_CALL: - /* converting nop to call */ - return add_update_call(rec, ftrace_addr); - - case FTRACE_UPDATE_MAKE_NOP: - /* converting a call to a nop */ - return add_update_nop(rec); - } - - return 0; -} - -static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_call_replace(ip, addr); - - return ftrace_write(ip, new, 1); -} - -static int finish_update_nop(struct dyn_ftrace *rec) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_nop_replace(); - - return ftrace_write(ip, new, 1); -} - -static int finish_update(struct dyn_ftrace *rec, bool enable) -{ - unsigned long ftrace_addr; - int ret; - - ret = ftrace_update_record(rec, enable); - - ftrace_addr = ftrace_get_addr_new(rec); - - switch (ret) { - case FTRACE_UPDATE_IGNORE: - return 0; - - case FTRACE_UPDATE_MODIFY_CALL: - case FTRACE_UPDATE_MAKE_CALL: - /* converting nop to call */ - return finish_update_call(rec, ftrace_addr); + ip = (unsigned long)(&ftrace_call); + new = ftrace_call_replace(ip, (unsigned long)func); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); - case FTRACE_UPDATE_MAKE_NOP: - /* converting a call to a nop */ - return finish_update_nop(rec); - } + ip = (unsigned long)(&ftrace_regs_call); + new = ftrace_call_replace(ip, (unsigned long)func); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); return 0; } -static void do_sync_core(void *data) -{ - sync_core(); -} - -static void run_sync(void) -{ - int enable_irqs; - - /* No need to sync if there's only one CPU */ - if (num_online_cpus() == 1) - return; - - enable_irqs = irqs_disabled(); - - /* We may be called with interrupts disabled (on bootup). */ - if (enable_irqs) - local_irq_enable(); - on_each_cpu(do_sync_core, NULL, 1); - if (enable_irqs) - local_irq_disable(); -} - void ftrace_replace_code(int enable) { struct ftrace_rec_iter *iter; struct dyn_ftrace *rec; - const char *report = "adding breakpoints"; - int count = 0; + const char *new, *old; int ret; for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); - ret = add_breakpoints(rec, enable); - if (ret) - goto remove_breakpoints; - count++; - } - - run_sync(); + switch (ftrace_test_record(rec, enable)) { + case FTRACE_UPDATE_IGNORE: + default: + continue; - report = "updating code"; - count = 0; + case FTRACE_UPDATE_MAKE_CALL: + old = ftrace_nop_replace(); + break; - for_ftrace_rec_iter(iter) { - rec = ftrace_rec_iter_record(iter); + case FTRACE_UPDATE_MODIFY_CALL: + case FTRACE_UPDATE_MAKE_NOP: + old = ftrace_call_replace(rec->ip, ftrace_get_addr_curr(rec)); + break; + } - ret = add_update(rec, enable); - if (ret) - goto remove_breakpoints; - count++; + ret = ftrace_verify_code(rec->ip, old); + if (ret) { + ftrace_bug(ret, rec); + return; + } } - run_sync(); - - report = "removing breakpoints"; - count = 0; - for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); - ret = finish_update(rec, enable); - if (ret) - goto remove_breakpoints; - count++; - } + switch (ftrace_test_record(rec, enable)) { + case FTRACE_UPDATE_IGNORE: + default: + continue; - run_sync(); + case FTRACE_UPDATE_MAKE_CALL: + case FTRACE_UPDATE_MODIFY_CALL: + new = ftrace_call_replace(rec->ip, ftrace_get_addr_new(rec)); + break; - return; + case FTRACE_UPDATE_MAKE_NOP: + new = ftrace_nop_replace(); + break; + } - remove_breakpoints: - pr_warn("Failed on %s (%d):\n", report, count); - ftrace_bug(ret, rec); - for_ftrace_rec_iter(iter) { - rec = ftrace_rec_iter_record(iter); - /* - * Breakpoints are handled only when this function is in - * progress. The system could not work with them. - */ - if (remove_breakpoint(rec)) - BUG(); + text_poke_queue((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL); + ftrace_update_record(rec, enable); } - run_sync(); -} - -static int -ftrace_modify_code(unsigned long ip, unsigned const char *old_code, - unsigned const char *new_code) -{ - int ret; - - ret = add_break(ip, old_code); - if (ret) - goto out; - - run_sync(); - - ret = add_update_code(ip, new_code); - if (ret) - goto fail_update; - - run_sync(); - - ret = ftrace_write(ip, new_code, 1); - /* - * The breakpoint is handled only when this function is in progress. - * The system could not work if we could not remove it. - */ - BUG_ON(ret); - out: - run_sync(); - return ret; - - fail_update: - /* Also here the system could not work with the breakpoint */ - if (ftrace_write(ip, old_code, 1)) - BUG(); - goto out; + text_poke_finish(); } void arch_ftrace_update_code(int command) { - /* See comment above by declaration of modifying_ftrace_code */ - atomic_inc(&modifying_ftrace_code); - ftrace_modify_all_code(command); - - atomic_dec(&modifying_ftrace_code); } int __init ftrace_dyn_arch_init(void) @@ -747,6 +314,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned long start_offset; unsigned long end_offset; unsigned long op_offset; + unsigned long call_offset; unsigned long offset; unsigned long npages; unsigned long size; @@ -763,10 +331,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) start_offset = (unsigned long)ftrace_regs_caller; end_offset = (unsigned long)ftrace_regs_caller_end; op_offset = (unsigned long)ftrace_regs_caller_op_ptr; + call_offset = (unsigned long)ftrace_regs_call; } else { start_offset = (unsigned long)ftrace_caller; end_offset = (unsigned long)ftrace_epilogue; op_offset = (unsigned long)ftrace_caller_op_ptr; + call_offset = (unsigned long)ftrace_call; } size = end_offset - start_offset; @@ -823,16 +393,21 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* put in the new offset to the ftrace_ops */ memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); + /* put in the call to the function */ + mutex_lock(&text_mutex); + call_offset -= start_offset; + memcpy(trampoline + call_offset, + text_gen_insn(CALL_INSN_OPCODE, + trampoline + call_offset, + ftrace_ops_get_func(ops)), CALL_INSN_SIZE); + mutex_unlock(&text_mutex); + /* ALLOC_TRAMP flags lets us know we created it */ ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; set_vm_flush_reset_perms(trampoline); - /* - * Module allocation needs to be completed by making the page - * executable. The page is still writable, which is a security hazard, - * but anyhow ftrace breaks W^X completely. - */ + set_memory_ro((unsigned long)trampoline, npages); set_memory_x((unsigned long)trampoline, npages); return (unsigned long)trampoline; fail: @@ -859,62 +434,54 @@ static unsigned long calc_trampoline_call_offset(bool save_regs) void arch_ftrace_update_trampoline(struct ftrace_ops *ops) { ftrace_func_t func; - unsigned char *new; unsigned long offset; unsigned long ip; unsigned int size; - int ret, npages; + const char *new; - if (ops->trampoline) { - /* - * The ftrace_ops caller may set up its own trampoline. - * In such a case, this code must not modify it. - */ - if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) - return; - npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT; - set_memory_rw(ops->trampoline, npages); - } else { + if (!ops->trampoline) { ops->trampoline = create_trampoline(ops, &size); if (!ops->trampoline) return; ops->trampoline_size = size; - npages = PAGE_ALIGN(size) >> PAGE_SHIFT; + return; } + /* + * The ftrace_ops caller may set up its own trampoline. + * In such a case, this code must not modify it. + */ + if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) + return; + offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); ip = ops->trampoline + offset; - func = ftrace_ops_get_func(ops); - ftrace_update_func_call = (unsigned long)func; - + mutex_lock(&text_mutex); /* Do a safe modify in case the trampoline is executing */ new = ftrace_call_replace(ip, (unsigned long)func); - ret = update_ftrace_func(ip, new); - set_memory_ro(ops->trampoline, npages); - - /* The update should never fail */ - WARN_ON(ret); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); + mutex_unlock(&text_mutex); } /* Return the address of the function the trampoline calls */ static void *addr_from_call(void *ptr) { - union ftrace_code_union calc; + union text_poke_insn call; int ret; - ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE); + ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE); if (WARN_ON_ONCE(ret < 0)) return NULL; /* Make sure this is a call */ - if (WARN_ON_ONCE(calc.op != 0xe8)) { - pr_warn("Expected e8, got %x\n", calc.op); + if (WARN_ON_ONCE(call.opcode != CALL_INSN_OPCODE)) { + pr_warn("Expected E8, got %x\n", call.opcode); return NULL; } - return ptr + MCOUNT_INSN_SIZE + calc.offset; + return ptr + CALL_INSN_SIZE + call.disp; } void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, @@ -981,19 +548,18 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE extern void ftrace_graph_call(void); -static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) +static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) { - return ftrace_text_replace(0xe9, ip, addr); + return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr); } static int ftrace_mod_jmp(unsigned long ip, void *func) { - unsigned char *new; + const char *new; - ftrace_update_func_call = 0UL; new = ftrace_jmp_replace(ip, (unsigned long)func); - - return update_ftrace_func(ip, new); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); + return 0; } int ftrace_enable_ftrace_graph_caller(void) @@ -1019,10 +585,9 @@ int ftrace_disable_ftrace_graph_caller(void) void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, unsigned long frame_pointer) { + unsigned long return_hooker = (unsigned long)&return_to_handler; unsigned long old; int faulted; - unsigned long return_hooker = (unsigned long) - &return_to_handler; /* * When resuming from suspend-to-ram, this function can be indirectly @@ -1043,20 +608,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, return; /* - * If the return location is actually pointing directly to - * the start of a direct trampoline (if we trace the trampoline - * it will still be offset by MCOUNT_INSN_SIZE), then the - * return address is actually off by one word, and we - * need to adjust for that. - */ - if (ftrace_direct_func_count) { - if (ftrace_find_direct_func(self_addr + MCOUNT_INSN_SIZE)) { - self_addr = *parent; - parent++; - } - } - - /* * Protect against fault, even if it shouldn't * happen. This tool is too much intrusive to * ignore such a protection. diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index c6f791bc481e..7a50f0b62a70 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -84,7 +84,7 @@ static inline void hpet_writel(unsigned int d, unsigned int a) static inline void hpet_set_mapping(void) { - hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); + hpet_virt_address = ioremap(hpet_address, HPET_MMAP_SIZE); } static inline void hpet_clear_mapping(void) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index c1a8b9e71408..9c4498ea0b3c 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -16,15 +16,7 @@ #include <asm/alternative.h> #include <asm/text-patching.h> -union jump_code_union { - char code[JUMP_LABEL_NOP_SIZE]; - struct { - char jump; - int offset; - } __attribute__((packed)); -}; - -static void bug_at(unsigned char *ip, int line) +static void bug_at(const void *ip, int line) { /* * The location is not an op that we were expecting. @@ -35,42 +27,42 @@ static void bug_at(unsigned char *ip, int line) BUG(); } -static void __jump_label_set_jump_code(struct jump_entry *entry, - enum jump_label_type type, - union jump_code_union *code, - int init) +static const void * +__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, int init) { const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; - const void *expect; + const void *expect, *code; + const void *addr, *dest; int line; - code->jump = 0xe9; - code->offset = jump_entry_target(entry) - - (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); + addr = (void *)jump_entry_code(entry); + dest = (void *)jump_entry_target(entry); + + code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); if (init) { expect = default_nop; line = __LINE__; } else if (type == JUMP_LABEL_JMP) { expect = ideal_nop; line = __LINE__; } else { - expect = code->code; line = __LINE__; + expect = code; line = __LINE__; } - if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE)) - bug_at((void *)jump_entry_code(entry), line); + if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) + bug_at(addr, line); if (type == JUMP_LABEL_NOP) - memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE); + code = ideal_nop; + + return code; } -static void __ref __jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - int init) +static void inline __jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + int init) { - union jump_code_union code; - - __jump_label_set_jump_code(entry, type, &code, init); + const void *opcode = __jump_label_set_jump_code(entry, type, init); /* * As long as only a single processor is running and the code is still @@ -84,31 +76,33 @@ static void __ref __jump_label_transform(struct jump_entry *entry, * always nop being the 'currently valid' instruction */ if (init || system_state == SYSTEM_BOOTING) { - text_poke_early((void *)jump_entry_code(entry), &code, + text_poke_early((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE); return; } - text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL); + text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL); } -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +static void __ref jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + int init) { mutex_lock(&text_mutex); - __jump_label_transform(entry, type, 0); + __jump_label_transform(entry, type, init); mutex_unlock(&text_mutex); } -#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) -static struct text_poke_loc tp_vec[TP_VEC_MAX]; -static int tp_vec_nr; +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + jump_label_transform(entry, type, 0); +} bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type) { - struct text_poke_loc *tp; - void *entry_code; + const void *opcode; if (system_state == SYSTEM_BOOTING) { /* @@ -118,53 +112,19 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, return true; } - /* - * No more space in the vector, tell upper layer to apply - * the queue before continuing. - */ - if (tp_vec_nr == TP_VEC_MAX) - return false; - - tp = &tp_vec[tp_vec_nr]; - - entry_code = (void *)jump_entry_code(entry); - - /* - * The INT3 handler will do a bsearch in the queue, so we need entries - * to be sorted. We can survive an unsorted list by rejecting the entry, - * forcing the generic jump_label code to apply the queue. Warning once, - * to raise the attention to the case of an unsorted entry that is - * better not happen, because, in the worst case we will perform in the - * same way as we do without batching - with some more overhead. - */ - if (tp_vec_nr > 0) { - int prev = tp_vec_nr - 1; - struct text_poke_loc *prev_tp = &tp_vec[prev]; - - if (WARN_ON_ONCE(prev_tp->addr > entry_code)) - return false; - } - - __jump_label_set_jump_code(entry, type, - (union jump_code_union *)&tp->text, 0); - - text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL); - - tp_vec_nr++; - + mutex_lock(&text_mutex); + opcode = __jump_label_set_jump_code(entry, type, 0); + text_poke_queue((void *)jump_entry_code(entry), + opcode, JUMP_LABEL_NOP_SIZE, NULL); + mutex_unlock(&text_mutex); return true; } void arch_jump_label_transform_apply(void) { - if (!tp_vec_nr) - return; - mutex_lock(&text_mutex); - text_poke_bp_batch(tp_vec, tp_vec_nr); + text_poke_finish(); mutex_unlock(&text_mutex); - - tp_vec_nr = 0; } static enum { @@ -193,5 +153,5 @@ __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, jlstate = JL_STATE_NO_UPDATE; } if (jlstate == JL_STATE_UPDATE) - __jump_label_transform(entry, type, 1); + jump_label_transform(entry, type, 1); } diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index d2f4e706a428..f293d872602a 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -177,7 +177,7 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr, * acpi_rsdp=<addr> on kernel command line to make second kernel boot * without efi. */ - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return 0; params->secure_boot = boot_params.secure_boot; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 4f13af7cbcdb..4d7022a740ab 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -40,6 +40,7 @@ #include <linux/frame.h> #include <linux/kasan.h> #include <linux/moduleloader.h> +#include <linux/vmalloc.h> #include <asm/text-patching.h> #include <asm/cacheflush.h> @@ -119,14 +120,14 @@ __synthesize_relative_insn(void *dest, void *from, void *to, u8 op) /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ void synthesize_reljump(void *dest, void *from, void *to) { - __synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE); + __synthesize_relative_insn(dest, from, to, JMP32_INSN_OPCODE); } NOKPROBE_SYMBOL(synthesize_reljump); /* Insert a call instruction at address 'from', which calls address 'to'.*/ void synthesize_relcall(void *dest, void *from, void *to) { - __synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE); + __synthesize_relative_insn(dest, from, to, CALL_INSN_OPCODE); } NOKPROBE_SYMBOL(synthesize_relcall); @@ -301,7 +302,7 @@ static int can_probe(unsigned long paddr) * Another debugging subsystem might insert this breakpoint. * In that case, we can't recover it. */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) return 0; addr += insn.length; } @@ -356,7 +357,7 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) return 0; /* Another subsystem puts a breakpoint, failed to recover */ - if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + if (insn->opcode.bytes[0] == INT3_INSN_OPCODE) return 0; /* We should not singlestep on the exception masking instructions */ @@ -400,14 +401,14 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p, int len = insn->length; if (can_boost(insn, p->addr) && - MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) { + MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) { /* * These instructions can be executed directly if it * jumps back to correct address. */ synthesize_reljump(buf + len, p->ainsn.insn + len, p->addr + insn->length); - len += RELATIVEJUMP_SIZE; + len += JMP32_INSN_SIZE; p->ainsn.boostable = true; } else { p->ainsn.boostable = false; @@ -501,12 +502,14 @@ int arch_prepare_kprobe(struct kprobe *p) void arch_arm_kprobe(struct kprobe *p) { - text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); + text_poke(p->addr, ((unsigned char []){INT3_INSN_OPCODE}), 1); + text_poke_sync(); } void arch_disarm_kprobe(struct kprobe *p) { text_poke(p->addr, &p->opcode, 1); + text_poke_sync(); } void arch_remove_kprobe(struct kprobe *p) @@ -609,7 +612,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, regs->flags |= X86_EFLAGS_TF; regs->flags &= ~X86_EFLAGS_IF; /* single step inline if the instruction is an int3 */ - if (p->opcode == BREAKPOINT_INSTRUCTION) + if (p->opcode == INT3_INSN_OPCODE) regs->ip = (unsigned long)p->addr; else regs->ip = (unsigned long)p->ainsn.insn; @@ -695,7 +698,7 @@ int kprobe_int3_handler(struct pt_regs *regs) reset_current_kprobe(); return 1; } - } else if (*addr != BREAKPOINT_INSTRUCTION) { + } else if (*addr != INT3_INSN_OPCODE) { /* * The breakpoint instruction was removed right * after we hit it. Another cpu has removed diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 8900329c28a7..3f45b5c43a71 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -38,7 +38,7 @@ unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) long offs; int i; - for (i = 0; i < RELATIVEJUMP_SIZE; i++) { + for (i = 0; i < JMP32_INSN_SIZE; i++) { kp = get_kprobe((void *)addr - i); /* This function only handles jump-optimized kprobe */ if (kp && kprobe_optimized(kp)) { @@ -62,10 +62,10 @@ found: if (addr == (unsigned long)kp->addr) { buf[0] = kp->opcode; - memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE); } else { offs = addr - (unsigned long)kp->addr - 1; - memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); + memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs); } return (unsigned long)buf; @@ -141,8 +141,6 @@ STACK_FRAME_NON_STANDARD(optprobe_template_func); #define TMPL_END_IDX \ ((long)optprobe_template_end - (long)optprobe_template_entry) -#define INT3_SIZE sizeof(kprobe_opcode_t) - /* Optimized kprobe call back function: called from optinsn */ static void optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) @@ -162,7 +160,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) regs->cs |= get_kernel_rpl(); regs->gs = 0; #endif - regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; + regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE; regs->orig_ax = ~0UL; __this_cpu_write(current_kprobe, &op->kp); @@ -179,7 +177,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) struct insn insn; int len = 0, ret; - while (len < RELATIVEJUMP_SIZE) { + while (len < JMP32_INSN_SIZE) { ret = __copy_instruction(dest + len, src + len, real + len, &insn); if (!ret || !can_boost(&insn, src + len)) return -EINVAL; @@ -271,7 +269,7 @@ static int can_optimize(unsigned long paddr) return 0; /* Check there is enough space for a relative jump. */ - if (size - offset < RELATIVEJUMP_SIZE) + if (size - offset < JMP32_INSN_SIZE) return 0; /* Decode instructions */ @@ -290,15 +288,15 @@ static int can_optimize(unsigned long paddr) kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); insn_get_length(&insn); /* Another subsystem puts a breakpoint */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) return 0; /* Recover address */ insn.kaddr = (void *)addr; insn.next_byte = (void *)(addr + insn.length); /* Check any instructions don't jump into target */ if (insn_is_indirect_jump(&insn) || - insn_jump_into_range(&insn, paddr + INT3_SIZE, - RELATIVE_ADDR_SIZE)) + insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE, + DISP32_SIZE)) return 0; addr += insn.length; } @@ -374,7 +372,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, * Verify if the address gap is in 2GB range, because this uses * a relative jump. */ - rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE; + rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE; if (abs(rel) > 0x7fffffff) { ret = -ERANGE; goto err; @@ -401,7 +399,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, /* Set returning jmp instruction at the tail of out-of-line buffer */ synthesize_reljump(buf + len, slot + len, (u8 *)op->kp.addr + op->optinsn.size); - len += RELATIVEJUMP_SIZE; + len += JMP32_INSN_SIZE; /* We have to use text_poke() for instruction buffer because it is RO */ text_poke(slot, buf, len); @@ -416,49 +414,50 @@ err: } /* - * Replace breakpoints (int3) with relative jumps. + * Replace breakpoints (INT3) with relative jumps (JMP.d32). * Caller must call with locking kprobe_mutex and text_mutex. + * + * The caller will have installed a regular kprobe and after that issued + * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in + * the 4 bytes after the INT3 are unused and can now be overwritten. */ void arch_optimize_kprobes(struct list_head *oplist) { struct optimized_kprobe *op, *tmp; - u8 insn_buff[RELATIVEJUMP_SIZE]; + u8 insn_buff[JMP32_INSN_SIZE]; list_for_each_entry_safe(op, tmp, oplist, list) { s32 rel = (s32)((long)op->optinsn.insn - - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); + ((long)op->kp.addr + JMP32_INSN_SIZE)); WARN_ON(kprobe_disabled(&op->kp)); /* Backup instructions which will be replaced by jump address */ - memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, - RELATIVE_ADDR_SIZE); + memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE, + DISP32_SIZE); - insn_buff[0] = RELATIVEJUMP_OPCODE; + insn_buff[0] = JMP32_INSN_OPCODE; *(s32 *)(&insn_buff[1]) = rel; - text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL); + text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL); list_del_init(&op->list); } } -/* Replace a relative jump with a breakpoint (int3). */ +/* + * Replace a relative jump (JMP.d32) with a breakpoint (INT3). + * + * After that, we can restore the 4 bytes after the INT3 to undo what + * arch_optimize_kprobes() scribbled. This is safe since those bytes will be + * unused once the INT3 lands. + */ void arch_unoptimize_kprobe(struct optimized_kprobe *op) { - u8 insn_buff[RELATIVEJUMP_SIZE]; - u8 emulate_buff[RELATIVEJUMP_SIZE]; - - /* Set int3 to first byte for kprobes */ - insn_buff[0] = BREAKPOINT_INSTRUCTION; - memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - - emulate_buff[0] = RELATIVEJUMP_OPCODE; - *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn - - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); - - text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, - emulate_buff); + arch_arm_kprobe(&op->kp); + text_poke(op->kp.addr + INT3_INSN_SIZE, + op->optinsn.copied_insn, DISP32_SIZE); + text_poke_sync(); } /* diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 32ef1ee733b7..81045aabb6f4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -245,17 +245,13 @@ NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); dotraplinkage void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { - enum ctx_state prev_state; - switch (kvm_read_and_reset_pf_reason()) { default: do_page_fault(regs, error_code, address); break; case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ - prev_state = exception_enter(); kvm_async_pf_task_wait((u32)address, !user_mode(regs)); - exception_exit(prev_state); break; case KVM_PV_REASON_PAGE_READY: rcu_irq_enter(); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index b2463fcb20a8..c57e1ca70fd1 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -28,6 +28,89 @@ #include <asm/desc.h> #include <asm/mmu_context.h> #include <asm/syscalls.h> +#include <asm/pgtable_areas.h> + +/* This is a multiple of PAGE_SIZE. */ +#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) + +static inline void *ldt_slot_va(int slot) +{ + return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); +} + +void load_mm_ldt(struct mm_struct *mm) +{ + struct ldt_struct *ldt; + + /* READ_ONCE synchronizes with smp_store_release */ + ldt = READ_ONCE(mm->context.ldt); + + /* + * Any change to mm->context.ldt is followed by an IPI to all + * CPUs with the mm active. The LDT will not be freed until + * after the IPI is handled by all such CPUs. This means that, + * if the ldt_struct changes before we return, the values we see + * will be safe, and the new values will be loaded before we run + * any user code. + * + * NB: don't try to convert this to use RCU without extreme care. + * We would still need IRQs off, because we don't want to change + * the local LDT after an IPI loaded a newer value than the one + * that we can see. + */ + + if (unlikely(ldt)) { + if (static_cpu_has(X86_FEATURE_PTI)) { + if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { + /* + * Whoops -- either the new LDT isn't mapped + * (if slot == -1) or is mapped into a bogus + * slot (if slot > 1). + */ + clear_LDT(); + return; + } + + /* + * If page table isolation is enabled, ldt->entries + * will not be mapped in the userspace pagetables. + * Tell the CPU to access the LDT through the alias + * at ldt_slot_va(ldt->slot). + */ + set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); + } else { + set_ldt(ldt->entries, ldt->nr_entries); + } + } else { + clear_LDT(); + } +} + +void switch_ldt(struct mm_struct *prev, struct mm_struct *next) +{ + /* + * Load the LDT if either the old or new mm had an LDT. + * + * An mm will never go from having an LDT to not having an LDT. Two + * mms never share an LDT, so we don't gain anything by checking to + * see whether the LDT changed. There's also no guarantee that + * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, + * then prev->context.ldt will also be non-NULL. + * + * If we really cared, we could optimize the case where prev == next + * and we're exiting lazy mode. Most of the time, if this happens, + * we don't actually need to reload LDTR, but modify_ldt() is mostly + * used by legacy code and emulators where we don't need this level of + * performance. + * + * This uses | instead of || because it generates better code. + */ + if (unlikely((unsigned long)prev->context.ldt | + (unsigned long)next->context.ldt)) + load_mm_ldt(next); + + DEBUG_LOCKS_WARN_ON(preemptible()); +} static void refresh_ldt_segments(void) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e676a9916c49..54c21d6abd5a 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void) } fs_initcall(nmi_warning_debugfs); -static void nmi_max_handler(struct irq_work *w) +static void nmi_check_duration(struct nmiaction *action, u64 duration) { - struct nmiaction *a = container_of(w, struct nmiaction, irq_work); + u64 whole_msecs = READ_ONCE(action->max_duration); int remainder_ns, decimal_msecs; - u64 whole_msecs = READ_ONCE(a->max_duration); + + if (duration < nmi_longest_ns || duration < action->max_duration) + return; + + action->max_duration = duration; remainder_ns = do_div(whole_msecs, (1000 * 1000)); decimal_msecs = remainder_ns / 1000; printk_ratelimited(KERN_INFO "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n", - a->handler, whole_msecs, decimal_msecs); + action->handler, whole_msecs, decimal_msecs); } static int nmi_handle(unsigned int type, struct pt_regs *regs) @@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs) delta = sched_clock() - delta; trace_nmi_handler(a->handler, (int)delta, thishandled); - if (delta < nmi_longest_ns || delta < a->max_duration) - continue; - - a->max_duration = delta; - irq_work_queue(&a->irq_work); + nmi_check_duration(a, delta); } rcu_read_unlock(); @@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) if (!action->handler) return -EINVAL; - init_irq_work(&action->irq_work, nmi_max_handler); - raw_spin_lock_irqsave(&desc->lock, flags); /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 61e93a318983..839b5244e3b7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -615,12 +615,8 @@ void speculation_ctrl_update_current(void) void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { - struct thread_struct *prev, *next; unsigned long tifp, tifn; - prev = &prev_p->thread; - next = &next_p->thread; - tifn = READ_ONCE(task_thread_info(next_p)->flags); tifp = READ_ONCE(task_thread_info(prev_p)->flags); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 323499f48858..5052ced43373 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -124,7 +124,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->ip = new_ip; regs->sp = new_sp; regs->flags = X86_EFLAGS_IF; - force_iret(); } EXPORT_SYMBOL_GPL(start_thread); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 506d66830d4d..ffd497804dbc 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -394,7 +394,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, regs->cs = _cs; regs->ss = _ss; regs->flags = X86_EFLAGS_IF; - force_iret(); } void diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 1daf8f2aa21f..896d74cb5081 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -110,7 +110,7 @@ static void ich_force_enable_hpet(struct pci_dev *dev) } /* use bits 31:14, 16 kB aligned */ - rcba_base = ioremap_nocache(rcba, 0x4000); + rcba_base = ioremap(rcba, 0x4000); if (rcba_base == NULL) { dev_printk(KERN_DEBUG, &dev->dev, "ioremap failed; " "cannot force enable HPET\n"); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cedfe2077a69..2441b64d061f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -2,130 +2,54 @@ /* * Copyright (C) 1995 Linus Torvalds * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - * - * Memory region support - * David Parsons <orc@pell.chi.il.us>, July-August 1999 - * - * Added E820 sanitization routine (removes overlapping memory regions); - * Brian Moyle <bmoyle@mvista.com>, February 2001 - * - * Moved CPU detection code to cpu/${cpu}.c - * Patrick Mochel <mochel@osdl.org>, March 2002 - * - * Provisions for empty E820 memory regions (reported by certain BIOSes). - * Alex Achenbach <xela@slit.de>, December 2002. - * - */ - -/* - * This file handles the architecture-dependent parts of initialization + * This file contains the setup_arch() code, which handles the architecture-dependent + * parts of early kernel initialization. */ - -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/mmzone.h> -#include <linux/screen_info.h> -#include <linux/ioport.h> -#include <linux/acpi.h> -#include <linux/sfi.h> -#include <linux/apm_bios.h> -#include <linux/initrd.h> -#include <linux/memblock.h> -#include <linux/seq_file.h> #include <linux/console.h> -#include <linux/root_dev.h> -#include <linux/highmem.h> -#include <linux/export.h> +#include <linux/crash_dump.h> +#include <linux/dmi.h> #include <linux/efi.h> -#include <linux/init.h> -#include <linux/edd.h> +#include <linux/init_ohci1394_dma.h> +#include <linux/initrd.h> #include <linux/iscsi_ibft.h> -#include <linux/nodemask.h> -#include <linux/kexec.h> -#include <linux/dmi.h> -#include <linux/pfn.h> +#include <linux/memblock.h> #include <linux/pci.h> -#include <asm/pci-direct.h> -#include <linux/init_ohci1394_dma.h> -#include <linux/kvm_para.h> -#include <linux/dma-contiguous.h> -#include <xen/xen.h> -#include <uapi/linux/mount.h> - -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> -#include <linux/user.h> -#include <linux/delay.h> - -#include <linux/kallsyms.h> -#include <linux/cpufreq.h> -#include <linux/dma-mapping.h> -#include <linux/ctype.h> -#include <linux/uaccess.h> - -#include <linux/percpu.h> -#include <linux/crash_dump.h> +#include <linux/root_dev.h> +#include <linux/sfi.h> #include <linux/tboot.h> -#include <linux/jiffies.h> -#include <linux/mem_encrypt.h> -#include <linux/sizes.h> - #include <linux/usb/xhci-dbgp.h> -#include <video/edid.h> -#include <asm/mtrr.h> +#include <uapi/linux/mount.h> + +#include <xen/xen.h> + #include <asm/apic.h> -#include <asm/realmode.h> -#include <asm/e820/api.h> -#include <asm/mpspec.h> -#include <asm/setup.h> -#include <asm/efi.h> -#include <asm/timer.h> -#include <asm/i8259.h> -#include <asm/sections.h> -#include <asm/io_apic.h> -#include <asm/ist.h> -#include <asm/setup_arch.h> #include <asm/bios_ebda.h> -#include <asm/cacheflush.h> -#include <asm/processor.h> #include <asm/bugs.h> -#include <asm/kasan.h> - -#include <asm/vsyscall.h> #include <asm/cpu.h> -#include <asm/desc.h> -#include <asm/dma.h> -#include <asm/iommu.h> +#include <asm/efi.h> #include <asm/gart.h> -#include <asm/mmu_context.h> -#include <asm/proto.h> - -#include <asm/paravirt.h> #include <asm/hypervisor.h> -#include <asm/olpc_ofw.h> - -#include <asm/percpu.h> -#include <asm/topology.h> -#include <asm/apicdef.h> -#include <asm/amd_nb.h> +#include <asm/io_apic.h> +#include <asm/kasan.h> +#include <asm/kaslr.h> #include <asm/mce.h> -#include <asm/alternative.h> +#include <asm/mtrr.h> +#include <asm/realmode.h> +#include <asm/olpc_ofw.h> +#include <asm/pci-direct.h> #include <asm/prom.h> -#include <asm/microcode.h> -#include <asm/kaslr.h> +#include <asm/proto.h> #include <asm/unwind.h> +#include <asm/vsyscall.h> +#include <linux/vmalloc.h> /* - * max_low_pfn_mapped: highest direct mapped pfn under 4GB - * max_pfn_mapped: highest direct mapped pfn over 4GB + * max_low_pfn_mapped: highest directly mapped pfn < 4 GB + * max_pfn_mapped: highest directly mapped pfn > 4 GB * * The direct mapping only covers E820_TYPE_RAM regions, so the ranges and gaps are - * represented by pfn_mapped + * represented by pfn_mapped[]. */ unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; @@ -135,14 +59,23 @@ RESERVE_BRK(dmi_alloc, 65536); #endif -static __initdata unsigned long _brk_start = (unsigned long)__brk_base; -unsigned long _brk_end = (unsigned long)__brk_base; +/* + * Range of the BSS area. The size of the BSS area is determined + * at link time, with RESERVE_BRK*() facility reserving additional + * chunks. + */ +static __initdata +unsigned long _brk_start = (unsigned long)__brk_base; +unsigned long _brk_end = (unsigned long)__brk_base; struct boot_params boot_params; /* - * Machine setup.. + * These are the four main kernel memory regions, we put them into + * the resource tree so that kdump tools and other debugging tools + * recover it: */ + static struct resource rodata_resource = { .name = "Kernel rodata", .start = 0, @@ -173,16 +106,16 @@ static struct resource bss_resource = { #ifdef CONFIG_X86_32 -/* cpu data as detected by the assembly code in head_32.S */ +/* CPU data as detected by the assembly code in head_32.S */ struct cpuinfo_x86 new_cpu_data; -/* common cpu data for all cpus */ +/* Common CPU data for all CPUs */ struct cpuinfo_x86 boot_cpu_data __read_mostly; EXPORT_SYMBOL(boot_cpu_data); unsigned int def_to_bigsmp; -/* for MCA, but anyone else can use it if they want */ +/* For MCA, but anyone else can use it if they want */ unsigned int machine_id; unsigned int machine_submodel_id; unsigned int BIOS_revision; @@ -468,15 +401,15 @@ static void __init memblock_x86_reserve_range_setup_data(void) /* * Keep the crash kernel below this limit. * - * On 32 bits earlier kernels would limit the kernel to the low 512 MiB + * Earlier 32-bits kernels would limit the kernel to the low 512 MB range * due to mapping restrictions. * - * On 64bit, kdump kernel need be restricted to be under 64TB, which is + * 64-bit kdump kernels need to be restricted to be under 64 TB, which is * the upper limit of system RAM in 4-level paging mode. Since the kdump - * jumping could be from 5-level to 4-level, the jumping will fail if - * kernel is put above 64TB, and there's no way to detect the paging mode - * of the kernel which will be loaded for dumping during the 1st kernel - * bootup. + * jump could be from 5-level paging to 4-level paging, the jump will fail if + * the kernel is put above 64 TB, and during the 1st kernel bootup there's + * no good way to detect the paging mode of the target kernel which will be + * loaded for dumping. */ #ifdef CONFIG_X86_32 # define CRASH_ADDR_LOW_MAX SZ_512M @@ -887,7 +820,7 @@ void __init setup_arch(char **cmdline_p) /* * Note: Quark X1000 CPUs advertise PGE incorrectly and require * a cr3 based tlb flush, so the following __flush_tlb_all() - * will not flush anything because the cpu quirk which clears + * will not flush anything because the CPU quirk which clears * X86_FEATURE_PGE has not been invoked yet. Though due to the * load_cr3() above the TLB has been flushed already. The * quirk is invoked before subsequent calls to __flush_tlb_all() diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 8eb7193e158d..8a29573851a3 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -151,8 +151,6 @@ static int restore_sigcontext(struct pt_regs *regs, err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32)); - force_iret(); - return err; } diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 01f0e2263b86..298fc1edd9c9 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -90,11 +90,11 @@ __init int create_simplefb(const struct screen_info *si, if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) size <<= 16; length = mode->height * mode->stride; - length = PAGE_ALIGN(length); if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } + length = PAGE_ALIGN(length); /* setup IORESOURCE_MEM as framebuffer memory */ memset(&res, 0, sizeof(res)); diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 4c61f0713832..b89f6ac6a0c0 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -354,7 +354,7 @@ static ssize_t tboot_log_read(struct file *file, char __user *user_buf, size_t c void *kbuf; int ret = -EFAULT; - log_base = ioremap_nocache(TBOOT_SERIAL_LOG_ADDR, TBOOT_SERIAL_LOG_SIZE); + log_base = ioremap(TBOOT_SERIAL_LOG_ADDR, TBOOT_SERIAL_LOG_SIZE); if (!log_base) return ret; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 05da6b5b167b..9e6f822922a3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -56,6 +56,8 @@ #include <asm/mpx.h> #include <asm/vm86.h> #include <asm/umip.h> +#include <asm/insn.h> +#include <asm/insn-eval.h> #ifdef CONFIG_X86_64 #include <asm/x86_init.h> @@ -518,11 +520,57 @@ exit_trap: do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL); } -dotraplinkage void -do_general_protection(struct pt_regs *regs, long error_code) +enum kernel_gp_hint { + GP_NO_HINT, + GP_NON_CANONICAL, + GP_CANONICAL +}; + +/* + * When an uncaught #GP occurs, try to determine the memory address accessed by + * the instruction and return that address to the caller. Also, try to figure + * out whether any part of the access to that address was non-canonical. + */ +static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, + unsigned long *addr) { - const char *desc = "general protection fault"; + u8 insn_buf[MAX_INSN_SIZE]; + struct insn insn; + + if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE)) + return GP_NO_HINT; + + kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE); + insn_get_modrm(&insn); + insn_get_sib(&insn); + + *addr = (unsigned long)insn_get_addr_ref(&insn, regs); + if (*addr == -1UL) + return GP_NO_HINT; + +#ifdef CONFIG_X86_64 + /* + * Check that: + * - the operand is not in the kernel half + * - the last byte of the operand is not in the user canonical half + */ + if (*addr < ~__VIRTUAL_MASK && + *addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK) + return GP_NON_CANONICAL; +#endif + + return GP_CANONICAL; +} + +#define GPFSTR "general protection fault" + +dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) +{ + char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; + enum kernel_gp_hint hint = GP_NO_HINT; struct task_struct *tsk; + unsigned long gp_addr; + int ret; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); cond_local_irq_enable(regs); @@ -539,48 +587,61 @@ do_general_protection(struct pt_regs *regs, long error_code) } tsk = current; - if (!user_mode(regs)) { - if (fixup_exception(regs, X86_TRAP_GP, error_code, 0)) - return; + if (user_mode(regs)) { tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; - /* - * To be potentially processing a kprobe fault and to - * trust the result from kprobe_running(), we have to - * be non-preemptible. - */ - if (!preemptible() && kprobe_running() && - kprobe_fault_handler(regs, X86_TRAP_GP)) - return; + show_signal(tsk, SIGSEGV, "", desc, regs, error_code); + force_sig(SIGSEGV); - if (notify_die(DIE_GPF, desc, regs, error_code, - X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) - die(desc, regs, error_code); return; } + if (fixup_exception(regs, X86_TRAP_GP, error_code, 0)) + return; + tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; - show_signal(tsk, SIGSEGV, "", desc, regs, error_code); + /* + * To be potentially processing a kprobe fault and to trust the result + * from kprobe_running(), we have to be non-preemptible. + */ + if (!preemptible() && + kprobe_running() && + kprobe_fault_handler(regs, X86_TRAP_GP)) + return; + + ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV); + if (ret == NOTIFY_STOP) + return; + + if (error_code) + snprintf(desc, sizeof(desc), "segment-related " GPFSTR); + else + hint = get_kernel_gp_address(regs, &gp_addr); + + if (hint != GP_NO_HINT) + snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx", + (hint == GP_NON_CANONICAL) ? "probably for non-canonical address" + : "maybe for address", + gp_addr); + + /* + * KASAN is interested only in the non-canonical case, clear it + * otherwise. + */ + if (hint != GP_NON_CANONICAL) + gp_addr = 0; + + die_addr(desc, regs, error_code, gp_addr); - force_sig(SIGSEGV); } NOKPROBE_SYMBOL(do_general_protection); dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { -#ifdef CONFIG_DYNAMIC_FTRACE - /* - * ftrace must be first, everything else may cause a recursive crash. - * See note by declaration of modifying_ftrace_code in ftrace.c - */ - if (unlikely(atomic_read(&modifying_ftrace_code)) && - ftrace_int3_handler(regs)) - return; -#endif if (poke_int3_handler(regs)) return; diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index b8acf639abd1..32a818764e03 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -233,7 +233,6 @@ static cycles_t check_tsc_warp(unsigned int timeout) * The measurement runs for 'timeout' msecs: */ end = start + (cycles_t) tsc_khz * timeout; - now = start; for (i = 0; ; i++) { /* diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 332ae6530fa8..e9cc182aa97e 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -187,6 +187,8 @@ static struct orc_entry *orc_find(unsigned long ip) return orc_ftrace_find(ip); } +#ifdef CONFIG_MODULES + static void orc_sort_swap(void *_a, void *_b, int size) { struct orc_entry *orc_a, *orc_b; @@ -229,7 +231,6 @@ static int orc_sort_cmp(const void *_a, const void *_b) return orc_a->sp_reg == ORC_REG_UNDEFINED && !orc_a->end ? -1 : 1; } -#ifdef CONFIG_MODULES void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size, void *_orc, size_t orc_size) { @@ -273,9 +274,11 @@ void __init unwind_init(void) return; } - /* Sort the .orc_unwind and .orc_unwind_ip tables: */ - sort(__start_orc_unwind_ip, num_entries, sizeof(int), orc_sort_cmp, - orc_sort_swap); + /* + * Note, the orc_unwind and orc_unwind_ip tables were already + * sorted at build time via the 'sorttable' tool. + * It's ready for binary search straight away, no need to sort it. + */ /* Initialize the fast lookup table: */ lookup_num_blocks = orc_lookup_end - orc_lookup; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index a76c12b38e92..91d55454e702 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -381,7 +381,6 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) mark_screen_rdonly(tsk->mm); memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs)); - force_iret(); return regs->ax; } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3a1a819da137..e3296aa028fe 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -193,12 +193,10 @@ SECTIONS __vvar_beginning_hack = .; /* Place all vvars at the offsets in asm/vvar.h. */ -#define EMIT_VVAR(name, offset) \ +#define EMIT_VVAR(name, offset) \ . = __vvar_beginning_hack + offset; \ *(.vvar_ ## name) -#define __VVAR_KERNEL_LDS #include <asm/vvar.h> -#undef __VVAR_KERNEL_LDS #undef EMIT_VVAR /* diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ce89430a7f80..23e25f3034c2 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -20,7 +20,7 @@ #include <asm/irq.h> #include <asm/io_apic.h> #include <asm/hpet.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/tsc.h> #include <asm/iommu.h> #include <asm/mach_traps.h> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 840e12583b85..991019d5eee1 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -60,13 +60,11 @@ config KVM If unsure, say N. config KVM_INTEL - tristate "KVM for Intel processors support" - depends on KVM - # for perf_guest_get_msrs(): - depends on CPU_SUP_INTEL + tristate "KVM for Intel (and compatible) processors support" + depends on KVM && IA32_FEAT_CTL ---help--- - Provides support for KVM on Intel processors equipped with the VT - extensions. + Provides support for KVM on processors equipped with Intel's VT + extensions, a.k.a. Virtual Machine Extensions (VMX). To compile this as a module, choose M here: the module will be called kvm-intel. diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index cfafa320a8cf..cf55629ff0ff 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -402,7 +402,8 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) entry->edx |= F(SPEC_CTRL); if (boot_cpu_has(X86_FEATURE_STIBP)) entry->edx |= F(INTEL_STIBP); - if (boot_cpu_has(X86_FEATURE_SSBD)) + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + boot_cpu_has(X86_FEATURE_AMD_SSBD)) entry->edx |= F(SPEC_CTRL_SSBD); /* * We emulate ARCH_CAPABILITIES in software even @@ -759,7 +760,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, entry->ebx |= F(AMD_IBRS); if (boot_cpu_has(X86_FEATURE_STIBP)) entry->ebx |= F(AMD_STIBP); - if (boot_cpu_has(X86_FEATURE_SSBD)) + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + boot_cpu_has(X86_FEATURE_AMD_SSBD)) entry->ebx |= F(AMD_SSBD); if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) entry->ebx |= F(AMD_SSB_NO); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6f92b40d798c..a32b847a8089 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -40,7 +40,7 @@ #include <linux/kthread.h> #include <asm/page.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/cmpxchg.h> #include <asm/e820/api.h> #include <asm/io.h> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 4aea7d304beb..6879966b7648 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4588,8 +4588,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu) gpa_t vmptr; uint32_t revision; struct vcpu_vmx *vmx = to_vmx(vcpu); - const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED - | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED + | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; /* * The Intel VMX Instruction Reference lists a bunch of bits that are diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e3394c839dea..cdb4bf50ee14 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1839,11 +1839,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_EXT_CTL: if (!msr_info->host_initiated && !(vmx->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) + FEAT_CTL_LMCE_ENABLED)) return 1; msr_info->data = vcpu->arch.mcg_ext_ctl; break; - case MSR_IA32_FEATURE_CONTROL: + case MSR_IA32_FEAT_CTL: msr_info->data = vmx->msr_ia32_feature_control; break; case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: @@ -2074,15 +2074,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_EXT_CTL: if ((!msr_info->host_initiated && !(to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) || + FEAT_CTL_LMCE_ENABLED)) || (data & ~MCG_EXT_CTL_LMCE_EN)) return 1; vcpu->arch.mcg_ext_ctl = data; break; - case MSR_IA32_FEATURE_CONTROL: + case MSR_IA32_FEAT_CTL: if (!vmx_feature_control_msr_valid(vcpu, data) || (to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) + FEAT_CTL_LOCKED && !msr_info->host_initiated)) return 1; vmx->msr_ia32_feature_control = data; if (msr_info->host_initiated && data == 0) @@ -2204,29 +2204,8 @@ static __init int cpu_has_kvm_support(void) static __init int vmx_disabled_by_bios(void) { - u64 msr; - - rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); - if (msr & FEATURE_CONTROL_LOCKED) { - /* launched w/ TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) - && tboot_enabled()) - return 1; - /* launched w/o TXT and VMX only enabled w/ TXT */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) - && !tboot_enabled()) { - printk(KERN_WARNING "kvm: disable TXT in the BIOS or " - "activate TXT before enabling KVM\n"); - return 1; - } - /* launched w/o TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && !tboot_enabled()) - return 1; - } - - return 0; + return !boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || + !boot_cpu_has(X86_FEATURE_VMX); } static void kvm_cpu_vmxon(u64 addr) @@ -2241,7 +2220,6 @@ static int hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); - u64 old, test_bits; if (cr4_read_shadow() & X86_CR4_VMXE) return -EBUSY; @@ -2269,17 +2247,6 @@ static int hardware_enable(void) */ crash_enable_local_vmclear(cpu); - rdmsrl(MSR_IA32_FEATURE_CONTROL, old); - - test_bits = FEATURE_CONTROL_LOCKED; - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - if (tboot_enabled()) - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; - - if ((old & test_bits) != test_bits) { - /* enable and lock */ - wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); - } kvm_cpu_vmxon(phys_addr); if (enable_ept) ept_sync_global(); @@ -6801,7 +6768,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; - vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; + vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED; /* * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR @@ -6871,6 +6838,12 @@ static int __init vmx_check_processor_compat(void) struct vmcs_config vmcs_conf; struct vmx_capability vmx_cap; + if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || + !this_cpu_has(X86_FEATURE_VMX)) { + pr_err("kvm: VMX is disabled on CPU %d\n", smp_processor_id()); + return -EIO; + } + if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) return -EIO; if (nested) @@ -7099,12 +7072,12 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (nested_vmx_allowed(vcpu)) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + FEAT_CTL_VMX_ENABLED_INSIDE_SMX | + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; else to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); + ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX | + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX); if (nested_vmx_allowed(vcpu)) { nested_vmx_cr_fixed1_bits_update(vcpu); @@ -7523,10 +7496,10 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) { if (vcpu->arch.mcg_cap & MCG_LMCE_P) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_LMCE; + FEAT_CTL_LMCE_ENABLED; else to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~FEATURE_CONTROL_LMCE; + ~FEAT_CTL_LMCE_ENABLED; } static int vmx_smi_allowed(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index a4f7f737c5d4..7f42cf3dcd70 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -289,7 +289,7 @@ struct vcpu_vmx { /* * Only bits masked by msr_ia32_feature_control_valid_bits can be set in - * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included + * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included * in msr_ia32_feature_control_valid_bits. */ u64 msr_ia32_feature_control; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cf917139de6b..740d3ee42455 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1142,7 +1142,7 @@ static const u32 msrs_to_save_all[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, MSR_IA32_SPEC_CTRL, MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 306c3a0902ba..31600d851fd8 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -155,7 +155,7 @@ static bool check_seg_overrides(struct insn *insn, int regoff) */ static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off) { - if (user_64bit_mode(regs)) + if (any_64bit_mode(regs)) return INAT_SEG_REG_IGNORE; /* * Resolve the default segment register as described in Section 3.7.4 @@ -266,7 +266,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff) * which may be invalid at this point. */ if (regoff == offsetof(struct pt_regs, ip)) { - if (user_64bit_mode(regs)) + if (any_64bit_mode(regs)) return INAT_SEG_REG_IGNORE; else return INAT_SEG_REG_CS; @@ -289,7 +289,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff) * In long mode, segment override prefixes are ignored, except for * overrides for FS and GS. */ - if (user_64bit_mode(regs)) { + if (any_64bit_mode(regs)) { if (idx != INAT_SEG_REG_FS && idx != INAT_SEG_REG_GS) idx = INAT_SEG_REG_IGNORE; @@ -646,23 +646,27 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) */ return (unsigned long)(sel << 4); - if (user_64bit_mode(regs)) { + if (any_64bit_mode(regs)) { /* * Only FS or GS will have a base address, the rest of * the segments' bases are forced to 0. */ unsigned long base; - if (seg_reg_idx == INAT_SEG_REG_FS) + if (seg_reg_idx == INAT_SEG_REG_FS) { rdmsrl(MSR_FS_BASE, base); - else if (seg_reg_idx == INAT_SEG_REG_GS) + } else if (seg_reg_idx == INAT_SEG_REG_GS) { /* * swapgs was called at the kernel entry point. Thus, * MSR_KERNEL_GS_BASE will have the user-space GS base. */ - rdmsrl(MSR_KERNEL_GS_BASE, base); - else + if (user_mode(regs)) + rdmsrl(MSR_KERNEL_GS_BASE, base); + else + rdmsrl(MSR_GS_BASE, base); + } else { base = 0; + } return base; } @@ -703,7 +707,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx) if (sel < 0) return 0; - if (user_64bit_mode(regs) || v8086_mode(regs)) + if (any_64bit_mode(regs) || v8086_mode(regs)) return -1L; if (!sel) @@ -948,7 +952,7 @@ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs, * following instruction. */ if (*regoff == -EDOM) { - if (user_64bit_mode(regs)) + if (any_64bit_mode(regs)) tmp = regs->ip + insn->length; else tmp = 0; @@ -1250,7 +1254,7 @@ static void __user *get_addr_ref_32(struct insn *insn, struct pt_regs *regs) * After computed, the effective address is treated as an unsigned * quantity. */ - if (!user_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit)) + if (!any_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit)) goto out; /* diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 337830d7a59c..7ff00ea64e4f 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -29,10 +29,7 @@ SYM_FUNC_START_ALIAS(memmove) SYM_FUNC_START(__memmove) - /* Handle more 32 bytes in loop */ mov %rdi, %rax - cmp $0x20, %rdx - jb 1f /* Decide forward/backward copy mode */ cmp %rdi, %rsi @@ -42,7 +39,9 @@ SYM_FUNC_START(__memmove) cmp %rdi, %r8 jg 2f + /* FSRM implies ERMS => no length checks, do the copy directly */ .Lmemmove_begin_forward: + ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS /* @@ -114,6 +113,8 @@ SYM_FUNC_START(__memmove) */ .p2align 4 2: + cmp $0x20, %rdx + jb 1f cmp $680, %rdx jb 6f cmp %dil, %sil diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 3b89c201ac26..345848f270e3 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -12,8 +12,10 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg CFLAGS_REMOVE_mem_encrypt_identity.o = -pg endif -obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o +obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \ + pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o + +obj-y += pat/ # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) @@ -23,8 +25,6 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp) CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace -obj-$(CONFIG_X86_PAT) += pat_interval.o - obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 304d31d8cbbc..fa4ea09593ab 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -29,6 +29,7 @@ #include <asm/efi.h> /* efi_recover_from_page_fault()*/ #include <asm/desc.h> /* store_idt(), ... */ #include <asm/cpu_entry_area.h> /* exception stack */ +#include <asm/pgtable_areas.h> /* VMALLOC_START, ... */ #define CREATE_TRACE_POINTS #include <asm/trace/exceptions.h> @@ -1486,27 +1487,6 @@ good_area: } NOKPROBE_SYMBOL(do_user_addr_fault); -/* - * Explicitly marked noinline such that the function tracer sees this as the - * page_fault entry point. - */ -static noinline void -__do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, - unsigned long address) -{ - prefetchw(¤t->mm->mmap_sem); - - if (unlikely(kmmio_fault(regs, address))) - return; - - /* Was the fault on kernel-controlled part of the address space? */ - if (unlikely(fault_in_kernel_space(address))) - do_kern_addr_fault(regs, hw_error_code, address); - else - do_user_addr_fault(regs, hw_error_code, address); -} -NOKPROBE_SYMBOL(__do_page_fault); - static __always_inline void trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code, unsigned long address) @@ -1521,13 +1501,19 @@ trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code, } dotraplinkage void -do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) +do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, + unsigned long address) { - enum ctx_state prev_state; + prefetchw(¤t->mm->mmap_sem); + trace_page_fault_entries(regs, hw_error_code, address); - prev_state = exception_enter(); - trace_page_fault_entries(regs, error_code, address); - __do_page_fault(regs, error_code, address); - exception_exit(prev_state); + if (unlikely(kmmio_fault(regs, address))) + return; + + /* Was the fault on kernel-controlled part of the address space? */ + if (unlikely(fault_in_kernel_space(address))) + do_kern_addr_fault(regs, hw_error_code, address); + else + do_user_addr_fault(regs, hw_error_code, address); } NOKPROBE_SYMBOL(do_page_fault); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 930edeb41ec3..23df4885bbed 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -52,6 +52,7 @@ #include <asm/page_types.h> #include <asm/cpu_entry_area.h> #include <asm/init.h> +#include <asm/pgtable_areas.h> #include "mm_internal.h" @@ -865,43 +866,13 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif int kernel_set_to_readonly __read_mostly; -void set_kernel_text_rw(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long size = PFN_ALIGN(_etext) - start; - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read write\n", - start, start+size); - - set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); -} - -void set_kernel_text_ro(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long size = PFN_ALIGN(_etext) - start; - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read only\n", - start, start+size); - - set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); -} - static void mark_nxdata_nx(void) { /* diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index dcb9bc961b39..abbdecb75fad 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1212,10 +1212,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap); - struct zone *zone = page_zone(page); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); kernel_physical_mapping_remove(start, start + size); } #endif /* CONFIG_MEMORY_HOTPLUG */ @@ -1260,42 +1258,6 @@ void __init mem_init(void) int kernel_set_to_readonly; -void set_kernel_text_rw(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(_etext); - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read write\n", - start, end); - - /* - * Make the kernel identity mapping for text RW. Kernel text - * mapping will always be RO. Refer to the comment in - * static_protections() in pageattr.c - */ - set_memory_rw(start, (end - start) >> PAGE_SHIFT); -} - -void set_kernel_text_ro(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(_etext); - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read only\n", - start, end); - - /* - * Set the kernel identity mapping for text RO. - */ - set_memory_ro(start, (end - start) >> PAGE_SHIFT); -} - void mark_rodata_ro(void) { unsigned long start = PFN_ALIGN(_text); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 6748b4c2baff..f60398aeb644 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -4,7 +4,7 @@ */ #include <asm/iomap.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <linux/export.h> #include <linux/highmem.h> @@ -26,7 +26,7 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) if (!is_io_mapping_possible(base, size)) return -EINVAL; - ret = io_reserve_memtype(base, base + size, &pcm); + ret = memtype_reserve_io(base, base + size, &pcm); if (ret) return ret; @@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(iomap_create_wc); void iomap_free(resource_size_t base, unsigned long size) { - io_free_memtype(base, base + size); + memtype_free_io(base, base + size); } EXPORT_SYMBOL_GPL(iomap_free); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index b3a2936377b5..44e4beb4239f 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -24,7 +24,7 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/pgalloc.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/setup.h> #include "physaddr.h" @@ -196,10 +196,10 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, phys_addr &= PHYSICAL_PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; - retval = reserve_memtype(phys_addr, (u64)phys_addr + size, + retval = memtype_reserve(phys_addr, (u64)phys_addr + size, pcm, &new_pcm); if (retval) { - printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval); + printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval); return NULL; } @@ -255,7 +255,7 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, area->phys_addr = phys_addr; vaddr = (unsigned long) area->addr; - if (kernel_map_sync_memtype(phys_addr, size, pcm)) + if (memtype_kernel_map_sync(phys_addr, size, pcm)) goto err_free_area; if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) @@ -275,7 +275,7 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, err_free_area: free_vm_area(area); err_free_memtype: - free_memtype(phys_addr, phys_addr + size); + memtype_free(phys_addr, phys_addr + size); return NULL; } @@ -451,7 +451,7 @@ void iounmap(volatile void __iomem *addr) return; } - free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); + memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p)); /* Finally remove it */ o = remove_vm_area((void __force *)addr); diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index cf5bc37c90ac..763e71abc0fe 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -288,23 +288,6 @@ static void __init kasan_shallow_populate_pgds(void *start, void *end) } while (pgd++, addr = next, addr != (unsigned long)end); } -#ifdef CONFIG_KASAN_INLINE -static int kasan_die_handler(struct notifier_block *self, - unsigned long val, - void *data) -{ - if (val == DIE_GPF) { - pr_emerg("CONFIG_KASAN_INLINE enabled\n"); - pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n"); - } - return NOTIFY_OK; -} - -static struct notifier_block kasan_die_notifier = { - .notifier_call = kasan_die_handler, -}; -#endif - void __init kasan_early_init(void) { int i; @@ -341,10 +324,6 @@ void __init kasan_init(void) int i; void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; -#ifdef CONFIG_KASAN_INLINE - register_die_notifier(&kasan_die_notifier); -#endif - memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); /* diff --git a/arch/x86/mm/pat/Makefile b/arch/x86/mm/pat/Makefile new file mode 100644 index 000000000000..ea464c995161 --- /dev/null +++ b/arch/x86/mm/pat/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y := set_memory.o memtype.o + +obj-$(CONFIG_X86_PAT) += memtype_interval.o diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pat/cpa-test.c index facce271e8b9..facce271e8b9 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pat/cpa-test.c diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat/memtype.c index 2d758e19ef22..394be8611748 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat/memtype.c @@ -1,11 +1,34 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Handle caching attributes in page tables (PAT) + * Page Attribute Table (PAT) support: handle memory caching attributes in page tables. * * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> * Suresh B Siddha <suresh.b.siddha@intel.com> * * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. + * + * Basic principles: + * + * PAT is a CPU feature supported by all modern x86 CPUs, to allow the firmware and + * the kernel to set one of a handful of 'caching type' attributes for physical + * memory ranges: uncached, write-combining, write-through, write-protected, + * and the most commonly used and default attribute: write-back caching. + * + * PAT support supercedes and augments MTRR support in a compatible fashion: MTRR is + * a hardware interface to enumerate a limited number of physical memory ranges + * and set their caching attributes explicitly, programmed into the CPU via MSRs. + * Even modern CPUs have MTRRs enabled - but these are typically not touched + * by the kernel or by user-space (such as the X server), we rely on PAT for any + * additional cache attribute logic. + * + * PAT doesn't work via explicit memory ranges, but uses page table entries to add + * cache attribute information to the mapped memory range: there's 3 bits used, + * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT), with the 8 possible values mapped by the + * CPU to actual cache attributes via an MSR loaded into the CPU (MSR_IA32_CR_PAT). + * + * ( There's a metric ton of finer details, such as compatibility with CPU quirks + * that only support 4 types of PAT entries, and interaction with MTRRs, see + * below for details. ) */ #include <linux/seq_file.h> @@ -29,44 +52,48 @@ #include <asm/mtrr.h> #include <asm/page.h> #include <asm/msr.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/io.h> -#include "pat_internal.h" -#include "mm_internal.h" +#include "memtype.h" +#include "../mm_internal.h" #undef pr_fmt #define pr_fmt(fmt) "" fmt -static bool __read_mostly boot_cpu_done; +static bool __read_mostly pat_bp_initialized; static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); -static bool __read_mostly pat_initialized; -static bool __read_mostly init_cm_done; +static bool __read_mostly pat_bp_enabled; +static bool __read_mostly pat_cm_initialized; -void pat_disable(const char *reason) +/* + * PAT support is enabled by default, but can be disabled for + * various user-requested or hardware-forced reasons: + */ +void pat_disable(const char *msg_reason) { if (pat_disabled) return; - if (boot_cpu_done) { + if (pat_bp_initialized) { WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n"); return; } pat_disabled = true; - pr_info("x86/PAT: %s\n", reason); + pr_info("x86/PAT: %s\n", msg_reason); } static int __init nopat(char *str) { - pat_disable("PAT support disabled."); + pat_disable("PAT support disabled via boot option."); return 0; } early_param("nopat", nopat); bool pat_enabled(void) { - return pat_initialized; + return pat_bp_enabled; } EXPORT_SYMBOL_GPL(pat_enabled); @@ -197,6 +224,8 @@ static void __init_cache_modes(u64 pat) char pat_msg[33]; int i; + WARN_ON_ONCE(pat_cm_initialized); + pat_msg[32] = 0; for (i = 7; i >= 0; i--) { cache = pat_get_cache_mode((pat >> (i * 8)) & 7, @@ -205,28 +234,28 @@ static void __init_cache_modes(u64 pat) } pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg); - init_cm_done = true; + pat_cm_initialized = true; } #define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) -static void pat_bsp_init(u64 pat) +static void pat_bp_init(u64 pat) { u64 tmp_pat; if (!boot_cpu_has(X86_FEATURE_PAT)) { - pat_disable("PAT not supported by CPU."); + pat_disable("PAT not supported by the CPU."); return; } rdmsrl(MSR_IA32_CR_PAT, tmp_pat); if (!tmp_pat) { - pat_disable("PAT MSR is 0, disabled."); + pat_disable("PAT support disabled by the firmware."); return; } wrmsrl(MSR_IA32_CR_PAT, pat); - pat_initialized = true; + pat_bp_enabled = true; __init_cache_modes(pat); } @@ -248,7 +277,7 @@ void init_cache_modes(void) { u64 pat = 0; - if (init_cm_done) + if (pat_cm_initialized) return; if (boot_cpu_has(X86_FEATURE_PAT)) { @@ -291,7 +320,7 @@ void init_cache_modes(void) } /** - * pat_init - Initialize PAT MSR and PAT table + * pat_init - Initialize the PAT MSR and PAT table on the current CPU * * This function initializes PAT MSR and PAT table with an OS-defined value * to enable additional cache attributes, WC, WT and WP. @@ -305,6 +334,10 @@ void pat_init(void) u64 pat; struct cpuinfo_x86 *c = &boot_cpu_data; +#ifndef CONFIG_X86_PAT + pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n"); +#endif + if (pat_disabled) return; @@ -364,9 +397,9 @@ void pat_init(void) PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT); } - if (!boot_cpu_done) { - pat_bsp_init(pat); - boot_cpu_done = true; + if (!pat_bp_initialized) { + pat_bp_init(pat); + pat_bp_initialized = true; } else { pat_ap_init(pat); } @@ -542,10 +575,10 @@ static u64 sanitize_phys(u64 address) * available type in new_type in case of no error. In case of any error * it will return a negative return value. */ -int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, +int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_type, enum page_cache_mode *new_type) { - struct memtype *new; + struct memtype *entry_new; enum page_cache_mode actual_type; int is_range_ram; int err = 0; @@ -593,22 +626,22 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, return -EINVAL; } - new = kzalloc(sizeof(struct memtype), GFP_KERNEL); - if (!new) + entry_new = kzalloc(sizeof(struct memtype), GFP_KERNEL); + if (!entry_new) return -ENOMEM; - new->start = start; - new->end = end; - new->type = actual_type; + entry_new->start = start; + entry_new->end = end; + entry_new->type = actual_type; spin_lock(&memtype_lock); - err = memtype_check_insert(new, new_type); + err = memtype_check_insert(entry_new, new_type); if (err) { - pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", + pr_info("x86/PAT: memtype_reserve failed [mem %#010Lx-%#010Lx], track %s, req %s\n", start, end - 1, - cattr_name(new->type), cattr_name(req_type)); - kfree(new); + cattr_name(entry_new->type), cattr_name(req_type)); + kfree(entry_new); spin_unlock(&memtype_lock); return err; @@ -616,18 +649,17 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, spin_unlock(&memtype_lock); - dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n", - start, end - 1, cattr_name(new->type), cattr_name(req_type), + dprintk("memtype_reserve added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n", + start, end - 1, cattr_name(entry_new->type), cattr_name(req_type), new_type ? cattr_name(*new_type) : "-"); return err; } -int free_memtype(u64 start, u64 end) +int memtype_free(u64 start, u64 end) { - int err = -EINVAL; int is_range_ram; - struct memtype *entry; + struct memtype *entry_old; if (!pat_enabled()) return 0; @@ -640,28 +672,24 @@ int free_memtype(u64 start, u64 end) return 0; is_range_ram = pat_pagerange_is_ram(start, end); - if (is_range_ram == 1) { - - err = free_ram_pages_type(start, end); - - return err; - } else if (is_range_ram < 0) { + if (is_range_ram == 1) + return free_ram_pages_type(start, end); + if (is_range_ram < 0) return -EINVAL; - } spin_lock(&memtype_lock); - entry = memtype_erase(start, end); + entry_old = memtype_erase(start, end); spin_unlock(&memtype_lock); - if (IS_ERR(entry)) { + if (IS_ERR(entry_old)) { pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", current->comm, current->pid, start, end - 1); return -EINVAL; } - kfree(entry); + kfree(entry_old); - dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1); + dprintk("memtype_free request [mem %#010Lx-%#010Lx]\n", start, end - 1); return 0; } @@ -700,6 +728,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr) rettype = _PAGE_CACHE_MODE_UC_MINUS; spin_unlock(&memtype_lock); + return rettype; } @@ -723,7 +752,7 @@ bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn) EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr); /** - * io_reserve_memtype - Request a memory type mapping for a region of memory + * memtype_reserve_io - Request a memory type mapping for a region of memory * @start: start (physical address) of the region * @end: end (physical address) of the region * @type: A pointer to memtype, with requested type. On success, requested @@ -732,7 +761,7 @@ EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr); * On success, returns 0 * On failure, returns non-zero */ -int io_reserve_memtype(resource_size_t start, resource_size_t end, +int memtype_reserve_io(resource_size_t start, resource_size_t end, enum page_cache_mode *type) { resource_size_t size = end - start; @@ -742,47 +771,47 @@ int io_reserve_memtype(resource_size_t start, resource_size_t end, WARN_ON_ONCE(iomem_map_sanity_check(start, size)); - ret = reserve_memtype(start, end, req_type, &new_type); + ret = memtype_reserve(start, end, req_type, &new_type); if (ret) goto out_err; if (!is_new_memtype_allowed(start, size, req_type, new_type)) goto out_free; - if (kernel_map_sync_memtype(start, size, new_type) < 0) + if (memtype_kernel_map_sync(start, size, new_type) < 0) goto out_free; *type = new_type; return 0; out_free: - free_memtype(start, end); + memtype_free(start, end); ret = -EBUSY; out_err: return ret; } /** - * io_free_memtype - Release a memory type mapping for a region of memory + * memtype_free_io - Release a memory type mapping for a region of memory * @start: start (physical address) of the region * @end: end (physical address) of the region */ -void io_free_memtype(resource_size_t start, resource_size_t end) +void memtype_free_io(resource_size_t start, resource_size_t end) { - free_memtype(start, end); + memtype_free(start, end); } int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) { enum page_cache_mode type = _PAGE_CACHE_MODE_WC; - return io_reserve_memtype(start, start + size, &type); + return memtype_reserve_io(start, start + size, &type); } EXPORT_SYMBOL(arch_io_reserve_memtype_wc); void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) { - io_free_memtype(start, start + size); + memtype_free_io(start, start + size); } EXPORT_SYMBOL(arch_io_free_memtype_wc); @@ -839,10 +868,10 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, } /* - * Change the memory type for the physial address range in kernel identity + * Change the memory type for the physical address range in kernel identity * mapping space if that range is a part of identity map. */ -int kernel_map_sync_memtype(u64 base, unsigned long size, +int memtype_kernel_map_sync(u64 base, unsigned long size, enum page_cache_mode pcm) { unsigned long id_sz; @@ -851,15 +880,14 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, return 0; /* - * some areas in the middle of the kernel identity range - * are not mapped, like the PCI space. + * Some areas in the middle of the kernel identity range + * are not mapped, for example the PCI space. */ if (!page_is_ram(base >> PAGE_SHIFT)) return 0; id_sz = (__pa(high_memory-1) <= base + size) ? - __pa(high_memory) - base : - size; + __pa(high_memory) - base : size; if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) { pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n", @@ -873,7 +901,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, /* * Internal interface to reserve a range of physical memory with prot. - * Reserved non RAM regions only and after successful reserve_memtype, + * Reserved non RAM regions only and after successful memtype_reserve, * this func also keeps identity mapping (if any) in sync with this new prot. */ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, @@ -910,14 +938,14 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, return 0; } - ret = reserve_memtype(paddr, paddr + size, want_pcm, &pcm); + ret = memtype_reserve(paddr, paddr + size, want_pcm, &pcm); if (ret) return ret; if (pcm != want_pcm) { if (strict_prot || !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) { - free_memtype(paddr, paddr + size); + memtype_free(paddr, paddr + size); pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n", current->comm, current->pid, cattr_name(want_pcm), @@ -935,8 +963,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, cachemode2protval(pcm)); } - if (kernel_map_sync_memtype(paddr, size, pcm) < 0) { - free_memtype(paddr, paddr + size); + if (memtype_kernel_map_sync(paddr, size, pcm) < 0) { + memtype_free(paddr, paddr + size); return -EINVAL; } return 0; @@ -952,7 +980,7 @@ static void free_pfn_range(u64 paddr, unsigned long size) is_ram = pat_pagerange_is_ram(paddr, paddr + size); if (is_ram == 0) - free_memtype(paddr, paddr + size); + memtype_free(paddr, paddr + size); } /* @@ -1099,25 +1127,30 @@ EXPORT_SYMBOL_GPL(pgprot_writethrough); #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) +/* + * We are allocating a temporary printout-entry to be passed + * between seq_start()/next() and seq_show(): + */ static struct memtype *memtype_get_idx(loff_t pos) { - struct memtype *print_entry; + struct memtype *entry_print; int ret; - print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL); - if (!print_entry) + entry_print = kzalloc(sizeof(struct memtype), GFP_KERNEL); + if (!entry_print) return NULL; spin_lock(&memtype_lock); - ret = memtype_copy_nth_element(print_entry, pos); + ret = memtype_copy_nth_element(entry_print, pos); spin_unlock(&memtype_lock); - if (!ret) { - return print_entry; - } else { - kfree(print_entry); + /* Free it on error: */ + if (ret) { + kfree(entry_print); return NULL; } + + return entry_print; } static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) @@ -1142,11 +1175,14 @@ static void memtype_seq_stop(struct seq_file *seq, void *v) static int memtype_seq_show(struct seq_file *seq, void *v) { - struct memtype *print_entry = (struct memtype *)v; + struct memtype *entry_print = (struct memtype *)v; + + seq_printf(seq, "PAT: [mem 0x%016Lx-0x%016Lx] %s\n", + entry_print->start, + entry_print->end, + cattr_name(entry_print->type)); - seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), - print_entry->start, print_entry->end); - kfree(print_entry); + kfree(entry_print); return 0; } @@ -1178,7 +1214,6 @@ static int __init pat_memtype_list_init(void) } return 0; } - late_initcall(pat_memtype_list_init); #endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */ diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat/memtype.h index 79a06684349e..cacecdbceb55 100644 --- a/arch/x86/mm/pat_internal.h +++ b/arch/x86/mm/pat/memtype.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PAT_INTERNAL_H_ -#define __PAT_INTERNAL_H_ +#ifndef __MEMTYPE_H_ +#define __MEMTYPE_H_ extern int pat_debug_enable; @@ -29,13 +29,13 @@ static inline char *cattr_name(enum page_cache_mode pcm) } #ifdef CONFIG_X86_PAT -extern int memtype_check_insert(struct memtype *new, +extern int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *new_type); extern struct memtype *memtype_erase(u64 start, u64 end); extern struct memtype *memtype_lookup(u64 addr); -extern int memtype_copy_nth_element(struct memtype *out, loff_t pos); +extern int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos); #else -static inline int memtype_check_insert(struct memtype *new, +static inline int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *new_type) { return 0; } static inline struct memtype *memtype_erase(u64 start, u64 end) @@ -46,4 +46,4 @@ static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos) { return 0; } #endif -#endif /* __PAT_INTERNAL_H_ */ +#endif /* __MEMTYPE_H_ */ diff --git a/arch/x86/mm/pat/memtype_interval.c b/arch/x86/mm/pat/memtype_interval.c new file mode 100644 index 000000000000..a07e4882bf36 --- /dev/null +++ b/arch/x86/mm/pat/memtype_interval.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handle caching attributes in page tables (PAT) + * + * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * Suresh B Siddha <suresh.b.siddha@intel.com> + * + * Interval tree used to store the PAT memory type reservations. + */ + +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/kernel.h> +#include <linux/interval_tree_generic.h> +#include <linux/sched.h> +#include <linux/gfp.h> + +#include <asm/pgtable.h> +#include <asm/memtype.h> + +#include "memtype.h" + +/* + * The memtype tree keeps track of memory type for specific + * physical memory areas. Without proper tracking, conflicting memory + * types in different mappings can cause CPU cache corruption. + * + * The tree is an interval tree (augmented rbtree) which tree is ordered + * by the starting address. The tree can contain multiple entries for + * different regions which overlap. All the aliases have the same + * cache attributes of course, as enforced by the PAT logic. + * + * memtype_lock protects the rbtree. + */ + +static inline u64 interval_start(struct memtype *entry) +{ + return entry->start; +} + +static inline u64 interval_end(struct memtype *entry) +{ + return entry->end - 1; +} + +INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, + interval_start, interval_end, + static, interval) + +static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; + +enum { + MEMTYPE_EXACT_MATCH = 0, + MEMTYPE_END_MATCH = 1 +}; + +static struct memtype *memtype_match(u64 start, u64 end, int match_type) +{ + struct memtype *entry_match; + + entry_match = interval_iter_first(&memtype_rbroot, start, end-1); + + while (entry_match != NULL && entry_match->start < end) { + if ((match_type == MEMTYPE_EXACT_MATCH) && + (entry_match->start == start) && (entry_match->end == end)) + return entry_match; + + if ((match_type == MEMTYPE_END_MATCH) && + (entry_match->start < start) && (entry_match->end == end)) + return entry_match; + + entry_match = interval_iter_next(entry_match, start, end-1); + } + + return NULL; /* Returns NULL if there is no match */ +} + +static int memtype_check_conflict(u64 start, u64 end, + enum page_cache_mode reqtype, + enum page_cache_mode *newtype) +{ + struct memtype *entry_match; + enum page_cache_mode found_type = reqtype; + + entry_match = interval_iter_first(&memtype_rbroot, start, end-1); + if (entry_match == NULL) + goto success; + + if (entry_match->type != found_type && newtype == NULL) + goto failure; + + dprintk("Overlap at 0x%Lx-0x%Lx\n", entry_match->start, entry_match->end); + found_type = entry_match->type; + + entry_match = interval_iter_next(entry_match, start, end-1); + while (entry_match) { + if (entry_match->type != found_type) + goto failure; + + entry_match = interval_iter_next(entry_match, start, end-1); + } +success: + if (newtype) + *newtype = found_type; + + return 0; + +failure: + pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", + current->comm, current->pid, start, end, + cattr_name(found_type), cattr_name(entry_match->type)); + + return -EBUSY; +} + +int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_type) +{ + int err = 0; + + err = memtype_check_conflict(entry_new->start, entry_new->end, entry_new->type, ret_type); + if (err) + return err; + + if (ret_type) + entry_new->type = *ret_type; + + interval_insert(entry_new, &memtype_rbroot); + return 0; +} + +struct memtype *memtype_erase(u64 start, u64 end) +{ + struct memtype *entry_old; + + /* + * Since the memtype_rbroot tree allows overlapping ranges, + * memtype_erase() checks with EXACT_MATCH first, i.e. free + * a whole node for the munmap case. If no such entry is found, + * it then checks with END_MATCH, i.e. shrink the size of a node + * from the end for the mremap case. + */ + entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH); + if (!entry_old) { + entry_old = memtype_match(start, end, MEMTYPE_END_MATCH); + if (!entry_old) + return ERR_PTR(-EINVAL); + } + + if (entry_old->start == start) { + /* munmap: erase this node */ + interval_remove(entry_old, &memtype_rbroot); + } else { + /* mremap: update the end value of this node */ + interval_remove(entry_old, &memtype_rbroot); + entry_old->end = start; + interval_insert(entry_old, &memtype_rbroot); + + return NULL; + } + + return entry_old; +} + +struct memtype *memtype_lookup(u64 addr) +{ + return interval_iter_first(&memtype_rbroot, addr, addr + PAGE_SIZE-1); +} + +/* + * Debugging helper, copy the Nth entry of the tree into a + * a copy for printout. This allows us to print out the tree + * via debugfs, without holding the memtype_lock too long: + */ +#ifdef CONFIG_DEBUG_FS +int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos) +{ + struct memtype *entry_match; + int i = 1; + + entry_match = interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); + + while (entry_match && pos != i) { + entry_match = interval_iter_next(entry_match, 0, ULONG_MAX); + i++; + } + + if (entry_match) { /* pos == i */ + *entry_out = *entry_match; + return 0; + } else { + return 1; + } +} +#endif diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pat/set_memory.c index 1b99ad05b117..62a8ebe72a52 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pat/set_memory.c @@ -24,10 +24,10 @@ #include <linux/uaccess.h> #include <asm/pgalloc.h> #include <asm/proto.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/set_memory.h> -#include "mm_internal.h" +#include "../mm_internal.h" /* * The current flushing context - we pass it instead of 5 arguments: @@ -331,7 +331,7 @@ static void cpa_flush_all(unsigned long cache) on_each_cpu(__cpa_flush_all, (void *) cache, 1); } -void __cpa_flush_tlb(void *data) +static void __cpa_flush_tlb(void *data) { struct cpa_data *cpa = data; unsigned int i; @@ -1801,7 +1801,7 @@ int set_memory_uc(unsigned long addr, int numpages) /* * for now UC MINUS. see comments in ioremap() */ - ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, + ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_MODE_UC_MINUS, NULL); if (ret) goto out_err; @@ -1813,7 +1813,7 @@ int set_memory_uc(unsigned long addr, int numpages) return 0; out_free: - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); out_err: return ret; } @@ -1839,14 +1839,14 @@ int set_memory_wc(unsigned long addr, int numpages) { int ret; - ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, + ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_MODE_WC, NULL); if (ret) return ret; ret = _set_memory_wc(addr, numpages); if (ret) - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); return ret; } @@ -1873,7 +1873,7 @@ int set_memory_wb(unsigned long addr, int numpages) if (ret) return ret; - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); return 0; } EXPORT_SYMBOL(set_memory_wb); @@ -2014,7 +2014,7 @@ static int _set_pages_array(struct page **pages, int numpages, continue; start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; - if (reserve_memtype(start, end, new_type, NULL)) + if (memtype_reserve(start, end, new_type, NULL)) goto err_out; } @@ -2040,7 +2040,7 @@ err_out: continue; start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; - free_memtype(start, end); + memtype_free(start, end); } return -EINVAL; } @@ -2089,7 +2089,7 @@ int set_pages_array_wb(struct page **pages, int numpages) continue; start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; - free_memtype(start, end); + memtype_free(start, end); } return 0; @@ -2215,7 +2215,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .pgd = pgd, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(0), + .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), .flags = 0, }; @@ -2224,12 +2224,6 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, if (!(__supported_pte_mask & _PAGE_NX)) goto out; - if (!(page_flags & _PAGE_NX)) - cpa.mask_clr = __pgprot(_PAGE_NX); - - if (!(page_flags & _PAGE_RW)) - cpa.mask_clr = __pgprot(_PAGE_RW); - if (!(page_flags & _PAGE_ENC)) cpa.mask_clr = pgprot_encrypted(cpa.mask_clr); @@ -2281,5 +2275,5 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, * be exposed to the rest of the kernel. Include these directly here. */ #ifdef CONFIG_CPA_DEBUG -#include "pageattr-test.c" +#include "cpa-test.c" #endif diff --git a/arch/x86/mm/pat_interval.c b/arch/x86/mm/pat_interval.c deleted file mode 100644 index 6855362eaf21..000000000000 --- a/arch/x86/mm/pat_interval.c +++ /dev/null @@ -1,185 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Handle caching attributes in page tables (PAT) - * - * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> - * Suresh B Siddha <suresh.b.siddha@intel.com> - * - * Interval tree used to store the PAT memory type reservations. - */ - -#include <linux/seq_file.h> -#include <linux/debugfs.h> -#include <linux/kernel.h> -#include <linux/interval_tree_generic.h> -#include <linux/sched.h> -#include <linux/gfp.h> - -#include <asm/pgtable.h> -#include <asm/pat.h> - -#include "pat_internal.h" - -/* - * The memtype tree keeps track of memory type for specific - * physical memory areas. Without proper tracking, conflicting memory - * types in different mappings can cause CPU cache corruption. - * - * The tree is an interval tree (augmented rbtree) with tree ordered - * on starting address. Tree can contain multiple entries for - * different regions which overlap. All the aliases have the same - * cache attributes of course. - * - * memtype_lock protects the rbtree. - */ -static inline u64 memtype_interval_start(struct memtype *memtype) -{ - return memtype->start; -} - -static inline u64 memtype_interval_end(struct memtype *memtype) -{ - return memtype->end - 1; -} -INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, - memtype_interval_start, memtype_interval_end, - static, memtype_interval) - -static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; - -enum { - MEMTYPE_EXACT_MATCH = 0, - MEMTYPE_END_MATCH = 1 -}; - -static struct memtype *memtype_match(u64 start, u64 end, int match_type) -{ - struct memtype *match; - - match = memtype_interval_iter_first(&memtype_rbroot, start, end-1); - while (match != NULL && match->start < end) { - if ((match_type == MEMTYPE_EXACT_MATCH) && - (match->start == start) && (match->end == end)) - return match; - - if ((match_type == MEMTYPE_END_MATCH) && - (match->start < start) && (match->end == end)) - return match; - - match = memtype_interval_iter_next(match, start, end-1); - } - - return NULL; /* Returns NULL if there is no match */ -} - -static int memtype_check_conflict(u64 start, u64 end, - enum page_cache_mode reqtype, - enum page_cache_mode *newtype) -{ - struct memtype *match; - enum page_cache_mode found_type = reqtype; - - match = memtype_interval_iter_first(&memtype_rbroot, start, end-1); - if (match == NULL) - goto success; - - if (match->type != found_type && newtype == NULL) - goto failure; - - dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); - found_type = match->type; - - match = memtype_interval_iter_next(match, start, end-1); - while (match) { - if (match->type != found_type) - goto failure; - - match = memtype_interval_iter_next(match, start, end-1); - } -success: - if (newtype) - *newtype = found_type; - - return 0; - -failure: - pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, start, end, - cattr_name(found_type), cattr_name(match->type)); - return -EBUSY; -} - -int memtype_check_insert(struct memtype *new, - enum page_cache_mode *ret_type) -{ - int err = 0; - - err = memtype_check_conflict(new->start, new->end, new->type, ret_type); - if (err) - return err; - - if (ret_type) - new->type = *ret_type; - - memtype_interval_insert(new, &memtype_rbroot); - return 0; -} - -struct memtype *memtype_erase(u64 start, u64 end) -{ - struct memtype *data; - - /* - * Since the memtype_rbroot tree allows overlapping ranges, - * memtype_erase() checks with EXACT_MATCH first, i.e. free - * a whole node for the munmap case. If no such entry is found, - * it then checks with END_MATCH, i.e. shrink the size of a node - * from the end for the mremap case. - */ - data = memtype_match(start, end, MEMTYPE_EXACT_MATCH); - if (!data) { - data = memtype_match(start, end, MEMTYPE_END_MATCH); - if (!data) - return ERR_PTR(-EINVAL); - } - - if (data->start == start) { - /* munmap: erase this node */ - memtype_interval_remove(data, &memtype_rbroot); - } else { - /* mremap: update the end value of this node */ - memtype_interval_remove(data, &memtype_rbroot); - data->end = start; - memtype_interval_insert(data, &memtype_rbroot); - return NULL; - } - - return data; -} - -struct memtype *memtype_lookup(u64 addr) -{ - return memtype_interval_iter_first(&memtype_rbroot, addr, - addr + PAGE_SIZE-1); -} - -#if defined(CONFIG_DEBUG_FS) -int memtype_copy_nth_element(struct memtype *out, loff_t pos) -{ - struct memtype *match; - int i = 1; - - match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); - while (match && pos != i) { - match = memtype_interval_iter_next(match, 0, ULONG_MAX); - i++; - } - - if (match) { /* pos == i */ - *out = *match; - return 0; - } else { - return 1; - } -} -#endif diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 9bb7f0ab9fe6..0e6700eaa4f9 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -18,6 +18,7 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/io.h> +#include <linux/vmalloc.h> unsigned int __VMALLOC_RESERVE = 128 << 20; diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index bdc98150d4db..fc3f3d3e2ef2 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -5,6 +5,7 @@ #include <linux/mm.h> #include <asm/page.h> +#include <linux/vmalloc.h> #include "physaddr.h" diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 92153d054d6c..bda73cb7a044 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -79,7 +79,7 @@ static void do_read_far_test(void __iomem *p) static void do_test(unsigned long size) { - void __iomem *p = ioremap_nocache(mmio_address, size); + void __iomem *p = ioremap(mmio_address, size); if (!p) { pr_err("could not ioremap, aborting.\n"); return; @@ -104,7 +104,7 @@ static void do_test_bulk_ioremapping(void) int i; for (i = 0; i < 10; ++i) { - p = ioremap_nocache(mmio_address, PAGE_SIZE); + p = ioremap(mmio_address, PAGE_SIZE); if (p) iounmap(p); } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index e6a9edc5baaf..66f96f21a7b6 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -708,7 +708,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, (void *)info, 1); else on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote, - (void *)info, 1, GFP_ATOMIC, cpumask); + (void *)info, 1, cpumask); } /* diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 9df652d3d927..fa855bbaebaf 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -34,7 +34,7 @@ #include <linux/errno.h> #include <linux/memblock.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/e820/api.h> #include <asm/pci_x86.h> #include <asm/io_apic.h> diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 887d181b769b..0c7b6e66c644 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -105,7 +105,7 @@ static void __iomem *mcfg_ioremap(struct pci_mmcfg_region *cfg) start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus); num_buses = cfg->end_bus - cfg->start_bus + 1; size = PCI_MMCFG_BUS_OFFSET(num_buses); - addr = ioremap_nocache(start, size); + addr = ioremap(start, size); if (addr) addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus); return addr; diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index fe29f3f5d384..84b09c230cbd 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y -OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y +KASAN_SANITIZE := n +GCOV_PROFILE := n obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 38d44f36d5ed..59f7f6d60cf6 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -54,8 +54,8 @@ #include <asm/x86_init.h> #include <asm/uv/uv.h> -static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; +static u64 efi_systab_phys __initdata; static efi_config_table_type_t arch_tables[] __initdata = { #ifdef CONFIG_X86_UV @@ -97,32 +97,6 @@ static int __init setup_add_efi_memmap(char *arg) } early_param("add_efi_memmap", setup_add_efi_memmap); -static efi_status_t __init phys_efi_set_virtual_address_map( - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - efi_status_t status; - unsigned long flags; - pgd_t *save_pgd; - - save_pgd = efi_call_phys_prolog(); - if (!save_pgd) - return EFI_ABORTED; - - /* Disable interrupts around EFI calls: */ - local_irq_save(flags); - status = efi_call_phys(efi_phys.set_virtual_address_map, - memory_map_size, descriptor_size, - descriptor_version, virtual_map); - local_irq_restore(flags); - - efi_call_phys_epilog(save_pgd); - - return status; -} - void __init efi_find_mirror(void) { efi_memory_desc_t *md; @@ -330,10 +304,16 @@ static void __init efi_clean_memmap(void) } if (n_removal > 0) { - u64 size = efi.memmap.nr_map - n_removal; + struct efi_memory_map_data data = { + .phys_map = efi.memmap.phys_map, + .desc_version = efi.memmap.desc_version, + .desc_size = efi.memmap.desc_size, + .size = data.desc_size * (efi.memmap.nr_map - n_removal), + .flags = 0, + }; pr_warn("Removing %d invalid memory map entries.\n", n_removal); - efi_memmap_install(efi.memmap.phys_map, size); + efi_memmap_install(&data); } } @@ -353,89 +333,90 @@ void __init efi_print_memmap(void) } } -static int __init efi_systab_init(void *phys) +static int __init efi_systab_init(u64 phys) { + int size = efi_enabled(EFI_64BIT) ? sizeof(efi_system_table_64_t) + : sizeof(efi_system_table_32_t); + bool over4g = false; + void *p; + + p = early_memremap_ro(phys, size); + if (p == NULL) { + pr_err("Couldn't map the system table!\n"); + return -ENOMEM; + } + if (efi_enabled(EFI_64BIT)) { - efi_system_table_64_t *systab64; - struct efi_setup_data *data = NULL; - u64 tmp = 0; + const efi_system_table_64_t *systab64 = p; + + efi_systab.hdr = systab64->hdr; + efi_systab.fw_vendor = systab64->fw_vendor; + efi_systab.fw_revision = systab64->fw_revision; + efi_systab.con_in_handle = systab64->con_in_handle; + efi_systab.con_in = systab64->con_in; + efi_systab.con_out_handle = systab64->con_out_handle; + efi_systab.con_out = (void *)(unsigned long)systab64->con_out; + efi_systab.stderr_handle = systab64->stderr_handle; + efi_systab.stderr = systab64->stderr; + efi_systab.runtime = (void *)(unsigned long)systab64->runtime; + efi_systab.boottime = (void *)(unsigned long)systab64->boottime; + efi_systab.nr_tables = systab64->nr_tables; + efi_systab.tables = systab64->tables; + + over4g = systab64->con_in_handle > U32_MAX || + systab64->con_in > U32_MAX || + systab64->con_out_handle > U32_MAX || + systab64->con_out > U32_MAX || + systab64->stderr_handle > U32_MAX || + systab64->stderr > U32_MAX || + systab64->boottime > U32_MAX; if (efi_setup) { - data = early_memremap(efi_setup, sizeof(*data)); - if (!data) + struct efi_setup_data *data; + + data = early_memremap_ro(efi_setup, sizeof(*data)); + if (!data) { + early_memunmap(p, size); return -ENOMEM; - } - systab64 = early_memremap((unsigned long)phys, - sizeof(*systab64)); - if (systab64 == NULL) { - pr_err("Couldn't map the system table!\n"); - if (data) - early_memunmap(data, sizeof(*data)); - return -ENOMEM; - } + } + + efi_systab.fw_vendor = (unsigned long)data->fw_vendor; + efi_systab.runtime = (void *)(unsigned long)data->runtime; + efi_systab.tables = (unsigned long)data->tables; + + over4g |= data->fw_vendor > U32_MAX || + data->runtime > U32_MAX || + data->tables > U32_MAX; - efi_systab.hdr = systab64->hdr; - efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor : - systab64->fw_vendor; - tmp |= data ? data->fw_vendor : systab64->fw_vendor; - efi_systab.fw_revision = systab64->fw_revision; - efi_systab.con_in_handle = systab64->con_in_handle; - tmp |= systab64->con_in_handle; - efi_systab.con_in = systab64->con_in; - tmp |= systab64->con_in; - efi_systab.con_out_handle = systab64->con_out_handle; - tmp |= systab64->con_out_handle; - efi_systab.con_out = systab64->con_out; - tmp |= systab64->con_out; - efi_systab.stderr_handle = systab64->stderr_handle; - tmp |= systab64->stderr_handle; - efi_systab.stderr = systab64->stderr; - tmp |= systab64->stderr; - efi_systab.runtime = data ? - (void *)(unsigned long)data->runtime : - (void *)(unsigned long)systab64->runtime; - tmp |= data ? data->runtime : systab64->runtime; - efi_systab.boottime = (void *)(unsigned long)systab64->boottime; - tmp |= systab64->boottime; - efi_systab.nr_tables = systab64->nr_tables; - efi_systab.tables = data ? (unsigned long)data->tables : - systab64->tables; - tmp |= data ? data->tables : systab64->tables; - - early_memunmap(systab64, sizeof(*systab64)); - if (data) early_memunmap(data, sizeof(*data)); -#ifdef CONFIG_X86_32 - if (tmp >> 32) { - pr_err("EFI data located above 4GB, disabling EFI.\n"); - return -EINVAL; + } else { + over4g |= systab64->fw_vendor > U32_MAX || + systab64->runtime > U32_MAX || + systab64->tables > U32_MAX; } -#endif } else { - efi_system_table_32_t *systab32; - - systab32 = early_memremap((unsigned long)phys, - sizeof(*systab32)); - if (systab32 == NULL) { - pr_err("Couldn't map the system table!\n"); - return -ENOMEM; - } - - efi_systab.hdr = systab32->hdr; - efi_systab.fw_vendor = systab32->fw_vendor; - efi_systab.fw_revision = systab32->fw_revision; - efi_systab.con_in_handle = systab32->con_in_handle; - efi_systab.con_in = systab32->con_in; - efi_systab.con_out_handle = systab32->con_out_handle; - efi_systab.con_out = systab32->con_out; - efi_systab.stderr_handle = systab32->stderr_handle; - efi_systab.stderr = systab32->stderr; - efi_systab.runtime = (void *)(unsigned long)systab32->runtime; - efi_systab.boottime = (void *)(unsigned long)systab32->boottime; - efi_systab.nr_tables = systab32->nr_tables; - efi_systab.tables = systab32->tables; - - early_memunmap(systab32, sizeof(*systab32)); + const efi_system_table_32_t *systab32 = p; + + efi_systab.hdr = systab32->hdr; + efi_systab.fw_vendor = systab32->fw_vendor; + efi_systab.fw_revision = systab32->fw_revision; + efi_systab.con_in_handle = systab32->con_in_handle; + efi_systab.con_in = systab32->con_in; + efi_systab.con_out_handle = systab32->con_out_handle; + efi_systab.con_out = (void *)(unsigned long)systab32->con_out; + efi_systab.stderr_handle = systab32->stderr_handle; + efi_systab.stderr = systab32->stderr; + efi_systab.runtime = (void *)(unsigned long)systab32->runtime; + efi_systab.boottime = (void *)(unsigned long)systab32->boottime; + efi_systab.nr_tables = systab32->nr_tables; + efi_systab.tables = systab32->tables; + } + + early_memunmap(p, size); + + if (IS_ENABLED(CONFIG_X86_32) && over4g) { + pr_err("EFI data located above 4GB, disabling EFI.\n"); + return -EINVAL; } efi.systab = &efi_systab; @@ -455,108 +436,23 @@ static int __init efi_systab_init(void *phys) return 0; } -static int __init efi_runtime_init32(void) -{ - efi_runtime_services_32_t *runtime; - - runtime = early_memremap((unsigned long)efi.systab->runtime, - sizeof(efi_runtime_services_32_t)); - if (!runtime) { - pr_err("Could not map the runtime service table!\n"); - return -ENOMEM; - } - - /* - * We will only need *early* access to the SetVirtualAddressMap - * EFI runtime service. All other runtime services will be called - * via the virtual mapping. - */ - efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - (unsigned long)runtime->set_virtual_address_map; - early_memunmap(runtime, sizeof(efi_runtime_services_32_t)); - - return 0; -} - -static int __init efi_runtime_init64(void) -{ - efi_runtime_services_64_t *runtime; - - runtime = early_memremap((unsigned long)efi.systab->runtime, - sizeof(efi_runtime_services_64_t)); - if (!runtime) { - pr_err("Could not map the runtime service table!\n"); - return -ENOMEM; - } - - /* - * We will only need *early* access to the SetVirtualAddressMap - * EFI runtime service. All other runtime services will be called - * via the virtual mapping. - */ - efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - (unsigned long)runtime->set_virtual_address_map; - early_memunmap(runtime, sizeof(efi_runtime_services_64_t)); - - return 0; -} - -static int __init efi_runtime_init(void) -{ - int rv; - - /* - * Check out the runtime services table. We need to map - * the runtime services table so that we can grab the physical - * address of several of the EFI runtime functions, needed to - * set the firmware into virtual mode. - * - * When EFI_PARAVIRT is in force then we could not map runtime - * service memory region because we do not have direct access to it. - * However, runtime services are available through proxy functions - * (e.g. in case of Xen dom0 EFI implementation they call special - * hypercall which executes relevant EFI functions) and that is why - * they are always enabled. - */ - - if (!efi_enabled(EFI_PARAVIRT)) { - if (efi_enabled(EFI_64BIT)) - rv = efi_runtime_init64(); - else - rv = efi_runtime_init32(); - - if (rv) - return rv; - } - - set_bit(EFI_RUNTIME_SERVICES, &efi.flags); - - return 0; -} - void __init efi_init(void) { efi_char16_t *c16; char vendor[100] = "unknown"; int i = 0; - void *tmp; -#ifdef CONFIG_X86_32 - if (boot_params.efi_info.efi_systab_hi || - boot_params.efi_info.efi_memmap_hi) { + if (IS_ENABLED(CONFIG_X86_32) && + (boot_params.efi_info.efi_systab_hi || + boot_params.efi_info.efi_memmap_hi)) { pr_info("Table located above 4GB, disabling EFI.\n"); return; } - efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; -#else - efi_phys.systab = (efi_system_table_t *) - (boot_params.efi_info.efi_systab | - ((__u64)boot_params.efi_info.efi_systab_hi<<32)); -#endif - if (efi_systab_init(efi_phys.systab)) + efi_systab_phys = boot_params.efi_info.efi_systab | + ((__u64)boot_params.efi_info.efi_systab_hi << 32); + + if (efi_systab_init(efi_systab_phys)) return; efi.config_table = (unsigned long)efi.systab->tables; @@ -566,14 +462,16 @@ void __init efi_init(void) /* * Show what we know for posterity */ - c16 = tmp = early_memremap(efi.systab->fw_vendor, 2); + c16 = early_memremap_ro(efi.systab->fw_vendor, + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = *c16++; + for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i) + vendor[i] = c16[i]; vendor[i] = '\0'; - } else + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); + } else { pr_err("Could not map the firmware vendor!\n"); - early_memunmap(tmp, 2); + } pr_info("EFI v%u.%.02u by %s\n", efi.systab->hdr.revision >> 16, @@ -592,19 +490,21 @@ void __init efi_init(void) if (!efi_runtime_supported()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); - else { - if (efi_runtime_disabled() || efi_runtime_init()) { - efi_memmap_unmap(); - return; - } + + if (!efi_runtime_supported() || efi_runtime_disabled()) { + efi_memmap_unmap(); + return; } + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); efi_clean_memmap(); if (efi_enabled(EFI_DBG)) efi_print_memmap(); } +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_UV) + void __init efi_set_executable(efi_memory_desc_t *md, bool executable) { u64 addr, npages; @@ -669,6 +569,8 @@ void __init old_map_region(efi_memory_desc_t *md) (unsigned long long)md->phys_addr); } +#endif + /* Merge contiguous regions of the same type and attribute */ static void __init efi_merge_regions(void) { @@ -707,7 +609,7 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - systab = (u64)(unsigned long)efi_phys.systab; + systab = efi_systab_phys; if (md->phys_addr <= systab && systab < end) { systab += md->virt_addr - md->phys_addr; efi.systab = (efi_system_table_t *)(unsigned long)systab; @@ -767,7 +669,7 @@ static inline void *efi_map_next_entry_reverse(void *entry) */ static void *efi_map_next_entry(void *entry) { - if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) { + if (!efi_have_uv1_memmap() && efi_enabled(EFI_64BIT)) { /* * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE * config table feature requires us to map all entries @@ -828,7 +730,7 @@ static bool should_map_region(efi_memory_desc_t *md) * Map all of RAM so that we can access arguments in the 1:1 * mapping when making EFI runtime calls. */ - if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_is_native()) { + if (efi_is_mixed()) { if (md->type == EFI_CONVENTIONAL_MEMORY || md->type == EFI_LOADER_DATA || md->type == EFI_LOADER_CODE) @@ -899,11 +801,11 @@ static void __init kexec_enter_virtual_mode(void) /* * We don't do virtual mode, since we don't do runtime services, on - * non-native EFI. With efi=old_map, we don't do runtime services in + * non-native EFI. With the UV1 memmap, we don't do runtime services in * kexec kernel because in the initial boot something else might * have been mapped at these virtual addresses. */ - if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) { + if (efi_is_mixed() || efi_have_uv1_memmap()) { efi_memmap_unmap(); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; @@ -958,11 +860,6 @@ static void __init kexec_enter_virtual_mode(void) efi.runtime_version = efi_systab.hdr.revision; efi_native_runtime_setup(); - - efi.set_virtual_address_map = NULL; - - if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) - runtime_code_page_mkexec(); #endif } @@ -974,9 +871,9 @@ static void __init kexec_enter_virtual_mode(void) * * The old method which used to update that memory descriptor with the * virtual address obtained from ioremap() is still supported when the - * kernel is booted with efi=old_map on its command line. Same old - * method enabled the runtime services to be called without having to - * thunk back into physical mode for every invocation. + * kernel is booted on SG1 UV1 hardware. Same old method enabled the + * runtime services to be called without having to thunk back into + * physical mode for every invocation. * * The new method does a pagetable switch in a preemption-safe manner * so that we're in a different address space when calling a runtime @@ -999,16 +896,14 @@ static void __init __efi_enter_virtual_mode(void) if (efi_alloc_page_tables()) { pr_err("Failed to allocate EFI page tables\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { pr_err("Error reallocating memory, EFI runtime non-functional!\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } pa = __pa(new_memmap); @@ -1022,8 +917,7 @@ static void __init __efi_enter_virtual_mode(void) if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { pr_err("Failed to remap late EFI memory map\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } if (efi_enabled(EFI_DBG)) { @@ -1031,34 +925,22 @@ static void __init __efi_enter_virtual_mode(void) efi_print_memmap(); } - BUG_ON(!efi.systab); + if (WARN_ON(!efi.systab)) + goto err; - if (efi_setup_page_tables(pa, 1 << pg_shift)) { - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; - } + if (efi_setup_page_tables(pa, 1 << pg_shift)) + goto err; efi_sync_low_kernel_mappings(); - if (efi_is_native()) { - status = phys_efi_set_virtual_address_map( - efi.memmap.desc_size * count, - efi.memmap.desc_size, - efi.memmap.desc_version, - (efi_memory_desc_t *)pa); - } else { - status = efi_thunk_set_virtual_address_map( - efi_phys.set_virtual_address_map, - efi.memmap.desc_size * count, - efi.memmap.desc_size, - efi.memmap.desc_version, - (efi_memory_desc_t *)pa); - } - + status = efi_set_virtual_address_map(efi.memmap.desc_size * count, + efi.memmap.desc_size, + efi.memmap.desc_version, + (efi_memory_desc_t *)pa); if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", - status); - panic("EFI call to SetVirtualAddressMap() failed!"); + pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + goto err; } efi_free_boot_services(); @@ -1071,13 +953,11 @@ static void __init __efi_enter_virtual_mode(void) */ efi.runtime_version = efi_systab.hdr.revision; - if (efi_is_native()) + if (!efi_is_mixed()) efi_native_runtime_setup(); else efi_thunk_runtime_setup(); - efi.set_virtual_address_map = NULL; - /* * Apply more restrictive page table mapping attributes now that * SVAM() has been called and the firmware has performed all @@ -1087,6 +967,10 @@ static void __init __efi_enter_virtual_mode(void) /* clean DUMMY object */ efi_delete_dummy_variable(); + return; + +err: + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); } void __init efi_enter_virtual_mode(void) @@ -1102,20 +986,6 @@ void __init efi_enter_virtual_mode(void) efi_dump_pagetable(); } -static int __init arch_parse_efi_cmdline(char *str) -{ - if (!str) { - pr_warn("need at least one option\n"); - return -EINVAL; - } - - if (parse_option_str(str, "old_map")) - set_bit(EFI_OLD_MEMMAP, &efi.flags); - - return 0; -} -early_param("efi", arch_parse_efi_cmdline); - bool efi_is_table_address(unsigned long phys_addr) { unsigned int i; diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 9959657127f4..71dddd1620f9 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -66,9 +66,17 @@ void __init efi_map_region(efi_memory_desc_t *md) void __init efi_map_region_fixed(efi_memory_desc_t *md) {} void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} -pgd_t * __init efi_call_phys_prolog(void) +efi_status_t efi_call_svam(efi_set_virtual_address_map_t *__efiapi *, + u32, u32, u32, void *); + +efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) { struct desc_ptr gdt_descr; + efi_status_t status; + unsigned long flags; pgd_t *save_pgd; /* Current pgd is swapper_pg_dir, we'll restore it later: */ @@ -80,14 +88,18 @@ pgd_t * __init efi_call_phys_prolog(void) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); - return save_pgd; -} + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); + status = efi_call_svam(&efi.systab->runtime->set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + local_irq_restore(flags); -void __init efi_call_phys_epilog(pgd_t *save_pgd) -{ load_fixmap_gdt(0); load_cr3(save_pgd); __flush_tlb_all(); + + return status; } void __init efi_runtime_update_mappings(void) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 08ce8177c3af..e2accfe636bd 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -57,142 +57,6 @@ static u64 efi_va = EFI_VA_START; struct efi_scratch efi_scratch; -static void __init early_code_mapping_set_exec(int executable) -{ - efi_memory_desc_t *md; - - if (!(__supported_pte_mask & _PAGE_NX)) - return; - - /* Make EFI service code area executable */ - for_each_efi_memory_desc(md) { - if (md->type == EFI_RUNTIME_SERVICES_CODE || - md->type == EFI_BOOT_SERVICES_CODE) - efi_set_executable(md, executable); - } -} - -pgd_t * __init efi_call_phys_prolog(void) -{ - unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; - pgd_t *save_pgd, *pgd_k, *pgd_efi; - p4d_t *p4d, *p4d_k, *p4d_efi; - pud_t *pud; - - int pgd; - int n_pgds, i, j; - - if (!efi_enabled(EFI_OLD_MEMMAP)) { - efi_switch_mm(&efi_mm); - return efi_mm.pgd; - } - - early_code_mapping_set_exec(1); - - n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); - save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); - if (!save_pgd) - return NULL; - - /* - * Build 1:1 identity mapping for efi=old_map usage. Note that - * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while - * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical - * address X, the pud_index(X) != pud_index(__va(X)), we can only copy - * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. - * This means here we can only reuse the PMD tables of the direct mapping. - */ - for (pgd = 0; pgd < n_pgds; pgd++) { - addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); - vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); - pgd_efi = pgd_offset_k(addr_pgd); - save_pgd[pgd] = *pgd_efi; - - p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); - if (!p4d) { - pr_err("Failed to allocate p4d table!\n"); - goto out; - } - - for (i = 0; i < PTRS_PER_P4D; i++) { - addr_p4d = addr_pgd + i * P4D_SIZE; - p4d_efi = p4d + p4d_index(addr_p4d); - - pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); - if (!pud) { - pr_err("Failed to allocate pud table!\n"); - goto out; - } - - for (j = 0; j < PTRS_PER_PUD; j++) { - addr_pud = addr_p4d + j * PUD_SIZE; - - if (addr_pud > (max_pfn << PAGE_SHIFT)) - break; - - vaddr = (unsigned long)__va(addr_pud); - - pgd_k = pgd_offset_k(vaddr); - p4d_k = p4d_offset(pgd_k, vaddr); - pud[j] = *pud_offset(p4d_k, vaddr); - } - } - pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; - } - - __flush_tlb_all(); - return save_pgd; -out: - efi_call_phys_epilog(save_pgd); - return NULL; -} - -void __init efi_call_phys_epilog(pgd_t *save_pgd) -{ - /* - * After the lock is released, the original page table is restored. - */ - int pgd_idx, i; - int nr_pgds; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - - if (!efi_enabled(EFI_OLD_MEMMAP)) { - efi_switch_mm(efi_scratch.prev_mm); - return; - } - - nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); - - for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { - pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); - set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); - - if (!pgd_present(*pgd)) - continue; - - for (i = 0; i < PTRS_PER_P4D; i++) { - p4d = p4d_offset(pgd, - pgd_idx * PGDIR_SIZE + i * P4D_SIZE); - - if (!p4d_present(*p4d)) - continue; - - pud = (pud_t *)p4d_page_vaddr(*p4d); - pud_free(&init_mm, pud); - } - - p4d = (p4d_t *)pgd_page_vaddr(*pgd); - p4d_free(&init_mm, p4d); - } - - kfree(save_pgd); - - __flush_tlb_all(); - early_code_mapping_set_exec(0); -} - EXPORT_SYMBOL_GPL(efi_mm); /* @@ -211,7 +75,7 @@ int __init efi_alloc_page_tables(void) pud_t *pud; gfp_t gfp_mask; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return 0; gfp_mask = GFP_KERNEL | __GFP_ZERO; @@ -252,7 +116,7 @@ void efi_sync_low_kernel_mappings(void) pud_t *pud_k, *pud_efi; pgd_t *efi_pgd = efi_mm.pgd; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return; /* @@ -346,7 +210,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) unsigned npages; pgd_t *pgd = efi_mm.pgd; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return 0; /* @@ -373,10 +237,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * as trim_bios_range() will reserve the first page and isolate it away * from memory allocators anyway. */ - pf = _PAGE_RW; - if (sev_active()) - pf |= _PAGE_ENC; - if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, pf)) { pr_err("Failed to create 1:1 mapping for the first page!\n"); return 1; @@ -388,21 +248,22 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * text and allocate a new stack because we can't rely on the * stack pointer being < 4GB. */ - if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native()) + if (!efi_is_mixed()) return 0; page = alloc_page(GFP_KERNEL|__GFP_DMA32); - if (!page) - panic("Unable to allocate EFI runtime stack < 4GB\n"); + if (!page) { + pr_err("Unable to allocate EFI runtime stack < 4GB\n"); + return 1; + } - efi_scratch.phys_stack = virt_to_phys(page_address(page)); - efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ + efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ - npages = (_etext - _text) >> PAGE_SHIFT; + npages = (__end_rodata_aligned - _text) >> PAGE_SHIFT; text = __pa(_text); pfn = text >> PAGE_SHIFT; - pf = _PAGE_RW | _PAGE_ENC; + pf = _PAGE_ENC; if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) { pr_err("Failed to map kernel text 1:1\n"); return 1; @@ -417,6 +278,22 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va) unsigned long pfn; pgd_t *pgd = efi_mm.pgd; + /* + * EFI_RUNTIME_SERVICES_CODE regions typically cover PE/COFF + * executable images in memory that consist of both R-X and + * RW- sections, so we cannot apply read-only or non-exec + * permissions just yet. However, modern EFI systems provide + * a memory attributes table that describes those sections + * with the appropriate restricted permissions, which are + * applied in efi_runtime_update_mappings() below. All other + * regions can be mapped non-executable at this point, with + * the exception of boot services code regions, but those will + * be unmapped again entirely in efi_free_boot_services(). + */ + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_CODE) + flags |= _PAGE_NX; + if (!(md->attribute & EFI_MEMORY_WB)) flags |= _PAGE_PCD; @@ -434,7 +311,7 @@ void __init efi_map_region(efi_memory_desc_t *md) unsigned long size = md->num_pages << PAGE_SHIFT; u64 pa = md->phys_addr; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return old_map_region(md); /* @@ -449,7 +326,7 @@ void __init efi_map_region(efi_memory_desc_t *md) * booting in EFI mixed mode, because even though we may be * running a 64-bit kernel, the firmware may only be 32-bit. */ - if (!efi_is_native () && IS_ENABLED(CONFIG_EFI_MIXED)) { + if (efi_is_mixed()) { md->virt_addr = md->phys_addr; return; } @@ -491,26 +368,6 @@ void __init efi_map_region_fixed(efi_memory_desc_t *md) __map_region(md, md->virt_addr); } -void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, - u32 type, u64 attribute) -{ - unsigned long last_map_pfn; - - if (type == EFI_MEMORY_MAPPED_IO) - return ioremap(phys_addr, size); - - last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); - if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { - unsigned long top = last_map_pfn << PAGE_SHIFT; - efi_ioremap(top, size - (top - phys_addr), type, attribute); - } - - if (!(attribute & EFI_MEMORY_WB)) - efi_memory_uc((u64)(unsigned long)__va(phys_addr), size); - - return (void __iomem *)__va(phys_addr); -} - void __init parse_efi_setup(u64 phys_addr, u32 data_len) { efi_setup = phys_addr + sizeof(struct setup_data); @@ -559,7 +416,7 @@ void __init efi_runtime_update_mappings(void) { efi_memory_desc_t *md; - if (efi_enabled(EFI_OLD_MEMMAP)) { + if (efi_have_uv1_memmap()) { if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec(); return; @@ -613,7 +470,7 @@ void __init efi_runtime_update_mappings(void) void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) ptdump_walk_pgd_level(NULL, swapper_pg_dir); else ptdump_walk_pgd_level(NULL, efi_mm.pgd); @@ -634,63 +491,74 @@ void efi_switch_mm(struct mm_struct *mm) switch_mm(efi_scratch.prev_mm, mm, NULL); } -#ifdef CONFIG_EFI_MIXED -extern efi_status_t efi64_thunk(u32, ...); - static DEFINE_SPINLOCK(efi_runtime_lock); -#define runtime_service32(func) \ -({ \ - u32 table = (u32)(unsigned long)efi.systab; \ - u32 *rt, *___f; \ - \ - rt = (u32 *)(table + offsetof(efi_system_table_32_t, runtime)); \ - ___f = (u32 *)(*rt + offsetof(efi_runtime_services_32_t, func)); \ - *___f; \ +/* + * DS and ES contain user values. We need to save them. + * The 32-bit EFI code needs a valid DS, ES, and SS. There's no + * need to save the old SS: __KERNEL_DS is always acceptable. + */ +#define __efi_thunk(func, ...) \ +({ \ + efi_runtime_services_32_t *__rt; \ + unsigned short __ds, __es; \ + efi_status_t ____s; \ + \ + __rt = (void *)(unsigned long)efi.systab->mixed_mode.runtime; \ + \ + savesegment(ds, __ds); \ + savesegment(es, __es); \ + \ + loadsegment(ss, __KERNEL_DS); \ + loadsegment(ds, __KERNEL_DS); \ + loadsegment(es, __KERNEL_DS); \ + \ + ____s = efi64_thunk(__rt->func, __VA_ARGS__); \ + \ + loadsegment(ds, __ds); \ + loadsegment(es, __es); \ + \ + ____s ^= (____s & BIT(31)) | (____s & BIT_ULL(31)) << 32; \ + ____s; \ }) /* * Switch to the EFI page tables early so that we can access the 1:1 * runtime services mappings which are not mapped in any other page - * tables. This function must be called before runtime_service32(). + * tables. * * Also, disable interrupts because the IDT points to 64-bit handlers, * which aren't going to function correctly when we switch to 32-bit. */ -#define efi_thunk(f, ...) \ +#define efi_thunk(func...) \ ({ \ efi_status_t __s; \ - u32 __func; \ \ arch_efi_call_virt_setup(); \ \ - __func = runtime_service32(f); \ - __s = efi64_thunk(__func, __VA_ARGS__); \ + __s = __efi_thunk(func); \ \ arch_efi_call_virt_teardown(); \ \ __s; \ }) -efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) +static efi_status_t __init __no_sanitize_address +efi_thunk_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) { efi_status_t status; unsigned long flags; - u32 func; efi_sync_low_kernel_mappings(); local_irq_save(flags); efi_switch_mm(&efi_mm); - func = (u32)(unsigned long)phys_set_virtual_address_map; - status = efi64_thunk(func, memory_map_size, descriptor_size, - descriptor_version, virtual_map); + status = __efi_thunk(set_virtual_address_map, memory_map_size, + descriptor_size, descriptor_version, virtual_map); efi_switch_mm(efi_scratch.prev_mm); local_irq_restore(flags); @@ -993,8 +861,11 @@ efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules, return EFI_UNSUPPORTED; } -void efi_thunk_runtime_setup(void) +void __init efi_thunk_runtime_setup(void) { + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return; + efi.get_time = efi_thunk_get_time; efi.set_time = efi_thunk_set_time; efi.get_wakeup_time = efi_thunk_get_wakeup_time; @@ -1010,4 +881,46 @@ void efi_thunk_runtime_setup(void) efi.update_capsule = efi_thunk_update_capsule; efi.query_capsule_caps = efi_thunk_query_capsule_caps; } -#endif /* CONFIG_EFI_MIXED */ + +efi_status_t __init __no_sanitize_address +efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + efi_status_t status; + unsigned long flags; + pgd_t *save_pgd = NULL; + + if (efi_is_mixed()) + return efi_thunk_set_virtual_address_map(memory_map_size, + descriptor_size, + descriptor_version, + virtual_map); + + if (efi_have_uv1_memmap()) { + save_pgd = efi_uv1_memmap_phys_prolog(); + if (!save_pgd) + return EFI_ABORTED; + } else { + efi_switch_mm(&efi_mm); + } + + kernel_fpu_begin(); + + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); + status = efi_call(efi.systab->runtime->set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + local_irq_restore(flags); + + kernel_fpu_end(); + + if (save_pgd) + efi_uv1_memmap_phys_epilog(save_pgd); + else + efi_switch_mm(efi_scratch.prev_mm); + + return status; +} diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S index eed8b5b441f8..75c46e7a809f 100644 --- a/arch/x86/platform/efi/efi_stub_32.S +++ b/arch/x86/platform/efi/efi_stub_32.S @@ -7,118 +7,43 @@ */ #include <linux/linkage.h> +#include <linux/init.h> #include <asm/page_types.h> -/* - * efi_call_phys(void *, ...) is a function with variable parameters. - * All the callers of this function assure that all the parameters are 4-bytes. - */ - -/* - * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. - * So we'd better save all of them at the beginning of this function and restore - * at the end no matter how many we use, because we can not assure EFI runtime - * service functions will comply with gcc calling convention, too. - */ + __INIT +SYM_FUNC_START(efi_call_svam) + push 8(%esp) + push 8(%esp) + push %ecx + push %edx -.text -SYM_FUNC_START(efi_call_phys) /* - * 0. The function can only be called in Linux kernel. So CS has been - * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found - * the values of these registers are the same. And, the corresponding - * GDT entries are identical. So I will do nothing about segment reg - * and GDT, but change GDT base register in prolog and epilog. - */ - - /* - * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. - * But to make it smoothly switch from virtual mode to flat mode. - * The mapping of lower virtual memory has been created in prolog and - * epilog. + * Switch to the flat mapped alias of this routine, by jumping to the + * address of label '1' after subtracting PAGE_OFFSET from it. */ movl $1f, %edx subl $__PAGE_OFFSET, %edx jmp *%edx 1: - /* - * 2. Now on the top of stack is the return - * address in the caller of efi_call_phys(), then parameter 1, - * parameter 2, ..., param n. To make things easy, we save the return - * address of efi_call_phys in a global variable. - */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx - - /* - * 3. Clear PG bit in %CR0. - */ + /* disable paging */ movl %cr0, %edx andl $0x7fffffff, %edx movl %edx, %cr0 - jmp 1f -1: - /* - * 4. Adjust stack pointer. - */ + /* convert the stack pointer to a flat mapped address */ subl $__PAGE_OFFSET, %esp - /* - * 5. Call the physical function. - */ - jmp *%ecx + /* call the EFI routine */ + call *(%eax) -2: - /* - * 6. After EFI runtime service returns, control will return to - * following instruction. We'd better readjust stack pointer first. - */ - addl $__PAGE_OFFSET, %esp + /* convert ESP back to a kernel VA, and pop the outgoing args */ + addl $__PAGE_OFFSET + 16, %esp - /* - * 7. Restore PG bit - */ + /* re-enable paging */ movl %cr0, %edx orl $0x80000000, %edx movl %edx, %cr0 - jmp 1f -1: - /* - * 8. Now restore the virtual mode from flat mode by - * adding EIP with PAGE_OFFSET. - */ - movl $1f, %edx - jmp *%edx -1: - - /* - * 9. Balance the stack. And because EAX contain the return value, - * we'd better not clobber it. - */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx - /* - * 10. Push the saved return address onto the stack and return. - */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx ret -SYM_FUNC_END(efi_call_phys) -.previous - -.data -saved_return_addr: - .long 0 -efi_rt_function_ptr: - .long 0 +SYM_FUNC_END(efi_call_svam) diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index b1d2313fe3bf..15da118f04f0 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -8,41 +8,12 @@ */ #include <linux/linkage.h> -#include <asm/segment.h> -#include <asm/msr.h> -#include <asm/processor-flags.h> -#include <asm/page_types.h> +#include <asm/nospec-branch.h> -#define SAVE_XMM \ - mov %rsp, %rax; \ - subq $0x70, %rsp; \ - and $~0xf, %rsp; \ - mov %rax, (%rsp); \ - mov %cr0, %rax; \ - clts; \ - mov %rax, 0x8(%rsp); \ - movaps %xmm0, 0x60(%rsp); \ - movaps %xmm1, 0x50(%rsp); \ - movaps %xmm2, 0x40(%rsp); \ - movaps %xmm3, 0x30(%rsp); \ - movaps %xmm4, 0x20(%rsp); \ - movaps %xmm5, 0x10(%rsp) - -#define RESTORE_XMM \ - movaps 0x60(%rsp), %xmm0; \ - movaps 0x50(%rsp), %xmm1; \ - movaps 0x40(%rsp), %xmm2; \ - movaps 0x30(%rsp), %xmm3; \ - movaps 0x20(%rsp), %xmm4; \ - movaps 0x10(%rsp), %xmm5; \ - mov 0x8(%rsp), %rsi; \ - mov %rsi, %cr0; \ - mov (%rsp), %rsp - -SYM_FUNC_START(efi_call) +SYM_FUNC_START(__efi_call) pushq %rbp movq %rsp, %rbp - SAVE_XMM + and $~0xf, %rsp mov 16(%rbp), %rax subq $48, %rsp mov %r9, 32(%rsp) @@ -50,9 +21,7 @@ SYM_FUNC_START(efi_call) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx - call *%rdi - addq $48, %rsp - RESTORE_XMM - popq %rbp + CALL_NOSPEC %rdi + leave ret -SYM_FUNC_END(efi_call) +SYM_FUNC_END(__efi_call) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 3189f1394701..26f0da238c1c 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -25,15 +25,16 @@ .text .code64 -SYM_FUNC_START(efi64_thunk) +SYM_CODE_START(__efi64_thunk) push %rbp push %rbx /* * Switch to 1:1 mapped 32-bit stack pointer. */ - movq %rsp, efi_saved_sp(%rip) + movq %rsp, %rax movq efi_scratch(%rip), %rsp + push %rax /* * Calculate the physical address of the kernel text. @@ -41,113 +42,31 @@ SYM_FUNC_START(efi64_thunk) movq $__START_KERNEL_map, %rax subq phys_base(%rip), %rax - /* - * Push some physical addresses onto the stack. This is easier - * to do now in a code64 section while the assembler can address - * 64-bit values. Note that all the addresses on the stack are - * 32-bit. - */ - subq $16, %rsp - leaq efi_exit32(%rip), %rbx - subq %rax, %rbx - movl %ebx, 8(%rsp) - - leaq __efi64_thunk(%rip), %rbx + leaq 1f(%rip), %rbp + leaq 2f(%rip), %rbx + subq %rax, %rbp subq %rax, %rbx - call *%rbx - - movq efi_saved_sp(%rip), %rsp - pop %rbx - pop %rbp - retq -SYM_FUNC_END(efi64_thunk) -/* - * We run this function from the 1:1 mapping. - * - * This function must be invoked with a 1:1 mapped stack. - */ -SYM_FUNC_START_LOCAL(__efi64_thunk) - movl %ds, %eax - push %rax - movl %es, %eax - push %rax - movl %ss, %eax - push %rax - - subq $32, %rsp - movl %esi, 0x0(%rsp) - movl %edx, 0x4(%rsp) - movl %ecx, 0x8(%rsp) - movq %r8, %rsi - movl %esi, 0xc(%rsp) - movq %r9, %rsi - movl %esi, 0x10(%rsp) - - leaq 1f(%rip), %rbx - movq %rbx, func_rt_ptr(%rip) + subq $28, %rsp + movl %ebx, 0x0(%rsp) /* return address */ + movl %esi, 0x4(%rsp) + movl %edx, 0x8(%rsp) + movl %ecx, 0xc(%rsp) + movl %r8d, 0x10(%rsp) + movl %r9d, 0x14(%rsp) /* Switch to 32-bit descriptor */ pushq $__KERNEL32_CS - leaq efi_enter32(%rip), %rax - pushq %rax + pushq %rdi /* EFI runtime service address */ lretq -1: addq $32, %rsp - +1: movq 24(%rsp), %rsp pop %rbx - movl %ebx, %ss - pop %rbx - movl %ebx, %es - pop %rbx - movl %ebx, %ds - - /* - * Convert 32-bit status code into 64-bit. - */ - test %rax, %rax - jz 1f - movl %eax, %ecx - andl $0x0fffffff, %ecx - andl $0xf0000000, %eax - shl $32, %rax - or %rcx, %rax -1: - ret -SYM_FUNC_END(__efi64_thunk) - -SYM_FUNC_START_LOCAL(efi_exit32) - movq func_rt_ptr(%rip), %rax - push %rax - mov %rdi, %rax - ret -SYM_FUNC_END(efi_exit32) + pop %rbp + retq .code32 -/* - * EFI service pointer must be in %edi. - * - * The stack should represent the 32-bit calling convention. - */ -SYM_FUNC_START_LOCAL(efi_enter32) - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es - movl %eax, %ss - - call *%edi - - /* We must preserve return value */ - movl %eax, %edi - - movl 72(%esp), %eax - pushl $__KERNEL_CS - pushl %eax - +2: pushl $__KERNEL_CS + pushl %ebp lret -SYM_FUNC_END(efi_enter32) - - .data - .balign 8 -func_rt_ptr: .quad 0 -efi_saved_sp: .quad 0 +SYM_CODE_END(__efi64_thunk) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 7675cf754d90..88d32c06cffa 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -16,6 +16,7 @@ #include <asm/efi.h> #include <asm/uv/uv.h> #include <asm/cpu_device_id.h> +#include <asm/realmode.h> #include <asm/reboot.h> #define EFI_MIN_RESERVE 5120 @@ -243,7 +244,7 @@ EXPORT_SYMBOL_GPL(efi_query_variable_store); */ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) { - phys_addr_t new_phys, new_size; + struct efi_memory_map_data data = { 0 }; struct efi_mem_range mr; efi_memory_desc_t md; int num_entries; @@ -260,10 +261,6 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) return; } - /* No need to reserve regions that will never be freed. */ - if (md.attribute & EFI_MEMORY_RUNTIME) - return; - size += addr % EFI_PAGE_SIZE; size = round_up(size, EFI_PAGE_SIZE); addr = round_down(addr, EFI_PAGE_SIZE); @@ -275,24 +272,23 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) num_entries = efi_memmap_split_count(&md, &mr.range); num_entries += efi.memmap.nr_map; - new_size = efi.memmap.desc_size * num_entries; - - new_phys = efi_memmap_alloc(num_entries); - if (!new_phys) { + if (efi_memmap_alloc(num_entries, &data) != 0) { pr_err("Could not allocate boot services memmap\n"); return; } - new = early_memremap(new_phys, new_size); + new = early_memremap(data.phys_map, data.size); if (!new) { pr_err("Failed to map new boot services memmap\n"); return; } efi_memmap_insert(&efi.memmap, new, &mr); - early_memunmap(new, new_size); + early_memunmap(new, data.size); - efi_memmap_install(new_phys, num_entries); + efi_memmap_install(&data); + e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__update_table(e820_table); } /* @@ -386,10 +382,10 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) /* * To Do: Remove this check after adding functionality to unmap EFI boot - * services code/data regions from direct mapping area because - * "efi=old_map" maps EFI regions in swapper_pg_dir. + * services code/data regions from direct mapping area because the UV1 + * memory map maps EFI regions in swapper_pg_dir. */ - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return; /* @@ -397,7 +393,7 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) * EFI runtime calls, hence don't unmap EFI boot services code/data * regions. */ - if (!efi_is_native()) + if (efi_is_mixed()) return; if (kernel_unmap_pages_in_pgd(pgd, pa, md->num_pages)) @@ -409,7 +405,7 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) void __init efi_free_boot_services(void) { - phys_addr_t new_phys, new_size; + struct efi_memory_map_data data = { 0 }; efi_memory_desc_t *md; int num_entries = 0; void *new, *new_md; @@ -464,14 +460,12 @@ void __init efi_free_boot_services(void) if (!num_entries) return; - new_size = efi.memmap.desc_size * num_entries; - new_phys = efi_memmap_alloc(num_entries); - if (!new_phys) { + if (efi_memmap_alloc(num_entries, &data) != 0) { pr_err("Failed to allocate new EFI memmap\n"); return; } - new = memremap(new_phys, new_size, MEMREMAP_WB); + new = memremap(data.phys_map, data.size, MEMREMAP_WB); if (!new) { pr_err("Failed to map new EFI memmap\n"); return; @@ -495,7 +489,7 @@ void __init efi_free_boot_services(void) memunmap(new); - if (efi_memmap_install(new_phys, num_entries)) { + if (efi_memmap_install(&data) != 0) { pr_err("Could not install new EFI memmap\n"); return; } @@ -560,7 +554,7 @@ out: return ret; } -static const struct dmi_system_id sgi_uv1_dmi[] = { +static const struct dmi_system_id sgi_uv1_dmi[] __initconst = { { NULL, "SGI UV1", { DMI_MATCH(DMI_PRODUCT_NAME, "Stoutland Platform"), DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), @@ -583,8 +577,15 @@ void __init efi_apply_memmap_quirks(void) } /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */ - if (dmi_check_system(sgi_uv1_dmi)) - set_bit(EFI_OLD_MEMMAP, &efi.flags); + if (dmi_check_system(sgi_uv1_dmi)) { + if (IS_ENABLED(CONFIG_X86_UV)) { + set_bit(EFI_UV1_MEMMAP, &efi.flags); + } else { + pr_warn("EFI runtime disabled, needs CONFIG_X86_UV=y on UV1\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + efi_memmap_unmap(); + } + } } /* @@ -722,7 +723,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr) /* * Make sure that an efi runtime service caused the page fault. * "efi_mm" cannot be used to check if the page fault had occurred - * in the firmware context because efi=old_map doesn't use efi_pgd. + * in the firmware context because the UV1 memmap doesn't use efi_pgd. */ if (efi_rts_work.efi_rts_id == EFI_NONE) return; diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c index 6dd25dc5f027..e9d97d52475e 100644 --- a/arch/x86/platform/intel-quark/imr.c +++ b/arch/x86/platform/intel-quark/imr.c @@ -29,6 +29,8 @@ #include <asm/cpu_device_id.h> #include <asm/imr.h> #include <asm/iosf_mbi.h> +#include <asm/io.h> + #include <linux/debugfs.h> #include <linux/init.h> #include <linux/mm.h> diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c index 42f879b75f9b..4307830e1b6f 100644 --- a/arch/x86/platform/intel-quark/imr_selftest.c +++ b/arch/x86/platform/intel-quark/imr_selftest.c @@ -14,6 +14,8 @@ #include <asm-generic/sections.h> #include <asm/cpu_device_id.h> #include <asm/imr.h> +#include <asm/io.h> + #include <linux/init.h> #include <linux/mm.h> #include <linux/types.h> diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index ece9cb9c1189..607f58147311 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -31,13 +31,16 @@ static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, return BIOS_STATUS_UNIMPLEMENTED; /* - * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI + * If EFI_UV1_MEMMAP is set, we need to fall back to using our old EFI * callback method, which uses efi_call() directly, with the kernel page tables: */ - if (unlikely(efi_enabled(EFI_OLD_MEMMAP))) + if (unlikely(efi_enabled(EFI_UV1_MEMMAP))) { + kernel_fpu_begin(); ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5); - else + kernel_fpu_end(); + } else { ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5); + } return ret; } @@ -214,3 +217,163 @@ int uv_bios_init(void) pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision); return 0; } + +static void __init early_code_mapping_set_exec(int executable) +{ + efi_memory_desc_t *md; + + if (!(__supported_pte_mask & _PAGE_NX)) + return; + + /* Make EFI service code area executable */ + for_each_efi_memory_desc(md) { + if (md->type == EFI_RUNTIME_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_CODE) + efi_set_executable(md, executable); + } +} + +void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd) +{ + /* + * After the lock is released, the original page table is restored. + */ + int pgd_idx, i; + int nr_pgds; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + + nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + + for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { + pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); + set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); + + if (!pgd_present(*pgd)) + continue; + + for (i = 0; i < PTRS_PER_P4D; i++) { + p4d = p4d_offset(pgd, + pgd_idx * PGDIR_SIZE + i * P4D_SIZE); + + if (!p4d_present(*p4d)) + continue; + + pud = (pud_t *)p4d_page_vaddr(*p4d); + pud_free(&init_mm, pud); + } + + p4d = (p4d_t *)pgd_page_vaddr(*pgd); + p4d_free(&init_mm, p4d); + } + + kfree(save_pgd); + + __flush_tlb_all(); + early_code_mapping_set_exec(0); +} + +pgd_t * __init efi_uv1_memmap_phys_prolog(void) +{ + unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; + pgd_t *save_pgd, *pgd_k, *pgd_efi; + p4d_t *p4d, *p4d_k, *p4d_efi; + pud_t *pud; + + int pgd; + int n_pgds, i, j; + + early_code_mapping_set_exec(1); + + n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); + save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); + if (!save_pgd) + return NULL; + + /* + * Build 1:1 identity mapping for UV1 memmap usage. Note that + * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while + * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical + * address X, the pud_index(X) != pud_index(__va(X)), we can only copy + * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. + * This means here we can only reuse the PMD tables of the direct mapping. + */ + for (pgd = 0; pgd < n_pgds; pgd++) { + addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); + vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); + pgd_efi = pgd_offset_k(addr_pgd); + save_pgd[pgd] = *pgd_efi; + + p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); + if (!p4d) { + pr_err("Failed to allocate p4d table!\n"); + goto out; + } + + for (i = 0; i < PTRS_PER_P4D; i++) { + addr_p4d = addr_pgd + i * P4D_SIZE; + p4d_efi = p4d + p4d_index(addr_p4d); + + pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); + if (!pud) { + pr_err("Failed to allocate pud table!\n"); + goto out; + } + + for (j = 0; j < PTRS_PER_PUD; j++) { + addr_pud = addr_p4d + j * PUD_SIZE; + + if (addr_pud > (max_pfn << PAGE_SHIFT)) + break; + + vaddr = (unsigned long)__va(addr_pud); + + pgd_k = pgd_offset_k(vaddr); + p4d_k = p4d_offset(pgd_k, vaddr); + pud[j] = *pud_offset(p4d_k, vaddr); + } + } + pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; + } + + __flush_tlb_all(); + return save_pgd; +out: + efi_uv1_memmap_phys_epilog(save_pgd); + return NULL; +} + +void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, + u32 type, u64 attribute) +{ + unsigned long last_map_pfn; + + if (type == EFI_MEMORY_MAPPED_IO) + return ioremap(phys_addr, size); + + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { + unsigned long top = last_map_pfn << PAGE_SHIFT; + efi_ioremap(top, size - (top - phys_addr), type, attribute); + } + + if (!(attribute & EFI_MEMORY_WB)) + efi_memory_uc((u64)(unsigned long)__va(phys_addr), size); + + return (void __iomem *)__va(phys_addr); +} + +static int __init arch_parse_efi_cmdline(char *str) +{ + if (!str) { + pr_warn("need at least one option\n"); + return -EINVAL; + } + + if (!efi_is_mixed() && parse_option_str(str, "old_map")) + set_bit(EFI_UV1_MEMMAP, &efi.flags); + + return 0; +} +early_param("efi", arch_parse_efi_cmdline); diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c index 5bd949da7a4a..ac8eee093f9c 100644 --- a/arch/x86/um/tls_32.c +++ b/arch/x86/um/tls_32.c @@ -215,14 +215,12 @@ static int set_tls_entry(struct task_struct* task, struct user_desc *info, return 0; } -int arch_copy_tls(struct task_struct *new) +int arch_set_tls(struct task_struct *new, unsigned long tls) { struct user_desc info; int idx, ret = -EFAULT; - if (copy_from_user(&info, - (void __user *) UPT_SI(&new->thread.regs.regs), - sizeof(info))) + if (copy_from_user(&info, (void __user *) tls, sizeof(info))) goto out; ret = -EINVAL; diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c index 3a621e0d3925..ebd3855d9b13 100644 --- a/arch/x86/um/tls_64.c +++ b/arch/x86/um/tls_64.c @@ -6,14 +6,13 @@ void clear_flushed_tls(struct task_struct *task) { } -int arch_copy_tls(struct task_struct *t) +int arch_set_tls(struct task_struct *t, unsigned long tls) { /* * If CLONE_SETTLS is set, we need to save the thread id - * (which is argument 5, child_tid, of clone) so it can be set - * during context switches. + * so it can be set during context switches. */ - t->thread.arch.fs = t->thread.regs.regs.gp[R8 / sizeof(long)]; + t->thread.arch.fs = tls; return 0; } diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index ba5a41828e9d..1aded63a95cb 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -62,10 +62,10 @@ config XEN_512GB boot parameter "xen_512gb_limit". config XEN_SAVE_RESTORE - bool - depends on XEN - select HIBERNATE_CALLBACKS - default y + bool + depends on XEN + select HIBERNATE_CALLBACKS + default y config XEN_DEBUG_FS bool "Enable Xen debug and tuning parameters in debugfs" diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index a04551ee5568..1abe455d926a 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -31,7 +31,7 @@ static efi_system_table_t efi_systab_xen __initdata = { .con_in_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ .con_in = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ .con_out_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .con_out = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .con_out = NULL, /* Not used under Xen. */ .stderr_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ .stderr = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ .runtime = (efi_runtime_services_t *)EFI_INVALID_TABLE_ADDR, diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index c8dbee62ec2a..bbba8b17829a 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -67,7 +67,7 @@ #include <asm/linkage.h> #include <asm/page.h> #include <asm/init.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/smp.h> #include <asm/tlb.h> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 4a3fa295d8fe..1c645172b4b5 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -11,7 +11,7 @@ config XTENSA select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select COMMON_CLK select DMA_REMAP if MMU @@ -24,6 +24,7 @@ config XTENSA select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL select HAVE_ARCH_TRACEHOOK + select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD diff --git a/arch/xtensa/include/asm/vmalloc.h b/arch/xtensa/include/asm/vmalloc.h new file mode 100644 index 000000000000..0eb94b70be55 --- /dev/null +++ b/arch/xtensa/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_XTENSA_VMALLOC_H +#define _ASM_XTENSA_VMALLOC_H + +#endif /* _ASM_XTENSA_VMALLOC_H */ diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index be897803834a..2c9e48566e48 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -520,7 +520,7 @@ common_exception_return: call4 schedule # void schedule (void) j 1b -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION 6: _bbci.l a4, TIF_NEED_RESCHED, 4f diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 9e1c49134c07..3edecc41ef8c 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -202,8 +202,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) * involved. Much simpler to just not copy those live frames across. */ -int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn, - unsigned long thread_fn_arg, struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long usp_thread_fn, + unsigned long thread_fn_arg, struct task_struct *p, + unsigned long tls) { struct pt_regs *childregs = task_pt_regs(p); @@ -266,9 +267,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn, childregs->syscall = regs->syscall; - /* The thread pointer is passed in the '4th argument' (= a5) */ if (clone_flags & CLONE_SETTLS) - childregs->threadptr = childregs->areg[5]; + childregs->threadptr = tls; } else { p->thread.ra = MAKE_RA_FOR_CALL( (unsigned long)ret_from_kernel_thread, 1); diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 87bd68dd7687..0976e27b8d5d 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -519,12 +519,15 @@ DEFINE_SPINLOCK(die_lock); void die(const char * str, struct pt_regs * regs, long err) { static int die_counter; + const char *pr = ""; + + if (IS_ENABLED(CONFIG_PREEMPTION)) + pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT"; console_verbose(); spin_lock_irq(&die_lock); - pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter, - IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : ""); + pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter, pr); show_regs(regs); if (!user_mode(regs)) show_stack(NULL, (unsigned long*)regs->areg[1]); |