diff options
Diffstat (limited to 'arch')
275 files changed, 3097 insertions, 2080 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index b4441b0764d7..e98c6b8e6186 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -9,6 +9,7 @@ config ARC def_bool y select ARC_TIMERS + select ARCH_HAS_DMA_COHERENT_TO_PFN select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE @@ -17,8 +18,7 @@ config ARC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK - select DMA_NONCOHERENT_OPS - select DMA_NONCOHERENT_MMAP + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) select GENERIC_CLOCKEVENTS select GENERIC_FIND_FIRST_BIT @@ -149,7 +149,7 @@ config ARC_CPU_770 Support for ARC770 core introduced with Rel 4.10 (Summer 2011) This core has a bunch of cool new features: -MMU-v3: Variable Page Sz (4k, 8k, 16k), bigger J-TLB (128x4) - Shared Address Spaces (for sharing TLB entires in MMU) + Shared Address Spaces (for sharing TLB entries in MMU) -Caches: New Prog Model, Region Flush -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 99cce77ab98f..644815c0516e 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,33 +6,11 @@ # published by the Free Software Foundation. # -ifeq ($(CROSS_COMPILE),) -ifndef CONFIG_CPU_BIG_ENDIAN -CROSS_COMPILE := arc-linux- -else -CROSS_COMPILE := arceb-linux- -endif -endif - KBUILD_DEFCONFIG := nsim_700_defconfig cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 -cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs - -is_700 = $(shell $(CC) -dM -E - < /dev/null | grep -q "ARC700" && echo 1 || echo 0) - -ifdef CONFIG_ISA_ARCOMPACT -ifeq ($(is_700), 0) - $(error Toolchain not configured for ARCompact builds) -endif -endif - -ifdef CONFIG_ISA_ARCV2 -ifeq ($(is_700), 1) - $(error Toolchain not configured for ARCv2 builds) -endif -endif +cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38 ifdef CONFIG_ARC_CURR_IN_REG # For a global register defintion, make sure it gets passed to every file @@ -79,7 +57,7 @@ cflags-$(disable_small_data) += -mno-sdata -fcall-used-gp cflags-$(CONFIG_CPU_BIG_ENDIAN) += -mbig-endian ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB -LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name) +LIBGCC = $(shell $(CC) $(cflags-y) --print-libgcc-file-name) # Modules with short calls might break for calls into builtin-kernel KBUILD_CFLAGS_MODULE += -mlong-calls -mno-millicode diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 4674541eba3f..8ce6e7235915 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -241,6 +241,26 @@ int copy_thread(unsigned long clone_flags, task_thread_info(current)->thr_ptr; } + + /* + * setup usermode thread pointer #1: + * when child is picked by scheduler, __switch_to() uses @c_callee to + * populate usermode callee regs: this works (despite being in a kernel + * function) since special return path for child @ret_from_fork() + * ensures those regs are not clobbered all the way to RTIE to usermode + */ + c_callee->r25 = task_thread_info(p)->thr_ptr; + +#ifdef CONFIG_ARC_CURR_IN_REG + /* + * setup usermode thread pointer #2: + * however for this special use of r25 in kernel, __switch_to() sets + * r25 for kernel needs and only in the final return path is usermode + * r25 setup, from pt_regs->user_r25. So set that up as well + */ + c_regs->user_r25 = c_callee->r25; +#endif + return 0; } diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index c75d5c3470e3..db203ff69ccf 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -84,29 +84,10 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, __free_pages(page, get_order(size)); } -int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr) { - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long pfn = __phys_to_pfn(dma_addr); - unsigned long off = vma->vm_pgoff; - int ret = -ENXIO; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (off < count && user_count <= (count - off)) { - ret = remap_pfn_range(vma, vma->vm_start, - pfn + off, - user_count << PAGE_SHIFT, - vma->vm_page_prot); - } - - return ret; + return __phys_to_pfn(dma_addr); } /* @@ -167,7 +148,7 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, } /* - * Plug in coherent or noncoherent dma ops + * Plug in direct dma map ops. */ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) @@ -175,13 +156,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, /* * IOC hardware snoops all DMA traffic keeping the caches consistent * with memory - eliding need for any explicit cache maintenance of - * DMA buffers - so we can use dma_direct cache ops. + * DMA buffers. */ - if (is_isa_arcv2() && ioc_enable && coherent) { - set_dma_ops(dev, &dma_direct_ops); - dev_info(dev, "use dma_direct_ops cache ops\n"); - } else { - set_dma_ops(dev, &dma_noncoherent_ops); - dev_info(dev, "use dma_noncoherent_ops cache ops\n"); - } + if (is_isa_arcv2() && ioc_enable && coherent) + dev->dma_coherent = true; + + dev_info(dev, "use %sncoherent DMA ops\n", + dev->dma_coherent ? "" : "non"); } diff --git a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts index b10dccd0958f..3b1baa8605a7 100644 --- a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts @@ -11,6 +11,7 @@ #include "sama5d2-pinfunc.h" #include <dt-bindings/mfd/atmel-flexcom.h> #include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/pinctrl/at91.h> / { model = "Atmel SAMA5D2 PTC EK"; @@ -299,6 +300,7 @@ <PIN_PA30__NWE_NANDWE>, <PIN_PB2__NRD_NANDOE>; bias-pull-up; + atmel,drive-strength = <ATMEL_PIO_DRVSTR_ME>; }; ale_cle_rdy_cs { diff --git a/arch/arm/boot/dts/bcm63138.dtsi b/arch/arm/boot/dts/bcm63138.dtsi index 43ee992ccdcf..6df61518776f 100644 --- a/arch/arm/boot/dts/bcm63138.dtsi +++ b/arch/arm/boot/dts/bcm63138.dtsi @@ -106,21 +106,23 @@ global_timer: timer@1e200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x1e200 0x20>; - interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>; clocks = <&axi_clk>; }; local_timer: local-timer@1e600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0x1e600 0x20>; - interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | + IRQ_TYPE_EDGE_RISING)>; clocks = <&axi_clk>; }; twd_watchdog: watchdog@1e620 { compatible = "arm,cortex-a9-twd-wdt"; reg = <0x1e620 0x20>; - interrupts = <GIC_PPI 14 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | + IRQ_TYPE_LEVEL_HIGH)>; }; armpll: armpll { @@ -158,7 +160,7 @@ serial0: serial@600 { compatible = "brcm,bcm6345-uart"; reg = <0x600 0x1b>; - interrupts = <GIC_SPI 32 0>; + interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -167,7 +169,7 @@ serial1: serial@620 { compatible = "brcm,bcm6345-uart"; reg = <0x620 0x1b>; - interrupts = <GIC_SPI 33 0>; + interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -180,7 +182,7 @@ reg = <0x2000 0x600>, <0xf0 0x10>; reg-names = "nand", "nand-int-base"; status = "disabled"; - interrupts = <GIC_SPI 38 0>; + interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "nand"; }; diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 7423d462d1e4..50dde84b72ed 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -123,6 +123,17 @@ }; }; +&cpu0 { + /* CPU rated to 1GHz, not 1.2GHz as per the default settings */ + operating-points = < + /* kHz uV */ + 166666 850000 + 400000 900000 + 800000 1050000 + 1000000 1200000 + >; +}; + &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; diff --git a/arch/arm/boot/dts/sama5d3_emac.dtsi b/arch/arm/boot/dts/sama5d3_emac.dtsi index 7cb235ef0fb6..6e9e1c2f9def 100644 --- a/arch/arm/boot/dts/sama5d3_emac.dtsi +++ b/arch/arm/boot/dts/sama5d3_emac.dtsi @@ -41,7 +41,7 @@ }; macb1: ethernet@f802c000 { - compatible = "cdns,at91sam9260-macb", "cdns,macb"; + compatible = "atmel,sama5d3-macb", "cdns,at91sam9260-macb", "cdns,macb"; reg = <0xf802c000 0x100>; interrupts = <35 IRQ_TYPE_LEVEL_HIGH 3>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi index 661be948ab74..185541a5b69f 100644 --- a/arch/arm/boot/dts/stm32mp157c.dtsi +++ b/arch/arm/boot/dts/stm32mp157c.dtsi @@ -1078,8 +1078,8 @@ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>; clocks = <&rcc SPI6_K>; resets = <&rcc SPI6_R>; - dmas = <&mdma1 34 0x0 0x40008 0x0 0x0 0>, - <&mdma1 35 0x0 0x40002 0x0 0x0 0>; + dmas = <&mdma1 34 0x0 0x40008 0x0 0x0>, + <&mdma1 35 0x0 0x40002 0x0 0x0>; dma-names = "rx", "tx"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi index ffd9f00f74a4..5f547c161baf 100644 --- a/arch/arm/boot/dts/sun8i-r40.dtsi +++ b/arch/arm/boot/dts/sun8i-r40.dtsi @@ -800,8 +800,7 @@ }; hdmi_phy: hdmi-phy@1ef0000 { - compatible = "allwinner,sun8i-r40-hdmi-phy", - "allwinner,sun50i-a64-hdmi-phy"; + compatible = "allwinner,sun8i-r40-hdmi-phy"; reg = <0x01ef0000 0x10000>; clocks = <&ccu CLK_BUS_HDMI1>, <&ccu CLK_HDMI_SLOW>, <&ccu 7>, <&ccu 16>; diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 8436f6ade57d..965b7c846ecb 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -100,8 +100,10 @@ static inline unsigned long dma_max_pfn(struct device *dev) extern void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent); +#ifdef CONFIG_MMU #define arch_teardown_dma_ops arch_teardown_dma_ops extern void arch_teardown_dma_ops(struct device *dev); +#endif /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 2cfbc531f63b..6b51826ab3d1 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -28,7 +28,6 @@ #include <asm/byteorder.h> #include <asm/memory.h> #include <asm-generic/pci_iomap.h> -#include <xen/xen.h> /* * ISA I/O bus memory addresses are 1:1 with the physical address. @@ -459,20 +458,6 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr); #include <asm-generic/io.h> -/* - * can the hardware map this into one segment or not, given no other - * constraints. - */ -#define BIOVEC_MERGEABLE(vec1, vec2) \ - ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) - -struct bio_vec; -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) - #ifdef CONFIG_MMU #define ARCH_HAS_VALID_PHYS_ADDR_RANGE extern int valid_phys_addr_range(phys_addr_t addr, size_t size); diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 3ab8b3781bfe..2d43dca29c72 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -161,6 +161,7 @@ #else #define VTTBR_X (5 - KVM_T0SZ) #endif +#define VTTBR_CNP_BIT _AC(1, UL) #define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT _AC(48, ULL) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 265ea9cf7df7..847f01fa429d 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -355,6 +355,11 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) (addr) +static inline bool kvm_cpu_has_cnp(void) +{ + return false; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h index ae5fdff18406..8247bc15addc 100644 --- a/arch/arm/kernel/vmlinux.lds.h +++ b/arch/arm/kernel/vmlinux.lds.h @@ -49,6 +49,8 @@ #define ARM_DISCARD \ *(.ARM.exidx.exit.text) \ *(.ARM.extab.exit.text) \ + *(.ARM.exidx.text.exit) \ + *(.ARM.extab.text.exit) \ ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text)) \ ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text)) \ ARM_EXIT_DISCARD(EXIT_TEXT) \ diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 450c7a4fbc8a..cb094e55dc5f 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -478,15 +478,15 @@ static const struct coproc_reg cp15_regs[] = { /* ICC_SGI1R */ { CRm64(12), Op1( 0), is64, access_gic_sgi}, - /* ICC_ASGI1R */ - { CRm64(12), Op1( 1), is64, access_gic_sgi}, - /* ICC_SGI0R */ - { CRm64(12), Op1( 2), is64, access_gic_sgi}, /* VBAR: swapped by interrupt.S. */ { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, NULL, reset_val, c12_VBAR, 0x00000000 }, + /* ICC_ASGI1R */ + { CRm64(12), Op1( 1), is64, access_gic_sgi}, + /* ICC_SGI0R */ + { CRm64(12), Op1( 2), is64, access_gic_sgi}, /* ICC_SRE */ { CRn(12), CRm(12), Op1( 0), Op2(5), is32, access_gic_sre }, diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c index 353f9e5a1454..efdaa27241c5 100644 --- a/arch/arm/mach-davinci/board-neuros-osd2.c +++ b/arch/arm/mach-davinci/board-neuros-osd2.c @@ -130,10 +130,10 @@ static struct platform_device davinci_fb_device = { }; static const struct gpio_led ntosd2_leds[] = { - { .name = "led1_green", .gpio = GPIO(10), }, - { .name = "led1_red", .gpio = GPIO(11), }, - { .name = "led2_green", .gpio = GPIO(12), }, - { .name = "led2_red", .gpio = GPIO(13), }, + { .name = "led1_green", .gpio = 10, }, + { .name = "led1_red", .gpio = 11, }, + { .name = "led2_green", .gpio = 12, }, + { .name = "led2_red", .gpio = 13, }, }; static struct gpio_led_platform_data ntosd2_leds_data = { diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index faf48a3b1fea..706515faee06 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -141,6 +141,15 @@ EXPORT_SYMBOL_GPL(ep93xx_chip_revision); *************************************************************************/ static struct resource ep93xx_gpio_resource[] = { DEFINE_RES_MEM(EP93XX_GPIO_PHYS_BASE, 0xcc), + DEFINE_RES_IRQ(IRQ_EP93XX_GPIO_AB), + DEFINE_RES_IRQ(IRQ_EP93XX_GPIO0MUX), + DEFINE_RES_IRQ(IRQ_EP93XX_GPIO1MUX), + DEFINE_RES_IRQ(IRQ_EP93XX_GPIO2MUX), + DEFINE_RES_IRQ(IRQ_EP93XX_GPIO3MUX), + DEFINE_RES_IRQ(IRQ_EP93XX_GPIO4MUX), + DEFINE_RES_IRQ(IRQ_EP93XX_GPIO5MUX), + DEFINE_RES_IRQ(IRQ_EP93XX_GPIO6MUX), + DEFINE_RES_IRQ(IRQ_EP93XX_GPIO7MUX), }; static struct platform_device ep93xx_gpio_device = { diff --git a/arch/arm/mach-ep93xx/snappercl15.c b/arch/arm/mach-ep93xx/snappercl15.c index 45940c1d7787..cf0cb58b3454 100644 --- a/arch/arm/mach-ep93xx/snappercl15.c +++ b/arch/arm/mach-ep93xx/snappercl15.c @@ -23,8 +23,7 @@ #include <linux/i2c.h> #include <linux/fb.h> -#include <linux/mtd/partitions.h> -#include <linux/mtd/rawnand.h> +#include <linux/mtd/platnand.h> #include <mach/hardware.h> #include <linux/platform_data/video-ep93xx.h> @@ -43,12 +42,11 @@ #define SNAPPERCL15_NAND_CEN (1 << 11) /* Chip enable (active low) */ #define SNAPPERCL15_NAND_RDY (1 << 14) /* Device ready */ -#define NAND_CTRL_ADDR(chip) (chip->IO_ADDR_W + 0x40) +#define NAND_CTRL_ADDR(chip) (chip->legacy.IO_ADDR_W + 0x40) -static void snappercl15_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, +static void snappercl15_nand_cmd_ctrl(struct nand_chip *chip, int cmd, unsigned int ctrl) { - struct nand_chip *chip = mtd_to_nand(mtd); static u16 nand_state = SNAPPERCL15_NAND_WPN; u16 set; @@ -70,13 +68,12 @@ static void snappercl15_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, } if (cmd != NAND_CMD_NONE) - __raw_writew((cmd & 0xff) | nand_state, chip->IO_ADDR_W); + __raw_writew((cmd & 0xff) | nand_state, + chip->legacy.IO_ADDR_W); } -static int snappercl15_nand_dev_ready(struct mtd_info *mtd) +static int snappercl15_nand_dev_ready(struct nand_chip *chip) { - struct nand_chip *chip = mtd_to_nand(mtd); - return !!(__raw_readw(NAND_CTRL_ADDR(chip)) & SNAPPERCL15_NAND_RDY); } diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c index c089a2a4fe30..c6a533699b00 100644 --- a/arch/arm/mach-ep93xx/ts72xx.c +++ b/arch/arm/mach-ep93xx/ts72xx.c @@ -16,8 +16,7 @@ #include <linux/init.h> #include <linux/platform_device.h> #include <linux/io.h> -#include <linux/mtd/rawnand.h> -#include <linux/mtd/partitions.h> +#include <linux/mtd/platnand.h> #include <linux/spi/spi.h> #include <linux/spi/flash.h> #include <linux/spi/mmc_spi.h> @@ -76,13 +75,11 @@ static void __init ts72xx_map_io(void) #define TS72XX_NAND_CONTROL_ADDR_LINE 22 /* 0xN0400000 */ #define TS72XX_NAND_BUSY_ADDR_LINE 23 /* 0xN0800000 */ -static void ts72xx_nand_hwcontrol(struct mtd_info *mtd, +static void ts72xx_nand_hwcontrol(struct nand_chip *chip, int cmd, unsigned int ctrl) { - struct nand_chip *chip = mtd_to_nand(mtd); - if (ctrl & NAND_CTRL_CHANGE) { - void __iomem *addr = chip->IO_ADDR_R; + void __iomem *addr = chip->legacy.IO_ADDR_R; unsigned char bits; addr += (1 << TS72XX_NAND_CONTROL_ADDR_LINE); @@ -96,13 +93,12 @@ static void ts72xx_nand_hwcontrol(struct mtd_info *mtd, } if (cmd != NAND_CMD_NONE) - __raw_writeb(cmd, chip->IO_ADDR_W); + __raw_writeb(cmd, chip->legacy.IO_ADDR_W); } -static int ts72xx_nand_device_ready(struct mtd_info *mtd) +static int ts72xx_nand_device_ready(struct nand_chip *chip) { - struct nand_chip *chip = mtd_to_nand(mtd); - void __iomem *addr = chip->IO_ADDR_R; + void __iomem *addr = chip->legacy.IO_ADDR_R; addr += (1 << TS72XX_NAND_BUSY_ADDR_LINE); diff --git a/arch/arm/mach-imx/mach-mx21ads.c b/arch/arm/mach-imx/mach-mx21ads.c index 5e366824814f..2e1e540f2e5a 100644 --- a/arch/arm/mach-imx/mach-mx21ads.c +++ b/arch/arm/mach-imx/mach-mx21ads.c @@ -18,6 +18,7 @@ #include <linux/mtd/mtd.h> #include <linux/mtd/physmap.h> #include <linux/gpio/driver.h> +#include <linux/gpio/machine.h> #include <linux/gpio.h> #include <linux/regulator/fixed.h> #include <linux/regulator/machine.h> @@ -175,6 +176,7 @@ static struct resource mx21ads_mmgpio_resource = DEFINE_RES_MEM_NAMED(MX21ADS_IO_REG, SZ_2, "dat"); static struct bgpio_pdata mx21ads_mmgpio_pdata = { + .label = "mx21ads-mmgpio", .base = MX21ADS_MMGPIO_BASE, .ngpio = 16, }; @@ -203,7 +205,6 @@ static struct regulator_init_data mx21ads_lcd_regulator_init_data = { static struct fixed_voltage_config mx21ads_lcd_regulator_pdata = { .supply_name = "LCD", .microvolts = 3300000, - .gpio = MX21ADS_IO_LCDON, .enable_high = 1, .init_data = &mx21ads_lcd_regulator_init_data, }; @@ -216,6 +217,14 @@ static struct platform_device mx21ads_lcd_regulator = { }, }; +static struct gpiod_lookup_table mx21ads_lcd_regulator_gpiod_table = { + .dev_id = "reg-fixed-voltage.0", /* Let's hope ID 0 is what we get */ + .table = { + GPIO_LOOKUP("mx21ads-mmgpio", 9, NULL, GPIO_ACTIVE_HIGH), + { }, + }, +}; + /* * Connected is a portrait Sharp-QVGA display * of type: LQ035Q7DB02 @@ -311,6 +320,7 @@ static void __init mx21ads_late_init(void) { imx21_add_mxc_mmc(0, &mx21ads_sdhc_pdata); + gpiod_add_lookup_table(&mx21ads_lcd_regulator_gpiod_table); platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices)); mx21ads_cs8900_resources[1].start = diff --git a/arch/arm/mach-imx/mach-mx27ads.c b/arch/arm/mach-imx/mach-mx27ads.c index a04bb094ded1..f5e04047ed13 100644 --- a/arch/arm/mach-imx/mach-mx27ads.c +++ b/arch/arm/mach-imx/mach-mx27ads.c @@ -16,6 +16,7 @@ #include <linux/gpio/driver.h> /* Needed for gpio_to_irq() */ #include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <linux/platform_device.h> #include <linux/mtd/mtd.h> #include <linux/mtd/map.h> @@ -230,10 +231,17 @@ static struct regulator_init_data mx27ads_lcd_regulator_init_data = { static struct fixed_voltage_config mx27ads_lcd_regulator_pdata = { .supply_name = "LCD", .microvolts = 3300000, - .gpio = MX27ADS_LCD_GPIO, .init_data = &mx27ads_lcd_regulator_init_data, }; +static struct gpiod_lookup_table mx27ads_lcd_regulator_gpiod_table = { + .dev_id = "reg-fixed-voltage.0", /* Let's hope ID 0 is what we get */ + .table = { + GPIO_LOOKUP("LCD", 0, NULL, GPIO_ACTIVE_HIGH), + { }, + }, +}; + static void __init mx27ads_regulator_init(void) { struct gpio_chip *vchip; @@ -247,6 +255,8 @@ static void __init mx27ads_regulator_init(void) vchip->set = vgpio_set; gpiochip_add_data(vchip, NULL); + gpiod_add_lookup_table(&mx27ads_lcd_regulator_gpiod_table); + platform_device_register_data(NULL, "reg-fixed-voltage", PLATFORM_DEVID_AUTO, &mx27ads_lcd_regulator_pdata, diff --git a/arch/arm/mach-imx/mach-qong.c b/arch/arm/mach-imx/mach-qong.c index 42a700053103..5c5df8ca38dd 100644 --- a/arch/arm/mach-imx/mach-qong.c +++ b/arch/arm/mach-imx/mach-qong.c @@ -18,7 +18,7 @@ #include <linux/memory.h> #include <linux/platform_device.h> #include <linux/mtd/physmap.h> -#include <linux/mtd/rawnand.h> +#include <linux/mtd/platnand.h> #include <linux/gpio.h> #include <asm/mach-types.h> @@ -129,30 +129,29 @@ static void qong_init_nor_mtd(void) /* * Hardware specific access to control-lines */ -static void qong_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) +static void qong_nand_cmd_ctrl(struct nand_chip *nand_chip, int cmd, + unsigned int ctrl) { - struct nand_chip *nand_chip = mtd_to_nand(mtd); - if (cmd == NAND_CMD_NONE) return; if (ctrl & NAND_CLE) - writeb(cmd, nand_chip->IO_ADDR_W + (1 << 24)); + writeb(cmd, nand_chip->legacy.IO_ADDR_W + (1 << 24)); else - writeb(cmd, nand_chip->IO_ADDR_W + (1 << 23)); + writeb(cmd, nand_chip->legacy.IO_ADDR_W + (1 << 23)); } /* * Read the Device Ready pin. */ -static int qong_nand_device_ready(struct mtd_info *mtd) +static int qong_nand_device_ready(struct nand_chip *chip) { return gpio_get_value(IOMUX_TO_GPIO(MX31_PIN_NFRB)); } -static void qong_nand_select_chip(struct mtd_info *mtd, int chip) +static void qong_nand_select_chip(struct nand_chip *chip, int cs) { - if (chip >= 0) + if (cs >= 0) gpio_set_value(IOMUX_TO_GPIO(MX31_PIN_NFCE_B), 0); else gpio_set_value(IOMUX_TO_GPIO(MX31_PIN_NFCE_B), 1); diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c index 772a7cf2010e..976ded5c5916 100644 --- a/arch/arm/mach-integrator/integrator_cp.c +++ b/arch/arm/mach-integrator/integrator_cp.c @@ -80,8 +80,6 @@ static unsigned int mmc_status(struct device *dev) static struct mmci_platform_data mmc_data = { .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, .status = mmc_status, - .gpio_wp = -1, - .gpio_cd = -1, }; static u64 notrace intcp_read_sched_clock(void) diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c index 3ec829d52cdd..57d7df79d838 100644 --- a/arch/arm/mach-ixp4xx/ixdp425-setup.c +++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c @@ -20,6 +20,7 @@ #include <linux/mtd/mtd.h> #include <linux/mtd/rawnand.h> #include <linux/mtd/partitions.h> +#include <linux/mtd/platnand.h> #include <linux/delay.h> #include <linux/gpio.h> #include <asm/types.h> @@ -75,9 +76,8 @@ static struct mtd_partition ixdp425_partitions[] = { }; static void -ixdp425_flash_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) +ixdp425_flash_nand_cmd_ctrl(struct nand_chip *this, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd_to_nand(mtd); int offset = (int)nand_get_controller_data(this); if (ctrl & NAND_CTRL_CHANGE) { @@ -93,7 +93,7 @@ ixdp425_flash_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) } if (cmd != NAND_CMD_NONE) - writeb(cmd, this->IO_ADDR_W + offset); + writeb(cmd, this->legacy.IO_ADDR_W + offset); } static struct platform_nand_data ixdp425_flash_nand_data = { diff --git a/arch/arm/mach-mmp/brownstone.c b/arch/arm/mach-mmp/brownstone.c index d1613b954926..a04e249c654b 100644 --- a/arch/arm/mach-mmp/brownstone.c +++ b/arch/arm/mach-mmp/brownstone.c @@ -15,6 +15,7 @@ #include <linux/platform_device.h> #include <linux/io.h> #include <linux/gpio-pxa.h> +#include <linux/gpio/machine.h> #include <linux/regulator/machine.h> #include <linux/regulator/max8649.h> #include <linux/regulator/fixed.h> @@ -148,7 +149,6 @@ static struct regulator_init_data brownstone_v_5vp_data = { static struct fixed_voltage_config brownstone_v_5vp = { .supply_name = "v_5vp", .microvolts = 5000000, - .gpio = GPIO_5V_ENABLE, .enable_high = 1, .enabled_at_boot = 1, .init_data = &brownstone_v_5vp_data, @@ -162,6 +162,15 @@ static struct platform_device brownstone_v_5vp_device = { }, }; +static struct gpiod_lookup_table brownstone_v_5vp_gpiod_table = { + .dev_id = "reg-fixed-voltage.1", /* .id set to 1 above */ + .table = { + GPIO_LOOKUP("gpio-pxa", GPIO_5V_ENABLE, + NULL, GPIO_ACTIVE_HIGH), + { }, + }, +}; + static struct max8925_platform_data brownstone_max8925_info = { .irq_base = MMP_NR_IRQS, }; @@ -217,6 +226,7 @@ static void __init brownstone_init(void) mmp2_add_isram(&mmp2_isram_platdata); /* enable 5v regulator */ + gpiod_add_lookup_table(&brownstone_v_5vp_gpiod_table); platform_device_register(&brownstone_v_5vp_device); } diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index dd28d2614d7f..f226973f3d8c 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -300,7 +300,6 @@ static struct regulator_init_data modem_nreset_data = { static struct fixed_voltage_config modem_nreset_config = { .supply_name = "modem_nreset", .microvolts = 3300000, - .gpio = AMS_DELTA_GPIO_PIN_MODEM_NRESET, .startup_delay = 25000, .enable_high = 1, .enabled_at_boot = 1, @@ -315,6 +314,15 @@ static struct platform_device modem_nreset_device = { }, }; +static struct gpiod_lookup_table ams_delta_nreset_gpiod_table = { + .dev_id = "reg-fixed-voltage", + .table = { + GPIO_LOOKUP(LATCH2_LABEL, LATCH2_PIN_MODEM_NRESET, + NULL, GPIO_ACTIVE_HIGH), + { }, + }, +}; + struct modem_private_data { struct regulator *regulator; }; @@ -568,7 +576,6 @@ static struct regulator_init_data keybrd_pwr_initdata = { static struct fixed_voltage_config keybrd_pwr_config = { .supply_name = "keybrd_pwr", .microvolts = 5000000, - .gpio = AMS_DELTA_GPIO_PIN_KEYBRD_PWR, .enable_high = 1, .init_data = &keybrd_pwr_initdata, }; @@ -602,6 +609,7 @@ static struct platform_device *ams_delta_devices[] __initdata = { }; static struct gpiod_lookup_table *ams_delta_gpio_tables[] __initdata = { + &ams_delta_nreset_gpiod_table, &ams_delta_audio_gpio_table, &keybrd_pwr_gpio_table, &ams_delta_lcd_gpio_table, diff --git a/arch/arm/mach-omap1/board-fsample.c b/arch/arm/mach-omap1/board-fsample.c index 69bd601feb83..4a0a66815ca0 100644 --- a/arch/arm/mach-omap1/board-fsample.c +++ b/arch/arm/mach-omap1/board-fsample.c @@ -16,8 +16,7 @@ #include <linux/platform_device.h> #include <linux/delay.h> #include <linux/mtd/mtd.h> -#include <linux/mtd/rawnand.h> -#include <linux/mtd/partitions.h> +#include <linux/mtd/platnand.h> #include <linux/mtd/physmap.h> #include <linux/input.h> #include <linux/smc91x.h> @@ -186,7 +185,7 @@ static struct platform_device nor_device = { #define FSAMPLE_NAND_RB_GPIO_PIN 62 -static int nand_dev_ready(struct mtd_info *mtd) +static int nand_dev_ready(struct nand_chip *chip) { return gpio_get_value(FSAMPLE_NAND_RB_GPIO_PIN); } diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c index 9aeb8ad8c327..9d9a6ca15df0 100644 --- a/arch/arm/mach-omap1/board-h2.c +++ b/arch/arm/mach-omap1/board-h2.c @@ -24,8 +24,7 @@ #include <linux/delay.h> #include <linux/i2c.h> #include <linux/mtd/mtd.h> -#include <linux/mtd/rawnand.h> -#include <linux/mtd/partitions.h> +#include <linux/mtd/platnand.h> #include <linux/mtd/physmap.h> #include <linux/input.h> #include <linux/mfd/tps65010.h> @@ -182,7 +181,7 @@ static struct mtd_partition h2_nand_partitions[] = { #define H2_NAND_RB_GPIO_PIN 62 -static int h2_nand_dev_ready(struct mtd_info *mtd) +static int h2_nand_dev_ready(struct nand_chip *chip) { return gpio_get_value(H2_NAND_RB_GPIO_PIN); } diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c index 2edcd6356f2d..cd6e02c5c01a 100644 --- a/arch/arm/mach-omap1/board-h3.c +++ b/arch/arm/mach-omap1/board-h3.c @@ -23,7 +23,7 @@ #include <linux/workqueue.h> #include <linux/i2c.h> #include <linux/mtd/mtd.h> -#include <linux/mtd/rawnand.h> +#include <linux/mtd/platnand.h> #include <linux/mtd/partitions.h> #include <linux/mtd/physmap.h> #include <linux/input.h> @@ -185,7 +185,7 @@ static struct mtd_partition nand_partitions[] = { #define H3_NAND_RB_GPIO_PIN 10 -static int nand_dev_ready(struct mtd_info *mtd) +static int nand_dev_ready(struct nand_chip *chip) { return gpio_get_value(H3_NAND_RB_GPIO_PIN); } diff --git a/arch/arm/mach-omap1/board-nand.c b/arch/arm/mach-omap1/board-nand.c index 1bffbb4e050f..20923eb2d9b6 100644 --- a/arch/arm/mach-omap1/board-nand.c +++ b/arch/arm/mach-omap1/board-nand.c @@ -20,9 +20,8 @@ #include "common.h" -void omap1_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl) +void omap1_nand_cmd_ctl(struct nand_chip *this, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd_to_nand(mtd); unsigned long mask; if (cmd == NAND_CMD_NONE) @@ -32,6 +31,6 @@ void omap1_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl) if (ctrl & NAND_ALE) mask |= 0x04; - writeb(cmd, this->IO_ADDR_W + mask); + writeb(cmd, this->legacy.IO_ADDR_W + mask); } diff --git a/arch/arm/mach-omap1/board-perseus2.c b/arch/arm/mach-omap1/board-perseus2.c index b4951eb82898..06a584fef5b8 100644 --- a/arch/arm/mach-omap1/board-perseus2.c +++ b/arch/arm/mach-omap1/board-perseus2.c @@ -16,8 +16,7 @@ #include <linux/platform_device.h> #include <linux/delay.h> #include <linux/mtd/mtd.h> -#include <linux/mtd/rawnand.h> -#include <linux/mtd/partitions.h> +#include <linux/mtd/platnand.h> #include <linux/mtd/physmap.h> #include <linux/input.h> #include <linux/smc91x.h> @@ -144,7 +143,7 @@ static struct platform_device nor_device = { #define P2_NAND_RB_GPIO_PIN 62 -static int nand_dev_ready(struct mtd_info *mtd) +static int nand_dev_ready(struct nand_chip *chip) { return gpio_get_value(P2_NAND_RB_GPIO_PIN); } diff --git a/arch/arm/mach-omap1/common.h b/arch/arm/mach-omap1/common.h index c6537d2c2859..504b959ba5cf 100644 --- a/arch/arm/mach-omap1/common.h +++ b/arch/arm/mach-omap1/common.h @@ -26,7 +26,6 @@ #ifndef __ARCH_ARM_MACH_OMAP1_COMMON_H #define __ARCH_ARM_MACH_OMAP1_COMMON_H -#include <linux/mtd/mtd.h> #include <linux/platform_data/i2c-omap.h> #include <linux/reboot.h> @@ -82,7 +81,8 @@ void omap1_restart(enum reboot_mode, const char *); extern void __init omap_check_revision(void); -extern void omap1_nand_cmd_ctl(struct mtd_info *mtd, int cmd, +struct nand_chip; +extern void omap1_nand_cmd_ctl(struct nand_chip *this, int cmd, unsigned int ctrl); extern void omap1_timer_init(void); diff --git a/arch/arm/mach-omap2/hsmmc.h b/arch/arm/mach-omap2/hsmmc.h index af9af5094ec3..bf99aec5a155 100644 --- a/arch/arm/mach-omap2/hsmmc.h +++ b/arch/arm/mach-omap2/hsmmc.h @@ -12,8 +12,6 @@ struct omap2_hsmmc_info { u8 mmc; /* controller 1/2/3 */ u32 caps; /* 4/8 wires and any additional host * capabilities OR'd (ref. linux/mmc/host.h) */ - int gpio_cd; /* or -EINVAL */ - int gpio_wp; /* or -EINVAL */ struct platform_device *pdev; /* mmc controller instance */ /* init some special card */ void (*init_card)(struct mmc_card *card); diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 7f02743edbe4..9fec5f84bf77 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -10,6 +10,7 @@ #include <linux/clk.h> #include <linux/davinci_emac.h> #include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/of_platform.h> @@ -328,7 +329,6 @@ static struct regulator_init_data pandora_vmmc3 = { static struct fixed_voltage_config pandora_vwlan = { .supply_name = "vwlan", .microvolts = 1800000, /* 1.8V */ - .gpio = PANDORA_WIFI_NRESET_GPIO, .startup_delay = 50000, /* 50ms */ .enable_high = 1, .init_data = &pandora_vmmc3, @@ -342,6 +342,19 @@ static struct platform_device pandora_vwlan_device = { }, }; +static struct gpiod_lookup_table pandora_vwlan_gpiod_table = { + .dev_id = "reg-fixed-voltage.1", + .table = { + /* + * As this is a low GPIO number it should be at the first + * GPIO bank. + */ + GPIO_LOOKUP("gpio-0-31", PANDORA_WIFI_NRESET_GPIO, + NULL, GPIO_ACTIVE_HIGH), + { }, + }, +}; + static void pandora_wl1251_init_card(struct mmc_card *card) { /* @@ -363,8 +376,6 @@ static struct omap2_hsmmc_info pandora_mmc3[] = { { .mmc = 3, .caps = MMC_CAP_4_BIT_DATA | MMC_CAP_POWER_OFF_CARD, - .gpio_cd = -EINVAL, - .gpio_wp = -EINVAL, .init_card = pandora_wl1251_init_card, }, {} /* Terminator */ @@ -403,6 +414,7 @@ fail: static void __init omap3_pandora_legacy_init(void) { platform_device_register(&pandora_backlight); + gpiod_add_lookup_table(&pandora_vwlan_gpiod_table); platform_device_register(&pandora_vwlan_device); omap_hsmmc_init(pandora_mmc3); omap_hsmmc_late_init(pandora_mmc3); diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c index 2a1a4180d5d0..1298b53ac263 100644 --- a/arch/arm/mach-omap2/pm24xx.c +++ b/arch/arm/mach-omap2/pm24xx.c @@ -18,6 +18,7 @@ * published by the Free Software Foundation. */ +#include <linux/cpu_pm.h> #include <linux/suspend.h> #include <linux/sched.h> #include <linux/proc_fs.h> @@ -29,8 +30,6 @@ #include <linux/clk-provider.h> #include <linux/irq.h> #include <linux/time.h> -#include <linux/gpio.h> -#include <linux/platform_data/gpio-omap.h> #include <asm/fncpy.h> @@ -87,7 +86,7 @@ static int omap2_enter_full_retention(void) l = omap_ctrl_readl(OMAP2_CONTROL_DEVCONF0) | OMAP24XX_USBSTANDBYCTRL; omap_ctrl_writel(l, OMAP2_CONTROL_DEVCONF0); - omap2_gpio_prepare_for_idle(0); + cpu_cluster_pm_enter(); /* One last check for pending IRQs to avoid extra latency due * to sleeping unnecessarily. */ @@ -100,7 +99,7 @@ static int omap2_enter_full_retention(void) OMAP_SDRC_REGADDR(SDRC_POWER)); no_sleep: - omap2_gpio_resume_after_idle(); + cpu_cluster_pm_exit(); clk_enable(osc_ck); diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 36c55547137c..1a90050361f1 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -18,19 +18,18 @@ * published by the Free Software Foundation. */ +#include <linux/cpu_pm.h> #include <linux/pm.h> #include <linux/suspend.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/list.h> #include <linux/err.h> -#include <linux/gpio.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/slab.h> #include <linux/omap-dma.h> #include <linux/omap-gpmc.h> -#include <linux/platform_data/gpio-omap.h> #include <trace/events/power.h> @@ -197,7 +196,6 @@ void omap_sram_idle(void) int mpu_next_state = PWRDM_POWER_ON; int per_next_state = PWRDM_POWER_ON; int core_next_state = PWRDM_POWER_ON; - int per_going_off; u32 sdrc_pwr = 0; mpu_next_state = pwrdm_read_next_pwrst(mpu_pwrdm); @@ -227,10 +225,8 @@ void omap_sram_idle(void) pwrdm_pre_transition(NULL); /* PER */ - if (per_next_state < PWRDM_POWER_ON) { - per_going_off = (per_next_state == PWRDM_POWER_OFF) ? 1 : 0; - omap2_gpio_prepare_for_idle(per_going_off); - } + if (per_next_state == PWRDM_POWER_OFF) + cpu_cluster_pm_enter(); /* CORE */ if (core_next_state < PWRDM_POWER_ON) { @@ -295,8 +291,8 @@ void omap_sram_idle(void) pwrdm_post_transition(NULL); /* PER */ - if (per_next_state < PWRDM_POWER_ON) - omap2_gpio_resume_after_idle(); + if (per_next_state == PWRDM_POWER_OFF) + cpu_cluster_pm_exit(); } static void omap3_pm_idle(void) diff --git a/arch/arm/mach-orion5x/ts78xx-setup.c b/arch/arm/mach-orion5x/ts78xx-setup.c index 94778739e38f..fda9b75c3a33 100644 --- a/arch/arm/mach-orion5x/ts78xx-setup.c +++ b/arch/arm/mach-orion5x/ts78xx-setup.c @@ -16,8 +16,7 @@ #include <linux/platform_device.h> #include <linux/mv643xx_eth.h> #include <linux/ata_platform.h> -#include <linux/mtd/rawnand.h> -#include <linux/mtd/partitions.h> +#include <linux/mtd/platnand.h> #include <linux/timeriomem-rng.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> @@ -131,11 +130,9 @@ static void ts78xx_ts_rtc_unload(void) * NAND_CLE: bit 1 -> bit 1 * NAND_ALE: bit 2 -> bit 0 */ -static void ts78xx_ts_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, - unsigned int ctrl) +static void ts78xx_ts_nand_cmd_ctrl(struct nand_chip *this, int cmd, + unsigned int ctrl) { - struct nand_chip *this = mtd_to_nand(mtd); - if (ctrl & NAND_CTRL_CHANGE) { unsigned char bits; @@ -147,19 +144,18 @@ static void ts78xx_ts_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, } if (cmd != NAND_CMD_NONE) - writeb(cmd, this->IO_ADDR_W); + writeb(cmd, this->legacy.IO_ADDR_W); } -static int ts78xx_ts_nand_dev_ready(struct mtd_info *mtd) +static int ts78xx_ts_nand_dev_ready(struct nand_chip *chip) { return readb(TS_NAND_CTRL) & 0x20; } -static void ts78xx_ts_nand_write_buf(struct mtd_info *mtd, - const uint8_t *buf, int len) +static void ts78xx_ts_nand_write_buf(struct nand_chip *chip, + const uint8_t *buf, int len) { - struct nand_chip *chip = mtd_to_nand(mtd); - void __iomem *io_base = chip->IO_ADDR_W; + void __iomem *io_base = chip->legacy.IO_ADDR_W; unsigned long off = ((unsigned long)buf & 3); int sz; @@ -182,11 +178,10 @@ static void ts78xx_ts_nand_write_buf(struct mtd_info *mtd, writesb(io_base, buf, len); } -static void ts78xx_ts_nand_read_buf(struct mtd_info *mtd, - uint8_t *buf, int len) +static void ts78xx_ts_nand_read_buf(struct nand_chip *chip, + uint8_t *buf, int len) { - struct nand_chip *chip = mtd_to_nand(mtd); - void __iomem *io_base = chip->IO_ADDR_R; + void __iomem *io_base = chip->legacy.IO_ADDR_R; unsigned long off = ((unsigned long)buf & 3); int sz; diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c index af46d2182533..c52c081eb6d9 100644 --- a/arch/arm/mach-pxa/balloon3.c +++ b/arch/arm/mach-pxa/balloon3.c @@ -25,11 +25,10 @@ #include <linux/ioport.h> #include <linux/ucb1400.h> #include <linux/mtd/mtd.h> -#include <linux/mtd/partitions.h> #include <linux/types.h> #include <linux/platform_data/pcf857x.h> #include <linux/platform_data/i2c-pxa.h> -#include <linux/mtd/rawnand.h> +#include <linux/mtd/platnand.h> #include <linux/mtd/physmap.h> #include <linux/regulator/max1586.h> @@ -571,9 +570,9 @@ static inline void balloon3_i2c_init(void) {} * NAND ******************************************************************************/ #if defined(CONFIG_MTD_NAND_PLATFORM)||defined(CONFIG_MTD_NAND_PLATFORM_MODULE) -static void balloon3_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl) +static void balloon3_nand_cmd_ctl(struct nand_chip *this, int cmd, + unsigned int ctrl) { - struct nand_chip *this = mtd_to_nand(mtd); uint8_t balloon3_ctl_set = 0, balloon3_ctl_clr = 0; if (ctrl & NAND_CTRL_CHANGE) { @@ -597,10 +596,10 @@ static void balloon3_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ct } if (cmd != NAND_CMD_NONE) - writeb(cmd, this->IO_ADDR_W); + writeb(cmd, this->legacy.IO_ADDR_W); } -static void balloon3_nand_select_chip(struct mtd_info *mtd, int chip) +static void balloon3_nand_select_chip(struct nand_chip *this, int chip) { if (chip < 0 || chip > 3) return; @@ -616,7 +615,7 @@ static void balloon3_nand_select_chip(struct mtd_info *mtd, int chip) BALLOON3_NAND_CONTROL_REG); } -static int balloon3_nand_dev_ready(struct mtd_info *mtd) +static int balloon3_nand_dev_ready(struct nand_chip *this) { return __raw_readl(BALLOON3_NAND_STAT_REG) & BALLOON3_NAND_STAT_RNB; } diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index 29be04c6cc48..67e37df637f5 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -15,8 +15,7 @@ #include <linux/dm9000.h> #include <linux/platform_data/rtc-v3020.h> -#include <linux/mtd/rawnand.h> -#include <linux/mtd/partitions.h> +#include <linux/mtd/platnand.h> #include <linux/mtd/physmap.h> #include <linux/input.h> #include <linux/gpio_keys.h> @@ -285,11 +284,10 @@ static void nand_cs_off(void) } /* hardware specific access to control-lines */ -static void em_x270_nand_cmd_ctl(struct mtd_info *mtd, int dat, +static void em_x270_nand_cmd_ctl(struct nand_chip *this, int dat, unsigned int ctrl) { - struct nand_chip *this = mtd_to_nand(mtd); - unsigned long nandaddr = (unsigned long)this->IO_ADDR_W; + unsigned long nandaddr = (unsigned long)this->legacy.IO_ADDR_W; dsb(); @@ -309,15 +307,15 @@ static void em_x270_nand_cmd_ctl(struct mtd_info *mtd, int dat, } dsb(); - this->IO_ADDR_W = (void __iomem *)nandaddr; + this->legacy.IO_ADDR_W = (void __iomem *)nandaddr; if (dat != NAND_CMD_NONE) - writel(dat, this->IO_ADDR_W); + writel(dat, this->legacy.IO_ADDR_W); dsb(); } /* read device ready pin */ -static int em_x270_nand_device_ready(struct mtd_info *mtd) +static int em_x270_nand_device_ready(struct nand_chip *this) { dsb(); @@ -986,7 +984,6 @@ static struct fixed_voltage_config camera_dummy_config = { .supply_name = "camera_vdd", .input_supply = "vcc cam", .microvolts = 2800000, - .gpio = -1, .enable_high = 0, .init_data = &camera_dummy_initdata, }; diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c index 2c90b58f347d..565965e9acc7 100644 --- a/arch/arm/mach-pxa/ezx.c +++ b/arch/arm/mach-pxa/ezx.c @@ -21,6 +21,7 @@ #include <linux/regulator/fixed.h> #include <linux/input.h> #include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <linux/gpio_keys.h> #include <linux/leds-lp3944.h> #include <linux/platform_data/i2c-pxa.h> @@ -698,31 +699,39 @@ static struct pxa27x_keypad_platform_data e2_keypad_platform_data = { #if defined(CONFIG_MACH_EZX_A780) || defined(CONFIG_MACH_EZX_A910) /* camera */ -static struct regulator_consumer_supply camera_dummy_supplies[] = { +static struct regulator_consumer_supply camera_regulator_supplies[] = { REGULATOR_SUPPLY("vdd", "0-005d"), }; -static struct regulator_init_data camera_dummy_initdata = { - .consumer_supplies = camera_dummy_supplies, - .num_consumer_supplies = ARRAY_SIZE(camera_dummy_supplies), +static struct regulator_init_data camera_regulator_initdata = { + .consumer_supplies = camera_regulator_supplies, + .num_consumer_supplies = ARRAY_SIZE(camera_regulator_supplies), .constraints = { .valid_ops_mask = REGULATOR_CHANGE_STATUS, }, }; -static struct fixed_voltage_config camera_dummy_config = { +static struct fixed_voltage_config camera_regulator_config = { .supply_name = "camera_vdd", .microvolts = 2800000, - .gpio = GPIO50_nCAM_EN, .enable_high = 0, - .init_data = &camera_dummy_initdata, + .init_data = &camera_regulator_initdata, }; -static struct platform_device camera_supply_dummy_device = { +static struct platform_device camera_supply_regulator_device = { .name = "reg-fixed-voltage", .id = 1, .dev = { - .platform_data = &camera_dummy_config, + .platform_data = &camera_regulator_config, + }, +}; + +static struct gpiod_lookup_table camera_supply_gpiod_table = { + .dev_id = "reg-fixed-voltage.1", + .table = { + GPIO_LOOKUP("gpio-pxa", GPIO50_nCAM_EN, + NULL, GPIO_ACTIVE_HIGH), + { }, }, }; #endif @@ -800,7 +809,7 @@ static struct i2c_board_info a780_i2c_board_info[] = { static struct platform_device *a780_devices[] __initdata = { &a780_gpio_keys, - &camera_supply_dummy_device, + &camera_supply_regulator_device, }; static void __init a780_init(void) @@ -823,6 +832,7 @@ static void __init a780_init(void) if (a780_camera_init() == 0) pxa_set_camera_info(&a780_pxacamera_platform_data); + gpiod_add_lookup_table(&camera_supply_gpiod_table); pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup)); platform_add_devices(ARRAY_AND_SIZE(ezx_devices)); platform_add_devices(ARRAY_AND_SIZE(a780_devices)); @@ -1098,7 +1108,7 @@ static struct i2c_board_info __initdata a910_i2c_board_info[] = { static struct platform_device *a910_devices[] __initdata = { &a910_gpio_keys, - &camera_supply_dummy_device, + &camera_supply_regulator_device, }; static void __init a910_init(void) @@ -1121,6 +1131,7 @@ static void __init a910_init(void) if (a910_camera_init() == 0) pxa_set_camera_info(&a910_pxacamera_platform_data); + gpiod_add_lookup_table(&camera_supply_gpiod_table); pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup)); platform_add_devices(ARRAY_AND_SIZE(ezx_devices)); platform_add_devices(ARRAY_AND_SIZE(a910_devices)); diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index c5325d1ae77b..14c0f80bc9e7 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -18,6 +18,7 @@ #include <linux/platform_device.h> #include <linux/delay.h> #include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <linux/gpio_keys.h> #include <linux/input.h> #include <linux/mfd/htc-pasic3.h> @@ -696,7 +697,6 @@ static struct regulator_init_data vads7846_regulator = { static struct fixed_voltage_config vads7846 = { .supply_name = "vads7846", .microvolts = 3300000, /* probably */ - .gpio = -EINVAL, .startup_delay = 0, .init_data = &vads7846_regulator, }; diff --git a/arch/arm/mach-pxa/palmtreo.c b/arch/arm/mach-pxa/palmtreo.c index 4cc05ecce618..b66b0b11d717 100644 --- a/arch/arm/mach-pxa/palmtreo.c +++ b/arch/arm/mach-pxa/palmtreo.c @@ -404,36 +404,6 @@ static void __init palmtreo_leds_init(void) } /****************************************************************************** - * diskonchip docg4 flash - ******************************************************************************/ -#if defined(CONFIG_MACH_TREO680) -/* REVISIT: does the centro have this device also? */ -#if IS_ENABLED(CONFIG_MTD_NAND_DOCG4) -static struct resource docg4_resources[] = { - { - .start = 0x00000000, - .end = 0x00001FFF, - .flags = IORESOURCE_MEM, - }, -}; - -static struct platform_device treo680_docg4_flash = { - .name = "docg4", - .id = -1, - .resource = docg4_resources, - .num_resources = ARRAY_SIZE(docg4_resources), -}; - -static void __init treo680_docg4_flash_init(void) -{ - platform_device_register(&treo680_docg4_flash); -} -#else -static inline void treo680_docg4_flash_init(void) {} -#endif -#endif - -/****************************************************************************** * Machine init ******************************************************************************/ static void __init treo_reserve(void) @@ -517,7 +487,6 @@ static void __init treo680_init(void) treo680_gpio_init(); palm27x_mmc_init(GPIO_NR_TREO_SD_DETECT_N, GPIO_NR_TREO680_SD_READONLY, GPIO_NR_TREO680_SD_POWER, 0); - treo680_docg4_flash_init(); } #endif diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c index 47e3e38e9bec..1d06a8e91d8f 100644 --- a/arch/arm/mach-pxa/palmtx.c +++ b/arch/arm/mach-pxa/palmtx.c @@ -28,8 +28,7 @@ #include <linux/wm97xx.h> #include <linux/power_supply.h> #include <linux/usb/gpio_vbus.h> -#include <linux/mtd/rawnand.h> -#include <linux/mtd/partitions.h> +#include <linux/mtd/platnand.h> #include <linux/mtd/mtd.h> #include <linux/mtd/physmap.h> @@ -247,11 +246,10 @@ static inline void palmtx_keys_init(void) {} ******************************************************************************/ #if defined(CONFIG_MTD_NAND_PLATFORM) || \ defined(CONFIG_MTD_NAND_PLATFORM_MODULE) -static void palmtx_nand_cmd_ctl(struct mtd_info *mtd, int cmd, - unsigned int ctrl) +static void palmtx_nand_cmd_ctl(struct nand_chip *this, int cmd, + unsigned int ctrl) { - struct nand_chip *this = mtd_to_nand(mtd); - char __iomem *nandaddr = this->IO_ADDR_W; + char __iomem *nandaddr = this->legacy.IO_ADDR_W; if (cmd == NAND_CMD_NONE) return; diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c index 034345546f84..bd3c23ad6ce6 100644 --- a/arch/arm/mach-pxa/raumfeld.c +++ b/arch/arm/mach-pxa/raumfeld.c @@ -886,7 +886,6 @@ static struct regulator_init_data audio_va_initdata = { static struct fixed_voltage_config audio_va_config = { .supply_name = "audio_va", .microvolts = 5000000, - .gpio = GPIO_AUDIO_VA_ENABLE, .enable_high = 1, .enabled_at_boot = 0, .init_data = &audio_va_initdata, @@ -900,6 +899,15 @@ static struct platform_device audio_va_device = { }, }; +static struct gpiod_lookup_table audio_va_gpiod_table = { + .dev_id = "reg-fixed-voltage.0", + .table = { + GPIO_LOOKUP("gpio-pxa", GPIO_AUDIO_VA_ENABLE, + NULL, GPIO_ACTIVE_HIGH), + { }, + }, +}; + /* Dummy supplies for Codec's VD/VLC */ static struct regulator_consumer_supply audio_dummy_supplies[] = { @@ -918,7 +926,6 @@ static struct regulator_init_data audio_dummy_initdata = { static struct fixed_voltage_config audio_dummy_config = { .supply_name = "audio_vd", .microvolts = 3300000, - .gpio = -1, .init_data = &audio_dummy_initdata, }; @@ -1033,6 +1040,7 @@ static void __init raumfeld_audio_init(void) else gpio_direction_output(GPIO_MCLK_RESET, 1); + gpiod_add_lookup_table(&audio_va_gpiod_table); platform_add_devices(ARRAY_AND_SIZE(audio_regulator_devices)); } diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index e3851795d6d7..d53ea12fc766 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -17,6 +17,7 @@ #include <linux/irq.h> #include <linux/pm.h> #include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <linux/serial_8250.h> #include <linux/dm9000.h> #include <linux/mmc/host.h> @@ -410,7 +411,6 @@ static struct regulator_init_data can_regulator_init_data = { static struct fixed_voltage_config can_regulator_pdata = { .supply_name = "CAN_SHDN", .microvolts = 3300000, - .gpio = ZEUS_CAN_SHDN_GPIO, .init_data = &can_regulator_init_data, }; @@ -422,6 +422,15 @@ static struct platform_device can_regulator_device = { }, }; +static struct gpiod_lookup_table can_regulator_gpiod_table = { + .dev_id = "reg-fixed-voltage.0", + .table = { + GPIO_LOOKUP("gpio-pxa", ZEUS_CAN_SHDN_GPIO, + NULL, GPIO_ACTIVE_HIGH), + { }, + }, +}; + static struct mcp251x_platform_data zeus_mcp2515_pdata = { .oscillator_frequency = 16*1000*1000, }; @@ -538,7 +547,6 @@ static struct regulator_init_data zeus_ohci_regulator_data = { static struct fixed_voltage_config zeus_ohci_regulator_config = { .supply_name = "vbus2", .microvolts = 5000000, /* 5.0V */ - .gpio = ZEUS_USB2_PWREN_GPIO, .enable_high = 1, .startup_delay = 0, .init_data = &zeus_ohci_regulator_data, @@ -552,6 +560,15 @@ static struct platform_device zeus_ohci_regulator_device = { }, }; +static struct gpiod_lookup_table zeus_ohci_regulator_gpiod_table = { + .dev_id = "reg-fixed-voltage.0", + .table = { + GPIO_LOOKUP("gpio-pxa", ZEUS_USB2_PWREN_GPIO, + NULL, GPIO_ACTIVE_HIGH), + { }, + }, +}; + static struct pxaohci_platform_data zeus_ohci_platform_data = { .port_mode = PMM_NPS_MODE, /* Clear Power Control Polarity Low and set Power Sense @@ -855,6 +872,8 @@ static void __init zeus_init(void) pxa2xx_mfp_config(ARRAY_AND_SIZE(zeus_pin_config)); + gpiod_add_lookup_table(&can_regulator_gpiod_table); + gpiod_add_lookup_table(&zeus_ohci_regulator_gpiod_table); platform_add_devices(zeus_devices, ARRAY_SIZE(zeus_devices)); zeus_register_ohci(); diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c index f04650297487..379424d72ae7 100644 --- a/arch/arm/mach-s3c64xx/mach-crag6410.c +++ b/arch/arm/mach-s3c64xx/mach-crag6410.c @@ -352,7 +352,6 @@ static struct fixed_voltage_config wallvdd_pdata = { .supply_name = "WALLVDD", .microvolts = 5000000, .init_data = &wallvdd_data, - .gpio = -EINVAL, }; static struct platform_device wallvdd_device = { diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c index c46fa5dfd2e0..908e5aa831c8 100644 --- a/arch/arm/mach-s3c64xx/mach-smdk6410.c +++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c @@ -222,7 +222,6 @@ static struct fixed_voltage_config smdk6410_b_pwr_5v_pdata = { .supply_name = "B_PWR_5V", .microvolts = 5000000, .init_data = &smdk6410_b_pwr_5v_data, - .gpio = -EINVAL, }; static struct platform_device smdk6410_b_pwr_5v = { diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index 575ec085cffa..3e8c0948abcc 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -101,7 +101,7 @@ static int __init assabet_init_gpio(void __iomem *reg, u32 def_val) assabet_bcr_gc = gc; - return gc->base; + return 0; } /* @@ -471,6 +471,14 @@ static struct fixed_voltage_config assabet_cf_vcc_pdata __initdata = { .enable_high = 1, }; +static struct gpiod_lookup_table assabet_cf_vcc_gpio_table = { + .dev_id = "reg-fixed-voltage.0", + .table = { + GPIO_LOOKUP("assabet", 0, NULL, GPIO_ACTIVE_HIGH), + { }, + }, +}; + static void __init assabet_init(void) { /* @@ -517,9 +525,11 @@ static void __init assabet_init(void) neponset_resources, ARRAY_SIZE(neponset_resources)); #endif } else { + gpiod_add_lookup_table(&assabet_cf_vcc_gpio_table); sa11x0_register_fixed_regulator(0, &assabet_cf_vcc_pdata, - assabet_cf_vcc_consumers, - ARRAY_SIZE(assabet_cf_vcc_consumers)); + assabet_cf_vcc_consumers, + ARRAY_SIZE(assabet_cf_vcc_consumers), + true); } @@ -802,7 +812,6 @@ fs_initcall(assabet_leds_init); void __init assabet_init_irq(void) { - unsigned int assabet_gpio_base; u32 def_val; sa1100_init_irq(); @@ -817,9 +826,7 @@ void __init assabet_init_irq(void) * * This must precede any driver calls to BCR_set() or BCR_clear(). */ - assabet_gpio_base = assabet_init_gpio((void *)&ASSABET_BCR, def_val); - - assabet_cf_vcc_pdata.gpio = assabet_gpio_base + 0; + assabet_init_gpio((void *)&ASSABET_BCR, def_val); } MACHINE_START(ASSABET, "Intel-Assabet") diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 7167ddf84a0e..800321c6cbd8 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -348,7 +348,8 @@ void __init sa11x0_init_late(void) int __init sa11x0_register_fixed_regulator(int n, struct fixed_voltage_config *cfg, - struct regulator_consumer_supply *supplies, unsigned num_supplies) + struct regulator_consumer_supply *supplies, unsigned num_supplies, + bool uses_gpio) { struct regulator_init_data *id; @@ -356,7 +357,7 @@ int __init sa11x0_register_fixed_regulator(int n, if (!cfg->init_data) return -ENOMEM; - if (cfg->gpio < 0) + if (!uses_gpio) id->constraints.always_on = 1; id->constraints.name = cfg->supply_name; id->constraints.min_uV = cfg->microvolts; diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h index 5f3cb52fa6ab..158a4fd5ca24 100644 --- a/arch/arm/mach-sa1100/generic.h +++ b/arch/arm/mach-sa1100/generic.h @@ -54,4 +54,5 @@ void sa11x0_register_pcmcia(int socket, struct gpiod_lookup_table *); struct fixed_voltage_config; struct regulator_consumer_supply; int sa11x0_register_fixed_regulator(int n, struct fixed_voltage_config *cfg, - struct regulator_consumer_supply *supplies, unsigned num_supplies); + struct regulator_consumer_supply *supplies, unsigned num_supplies, + bool uses_gpio); diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c index 22f7fe0b809f..5bc82e2671c6 100644 --- a/arch/arm/mach-sa1100/shannon.c +++ b/arch/arm/mach-sa1100/shannon.c @@ -102,14 +102,14 @@ static struct fixed_voltage_config shannon_cf_vcc_pdata __initdata = { .supply_name = "cf-power", .microvolts = 3300000, .enabled_at_boot = 1, - .gpio = -EINVAL, }; static void __init shannon_init(void) { sa11x0_register_fixed_regulator(0, &shannon_cf_vcc_pdata, shannon_cf_vcc_consumers, - ARRAY_SIZE(shannon_cf_vcc_consumers)); + ARRAY_SIZE(shannon_cf_vcc_consumers), + false); sa11x0_register_pcmcia(0, &shannon_pcmcia0_gpio_table); sa11x0_register_pcmcia(1, &shannon_pcmcia1_gpio_table); sa11x0_ppc_configure_mcp(); diff --git a/arch/arm/mach-versatile/versatile_dt.c b/arch/arm/mach-versatile/versatile_dt.c index 3c8d39c12909..e9d60687e416 100644 --- a/arch/arm/mach-versatile/versatile_dt.c +++ b/arch/arm/mach-versatile/versatile_dt.c @@ -89,15 +89,11 @@ unsigned int mmc_status(struct device *dev) static struct mmci_platform_data mmc0_plat_data = { .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, .status = mmc_status, - .gpio_wp = -1, - .gpio_cd = -1, }; static struct mmci_platform_data mmc1_plat_data = { .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34, .status = mmc_status, - .gpio_wp = -1, - .gpio_cd = -1, }; /* diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index f448a0663b10..712416ecd8e6 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -47,7 +47,8 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, */ if (attrs & DMA_ATTR_NON_CONSISTENT) - return dma_direct_alloc(dev, size, dma_handle, gfp, attrs); + return dma_direct_alloc_pages(dev, size, dma_handle, gfp, + attrs); ret = dma_alloc_from_global_coherent(size, dma_handle); @@ -70,7 +71,7 @@ static void arm_nommu_dma_free(struct device *dev, size_t size, unsigned long attrs) { if (attrs & DMA_ATTR_NON_CONSISTENT) { - dma_direct_free(dev, size, cpu_addr, dma_addr, attrs); + dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); } else { int ret = dma_release_from_global_coherent(get_order(size), cpu_addr); @@ -90,7 +91,7 @@ static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret)) return ret; - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); } @@ -237,7 +238,3 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, set_dma_ops(dev, dma_ops); } - -void arch_teardown_dma_ops(struct device *dev) -{ -} diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index fc91205ff46c..5bf9443cfbaa 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -473,7 +473,7 @@ void pci_ioremap_set_mem_type(int mem_type) int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) { - BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); + BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT); return ioremap_page_range(PCI_IO_VIRT_BASE + offset, PCI_IO_VIRT_BASE + offset + SZ_64K, diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index fbc74b5fa3ed..8edf93b4490f 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -413,3 +413,4 @@ 396 common pkey_free sys_pkey_free 397 common statx sys_statx 398 common rseq sys_rseq +399 common io_pgetevents sys_io_pgetevents diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1b1a0e95c751..a8ae30fab508 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -75,6 +75,7 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK select CPU_PM if (SUSPEND || CPU_IDLE) + select CRC32 select DCACHE_WORD_ACCESS select DMA_DIRECT_OPS select EDAC_SUPPORT @@ -142,6 +143,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE + select HAVE_RCU_TABLE_INVALIDATE select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS @@ -479,6 +481,19 @@ config ARM64_ERRATUM_1024718 If unsure, say Y. +config ARM64_ERRATUM_1188873 + bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result" + default y + select ARM_ARCH_TIMER_OOL_WORKAROUND + help + This option adds work arounds for ARM Cortex-A76 erratum 1188873 + + Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could cause + register corruption when accessing the timer registers from + AArch32 userspace. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -769,9 +784,6 @@ source kernel/Kconfig.hz config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y -config ARCH_HAS_HOLES_MEMORYMODEL - def_bool y if SPARSEMEM - config ARCH_SPARSEMEM_ENABLE def_bool y select SPARSEMEM_VMEMMAP_ENABLE @@ -786,7 +798,7 @@ config ARCH_FLATMEM_ENABLE def_bool !NUMA config HAVE_ARCH_PFN_VALID - def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM + def_bool y config HW_PERF_EVENTS def_bool y @@ -1132,6 +1144,20 @@ config ARM64_RAS_EXTN and access the new registers if the system supports the extension. Platform RAS features may additionally depend on firmware support. +config ARM64_CNP + bool "Enable support for Common Not Private (CNP) translations" + default y + depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN + help + Common Not Private (CNP) allows translation table entries to + be shared between different PEs in the same inner shareable + domain, so the hardware can use this fact to optimise the + caching of such entries in the TLB. + + Selecting this option allows the CNP feature to be detected + at runtime, and does not affect PEs that do not implement + this feature. + endmenu config ARM64_SVE diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 0bcc98dbba56..6142402c2eb4 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -286,12 +286,11 @@ alternative_endif ldr \rd, [\rn, #MM_CONTEXT_ID] .endm /* - * read_ctr - read CTR_EL0. If the system has mismatched - * cache line sizes, provide the system wide safe value - * from arm64_ftr_reg_ctrel0.sys_val + * read_ctr - read CTR_EL0. If the system has mismatched register fields, + * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val */ .macro read_ctr, reg -alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE +alternative_if_not ARM64_MISMATCHED_CACHE_TYPE mrs \reg, ctr_el0 // read CTR nop alternative_else diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 5ee5bca8c24b..13dd42c3ad4e 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -40,6 +40,15 @@ #define L1_CACHE_SHIFT (6) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#define CLIDR_LOUU_SHIFT 27 +#define CLIDR_LOC_SHIFT 24 +#define CLIDR_LOUIS_SHIFT 21 + +#define CLIDR_LOUU(clidr) (((clidr) >> CLIDR_LOUU_SHIFT) & 0x7) +#define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7) +#define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7) + /* * Memory returned by kmalloc() may be used for DMA, so we must make * sure that all such allocations are cache aligned. Otherwise, @@ -84,6 +93,37 @@ static inline int cache_line_size(void) return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } +/* + * Read the effective value of CTR_EL0. + * + * According to ARM ARM for ARMv8-A (ARM DDI 0487C.a), + * section D10.2.33 "CTR_EL0, Cache Type Register" : + * + * CTR_EL0.IDC reports the data cache clean requirements for + * instruction to data coherence. + * + * 0 - dcache clean to PoU is required unless : + * (CLIDR_EL1.LoC == 0) || (CLIDR_EL1.LoUIS == 0 && CLIDR_EL1.LoUU == 0) + * 1 - dcache clean to PoU is not required for i-to-d coherence. + * + * This routine provides the CTR_EL0 with the IDC field updated to the + * effective state. + */ +static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void) +{ + u32 ctr = read_cpuid_cachetype(); + + if (!(ctr & BIT(CTR_IDC_SHIFT))) { + u64 clidr = read_sysreg(clidr_el1); + + if (CLIDR_LOC(clidr) == 0 || + (CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0)) + ctr |= BIT(CTR_IDC_SHIFT); + } + + return ctr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 1a037b94eba1..cee28a05ee98 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -159,6 +159,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) } #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) +#define COMPAT_MINSIGSTKSZ 2048 static inline void __user *arch_compat_alloc_user_space(long len) { diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h deleted file mode 100644 index ee35fd0f2236..000000000000 --- a/arch/arm64/include/asm/compiler.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Based on arch/arm/include/asm/compiler.h - * - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_COMPILER_H -#define __ASM_COMPILER_H - -/* - * This is used to ensure the compiler did actually allocate the register we - * asked it for some inline assembly sequences. Apparently we can't trust the - * compiler from one version to another so a bit of paranoia won't hurt. This - * string is meant to be concatenated with the inline asm string and will - * cause compilation to stop on mismatch. (for details, see gcc PR 15089) - */ -#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" - -#endif /* __ASM_COMPILER_H */ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index ae1f70450fb2..6e2d254c09eb 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -33,7 +33,7 @@ #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 #define ARM64_HARDEN_EL2_VECTORS 14 -#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 +#define ARM64_HAS_CNP 15 #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 @@ -51,7 +51,10 @@ #define ARM64_SSBD 30 #define ARM64_MISMATCHED_CACHE_TYPE 31 #define ARM64_HAS_STAGE2_FWB 32 +#define ARM64_HAS_CRC32 33 +#define ARM64_SSBS 34 +#define ARM64_WORKAROUND_1188873 35 -#define ARM64_NCAPS 33 +#define ARM64_NCAPS 36 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 1717ba1db35d..6db48d90ad63 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -262,7 +262,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; /* * CPU feature detected at boot time based on system-wide value of a * feature. It is safe for a late CPU to have this feature even though - * the system hasn't enabled it, although the featuer will not be used + * the system hasn't enabled it, although the feature will not be used * by Linux in this case. If the system has enabled this feature already, * then every late CPU must have it. */ @@ -508,6 +508,12 @@ static inline bool system_supports_sve(void) cpus_have_const_cap(ARM64_SVE); } +static inline bool system_supports_cnp(void) +{ + return IS_ENABLED(CONFIG_ARM64_CNP) && + cpus_have_const_cap(ARM64_HAS_CNP); +} + #define ARM64_SSBD_UNKNOWN -1 #define ARM64_SSBD_FORCE_DISABLE 0 #define ARM64_SSBD_KERNEL 1 @@ -530,6 +536,7 @@ void arm64_set_ssbd_mitigation(bool state); static inline void arm64_set_ssbd_mitigation(bool state) {} #endif +extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ea690b3562af..12f93e4d2452 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define ARM_CPU_PART_CORTEX_A75 0xD0A #define ARM_CPU_PART_CORTEX_A35 0xD04 #define ARM_CPU_PART_CORTEX_A55 0xD05 +#define ARM_CPU_PART_CORTEX_A76 0xD0B #define APM_CPU_PART_POTENZA 0x000 @@ -110,6 +111,7 @@ #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) #define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) +#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 22e4c83de5a5..8d91f2233135 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -36,11 +36,8 @@ static inline unsigned long local_daif_save(void) { unsigned long flags; - asm volatile( - "mrs %0, daif // local_daif_save\n" - : "=r" (flags) - : - : "memory"); + flags = arch_local_save_flags(); + local_daif_mask(); return flags; @@ -60,11 +57,9 @@ static inline void local_daif_restore(unsigned long flags) { if (!arch_irqs_disabled_flags(flags)) trace_hardirqs_on(); - asm volatile( - "msr daif, %0 // local_daif_restore" - : - : "r" (flags) - : "memory"); + + arch_local_irq_restore(flags); + if (arch_irqs_disabled_flags(flags)) trace_hardirqs_off(); } diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index ce70c3ffb993..676de2ec1762 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -137,6 +137,8 @@ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_TI (UL(1) << 0) +#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0) #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) #define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) @@ -148,6 +150,9 @@ #define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC) /* ESR value templates for specific events */ +#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | ESR_ELx_WFx_ISS_TI) +#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \ + ESR_ELx_WFx_ISS_WFI) /* BRK instruction trap from AArch64 state */ #define ESR_ELx_VAL_BRK64(imm) \ @@ -187,6 +192,8 @@ #define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \ ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_RT(esr) \ + (((esr) & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT) /* * User space cache operations have the following sysreg encoding * in System instructions. @@ -206,6 +213,18 @@ #define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ ESR_ELx_SYS64_ISS_DIR_WRITE) +/* + * User space MRS operations which are supported for emulation + * have the following sysreg encoding in System instructions. + * op0 = 3, op1= 0, crn = 0, {crm = 0, 4-7}, READ (L = 1) + */ +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(3, 0, 0, 0, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) #define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0) #define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ @@ -249,6 +268,64 @@ #define ESR_ELx_FP_EXC_TFV (UL(1) << 23) +/* + * ISS field definitions for CP15 accesses + */ +#define ESR_ELx_CP15_32_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_32_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_32_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_32_ISS_RT_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_32_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRM_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRN_SHIFT 10 +#define ESR_ELx_CP15_32_ISS_CRN_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP1_SHIFT 14 +#define ESR_ELx_CP15_32_ISS_OP1_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP2_SHIFT 17 +#define ESR_ELx_CP15_32_ISS_OP2_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) + +#define ESR_ELx_CP15_32_ISS_SYS_MASK (ESR_ELx_CP15_32_ISS_OP1_MASK | \ + ESR_ELx_CP15_32_ISS_OP2_MASK | \ + ESR_ELx_CP15_32_ISS_CRN_MASK | \ + ESR_ELx_CP15_32_ISS_CRM_MASK | \ + ESR_ELx_CP15_32_ISS_DIR_MASK) +#define ESR_ELx_CP15_32_ISS_SYS_VAL(op1, op2, crn, crm) \ + (((op1) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_CP15_32_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_64_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT_SHIFT) + +#define ESR_ELx_CP15_64_ISS_RT2_SHIFT 10 +#define ESR_ELx_CP15_64_ISS_RT2_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT2_SHIFT) + +#define ESR_ELx_CP15_64_ISS_OP1_SHIFT 16 +#define ESR_ELx_CP15_64_ISS_OP1_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_64_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_CRM_SHIFT) + +#define ESR_ELx_CP15_64_ISS_SYS_VAL(op1, crm) \ + (((op1) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) | \ + ((crm) << ESR_ELx_CP15_64_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_SYS_MASK (ESR_ELx_CP15_64_ISS_OP1_MASK | \ + ESR_ELx_CP15_64_ISS_CRM_MASK | \ + ESR_ELx_CP15_64_ISS_DIR_MASK) + +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + +#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ + ESR_ELx_CP15_32_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 35b2e50f17fb..9f8b915af3a7 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -31,8 +31,6 @@ #include <asm/alternative.h> #include <asm/cpufeature.h> -#include <xen/xen.h> - /* * Generic IO read/write. These perform native-endian accesses. */ @@ -205,12 +203,5 @@ extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); extern int devmem_is_allowed(unsigned long pfn); -struct bio_vec; -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) - #endif /* __KERNEL__ */ #endif /* __ASM_IO_H */ diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index a780f6714b44..850e2122d53f 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -97,7 +97,7 @@ + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define SWAPPER_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) #ifdef CONFIG_ARM64_SW_TTBR0_PAN diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index aa45df752a16..b476bc46f0ab 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -175,6 +175,7 @@ #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) +#define VTTBR_CNP_BIT (UL(1)) #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT (UL(48)) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 6106a85ae0be..21247870def7 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -335,7 +335,7 @@ static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); - return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + return ESR_ELx_SYS64_ISS_RT(esr); } static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3d6d7336f871..2842bf149029 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -387,6 +387,8 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); +void __kvm_enable_ssbs(void); + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) @@ -407,6 +409,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, */ BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); + + /* + * Disabling SSBD on a non-VHE system requires us to enable SSBS + * at EL2. + */ + if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) && + arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + kvm_call_hyp(__kvm_enable_ssbs); + } } static inline bool kvm_arch_check_sve_has_vhe(void) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index d6fff7de5539..64337afbf124 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -517,5 +517,10 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) +static inline bool kvm_cpu_has_cnp(void) +{ + return system_supports_cnp(); +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index dd320df0d026..7689c7aa1d77 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -95,5 +95,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, extern void *fixmap_remap_fdt(phys_addr_t dt_phys); extern void mark_linear_text_alias_ro(void); +#define INIT_MM_CONTEXT(name) \ + .pgd = init_pg_dir, + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 39ec0b8a689e..1e58bf58c22b 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -147,12 +147,25 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) extern ttbr_replace_func idmap_cpu_replace_ttbr1; ttbr_replace_func *replace_phys; - phys_addr_t pgd_phys = virt_to_phys(pgdp); + /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */ + phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); + + if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) { + /* + * cpu_replace_ttbr1() is used when there's a boot CPU + * up (i.e. cpufeature framework is not up yet) and + * latter only when we enable CNP via cpufeature's + * enable() callback. + * Also we rely on the cpu_hwcap bit being set before + * calling the enable() function. + */ + ttbr1 |= TTBR_CNP_BIT; + } replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); cpu_install_idmap(); - replace_phys(pgd_phys); + replace_phys(ttbr1); cpu_uninstall_idmap(); } diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 60d02c81a3a2..c88a3cb117a1 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -37,9 +37,7 @@ extern void clear_page(void *to); typedef struct page *pgtable_t; -#ifdef CONFIG_HAVE_ARCH_PFN_VALID extern int pfn_valid(unsigned long); -#endif #include <asm/memory.h> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index fd208eac9f2a..1d7d8da2ef9b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -211,6 +211,8 @@ #define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS) #define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) +#define TTBR_CNP_BIT (UL(1) << 0) + /* * TCR flags. */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1bdeca8918a6..50b1ef8584c0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -360,6 +360,7 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) @@ -428,10 +429,33 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PUD_TYPE_TABLE) #endif +extern pgd_t init_pg_dir[PTRS_PER_PGD]; +extern pgd_t init_pg_end[]; +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; + +extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); + +static inline bool in_swapper_pgdir(void *addr) +{ + return ((unsigned long)addr & PAGE_MASK) == + ((unsigned long)swapper_pg_dir & PAGE_MASK); +} + static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { +#ifdef __PAGETABLE_PMD_FOLDED + if (in_swapper_pgdir(pmdp)) { + set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); + return; + } +#endif /* __PAGETABLE_PMD_FOLDED */ + WRITE_ONCE(*pmdp, pmd); - dsb(ishst); + + if (pmd_valid(pmd)) + dsb(ishst); } static inline void pmd_clear(pmd_t *pmdp) @@ -477,11 +501,21 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) #define pud_present(pud) pte_present(pud_pte(pud)) +#define pud_valid(pud) pte_valid(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { +#ifdef __PAGETABLE_PUD_FOLDED + if (in_swapper_pgdir(pudp)) { + set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); + return; + } +#endif /* __PAGETABLE_PUD_FOLDED */ + WRITE_ONCE(*pudp, pud); - dsb(ishst); + + if (pud_valid(pud)) + dsb(ishst); } static inline void pud_clear(pud_t *pudp) @@ -532,6 +566,11 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { + if (in_swapper_pgdir(pgdp)) { + set_swapper_pgd(pgdp, pgd); + return; + } + WRITE_ONCE(*pgdp, pgd); dsb(ishst); } @@ -712,11 +751,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, } #endif -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern pgd_t swapper_pg_end[]; -extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; - /* * Encode and decode a swap entry: * bits 0-1: present (must be zero) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 79657ad91397..2bf6691371c2 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -174,6 +174,10 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, { start_thread_common(regs, pc); regs->pstate = PSR_MODE_EL0t; + + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_SSBS_BIT; + regs->sp = sp; } @@ -190,6 +194,9 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate |= PSR_AA32_E_BIT; #endif + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_AA32_SSBS_BIT; + regs->compat_sp = sp; } #endif @@ -244,10 +251,6 @@ static inline void spin_lock_prefetch(const void *ptr) #endif -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused); -void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused); -void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused); - extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */ extern void __init minsigstksz_setup(void); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 177b851ca6d9..6bc43889d11e 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -50,6 +50,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 #define PSR_AA32_V_BIT 0x10000000 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c1470931b897..0c909c4a932f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,7 +20,6 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H -#include <asm/compiler.h> #include <linux/stringify.h> /* @@ -84,13 +83,26 @@ #endif /* CONFIG_BROKEN_GAS_INST */ -#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) -#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) +/* + * Instructions for modifying PSTATE fields. + * As per Arm ARM for v8-A, Section "C.5.1.3 op0 == 0b00, architectural hints, + * barriers and CLREX, and PSTATE access", ARM DDI 0487 C.a, system instructions + * for accessing PSTATE fields have the following encoding: + * Op0 = 0, CRn = 4 + * Op1, Op2 encodes the PSTATE field modified and defines the constraints. + * CRm = Imm4 for the instruction. + * Rt = 0x1f + */ +#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) +#define PSTATE_Imm_shift CRm_shift + +#define PSTATE_PAN pstate_field(0, 4) +#define PSTATE_UAO pstate_field(0, 3) +#define PSTATE_SSBS pstate_field(3, 1) -#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ - (!!x)<<8 | 0x1f) +#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) @@ -419,6 +431,7 @@ #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) /* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_DSSBS (1UL << 44) #define SCTLR_ELx_EE (1 << 25) #define SCTLR_ELx_IESB (1 << 21) #define SCTLR_ELx_WXN (1 << 19) @@ -439,7 +452,7 @@ (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ (1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \ (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE @@ -453,7 +466,7 @@ #define SCTLR_EL2_SET (SCTLR_ELx_IESB | ENDIAN_SET_EL2 | SCTLR_EL2_RES1) #define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ - ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) + SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) #if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL2 set/clear bits" @@ -477,7 +490,7 @@ (1 << 29)) #define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \ (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) @@ -489,12 +502,12 @@ #define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\ - SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_NTWI |\ + SCTLR_EL1_DZE | SCTLR_EL1_UCT |\ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) #define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ - SCTLR_EL1_RES0) + SCTLR_ELx_DSSBS | SCTLR_EL1_NTWI | SCTLR_EL1_RES0) #if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL1 set/clear bits" @@ -544,6 +557,13 @@ #define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 +/* id_aa64pfr1 */ +#define ID_AA64PFR1_SSBS_SHIFT 4 + +#define ID_AA64PFR1_SSBS_PSTATE_NI 0 +#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 +#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 + /* id_aa64mmfr0 */ #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index a3233167be60..106fdc951b6e 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -22,16 +22,10 @@ #include <linux/pagemap.h> #include <linux/swap.h> -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - -#define tlb_remove_entry(tlb, entry) tlb_remove_table(tlb, entry) static inline void __tlb_remove_table(void *_table) { free_page_and_swap_cache((struct page *)_table); } -#else -#define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry) -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ static void tlb_flush(struct mmu_gather *tlb); @@ -40,36 +34,35 @@ static void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); + bool last_level = !tlb->freed_tables; + unsigned long stride = tlb_get_unmap_size(tlb); /* - * The ASID allocator will either invalidate the ASID or mark - * it as used. + * If we're tearing down the address space then we only care about + * invalidating the walk-cache, since the ASID allocator won't + * reallocate our ASID without invalidating the entire TLB. */ - if (tlb->fullmm) + if (tlb->fullmm) { + if (!last_level) + flush_tlb_mm(tlb->mm); return; + } - /* - * The intermediate page table levels are already handled by - * the __(pte|pmd|pud)_free_tlb() functions, so last level - * TLBI is sufficient here. - */ - __flush_tlb_range(&vma, tlb->start, tlb->end, true); + __flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); pgtable_page_dtor(pte); - tlb_remove_entry(tlb, pte); + tlb_remove_table(tlb, pte); } #if CONFIG_PGTABLE_LEVELS > 2 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); - tlb_remove_entry(tlb, virt_to_page(pmdp)); + tlb_remove_table(tlb, virt_to_page(pmdp)); } #endif @@ -77,8 +70,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); - tlb_remove_entry(tlb, virt_to_page(pudp)); + tlb_remove_table(tlb, virt_to_page(pudp)); } #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index a4a1901140ee..c3c0387aee18 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -70,43 +70,73 @@ }) /* - * TLB Management - * ============== + * TLB Invalidation + * ================ * - * The TLB specific code is expected to perform whatever tests it needs - * to determine if it should invalidate the TLB for each call. Start - * addresses are inclusive and end addresses are exclusive; it is safe to - * round these addresses down. + * This header file implements the low-level TLB invalidation routines + * (sometimes referred to as "flushing" in the kernel) for arm64. * - * flush_tlb_all() + * Every invalidation operation uses the following template: + * + * DSB ISHST // Ensure prior page-table updates have completed + * TLBI ... // Invalidate the TLB + * DSB ISH // Ensure the TLB invalidation has completed + * if (invalidated kernel mappings) + * ISB // Discard any instructions fetched from the old mapping + * + * + * The following functions form part of the "core" TLB invalidation API, + * as documented in Documentation/core-api/cachetlb.rst: * - * Invalidate the entire TLB. + * flush_tlb_all() + * Invalidate the entire TLB (kernel + user) on all CPUs * * flush_tlb_mm(mm) + * Invalidate an entire user address space on all CPUs. + * The 'mm' argument identifies the ASID to invalidate. + * + * flush_tlb_range(vma, start, end) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * Note that this operation also invalidates any walk-cache + * entries associated with translations for the specified address + * range. + * + * flush_tlb_kernel_range(start, end) + * Same as flush_tlb_range(..., start, end), but applies to + * kernel mappings rather than a particular user address space. + * Whilst not explicitly documented, this function is used when + * unmapping pages from vmalloc/io space. + * + * flush_tlb_page(vma, addr) + * Invalidate a single user mapping for address 'addr' in the + * address space corresponding to 'vma->mm'. Note that this + * operation only invalidates a single, last-level page-table + * entry and therefore does not affect any walk-caches. * - * Invalidate all TLB entries in a particular address space. - * - mm - mm_struct describing address space * - * flush_tlb_range(mm,start,end) + * Next, we have some undocumented invalidation routines that you probably + * don't want to call unless you know what you're doing: * - * Invalidate a range of TLB entries in the specified address - * space. - * - mm - mm_struct describing address space - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) + * local_flush_tlb_all() + * Same as flush_tlb_all(), but only applies to the calling CPU. * - * flush_tlb_page(vaddr,vma) + * __flush_tlb_kernel_pgtable(addr) + * Invalidate a single kernel mapping for address 'addr' on all + * CPUs, ensuring that any walk-cache entries associated with the + * translation are also invalidated. * - * Invalidate the specified page in the specified address range. - * - vaddr - virtual address (may not be aligned) - * - vma - vma_struct describing address range + * __flush_tlb_range(vma, start, end, stride, last_level) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * The invalidation operations are issued at a granularity + * determined by 'stride' and only affect any walk-cache entries + * if 'last_level' is equal to false. * - * flush_kern_tlb_page(kaddr) * - * Invalidate the TLB entry for the specified page. The address - * will be in the kernels virtual memory space. Current uses - * only require the D-TLB to be invalidated. - * - kaddr - Kernel virtual memory address + * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented + * on top of these routines, since that is our interface to the mmu_gather + * API as used by munmap() and friends. */ static inline void local_flush_tlb_all(void) { @@ -149,25 +179,28 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) +#define MAX_TLBI_OPS 1024UL static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, - bool last_level) + unsigned long stride, bool last_level) { unsigned long asid = ASID(vma->vm_mm); unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * stride)) { flush_tlb_mm(vma->vm_mm); return; } + /* Convert the stride into units of 4k */ + stride >>= 12; + start = __TLBI_VADDR(start, asid); end = __TLBI_VADDR(end, asid); dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { + for (addr = start; addr < end; addr += stride) { if (last_level) { __tlbi(vale1is, addr); __tlbi_user(vale1is, addr); @@ -182,14 +215,18 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __flush_tlb_range(vma, start, end, false); + /* + * We cannot use leaf-only invalidation here, since we may be invalidating + * table entries as part of collapsing hugepages or moving page tables. + */ + __flush_tlb_range(vma, start, end, PAGE_SIZE, false); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { flush_tlb_all(); return; } @@ -199,7 +236,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - __tlbi(vaae1is, addr); + __tlbi(vaale1is, addr); dsb(ish); isb(); } @@ -208,20 +245,11 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end * Used to invalidate the TLB (walk caches) corresponding to intermediate page * table levels (pgd/pud/pmd). */ -static inline void __flush_tlb_pgtable(struct mm_struct *mm, - unsigned long uaddr) -{ - unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm)); - - __tlbi(vae1is, addr); - __tlbi_user(vae1is, addr); - dsb(ish); -} - static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) { unsigned long addr = __TLBI_VADDR(kaddr, 0); + dsb(ishst); __tlbi(vaae1is, addr); dsb(ish); } diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index e66b0fca99c2..07c34087bd5e 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -32,7 +32,6 @@ #include <asm/cpufeature.h> #include <asm/ptrace.h> #include <asm/memory.h> -#include <asm/compiler.h> #include <asm/extable.h> #define get_ds() (KERNEL_DS) diff --git a/arch/arm64/include/asm/xen/events.h b/arch/arm64/include/asm/xen/events.h index 4e22b7a8c038..2788e95d0ff0 100644 --- a/arch/arm64/include/asm/xen/events.h +++ b/arch/arm64/include/asm/xen/events.h @@ -14,7 +14,7 @@ enum ipi_vector { static inline int xen_irqs_disabled(struct pt_regs *regs) { - return raw_irqs_disabled_flags((unsigned long) regs->pstate); + return !interrupts_enabled(regs); } #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 17c65c8f33cb..2bcd6e4f3474 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -48,5 +48,6 @@ #define HWCAP_USCAT (1 << 25) #define HWCAP_ILRCPC (1 << 26) #define HWCAP_FLAGM (1 << 27) +#define HWCAP_SSBS (1 << 28) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 98c4ce55d9c3..a36227fdb084 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -46,6 +46,7 @@ #define PSR_I_BIT 0x00000080 #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 +#define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 #define PSR_V_BIT 0x10000000 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index dec10898d688..a509e35132d2 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -68,21 +68,43 @@ static bool has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, int scope) { - u64 mask = CTR_CACHE_MINLINE_MASK; - - /* Skip matching the min line sizes for cache type check */ - if (entry->capability == ARM64_MISMATCHED_CACHE_TYPE) - mask ^= arm64_ftr_reg_ctrel0.strict_mask; + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + u64 sys = arm64_ftr_reg_ctrel0.sys_val & mask; + u64 ctr_raw, ctr_real; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return (read_cpuid_cachetype() & mask) != - (arm64_ftr_reg_ctrel0.sys_val & mask); + + /* + * We want to make sure that all the CPUs in the system expose + * a consistent CTR_EL0 to make sure that applications behaves + * correctly with migration. + * + * If a CPU has CTR_EL0.IDC but does not advertise it via CTR_EL0 : + * + * 1) It is safe if the system doesn't support IDC, as CPU anyway + * reports IDC = 0, consistent with the rest. + * + * 2) If the system has IDC, it is still safe as we trap CTR_EL0 + * access on this CPU via the ARM64_HAS_CACHE_IDC capability. + * + * So, we need to make sure either the raw CTR_EL0 or the effective + * CTR_EL0 matches the system's copy to allow a secondary CPU to boot. + */ + ctr_raw = read_cpuid_cachetype() & mask; + ctr_real = read_cpuid_effective_cachetype() & mask; + + return (ctr_real != sys) && (ctr_raw != sys); } static void cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) { - sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + + /* Trap CTR_EL0 access on this CPU, only if it has a mismatch */ + if ((read_cpuid_cachetype() & mask) != + (arm64_ftr_reg_ctrel0.sys_val & mask)) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); @@ -116,6 +138,15 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, static DEFINE_SPINLOCK(bp_lock); int cpu, slot = -1; + /* + * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs + * start/end if we're a guest. Skip the hyp-vectors work. + */ + if (!hyp_vecs_start) { + __this_cpu_write(bp_hardening_data.fn, fn); + return; + } + spin_lock(&bp_lock); for_each_possible_cpu(cpu) { if (per_cpu(bp_hardening_data.fn, cpu) == fn) { @@ -312,6 +343,14 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, void arm64_set_ssbd_mitigation(bool state) { + if (this_cpu_has_cap(ARM64_SSBS)) { + if (state) + asm volatile(SET_PSTATE_SSBS(0)); + else + asm volatile(SET_PSTATE_SSBS(1)); + return; + } + switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); @@ -336,6 +375,11 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + if (this_cpu_has_cap(ARM64_SSBS)) { + required = false; + goto out_printmsg; + } + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { ssbd_state = ARM64_SSBD_UNKNOWN; return false; @@ -384,7 +428,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, switch (ssbd_state) { case ARM64_SSBD_FORCE_DISABLE: - pr_info_once("%s disabled from command-line\n", entry->desc); arm64_set_ssbd_mitigation(false); required = false; break; @@ -397,7 +440,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; case ARM64_SSBD_FORCE_ENABLE: - pr_info_once("%s forced from command-line\n", entry->desc); arm64_set_ssbd_mitigation(true); required = true; break; @@ -407,10 +449,27 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; } +out_printmsg: + switch (ssbd_state) { + case ARM64_SSBD_FORCE_DISABLE: + pr_info_once("%s disabled from command-line\n", entry->desc); + break; + + case ARM64_SSBD_FORCE_ENABLE: + pr_info_once("%s forced from command-line\n", entry->desc); + break; + } + return required; } #endif /* CONFIG_ARM64_SSBD */ +static void __maybe_unused +cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); +} + #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) @@ -616,14 +675,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { - .desc = "Mismatched cache line size", - .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, - .matches = has_mismatched_cache_type, - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .cpu_enable = cpu_enable_trap_ctr_access, - }, - { - .desc = "Mismatched cache type", + .desc = "Mismatched cache type (CTR_EL0)", .capability = ARM64_MISMATCHED_CACHE_TYPE, .matches = has_mismatched_cache_type, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, @@ -680,6 +732,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = has_ssbd_mitigation, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 + { + /* Cortex-A76 r0p0 to r2p0 */ + .desc = "ARM erratum 1188873", + .capability = ARM64_WORKAROUND_1188873, + ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e238b7932096..af50064dea51 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -20,6 +20,7 @@ #include <linux/bsearch.h> #include <linux/cpumask.h> +#include <linux/crash_dump.h> #include <linux/sort.h> #include <linux/stop_machine.h> #include <linux/types.h> @@ -117,6 +118,7 @@ EXPORT_SYMBOL(cpu_hwcap_keys); static bool __maybe_unused cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); +static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); /* * NOTE: Any changes to the visibility of features should be kept in @@ -164,6 +166,11 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), @@ -371,7 +378,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), - ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1), ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz), /* Op1 = 0, CRn = 0, CRm = 5 */ @@ -657,7 +664,6 @@ void update_cpu_features(int cpu, /* * EL3 is not our concern. - * ID_AA64PFR1 is currently RES0. */ taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); @@ -848,15 +854,55 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus } static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { - return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_IDC_SHIFT); + u64 ctr; + + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_effective_cachetype(); + + return ctr & BIT(CTR_IDC_SHIFT); +} + +static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused) +{ + /* + * If the CPU exposes raw CTR_EL0.IDC = 0, while effectively + * CTR_EL0.IDC = 1 (from CLIDR values), we need to trap accesses + * to the CTR_EL0 on this CPU and emulate it with the real/safe + * value. + */ + if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT))) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { - return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_DIC_SHIFT); + u64 ctr; + + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_cachetype(); + + return ctr & BIT(CTR_DIC_SHIFT); +} + +static bool __maybe_unused +has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) +{ + /* + * Kdump isn't guaranteed to power-off all secondary CPUs, CNP + * may share TLB entries with a CPU stuck in the crashed + * kernel. + */ + if (is_kdump_kernel()) + return false; + + return has_cpuid_feature(entry, scope); } #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 @@ -1035,6 +1081,70 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) WARN_ON(val & (7 << 27 | 7 << 21)); } +#ifdef CONFIG_ARM64_SSBD +static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) +{ + if (user_mode(regs)) + return 1; + + if (instr & BIT(PSTATE_Imm_shift)) + regs->pstate |= PSR_SSBS_BIT; + else + regs->pstate &= ~PSR_SSBS_BIT; + + arm64_skip_faulting_instruction(regs, 4); + return 0; +} + +static struct undef_hook ssbs_emulation_hook = { + .instr_mask = ~(1U << PSTATE_Imm_shift), + .instr_val = 0xd500401f | PSTATE_SSBS, + .fn = ssbs_emulation_handler, +}; + +static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) +{ + static bool undef_hook_registered = false; + static DEFINE_SPINLOCK(hook_lock); + + spin_lock(&hook_lock); + if (!undef_hook_registered) { + register_undef_hook(&ssbs_emulation_hook); + undef_hook_registered = true; + } + spin_unlock(&hook_lock); + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); + arm64_set_ssbd_mitigation(false); + } else { + arm64_set_ssbd_mitigation(true); + } +} +#endif /* CONFIG_ARM64_SSBD */ + +#ifdef CONFIG_ARM64_PAN +static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) +{ + /* + * We modify PSTATE. This won't work from irq context as the PSTATE + * is discarded once we return from the exception. + */ + WARN_ON_ONCE(in_interrupt()); + + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); + asm(SET_PSTATE_PAN(1)); +} +#endif /* CONFIG_ARM64_PAN */ + +#ifdef CONFIG_ARM64_RAS_EXTN +static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) +{ + /* Firmware may have left a deferred SError in this register. */ + write_sysreg_s(0, SYS_DISR_EL1); +} +#endif /* CONFIG_ARM64_RAS_EXTN */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -1184,6 +1294,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_CACHE_IDC, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_idc, + .cpu_enable = cpu_emulate_effective_ctr, }, { .desc = "Instruction cache invalidation not required for I/D coherence", @@ -1222,6 +1333,41 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_hw_dbm, }, #endif +#ifdef CONFIG_ARM64_SSBD + { + .desc = "CRC32 instructions", + .capability = ARM64_HAS_CRC32, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR0_EL1, + .field_pos = ID_AA64ISAR0_CRC32_SHIFT, + .min_field_value = 1, + }, + { + .desc = "Speculative Store Bypassing Safe (SSBS)", + .capability = ARM64_SSBS, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_SSBS_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, + .cpu_enable = cpu_enable_ssbs, + }, +#endif +#ifdef CONFIG_ARM64_CNP + { + .desc = "Common not Private translations", + .capability = ARM64_HAS_CNP, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_useable_cnp, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_CNP_SHIFT, + .min_field_value = 1, + .cpu_enable = cpu_enable_cnp, + }, +#endif {}, }; @@ -1267,6 +1413,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE), #endif + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS), {}, }; @@ -1658,6 +1805,11 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO)); } +static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) +{ + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); +} + /* * We emulate only the following system register space. * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7] @@ -1719,27 +1871,32 @@ static int emulate_sys_reg(u32 id, u64 *valp) return 0; } -static int emulate_mrs(struct pt_regs *regs, u32 insn) +int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt) { int rc; - u32 sys_reg, dst; u64 val; - /* - * sys_reg values are defined as used in mrs/msr instruction. - * shift the imm value to get the encoding. - */ - sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; rc = emulate_sys_reg(sys_reg, &val); if (!rc) { - dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); - pt_regs_write_reg(regs, dst, val); + pt_regs_write_reg(regs, rt, val); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } - return rc; } +static int emulate_mrs(struct pt_regs *regs, u32 insn) +{ + u32 sys_reg, rt; + + /* + * sys_reg values are defined as used in mrs/msr instruction. + * shift the imm value to get the encoding. + */ + sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; + rt = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + return do_emulate_mrs(regs, sys_reg, rt); +} + static struct undef_hook mrs_hook = { .instr_mask = 0xfff00000, .instr_val = 0xd5300000, @@ -1755,9 +1912,3 @@ static int __init enable_mrs_emulation(void) } core_initcall(enable_mrs_emulation); - -void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) -{ - /* Firmware may have left a deferred SError in this register. */ - write_sysreg_s(0, SYS_DISR_EL1); -} diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index e9ab7b3ed317..bcc2831399cb 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -81,6 +81,7 @@ static const char *const hwcap_str[] = { "uscat", "ilrcpc", "flagm", + "ssbs", NULL }; @@ -324,7 +325,15 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); - info->reg_ctr = read_cpuid_cachetype(); + /* + * Use the effective value of the CTR_EL0 than the raw value + * exposed by the CPU. CTR_E0.IDC field value must be interpreted + * with the CLIDR_EL1 fields to avoid triggering false warnings + * when there is a mismatch across the CPUs. Keep track of the + * effective value of the CTR_EL0 in our internal records for + * acurate sanity check and feature enablement. + */ + info->reg_ctr = read_cpuid_effective_cachetype(); info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); info->reg_revidr = read_cpuid(REVIDR_EL1); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 09dbea221a27..039144ecbcb2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -589,7 +589,7 @@ el1_undef: inherit_daif pstate=x23, tmp=x2 mov x0, sp bl do_undefinstr - ASM_BUG() + kernel_exit 1 el1_dbg: /* * Debug exception handling @@ -665,6 +665,7 @@ el0_sync: cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc cmp x24, #ESR_ELx_EC_SYS64 // configurable trap + ccmp x24, #ESR_ELx_EC_WFx, #4, ne b.eq el0_sys cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc @@ -697,9 +698,9 @@ el0_sync_compat: cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap b.eq el0_undef cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap @@ -722,6 +723,17 @@ el0_irq_compat: el0_error_compat: kernel_entry 0, 32 b el0_error_naked + +el0_cp15: + /* + * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions + */ + enable_daif + ct_user_exit + mov x0, x25 + mov x1, sp + bl do_cp15instr + b ret_to_user #endif el0_da: diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b0853069702f..4471f570a295 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -287,19 +287,21 @@ __create_page_tables: mov x28, lr /* - * Invalidate the idmap and swapper page tables to avoid potential - * dirty cache lines being evicted. + * Invalidate the init page tables to avoid potential dirty cache lines + * being evicted. Other page tables are allocated in rodata as part of + * the kernel image, and thus are clean to the PoC per the boot + * protocol. */ - adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x0, init_pg_dir + adrp x1, init_pg_end sub x1, x1, x0 bl __inval_dcache_area /* - * Clear the idmap and swapper page tables. + * Clear the init page tables. */ - adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x0, init_pg_dir + adrp x1, init_pg_end sub x1, x1, x0 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -373,7 +375,7 @@ __create_page_tables: /* * Map the kernel image (starting with PHYS_OFFSET). */ - adrp x0, swapper_pg_dir + adrp x0, init_pg_dir mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement mov x4, PTRS_PER_PGD @@ -390,7 +392,7 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x1, init_pg_end sub x1, x1, x0 dmb sy bl __inval_dcache_area @@ -706,6 +708,7 @@ secondary_startup: * Common entry point for secondary CPUs. */ bl __cpu_setup // initialise processor + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =__secondary_switched br x8 @@ -748,6 +751,7 @@ ENDPROC(__secondary_switched) * Enable the MMU. * * x0 = SCTLR_EL1 value for turning on the MMU. + * x1 = TTBR1_EL1 value * * Returns to the caller via x30/lr. This requires the caller to be covered * by the .idmap.text section. @@ -756,17 +760,16 @@ ENDPROC(__secondary_switched) * If it isn't, park the CPU */ ENTRY(__enable_mmu) - mrs x1, ID_AA64MMFR0_EL1 - ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 + mrs x2, ID_AA64MMFR0_EL1 + ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support - update_early_cpu_boot_status 0, x1, x2 - adrp x1, idmap_pg_dir - adrp x2, swapper_pg_dir - phys_to_ttbr x3, x1 - phys_to_ttbr x4, x2 - msr ttbr0_el1, x3 // load TTBR0 - msr ttbr1_el1, x4 // load TTBR1 + update_early_cpu_boot_status 0, x2, x3 + adrp x2, idmap_pg_dir + phys_to_ttbr x1, x1 + phys_to_ttbr x2, x2 + msr ttbr0_el1, x2 // load TTBR0 + msr ttbr1_el1, x1 // load TTBR1 isb msr sctlr_el1, x0 isb @@ -823,6 +826,7 @@ __primary_switch: mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value #endif + adrp x1, init_pg_dir bl __enable_mmu #ifdef CONFIG_RELOCATABLE bl __relocate_kernel diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 8e38d5267f22..e213f8e867f6 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -966,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } +static int armv8pmu_filter_match(struct perf_event *event) +{ + unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT; + return evtype != ARMV8_PMUV3_PERFCTR_CHAIN; +} + static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; @@ -1114,6 +1120,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + cpu_pmu->filter_match = armv8pmu_filter_match; return 0; } diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index e78c3ef04d95..9b65132e789a 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -107,7 +107,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if (!p->ainsn.api.insn) return -ENOMEM; break; - }; + } /* prepare the instruction */ if (p->ainsn.api.insn) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7f1628effe6d..ce99c58cd1f1 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -358,6 +358,10 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (IS_ENABLED(CONFIG_ARM64_UAO) && cpus_have_const_cap(ARM64_HAS_UAO)) childregs->pstate |= PSR_UAO_BIT; + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) + childregs->pstate |= PSR_SSBS_BIT; + p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index e8edbf13302a..8cdaf25e99cd 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -24,7 +24,6 @@ #include <uapi/linux/psci.h> -#include <asm/compiler.h> #include <asm/cpu_ops.h> #include <asm/errno.h> #include <asm/smp_plat.h> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 5b4fac434c84..d0f62dd24c90 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -64,6 +64,9 @@ #include <asm/xen/hypervisor.h> #include <asm/mmu_context.h> +static int num_standard_resources; +static struct resource *standard_resources; + phys_addr_t __fdt_pointer __initdata; /* @@ -206,14 +209,19 @@ static void __init request_standard_resources(void) { struct memblock_region *region; struct resource *res; + unsigned long i = 0; kernel_code.start = __pa_symbol(_text); kernel_code.end = __pa_symbol(__init_begin - 1); kernel_data.start = __pa_symbol(_sdata); kernel_data.end = __pa_symbol(_end - 1); + num_standard_resources = memblock.memory.cnt; + standard_resources = alloc_bootmem_low(num_standard_resources * + sizeof(*standard_resources)); + for_each_memblock(memory, region) { - res = alloc_bootmem_low(sizeof(*res)); + res = &standard_resources[i++]; if (memblock_is_nomap(region)) { res->name = "reserved"; res->flags = IORESOURCE_MEM; @@ -243,36 +251,26 @@ static void __init request_standard_resources(void) static int __init reserve_memblock_reserved_regions(void) { - phys_addr_t start, end, roundup_end = 0; - struct resource *mem, *res; - u64 i; - - for_each_reserved_mem_region(i, &start, &end) { - if (end <= roundup_end) - continue; /* done already */ - - start = __pfn_to_phys(PFN_DOWN(start)); - end = __pfn_to_phys(PFN_UP(end)) - 1; - roundup_end = end; - - res = kzalloc(sizeof(*res), GFP_ATOMIC); - if (WARN_ON(!res)) - return -ENOMEM; - res->start = start; - res->end = end; - res->name = "reserved"; - res->flags = IORESOURCE_MEM; - - mem = request_resource_conflict(&iomem_resource, res); - /* - * We expected memblock_reserve() regions to conflict with - * memory created by request_standard_resources(). - */ - if (WARN_ON_ONCE(!mem)) + u64 i, j; + + for (i = 0; i < num_standard_resources; ++i) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) continue; - kfree(res); - reserve_region_with_split(mem, start, end, "reserved"); + for_each_reserved_mem_region(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "reserved"); + } } return 0; @@ -351,12 +349,8 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) conswitchp = &dummy_con; #endif -#endif if (boot_args[1] || boot_args[2] || boot_args[3]) { pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" "\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n" diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index bebec8ef9372..3e53ffa07994 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -101,6 +101,7 @@ ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =_cpu_resume br x8 diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c index 3432e5ef9f41..885f13e58708 100644 --- a/arch/arm64/kernel/ssbd.c +++ b/arch/arm64/kernel/ssbd.c @@ -3,17 +3,33 @@ * Copyright (C) 2018 ARM Ltd, All Rights Reserved. */ +#include <linux/compat.h> #include <linux/errno.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <linux/thread_info.h> #include <asm/cpufeature.h> +static void ssbd_ssbs_enable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate |= val; +} + +static void ssbd_ssbs_disable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate &= ~val; +} + /* * prctl interface for SSBD - * FIXME: Drop the below ifdefery once merged in 4.18. */ -#ifdef PR_SPEC_STORE_BYPASS static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) { int state = arm64_get_ssbd_state(); @@ -46,12 +62,14 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) return -EPERM; task_clear_spec_ssb_disable(task); clear_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_enable(task); break; case PR_SPEC_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) return -EPERM; task_set_spec_ssb_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; case PR_SPEC_FORCE_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) @@ -59,6 +77,7 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; default: return -ERANGE; @@ -107,4 +126,3 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) return -ENODEV; } } -#endif /* PR_SPEC_STORE_BYPASS */ diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 70c283368b64..9405d1b7f4b0 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -48,6 +48,10 @@ void notrace __cpu_suspend_exit(void) */ cpu_uninstall_idmap(); + /* Restore CnP bit in TTBR1_EL1 */ + if (system_supports_cnp()) + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + /* * PSTATE was not saved over suspend/resume, re-enable any detected * features that might not have been set correctly. diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 039e9ff379cc..4066da7f1e5e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -310,10 +310,12 @@ static int call_undef_hook(struct pt_regs *regs) int (*fn)(struct pt_regs *regs, u32 instr) = NULL; void __user *pc = (void __user *)instruction_pointer(regs); - if (!user_mode(regs)) - return 1; - - if (compat_thumb_mode(regs)) { + if (!user_mode(regs)) { + __le32 instr_le; + if (probe_kernel_address((__force __le32 *)pc, instr_le)) + goto exit; + instr = le32_to_cpu(instr_le); + } else if (compat_thumb_mode(regs)) { /* 16-bit Thumb instruction */ __le16 instr_le; if (get_user(instr_le, (__le16 __user *)pc)) @@ -352,6 +354,9 @@ void force_signal_inject(int signal, int code, unsigned long address) const char *desc; struct pt_regs *regs = current_pt_regs(); + if (WARN_ON(!user_mode(regs))) + return; + clear_siginfo(&info); switch (signal) { @@ -406,14 +411,10 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; + BUG_ON(!user_mode(regs)); force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); } -void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) -{ - sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); -} - #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ res = -EFAULT; \ @@ -437,7 +438,7 @@ void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { unsigned long address; - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; int ret = 0; @@ -472,7 +473,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); pt_regs_write_reg(regs, rt, val); @@ -482,7 +483,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_counter_get_cntvct()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); @@ -490,12 +491,28 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_timer_get_rate()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } +static void mrs_handler(unsigned int esr, struct pt_regs *regs) +{ + u32 sysreg, rt; + + rt = ESR_ELx_SYS64_ISS_RT(esr); + sysreg = esr_sys64_to_sysreg(esr); + + if (do_emulate_mrs(regs, sysreg, rt) != 0) + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); +} + +static void wfi_handler(unsigned int esr, struct pt_regs *regs) +{ + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -526,9 +543,176 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, .handler = cntfrq_read_handler, }, + { + /* Trap read access to CPUID registers */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL, + .handler = mrs_handler, + }, + { + /* Trap WFI instructions executed in userspace */ + .esr_mask = ESR_ELx_WFx_MASK, + .esr_val = ESR_ELx_WFx_WFI_VAL, + .handler = wfi_handler, + }, {}, }; + +#ifdef CONFIG_COMPAT +#define PSTATE_IT_1_0_SHIFT 25 +#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT) +#define PSTATE_IT_7_2_SHIFT 10 +#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT) + +static u32 compat_get_it_state(struct pt_regs *regs) +{ + u32 it, pstate = regs->pstate; + + it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT; + it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2; + + return it; +} + +static void compat_set_it_state(struct pt_regs *regs, u32 it) +{ + u32 pstate_it; + + pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK; + pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK; + + regs->pstate &= ~PSR_AA32_IT_MASK; + regs->pstate |= pstate_it; +} + +static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) +{ + int cond; + + /* Only a T32 instruction can trap without CV being set */ + if (!(esr & ESR_ELx_CV)) { + u32 it; + + it = compat_get_it_state(regs); + if (!it) + return true; + + cond = it >> 4; + } else { + cond = (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; + } + + return aarch32_opcode_cond_checks[cond](regs->pstate); +} + +static void advance_itstate(struct pt_regs *regs) +{ + u32 it; + + /* ARM mode */ + if (!(regs->pstate & PSR_AA32_T_BIT) || + !(regs->pstate & PSR_AA32_IT_MASK)) + return; + + it = compat_get_it_state(regs); + + /* + * If this is the last instruction of the block, wipe the IT + * state. Otherwise advance it. + */ + if (!(it & 7)) + it = 0; + else + it = (it & 0xe0) | ((it << 1) & 0x1f); + + compat_set_it_state(regs, it); +} + +static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs, + unsigned int sz) +{ + advance_itstate(regs); + arm64_skip_faulting_instruction(regs, sz); +} + +static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT; + + pt_regs_write_reg(regs, reg, arch_timer_get_rate()); + arm64_compat_skip_faulting_instruction(regs, 4); +} + +static struct sys64_hook cp15_32_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_32_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_32_ISS_SYS_CNTFRQ, + .handler = compat_cntfrq_read_handler, + }, + {}, +}; + +static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT; + int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT; + u64 val = arch_counter_get_cntvct(); + + pt_regs_write_reg(regs, rt, lower_32_bits(val)); + pt_regs_write_reg(regs, rt2, upper_32_bits(val)); + arm64_compat_skip_faulting_instruction(regs, 4); +} + +static struct sys64_hook cp15_64_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT, + .handler = compat_cntvct_read_handler, + }, + {}, +}; + +asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) +{ + struct sys64_hook *hook, *hook_base; + + if (!cp15_cond_valid(esr, regs)) { + /* + * There is no T16 variant of a CP access, so we + * always advance PC by 4 bytes. + */ + arm64_compat_skip_faulting_instruction(regs, 4); + return; + } + + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_CP15_32: + hook_base = cp15_32_hooks; + break; + case ESR_ELx_EC_CP15_64: + hook_base = cp15_64_hooks; + break; + default: + do_undefinstr(regs); + return; + } + + for (hook = hook_base; hook->handler; hook++) + if ((hook->esr_mask & esr) == hook->esr_val) { + hook->handler(esr, regs); + return; + } + + /* + * New cp15 instructions may previously have been undefined at + * EL0. Fall back to our usual undefined instruction handler + * so that we handle these consistently. + */ + do_undefinstr(regs); +} +#endif + asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) { struct sys64_hook *hook; @@ -605,7 +789,6 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); - die("Oops - bad mode", regs, 0); local_daif_mask(); panic("bad mode"); } diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 605d1b60469c..ab29c06a7d4b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -138,6 +138,23 @@ SECTIONS EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */ NOTES + . = ALIGN(PAGE_SIZE); + idmap_pg_dir = .; + . += IDMAP_DIR_SIZE; + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + tramp_pg_dir = .; + . += PAGE_SIZE; +#endif + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + reserved_ttbr0 = .; + . += RESERVED_TTBR0_SIZE; +#endif + swapper_pg_dir = .; + . += PAGE_SIZE; + swapper_pg_end = .; + . = ALIGN(SEGMENT_ALIGN); __init_begin = .; __inittext_begin = .; @@ -216,21 +233,9 @@ SECTIONS BSS_SECTION(0, 0, 0) . = ALIGN(PAGE_SIZE); - idmap_pg_dir = .; - . += IDMAP_DIR_SIZE; - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_pg_dir = .; - . += PAGE_SIZE; -#endif - -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - reserved_ttbr0 = .; - . += RESERVED_TTBR0_SIZE; -#endif - swapper_pg_dir = .; - . += SWAPPER_DIR_SIZE; - swapper_pg_end = .; + init_pg_dir = .; + . += INIT_DIR_SIZE; + init_pg_end = .; __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07256b08226c..a6c9fbaeaefc 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -57,6 +57,45 @@ static u64 core_reg_offset_from_id(u64 id) return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); } +static int validate_core_offset(const struct kvm_one_reg *reg) +{ + u64 off = core_reg_offset_from_id(reg->id); + int size; + + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + case KVM_REG_ARM_CORE_REG(regs.sp): + case KVM_REG_ARM_CORE_REG(regs.pc): + case KVM_REG_ARM_CORE_REG(regs.pstate): + case KVM_REG_ARM_CORE_REG(sp_el1): + case KVM_REG_ARM_CORE_REG(elr_el1): + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + size = sizeof(__u64); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + size = sizeof(__uint128_t); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + size = sizeof(__u32); + break; + + default: + return -EINVAL; + } + + if (KVM_REG_SIZE(reg->id) == size && + IS_ALIGNED(off, size / sizeof(__u32))) + return 0; + + return -EINVAL; +} + static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { /* @@ -76,6 +115,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) return -EFAULT; @@ -98,6 +140,9 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) return -EINVAL; @@ -107,17 +152,25 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { - u32 mode = (*(u32 *)valp) & PSR_AA32_MODE_MASK; + u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { case PSR_AA32_MODE_USR: + if (!system_supports_32bit_el0()) + return -EINVAL; + break; case PSR_AA32_MODE_FIQ: case PSR_AA32_MODE_IRQ: case PSR_AA32_MODE_SVC: case PSR_AA32_MODE_ABT: case PSR_AA32_MODE_UND: + if (!vcpu_el1_is_32bit(vcpu)) + return -EINVAL; + break; case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: + if (vcpu_el1_is_32bit(vcpu)) + return -EINVAL; break; default: err = -EINVAL; diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index ea9225160786..4576b86a5579 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -65,6 +65,9 @@ __do_hyp_init: b.lo __kvm_handle_stub_hvc phys_to_ttbr x4, x0 +alternative_if ARM64_HAS_CNP + orr x4, x4, #TTBR_CNP_BIT +alternative_else_nop_endif msr ttbr0_el2, x4 mrs x4, tcr_el1 diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 9ce223944983..76d016b446b2 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -288,3 +288,14 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) vcpu->arch.sysregs_loaded_on_cpu = false; } + +void __hyp_text __kvm_enable_ssbs(void) +{ + u64 tmp; + + asm volatile( + "mrs %0, sctlr_el2\n" + "orr %0, %0, %1\n" + "msr sctlr_el2, %0" + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); +} diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 68755fd70dcf..69ff9887f724 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -12,7 +12,7 @@ lib-y := clear_user.o delay.o copy_from_user.o \ # when supported by the CPU. Result and argument registers are handled # correctly, based on the function prototype. lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o -CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ +CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ @@ -25,3 +25,5 @@ KCOV_INSTRUMENT_atomic_ll_sc.o := n UBSAN_SANITIZE_atomic_ll_sc.o := n lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o + +obj-$(CONFIG_CRC32) += crc32.o diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S new file mode 100644 index 000000000000..5bc1e85b4e1c --- /dev/null +++ b/arch/arm64/lib/crc32.S @@ -0,0 +1,60 @@ +/* + * Accelerated CRC32(C) using AArch64 CRC instructions + * + * Copyright (C) 2016 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/alternative.h> +#include <asm/assembler.h> + + .cpu generic+crc + + .macro __crc32, c +0: subs x2, x2, #16 + b.mi 8f + ldp x3, x4, [x1], #16 +CPU_BE( rev x3, x3 ) +CPU_BE( rev x4, x4 ) + crc32\c\()x w0, w0, x3 + crc32\c\()x w0, w0, x4 + b.ne 0b + ret + +8: tbz x2, #3, 4f + ldr x3, [x1], #8 +CPU_BE( rev x3, x3 ) + crc32\c\()x w0, w0, x3 +4: tbz x2, #2, 2f + ldr w3, [x1], #4 +CPU_BE( rev w3, w3 ) + crc32\c\()w w0, w0, w3 +2: tbz x2, #1, 1f + ldrh w3, [x1], #2 +CPU_BE( rev16 w3, w3 ) + crc32\c\()h w0, w0, w3 +1: tbz x2, #0, 0f + ldrb w3, [x1] + crc32\c\()b w0, w0, w3 +0: ret + .endm + + .align 5 +ENTRY(crc32_le) +alternative_if_not ARM64_HAS_CRC32 + b crc32_le_base +alternative_else_nop_endif + __crc32 +ENDPROC(crc32_le) + + .align 5 +ENTRY(__crc32c_le) +alternative_if_not ARM64_HAS_CRC32 + b __crc32c_le_base +alternative_else_nop_endif + __crc32 c +ENDPROC(__crc32c_le) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index c127f94da8e2..1f0ea2facf24 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -88,7 +88,7 @@ void verify_cpu_asid_bits(void) } } -static void flush_context(unsigned int cpu) +static void flush_context(void) { int i; u64 asid; @@ -142,7 +142,7 @@ static bool check_update_reserved_asid(u64 asid, u64 newasid) return hit; } -static u64 new_context(struct mm_struct *mm, unsigned int cpu) +static u64 new_context(struct mm_struct *mm) { static u32 cur_idx = 1; u64 asid = atomic64_read(&mm->context.id); @@ -180,7 +180,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) /* We're out of ASIDs, so increment the global generation count */ generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION, &asid_generation); - flush_context(cpu); + flush_context(); /* We have more ASIDs than CPUs, so this will always succeed */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); @@ -196,6 +196,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) unsigned long flags; u64 asid, old_active_asid; + if (system_supports_cnp()) + cpu_set_reserved_ttbr0(); + asid = atomic64_read(&mm->context.id); /* @@ -223,7 +226,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) /* Check that our ASID belongs to the current generation. */ asid = atomic64_read(&mm->context.id); if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) { - asid = new_context(mm, cpu); + asid = new_context(mm); atomic64_set(&mm->context.id, asid); } diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 65dfc8571bf8..fcb1f2a6d7c6 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -36,8 +36,8 @@ static const struct addr_marker address_markers[] = { #endif { MODULES_VADDR, "Modules start" }, { MODULES_END, "Modules end" }, - { VMALLOC_START, "vmalloc() Area" }, - { VMALLOC_END, "vmalloc() End" }, + { VMALLOC_START, "vmalloc() area" }, + { VMALLOC_END, "vmalloc() end" }, { FIXADDR_START, "Fixmap start" }, { FIXADDR_TOP, "Fixmap end" }, { PCI_IO_START, "PCI I/O start" }, @@ -46,7 +46,7 @@ static const struct addr_marker address_markers[] = { { VMEMMAP_START, "vmemmap start" }, { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, #endif - { PAGE_OFFSET, "Linear Mapping" }, + { PAGE_OFFSET, "Linear mapping" }, { -1, NULL }, }; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 50b30ff30de4..d0e638ef3af6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -37,6 +37,7 @@ #include <asm/cmpxchg.h> #include <asm/cpufeature.h> #include <asm/exception.h> +#include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/sysreg.h> @@ -56,10 +57,16 @@ struct fault_info { }; static const struct fault_info fault_info[]; +static struct fault_info debug_fault_info[]; static inline const struct fault_info *esr_to_fault_info(unsigned int esr) { - return fault_info + (esr & 63); + return fault_info + (esr & ESR_ELx_FSC); +} + +static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) +{ + return debug_fault_info + DBG_ESR_EVT(esr); } #ifdef CONFIG_KPROBES @@ -235,9 +242,8 @@ static bool is_el1_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; } -static inline bool is_el1_permission_fault(unsigned int esr, - struct pt_regs *regs, - unsigned long addr) +static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; @@ -283,7 +289,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; - if (is_el1_permission_fault(esr, regs, addr)) { + if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; else @@ -454,7 +460,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (addr < TASK_SIZE && is_el1_permission_fault(esr, regs, addr)) { + if (addr < TASK_SIZE && is_el1_permission_fault(addr, esr, regs)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die_kernel_fault("access to user memory with fs=KERNEL_DS", @@ -771,7 +777,7 @@ asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, if (addr > TASK_SIZE) arm64_apply_bp_hardening(); - local_irq_enable(); + local_daif_restore(DAIF_PROCCTX); do_mem_abort(addr, esr, regs); } @@ -785,7 +791,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, if (user_mode(regs)) { if (instruction_pointer(regs) > TASK_SIZE) arm64_apply_bp_hardening(); - local_irq_enable(); + local_daif_restore(DAIF_PROCCTX); } clear_siginfo(&info); @@ -831,7 +837,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); + const struct fault_info *inf = esr_to_debug_fault_info(esr); int rv; /* @@ -864,17 +870,3 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, return rv; } NOKPROBE_SYMBOL(do_debug_exception); - -#ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) -{ - /* - * We modify PSTATE. This won't work from irq context as the PSTATE - * is discarded once we return from the exception. - */ - WARN_ON_ONCE(in_interrupt()); - - sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); - asm(SET_PSTATE_PAN(1)); -} -#endif /* CONFIG_ARM64_PAN */ diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 192b3ba07075..f58ea503ad01 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -117,11 +117,14 @@ static pte_t get_clear_flush(struct mm_struct *mm, /* * If HW_AFDBM is enabled, then the HW could turn on - * the dirty bit for any page in the set, so check - * them all. All hugetlb entries are already young. + * the dirty or accessed bit for any page in the set, + * so check them all. */ if (pte_dirty(pte)) orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); } if (valid) { @@ -320,11 +323,40 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, return get_clear_flush(mm, addr, ptep, pgsize, ncontig); } +/* + * huge_ptep_set_access_flags will update access flags (dirty, accesssed) + * and write permission. + * + * For a contiguous huge pte range we need to check whether or not write + * permission has to change only on the first pte in the set. Then for + * all the contiguous ptes we need to check whether or not there is a + * discrepancy between dirty or young. + */ +static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) +{ + int i; + + if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) + return 1; + + for (i = 0; i < ncontig; i++) { + pte_t orig_pte = huge_ptep_get(ptep + i); + + if (pte_dirty(pte) != pte_dirty(orig_pte)) + return 1; + + if (pte_young(pte) != pte_young(orig_pte)) + return 1; + } + + return 0; +} + int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { - int ncontig, i, changed = 0; + int ncontig, i; size_t pgsize = 0; unsigned long pfn = pte_pfn(pte), dpfn; pgprot_t hugeprot; @@ -336,19 +368,23 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; + if (!__cont_access_flags_changed(ptep, pte, ncontig)) + return 0; + orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); - if (!pte_same(orig_pte, pte)) - changed = 1; - /* Make sure we don't lose the dirty state */ + /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) pte = pte_mkdirty(pte); + if (pte_young(orig_pte)) + pte = pte_mkyoung(pte); + hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); - return changed; + return 1; } void huge_ptep_set_wrprotect(struct mm_struct *mm, diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 787e27964ab9..3cf87341859f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -284,7 +284,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) #endif /* CONFIG_NUMA */ -#ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { phys_addr_t addr = pfn << PAGE_SHIFT; @@ -294,7 +293,6 @@ int pfn_valid(unsigned long pfn) return memblock_is_map_memory(addr); } EXPORT_SYMBOL(pfn_valid); -#endif #ifndef CONFIG_SPARSEMEM static void __init arm64_memory_present(void) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 12145874c02b..fccb1a6f8c6f 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -192,7 +192,7 @@ void __init kasan_init(void) /* * We are going to perform proper setup of shadow memory. - * At first we should unmap early shadow (clear_pgds() call bellow). + * At first we should unmap early shadow (clear_pgds() call below). * However, instrumented code couldn't execute without shadow memory. * tmp_pg_dir used to keep early shadow mapped until full shadow * setup will be finished. diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8080c9f489c3..9498c15b847b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -67,6 +67,24 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; +static DEFINE_SPINLOCK(swapper_pgdir_lock); + +void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) +{ + pgd_t *fixmap_pgdp; + + spin_lock(&swapper_pgdir_lock); + fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp)); + WRITE_ONCE(*fixmap_pgdp, pgd); + /* + * We need dsb(ishst) here to ensure the page-table-walker sees + * our new entry before set_p?d() returns. The fixmap's + * flush_tlb_kernel_range() via clear_fixmap() does this for us. + */ + pgd_clear_fixmap(); + spin_unlock(&swapper_pgdir_lock); +} + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -629,34 +647,18 @@ static void __init map_kernel(pgd_t *pgdp) */ void __init paging_init(void) { - phys_addr_t pgd_phys = early_pgtable_alloc(); - pgd_t *pgdp = pgd_set_fixmap(pgd_phys); + pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); map_kernel(pgdp); map_mem(pgdp); - /* - * We want to reuse the original swapper_pg_dir so we don't have to - * communicate the new address to non-coherent secondaries in - * secondary_entry, and so cpu_switch_mm can generate the address with - * adrp+add rather than a load from some global variable. - * - * To do this we need to go via a temporary pgd. - */ - cpu_replace_ttbr1(__va(pgd_phys)); - memcpy(swapper_pg_dir, pgdp, PGD_SIZE); - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); - pgd_clear_fixmap(); - memblock_free(pgd_phys, PAGE_SIZE); - /* - * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd - * allocated with it. - */ - memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, - __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir) - - PAGE_SIZE); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + init_mm.pgd = swapper_pg_dir; + + memblock_free(__pa_symbol(init_pg_dir), + __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); } /* diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 146c04ceaa51..d7b66fc5e1c5 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -391,7 +391,6 @@ static int __init numa_init(int (*init_func)(void)) nodes_clear(numa_nodes_parsed); nodes_clear(node_possible_map); nodes_clear(node_online_map); - numa_free_distance(); ret = numa_alloc_distance(); if (ret < 0) @@ -399,20 +398,24 @@ static int __init numa_init(int (*init_func)(void)) ret = init_func(); if (ret < 0) - return ret; + goto out_free_distance; if (nodes_empty(numa_nodes_parsed)) { pr_info("No NUMA configuration found\n"); - return -EINVAL; + ret = -EINVAL; + goto out_free_distance; } ret = numa_register_nodes(); if (ret < 0) - return ret; + goto out_free_distance; setup_node_to_cpumask_map(); return 0; +out_free_distance: + numa_free_distance(); + return ret; } /** @@ -432,7 +435,7 @@ static int __init dummy_numa_init(void) if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", - 0LLU, PFN_PHYS(max_pfn) - 1); + memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1); for_each_memblock(memory, mblk) { ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 03646e6a2ef4..2c75b0b903ae 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -160,6 +160,12 @@ ENTRY(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id phys_to_ttbr x3, x0 + +alternative_if ARM64_HAS_CNP + cbz x1, 1f // skip CNP for reserved ASID + orr x3, x3, #TTBR_CNP_BIT +1: +alternative_else_nop_endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN bfi x3, x1, #48, #16 // set the ASID field in TTBR0 #endif @@ -184,7 +190,7 @@ ENDPROC(cpu_do_switch_mm) .endm /* - * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd) + * void idmap_cpu_replace_ttbr1(phys_addr_t ttbr1) * * This is the low-level counterpart to cpu_replace_ttbr1, and should not be * called by anything else. It can only be executed from a TTBR0 mapping. @@ -194,8 +200,7 @@ ENTRY(idmap_cpu_replace_ttbr1) __idmap_cpu_set_reserved_ttbr1 x1, x3 - phys_to_ttbr x3, x0 - msr ttbr1_el1, x3 + msr ttbr1_el1, x0 isb restore_daif x2 diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index a641b0bf1611..f65a084607fd 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -9,7 +9,7 @@ config C6X select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select CLKDEV_LOOKUP - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_IRQ_SHOW select HAVE_ARCH_TRACEHOOK diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 89a4b22f34d9..3ef46522e89f 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -4,6 +4,7 @@ comment "Linux Kernel Configuration for Hexagon" config HEXAGON def_bool y + select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_PREEMPT select HAVE_OPROFILE # Other pending projects/to-do items. @@ -29,6 +30,7 @@ config HEXAGON select GENERIC_CLOCKEVENTS_BROADCAST select MODULES_USE_ELF_RELA select GENERIC_CPU_DEVICES + select DMA_DIRECT_OPS ---help--- Qualcomm Hexagon is a processor architecture designed for high performance and low power across a wide variety of applications. diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index dd2fd9c0d292..47c4da3d64a4 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -6,6 +6,7 @@ generic-y += compat.h generic-y += current.h generic-y += device.h generic-y += div64.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += extable.h generic-y += fb.h diff --git a/arch/hexagon/include/asm/dma-mapping.h b/arch/hexagon/include/asm/dma-mapping.h deleted file mode 100644 index 263f6acbfb0f..000000000000 --- a/arch/hexagon/include/asm/dma-mapping.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * DMA operations for the Hexagon architecture - * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef _ASM_DMA_MAPPING_H -#define _ASM_DMA_MAPPING_H - -#include <linux/types.h> -#include <linux/cache.h> -#include <linux/mm.h> -#include <linux/scatterlist.h> -#include <linux/dma-debug.h> -#include <asm/io.h> - -struct device; - -extern const struct dma_map_ops *dma_ops; - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return dma_ops; -} - -#endif diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index 7ebe7ad19d15..706699374444 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -18,32 +18,19 @@ * 02110-1301, USA. */ -#include <linux/dma-mapping.h> -#include <linux/dma-direct.h> +#include <linux/dma-noncoherent.h> #include <linux/bootmem.h> #include <linux/genalloc.h> -#include <asm/dma-mapping.h> #include <linux/module.h> #include <asm/page.h> -#define HEXAGON_MAPPING_ERROR 0 - -const struct dma_map_ops *dma_ops; -EXPORT_SYMBOL(dma_ops); - -static inline void *dma_addr_to_virt(dma_addr_t dma_addr) -{ - return phys_to_virt((unsigned long) dma_addr); -} - static struct gen_pool *coherent_pool; /* Allocates from a pool of uncached memory that was reserved at boot time */ -static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_addr, + gfp_t flag, unsigned long attrs) { void *ret; @@ -75,58 +62,17 @@ static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size, return ret; } -static void hexagon_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs) +void arch_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr, unsigned long attrs) { gen_pool_free(coherent_pool, (unsigned long) vaddr, size); } -static int check_addr(const char *name, struct device *hwdev, - dma_addr_t bus, size_t size) -{ - if (hwdev && hwdev->dma_mask && !dma_capable(hwdev, bus, size)) { - if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) - printk(KERN_ERR - "%s: overflow %Lx+%zu of device mask %Lx\n", - name, (long long)bus, size, - (long long)*hwdev->dma_mask); - return 0; - } - return 1; -} - -static int hexagon_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - struct scatterlist *s; - int i; - - WARN_ON(nents == 0 || sg[0].length == 0); - - for_each_sg(sg, s, nents, i) { - s->dma_address = sg_phys(s); - if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) - return 0; - - s->dma_length = s->length; + void *addr = phys_to_virt(paddr); - if (attrs & DMA_ATTR_SKIP_CPU_SYNC) - continue; - - flush_dcache_range(dma_addr_to_virt(s->dma_address), - dma_addr_to_virt(s->dma_address + s->length)); - } - - return nents; -} - -/* - * address is virtual - */ -static inline void dma_sync(void *addr, size_t size, - enum dma_data_direction dir) -{ switch (dir) { case DMA_TO_DEVICE: hexagon_clean_dcache_range((unsigned long) addr, @@ -144,76 +90,3 @@ static inline void dma_sync(void *addr, size_t size, BUG(); } } - -/** - * hexagon_map_page() - maps an address for device DMA - * @dev: pointer to DMA device - * @page: pointer to page struct of DMA memory - * @offset: offset within page - * @size: size of memory to map - * @dir: transfer direction - * @attrs: pointer to DMA attrs (not used) - * - * Called to map a memory address to a DMA address prior - * to accesses to/from device. - * - * We don't particularly have many hoops to jump through - * so far. Straight translation between phys and virtual. - * - * DMA is not cache coherent so sync is necessary; this - * seems to be a convenient place to do it. - * - */ -static dma_addr_t hexagon_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t bus = page_to_phys(page) + offset; - WARN_ON(size == 0); - - if (!check_addr("map_single", dev, bus, size)) - return HEXAGON_MAPPING_ERROR; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_sync(dma_addr_to_virt(bus), size, dir); - - return bus; -} - -static void hexagon_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir) -{ - dma_sync(dma_addr_to_virt(dma_handle), size, dir); -} - -static void hexagon_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir) -{ - dma_sync(dma_addr_to_virt(dma_handle), size, dir); -} - -static int hexagon_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == HEXAGON_MAPPING_ERROR; -} - -const struct dma_map_ops hexagon_dma_ops = { - .alloc = hexagon_dma_alloc_coherent, - .free = hexagon_free_coherent, - .map_sg = hexagon_map_sg, - .map_page = hexagon_map_page, - .sync_single_for_cpu = hexagon_sync_single_for_cpu, - .sync_single_for_device = hexagon_sync_single_for_device, - .mapping_error = hexagon_mapping_error, -}; - -void __init hexagon_dma_init(void) -{ - if (dma_ops) - return; - - dma_ops = &hexagon_dma_ops; -} diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 2b8cd4a6d958..f7ec71e4001e 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -10,8 +10,6 @@ #include <linux/scatterlist.h> #include <linux/dma-debug.h> -#define ARCH_HAS_DMA_GET_REQUIRED_MASK - extern const struct dma_map_ops *dma_ops; extern struct ia64_machine_vector ia64_mv; extern void set_iommu_machvec(void); diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h index 267f4f170191..5133739966bc 100644 --- a/arch/ia64/include/asm/machvec.h +++ b/arch/ia64/include/asm/machvec.h @@ -44,7 +44,6 @@ typedef void ia64_mv_kernel_launch_event_t(void); /* DMA-mapping interface: */ typedef void ia64_mv_dma_init (void); -typedef u64 ia64_mv_dma_get_required_mask (struct device *); typedef const struct dma_map_ops *ia64_mv_dma_get_ops(struct device *); /* @@ -127,7 +126,6 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *); # define platform_global_tlb_purge ia64_mv.global_tlb_purge # define platform_tlb_migrate_finish ia64_mv.tlb_migrate_finish # define platform_dma_init ia64_mv.dma_init -# define platform_dma_get_required_mask ia64_mv.dma_get_required_mask # define platform_dma_get_ops ia64_mv.dma_get_ops # define platform_irq_to_vector ia64_mv.irq_to_vector # define platform_local_vector_to_irq ia64_mv.local_vector_to_irq @@ -171,7 +169,6 @@ struct ia64_machine_vector { ia64_mv_global_tlb_purge_t *global_tlb_purge; ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish; ia64_mv_dma_init *dma_init; - ia64_mv_dma_get_required_mask *dma_get_required_mask; ia64_mv_dma_get_ops *dma_get_ops; ia64_mv_irq_to_vector *irq_to_vector; ia64_mv_local_vector_to_irq *local_vector_to_irq; @@ -211,7 +208,6 @@ struct ia64_machine_vector { platform_global_tlb_purge, \ platform_tlb_migrate_finish, \ platform_dma_init, \ - platform_dma_get_required_mask, \ platform_dma_get_ops, \ platform_irq_to_vector, \ platform_local_vector_to_irq, \ @@ -286,9 +282,6 @@ extern const struct dma_map_ops *dma_get_ops(struct device *); #ifndef platform_dma_get_ops # define platform_dma_get_ops dma_get_ops #endif -#ifndef platform_dma_get_required_mask -# define platform_dma_get_required_mask ia64_dma_get_required_mask -#endif #ifndef platform_irq_to_vector # define platform_irq_to_vector __ia64_irq_to_vector #endif diff --git a/arch/ia64/include/asm/machvec_init.h b/arch/ia64/include/asm/machvec_init.h index 2b32fd06b7c6..2aafb69a3787 100644 --- a/arch/ia64/include/asm/machvec_init.h +++ b/arch/ia64/include/asm/machvec_init.h @@ -4,7 +4,6 @@ extern ia64_mv_send_ipi_t ia64_send_ipi; extern ia64_mv_global_tlb_purge_t ia64_global_tlb_purge; -extern ia64_mv_dma_get_required_mask ia64_dma_get_required_mask; extern ia64_mv_irq_to_vector __ia64_irq_to_vector; extern ia64_mv_local_vector_to_irq __ia64_local_vector_to_irq; extern ia64_mv_pci_get_legacy_mem_t ia64_pci_get_legacy_mem; diff --git a/arch/ia64/include/asm/machvec_sn2.h b/arch/ia64/include/asm/machvec_sn2.h index ece9fa85be88..b5153d300289 100644 --- a/arch/ia64/include/asm/machvec_sn2.h +++ b/arch/ia64/include/asm/machvec_sn2.h @@ -55,7 +55,6 @@ extern ia64_mv_readb_t __sn_readb_relaxed; extern ia64_mv_readw_t __sn_readw_relaxed; extern ia64_mv_readl_t __sn_readl_relaxed; extern ia64_mv_readq_t __sn_readq_relaxed; -extern ia64_mv_dma_get_required_mask sn_dma_get_required_mask; extern ia64_mv_dma_init sn_dma_init; extern ia64_mv_migrate_t sn_migrate; extern ia64_mv_kernel_launch_event_t sn_kernel_launch_event; @@ -100,7 +99,6 @@ extern ia64_mv_pci_fixup_bus_t sn_pci_fixup_bus; #define platform_pci_get_legacy_mem sn_pci_get_legacy_mem #define platform_pci_legacy_read sn_pci_legacy_read #define platform_pci_legacy_write sn_pci_legacy_write -#define platform_dma_get_required_mask sn_dma_get_required_mask #define platform_dma_init sn_dma_init #define platform_migrate sn_migrate #define platform_kernel_launch_event sn_kernel_launch_event diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 7ccc64d5fe3e..5d71800df431 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -568,32 +568,6 @@ static void __init set_pci_dfl_cacheline_size(void) pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4; } -u64 ia64_dma_get_required_mask(struct device *dev) -{ - u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); - u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT)); - u64 mask; - - if (!high_totalram) { - /* convert to mask just covering totalram */ - low_totalram = (1 << (fls(low_totalram) - 1)); - low_totalram += low_totalram - 1; - mask = low_totalram; - } else { - high_totalram = (1 << (fls(high_totalram) - 1)); - high_totalram += high_totalram - 1; - mask = (((u64)high_totalram) << 32) + 0xffffffff; - } - return mask; -} -EXPORT_SYMBOL_GPL(ia64_dma_get_required_mask); - -u64 dma_get_required_mask(struct device *dev) -{ - return platform_dma_get_required_mask(dev); -} -EXPORT_SYMBOL_GPL(dma_get_required_mask); - static int __init pcibios_init(void) { set_pci_dfl_cacheline_size(); diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 3b944fba0a3e..4ce4ee4ef9f2 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -319,11 +319,10 @@ static int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) return 0; } -u64 sn_dma_get_required_mask(struct device *dev) +static u64 sn_dma_get_required_mask(struct device *dev) { return DMA_BIT_MASK(64); } -EXPORT_SYMBOL_GPL(sn_dma_get_required_mask); char *sn_pci_get_legacy_mem(struct pci_bus *bus) { @@ -444,6 +443,7 @@ static struct dma_map_ops sn_dma_ops = { .unmap_sg = sn_dma_unmap_sg, .mapping_error = sn_dma_mapping_error, .dma_supported = sn_dma_supported, + .get_required_mask = sn_dma_get_required_mask, }; void sn_dma_init(void) diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 070553791e97..c7b2a8d60a41 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -26,7 +26,7 @@ config M68K select MODULES_USE_ELF_RELA select OLD_SIGSUSPEND3 select OLD_SIGACTION - select DMA_NONCOHERENT_OPS if HAS_DMA + select DMA_DIRECT_OPS if HAS_DMA select HAVE_MEMBLOCK select ARCH_DISCARD_MEMBLOCK select NO_BOOTMEM diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index e9110b9b8bcd..38049357d6d3 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -73,7 +73,7 @@ static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio) len = bvec.bv_len; len >>= 9; nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift, - bvec_to_phys(&bvec)); + page_to_phys(bvec.bv_page) + bvec.bv_offset); sec += len; } bio_endio(bio); diff --git a/arch/m68k/include/asm/atafd.h b/arch/m68k/include/asm/atafd.h deleted file mode 100644 index ad7014cad633..000000000000 --- a/arch/m68k/include/asm/atafd.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_M68K_FD_H -#define _ASM_M68K_FD_H - -/* Definitions for the Atari Floppy driver */ - -struct atari_format_descr { - int track; /* to be formatted */ - int head; /* "" "" */ - int sect_offset; /* offset of first sector */ -}; - -#endif diff --git a/arch/m68k/include/asm/atafdreg.h b/arch/m68k/include/asm/atafdreg.h deleted file mode 100644 index c31b4919ed2d..000000000000 --- a/arch/m68k/include/asm/atafdreg.h +++ /dev/null @@ -1,80 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_FDREG_H -#define _LINUX_FDREG_H - -/* -** WD1772 stuff - */ - -/* register codes */ - -#define FDCSELREG_STP (0x80) /* command/status register */ -#define FDCSELREG_TRA (0x82) /* track register */ -#define FDCSELREG_SEC (0x84) /* sector register */ -#define FDCSELREG_DTA (0x86) /* data register */ - -/* register names for FDC_READ/WRITE macros */ - -#define FDCREG_CMD 0 -#define FDCREG_STATUS 0 -#define FDCREG_TRACK 2 -#define FDCREG_SECTOR 4 -#define FDCREG_DATA 6 - -/* command opcodes */ - -#define FDCCMD_RESTORE (0x00) /* - */ -#define FDCCMD_SEEK (0x10) /* | */ -#define FDCCMD_STEP (0x20) /* | TYP 1 Commands */ -#define FDCCMD_STIN (0x40) /* | */ -#define FDCCMD_STOT (0x60) /* - */ -#define FDCCMD_RDSEC (0x80) /* - TYP 2 Commands */ -#define FDCCMD_WRSEC (0xa0) /* - " */ -#define FDCCMD_RDADR (0xc0) /* - */ -#define FDCCMD_RDTRA (0xe0) /* | TYP 3 Commands */ -#define FDCCMD_WRTRA (0xf0) /* - */ -#define FDCCMD_FORCI (0xd0) /* - TYP 4 Command */ - -/* command modifier bits */ - -#define FDCCMDADD_SR6 (0x00) /* step rate settings */ -#define FDCCMDADD_SR12 (0x01) -#define FDCCMDADD_SR2 (0x02) -#define FDCCMDADD_SR3 (0x03) -#define FDCCMDADD_V (0x04) /* verify */ -#define FDCCMDADD_H (0x08) /* wait for spin-up */ -#define FDCCMDADD_U (0x10) /* update track register */ -#define FDCCMDADD_M (0x10) /* multiple sector access */ -#define FDCCMDADD_E (0x04) /* head settling flag */ -#define FDCCMDADD_P (0x02) /* precompensation off */ -#define FDCCMDADD_A0 (0x01) /* DAM flag */ - -/* status register bits */ - -#define FDCSTAT_MOTORON (0x80) /* motor on */ -#define FDCSTAT_WPROT (0x40) /* write protected (FDCCMD_WR*) */ -#define FDCSTAT_SPINUP (0x20) /* motor speed stable (Type I) */ -#define FDCSTAT_DELDAM (0x20) /* sector has deleted DAM (Type II+III) */ -#define FDCSTAT_RECNF (0x10) /* record not found */ -#define FDCSTAT_CRC (0x08) /* CRC error */ -#define FDCSTAT_TR00 (0x04) /* Track 00 flag (Type I) */ -#define FDCSTAT_LOST (0x04) /* Lost Data (Type II+III) */ -#define FDCSTAT_IDX (0x02) /* Index status (Type I) */ -#define FDCSTAT_DRQ (0x02) /* DRQ status (Type II+III) */ -#define FDCSTAT_BUSY (0x01) /* FDC is busy */ - - -/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1 1 -> Side 2 */ -#define DSKSIDE (0x01) - -#define DSKDRVNONE (0x06) -#define DSKDRV0 (0x02) -#define DSKDRV1 (0x04) - -/* step rates */ -#define FDCSTEP_6 0x00 -#define FDCSTEP_12 0x01 -#define FDCSTEP_2 0x02 -#define FDCSTEP_3 0x03 - -#endif diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index ace5c5bf1836..164a4857737a 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -1,6 +1,7 @@ config MICROBLAZE def_bool y select ARCH_NO_SWAP + select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE @@ -11,8 +12,7 @@ config MICROBLAZE select TIMER_OF select CLONE_BACKWARDS3 select COMMON_CLK - select DMA_NONCOHERENT_OPS - select DMA_NONCOHERENT_MMAP + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 7b650ab14fa0..f64ebb9c9a41 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -553,8 +553,6 @@ void __init *early_get_page(void); extern unsigned long ioremap_bot, ioremap_base; -unsigned long consistent_virt_to_pfn(void *vaddr); - void setup_memory(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index 71032cf64669..a89c2d4ed5ff 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -42,25 +42,3 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, { __dma_sync(dev, paddr, size, dir); } - -int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t handle, size_t size, - unsigned long attrs) -{ -#ifdef CONFIG_MMU - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long off = vma->vm_pgoff; - unsigned long pfn; - - if (off >= count || user_count > (count - off)) - return -ENXIO; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - pfn = consistent_virt_to_pfn(cpu_addr); - return remap_pfn_range(vma, vma->vm_start, pfn + off, - vma->vm_end - vma->vm_start, vma->vm_page_prot); -#else - return -ENXIO; -#endif -} diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c index c9a278ac795a..d801cc5f5b95 100644 --- a/arch/microblaze/mm/consistent.c +++ b/arch/microblaze/mm/consistent.c @@ -165,7 +165,8 @@ static pte_t *consistent_virt_to_pte(void *vaddr) return pte_offset_kernel(pmd_offset(pgd_offset_k(addr), addr), addr); } -unsigned long consistent_virt_to_pfn(void *vaddr) +long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr, + dma_addr_t dma_addr) { pte_t *ptep = consistent_virt_to_pte(vaddr); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 35511999156a..77c022e56e6e 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1106,21 +1106,22 @@ config ARCH_SUPPORTS_UPROBES bool config DMA_MAYBE_COHERENT + select ARCH_HAS_DMA_COHERENCE_H select DMA_NONCOHERENT bool config DMA_PERDEV_COHERENT bool - select DMA_MAYBE_COHERENT + select DMA_NONCOHERENT config DMA_NONCOHERENT bool + select ARCH_HAS_DMA_MMAP_PGPROT select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SYNC_DMA_FOR_CPU select NEED_DMA_MAP_STATE - select DMA_NONCOHERENT_MMAP + select ARCH_HAS_DMA_COHERENT_TO_PFN select DMA_NONCOHERENT_CACHE_SYNC - select DMA_NONCOHERENT_OPS config SYS_HAS_EARLY_PRINTK bool diff --git a/arch/mips/alchemy/devboards/db1200.c b/arch/mips/alchemy/devboards/db1200.c index da7663770425..4bf02f96ab7f 100644 --- a/arch/mips/alchemy/devboards/db1200.c +++ b/arch/mips/alchemy/devboards/db1200.c @@ -29,8 +29,7 @@ #include <linux/leds.h> #include <linux/mmc/host.h> #include <linux/mtd/mtd.h> -#include <linux/mtd/rawnand.h> -#include <linux/mtd/partitions.h> +#include <linux/mtd/platnand.h> #include <linux/platform_device.h> #include <linux/serial_8250.h> #include <linux/spi/spi.h> @@ -197,11 +196,10 @@ static struct i2c_board_info db1200_i2c_devs[] __initdata = { /**********************************************************************/ -static void au1200_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, +static void au1200_nand_cmd_ctrl(struct nand_chip *this, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd_to_nand(mtd); - unsigned long ioaddr = (unsigned long)this->IO_ADDR_W; + unsigned long ioaddr = (unsigned long)this->legacy.IO_ADDR_W; ioaddr &= 0xffffff00; @@ -213,14 +211,14 @@ static void au1200_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, /* assume we want to r/w real data by default */ ioaddr += MEM_STNAND_DATA; } - this->IO_ADDR_R = this->IO_ADDR_W = (void __iomem *)ioaddr; + this->legacy.IO_ADDR_R = this->legacy.IO_ADDR_W = (void __iomem *)ioaddr; if (cmd != NAND_CMD_NONE) { - __raw_writeb(cmd, this->IO_ADDR_W); + __raw_writeb(cmd, this->legacy.IO_ADDR_W); wmb(); } } -static int au1200_nand_device_ready(struct mtd_info *mtd) +static int au1200_nand_device_ready(struct nand_chip *this) { return alchemy_rdsmem(AU1000_MEM_STSTAT) & 1; } diff --git a/arch/mips/alchemy/devboards/db1300.c b/arch/mips/alchemy/devboards/db1300.c index efb318e03e0a..ad7dd8e89598 100644 --- a/arch/mips/alchemy/devboards/db1300.c +++ b/arch/mips/alchemy/devboards/db1300.c @@ -19,8 +19,7 @@ #include <linux/mmc/host.h> #include <linux/module.h> #include <linux/mtd/mtd.h> -#include <linux/mtd/rawnand.h> -#include <linux/mtd/partitions.h> +#include <linux/mtd/platnand.h> #include <linux/platform_device.h> #include <linux/smsc911x.h> #include <linux/wm97xx.h> @@ -149,11 +148,10 @@ static void __init db1300_gpio_config(void) /**********************************************************************/ -static void au1300_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, +static void au1300_nand_cmd_ctrl(struct nand_chip *this, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd_to_nand(mtd); - unsigned long ioaddr = (unsigned long)this->IO_ADDR_W; + unsigned long ioaddr = (unsigned long)this->legacy.IO_ADDR_W; ioaddr &= 0xffffff00; @@ -165,14 +163,14 @@ static void au1300_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, /* assume we want to r/w real data by default */ ioaddr += MEM_STNAND_DATA; } - this->IO_ADDR_R = this->IO_ADDR_W = (void __iomem *)ioaddr; + this->legacy.IO_ADDR_R = this->legacy.IO_ADDR_W = (void __iomem *)ioaddr; if (cmd != NAND_CMD_NONE) { - __raw_writeb(cmd, this->IO_ADDR_W); + __raw_writeb(cmd, this->legacy.IO_ADDR_W); wmb(); } } -static int au1300_nand_device_ready(struct mtd_info *mtd) +static int au1300_nand_device_ready(struct nand_chip *this) { return alchemy_rdsmem(AU1000_MEM_STSTAT) & 1; } diff --git a/arch/mips/alchemy/devboards/db1550.c b/arch/mips/alchemy/devboards/db1550.c index 7d3dfaa10231..7700ad0b93b4 100644 --- a/arch/mips/alchemy/devboards/db1550.c +++ b/arch/mips/alchemy/devboards/db1550.c @@ -13,8 +13,7 @@ #include <linux/io.h> #include <linux/interrupt.h> #include <linux/mtd/mtd.h> -#include <linux/mtd/rawnand.h> -#include <linux/mtd/partitions.h> +#include <linux/mtd/platnand.h> #include <linux/platform_device.h> #include <linux/pm.h> #include <linux/spi/spi.h> @@ -126,11 +125,10 @@ static struct i2c_board_info db1550_i2c_devs[] __initdata = { /**********************************************************************/ -static void au1550_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, +static void au1550_nand_cmd_ctrl(struct nand_chip *this, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd_to_nand(mtd); - unsigned long ioaddr = (unsigned long)this->IO_ADDR_W; + unsigned long ioaddr = (unsigned long)this->legacy.IO_ADDR_W; ioaddr &= 0xffffff00; @@ -142,14 +140,14 @@ static void au1550_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, /* assume we want to r/w real data by default */ ioaddr += MEM_STNAND_DATA; } - this->IO_ADDR_R = this->IO_ADDR_W = (void __iomem *)ioaddr; + this->legacy.IO_ADDR_R = this->legacy.IO_ADDR_W = (void __iomem *)ioaddr; if (cmd != NAND_CMD_NONE) { - __raw_writeb(cmd, this->IO_ADDR_W); + __raw_writeb(cmd, this->legacy.IO_ADDR_W); wmb(); } } -static int au1550_nand_device_ready(struct mtd_info *mtd) +static int au1550_nand_device_ready(struct nand_chip *this) { return alchemy_rdsmem(AU1000_MEM_STSTAT) & 1; } diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 58351e48421e..9a81e72119da 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -1,6 +1,7 @@ # MIPS headers generic-(CONFIG_GENERIC_CSUM) += checksum.h generic-y += current.h +generic-y += device.h generic-y += dma-contiguous.h generic-y += emergency-restart.h generic-y += export.h diff --git a/arch/mips/include/asm/device.h b/arch/mips/include/asm/device.h deleted file mode 100644 index 6aa796f1081a..000000000000 --- a/arch/mips/include/asm/device.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Arch specific extensions to struct device - * - * This file is released under the GPLv2 - */ -#ifndef _ASM_MIPS_DEVICE_H -#define _ASM_MIPS_DEVICE_H - -struct dev_archdata { -#ifdef CONFIG_DMA_PERDEV_COHERENT - /* Non-zero if DMA is coherent with CPU caches */ - bool dma_coherent; -#endif -}; - -struct pdev_archdata { -}; - -#endif /* _ASM_MIPS_DEVICE_H*/ diff --git a/arch/mips/include/asm/dma-coherence.h b/arch/mips/include/asm/dma-coherence.h index 8eda48748ed5..5eaa1fcc878a 100644 --- a/arch/mips/include/asm/dma-coherence.h +++ b/arch/mips/include/asm/dma-coherence.h @@ -20,6 +20,12 @@ enum coherent_io_user_state { #elif defined(CONFIG_DMA_MAYBE_COHERENT) extern enum coherent_io_user_state coherentio; extern int hw_coherentio; + +static inline bool dev_is_dma_coherent(struct device *dev) +{ + return coherentio == IO_COHERENCE_ENABLED || + (coherentio == IO_COHERENCE_DEFAULT && hw_coherentio); +} #else #ifdef CONFIG_DMA_NONCOHERENT #define coherentio IO_COHERENCE_DISABLED diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index e81c4e97ff1a..b4c477eb46ce 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -12,8 +12,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return &jazz_dma_ops; #elif defined(CONFIG_SWIOTLB) return &swiotlb_dma_ops; -#elif defined(CONFIG_DMA_NONCOHERENT_OPS) - return &dma_noncoherent_ops; #else return &dma_direct_ops; #endif @@ -25,7 +23,7 @@ static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, bool coherent) { #ifdef CONFIG_DMA_PERDEV_COHERENT - dev->archdata.dma_coherent = coherent; + dev->dma_coherent = coherent; #endif } diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index b2fa62922d88..49d6046ca1d0 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -13,6 +13,7 @@ #include <linux/atomic.h> #include <linux/cpumask.h> +#include <linux/sizes.h> #include <linux/threads.h> #include <asm/cachectl.h> @@ -80,11 +81,10 @@ extern unsigned int vced_count, vcei_count; #endif -/* - * One page above the stack is used for branch delay slot "emulation". - * See dsemul.c for details. - */ -#define STACK_TOP ((TASK_SIZE & PAGE_MASK) - PAGE_SIZE) +#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_256M) + +extern unsigned long mips_stack_top(void); +#define STACK_TOP mips_stack_top() /* * This decides where the kernel will search for a free chunk of vm diff --git a/arch/mips/include/asm/vr41xx/giu.h b/arch/mips/include/asm/vr41xx/giu.h index 6a90bc1d916b..ecda4cf300de 100644 --- a/arch/mips/include/asm/vr41xx/giu.h +++ b/arch/mips/include/asm/vr41xx/giu.h @@ -51,12 +51,4 @@ typedef enum { extern void vr41xx_set_irq_level(unsigned int pin, irq_level_t level); -typedef enum { - GPIO_PULL_DOWN, - GPIO_PULL_UP, - GPIO_PULL_DISABLE, -} gpio_pull_t; - -extern int vr41xx_gpio_pullupdown(unsigned int pin, gpio_pull_t pull); - #endif /* __NEC_VR41XX_GIU_H */ diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index d31bc2f01208..0a0aaf39fd16 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -564,13 +564,13 @@ static void *jazz_dma_alloc(struct device *dev, size_t size, { void *ret; - ret = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); + ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); if (!ret) return NULL; *dma_handle = vdma_alloc(virt_to_phys(ret), size); if (*dma_handle == VDMA_ERROR) { - dma_direct_free(dev, size, ret, *dma_handle, attrs); + dma_direct_free_pages(dev, size, ret, *dma_handle, attrs); return NULL; } @@ -587,7 +587,7 @@ static void jazz_dma_free(struct device *dev, size_t size, void *vaddr, vdma_free(dma_handle); if (!(attrs & DMA_ATTR_NON_CONSISTENT)) vaddr = (void *)CAC_ADDR((unsigned long)vaddr); - return dma_direct_free(dev, size, vaddr, dma_handle, attrs); + dma_direct_free_pages(dev, size, vaddr, dma_handle, attrs); } static dma_addr_t jazz_dma_map_page(struct device *dev, struct page *page, @@ -682,7 +682,6 @@ static int jazz_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) const struct dma_map_ops jazz_dma_ops = { .alloc = jazz_dma_alloc, .free = jazz_dma_free, - .mmap = arch_dma_mmap, .map_page = jazz_dma_map_page, .unmap_page = jazz_dma_unmap_page, .map_sg = jazz_dma_map_sg, diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 8fc69891e117..d4f7fd4550e1 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -32,6 +32,7 @@ #include <linux/nmi.h> #include <linux/cpu.h> +#include <asm/abi.h> #include <asm/asm.h> #include <asm/bootinfo.h> #include <asm/cpu.h> @@ -39,6 +40,7 @@ #include <asm/dsp.h> #include <asm/fpu.h> #include <asm/irq.h> +#include <asm/mips-cps.h> #include <asm/msa.h> #include <asm/pgtable.h> #include <asm/mipsregs.h> @@ -645,6 +647,29 @@ out: return pc; } +unsigned long mips_stack_top(void) +{ + unsigned long top = TASK_SIZE & PAGE_MASK; + + /* One page for branch delay slot "emulation" */ + top -= PAGE_SIZE; + + /* Space for the VDSO, data page & GIC user page */ + top -= PAGE_ALIGN(current->thread.abi->vdso->size); + top -= PAGE_SIZE; + top -= mips_gic_present() ? PAGE_SIZE : 0; + + /* Space for cache colour alignment */ + if (cpu_has_dc_aliases) + top -= shm_align_mask + 1; + + /* Space to randomize the VDSO base */ + if (current->flags & PF_RANDOMIZE) + top -= VDSO_RANDOMIZE_SIZE; + + return top; +} + /* * Don't forget that the stack pointer must be aligned on a 8 bytes * boundary for 32-bits ABI and 16 bytes for 64-bits ABI. diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index c71d1eb7da59..e64b9e8bb002 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -846,6 +846,34 @@ static void __init arch_mem_init(char **cmdline_p) struct memblock_region *reg; extern void plat_mem_setup(void); + /* + * Initialize boot_command_line to an innocuous but non-empty string in + * order to prevent early_init_dt_scan_chosen() from copying + * CONFIG_CMDLINE into it without our knowledge. We handle + * CONFIG_CMDLINE ourselves below & don't want to duplicate its + * content because repeating arguments can be problematic. + */ + strlcpy(boot_command_line, " ", COMMAND_LINE_SIZE); + + /* call board setup routine */ + plat_mem_setup(); + + /* + * Make sure all kernel memory is in the maps. The "UP" and + * "DOWN" are opposite for initdata since if it crosses over + * into another memory section you don't want that to be + * freed when the initdata is freed. + */ + arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT, + PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT, + BOOT_MEM_RAM); + arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, + PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, + BOOT_MEM_INIT_RAM); + + pr_info("Determined physical RAM map:\n"); + print_memory_map(); + #if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE) strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); #else @@ -873,26 +901,6 @@ static void __init arch_mem_init(char **cmdline_p) } #endif #endif - - /* call board setup routine */ - plat_mem_setup(); - - /* - * Make sure all kernel memory is in the maps. The "UP" and - * "DOWN" are opposite for initdata since if it crosses over - * into another memory section you don't want that to be - * freed when the initdata is freed. - */ - arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT, - PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT, - BOOT_MEM_RAM); - arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, - PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, - BOOT_MEM_INIT_RAM); - - pr_info("Determined physical RAM map:\n"); - print_memory_map(); - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; @@ -1067,7 +1075,7 @@ static int __init debugfs_mips(void) arch_initcall(debugfs_mips); #endif -#if defined(CONFIG_DMA_MAYBE_COHERENT) && !defined(CONFIG_DMA_PERDEV_COHERENT) +#ifdef CONFIG_DMA_MAYBE_COHERENT /* User defined DMA coherency from command line. */ enum coherent_io_user_state coherentio = IO_COHERENCE_DEFAULT; EXPORT_SYMBOL_GPL(coherentio); diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 8f845f6e5f42..48a9c6b90e07 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -15,6 +15,7 @@ #include <linux/ioport.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/random.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/timekeeper_internal.h> @@ -97,6 +98,21 @@ void update_vsyscall_tz(void) } } +static unsigned long vdso_base(void) +{ + unsigned long base; + + /* Skip the delay slot emulation page */ + base = STACK_TOP + PAGE_SIZE; + + if (current->flags & PF_RANDOMIZE) { + base += get_random_int() & (VDSO_RANDOMIZE_SIZE - 1); + base = PAGE_ALIGN(base); + } + + return base; +} + int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mips_vdso_image *image = current->thread.abi->vdso; @@ -137,7 +153,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (cpu_has_dc_aliases) size += shm_align_mask + 1; - base = get_unmapped_area(NULL, 0, size, 0, 0); + base = get_unmapped_area(NULL, vdso_base(), size, 0, 0); if (IS_ERR_VALUE(base)) { ret = base; goto out; diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 3a6f34ef5ffc..069acec3df9f 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -280,9 +280,11 @@ * unset_bytes = end_addr - current_addr + 1 * a2 = t1 - a0 + 1 */ + .set reorder PTR_SUBU a2, t1, a0 + PTR_ADDIU a2, 1 jr ra - PTR_ADDIU a2, 1 + .set noreorder .endm diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index a9ef057c79fe..05bd77727fb9 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1955,22 +1955,21 @@ void r4k_cache_init(void) __flush_icache_user_range = r4k_flush_icache_user_range; __local_flush_icache_user_range = local_r4k_flush_icache_user_range; -#if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_DMA_MAYBE_COHERENT) -# if defined(CONFIG_DMA_PERDEV_COHERENT) - if (0) { -# else - if ((coherentio == IO_COHERENCE_ENABLED) || - ((coherentio == IO_COHERENCE_DEFAULT) && hw_coherentio)) { -# endif +#ifdef CONFIG_DMA_NONCOHERENT +#ifdef CONFIG_DMA_MAYBE_COHERENT + if (coherentio == IO_COHERENCE_ENABLED || + (coherentio == IO_COHERENCE_DEFAULT && hw_coherentio)) { _dma_cache_wback_inv = (void *)cache_noop; _dma_cache_wback = (void *)cache_noop; _dma_cache_inv = (void *)cache_noop; - } else { + } else +#endif /* CONFIG_DMA_MAYBE_COHERENT */ + { _dma_cache_wback_inv = r4k_dma_cache_wback_inv; _dma_cache_wback = r4k_dma_cache_wback_inv; _dma_cache_inv = r4k_dma_cache_inv; } -#endif +#endif /* CONFIG_DMA_NONCOHERENT */ build_clear_page(); build_copy_page(); diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 2aca1236af36..e6c9485cadcf 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -14,26 +14,6 @@ #include <asm/dma-coherence.h> #include <asm/io.h> -#ifdef CONFIG_DMA_PERDEV_COHERENT -static inline int dev_is_coherent(struct device *dev) -{ - return dev->archdata.dma_coherent; -} -#else -static inline int dev_is_coherent(struct device *dev) -{ - switch (coherentio) { - default: - case IO_COHERENCE_DEFAULT: - return hw_coherentio; - case IO_COHERENCE_ENABLED: - return 1; - case IO_COHERENCE_DISABLED: - return 0; - } -} -#endif /* CONFIG_DMA_PERDEV_COHERENT */ - /* * The affected CPUs below in 'cpu_needs_post_dma_flush()' can speculatively * fill random cachelines with stale data at any time, requiring an extra @@ -49,9 +29,6 @@ static inline int dev_is_coherent(struct device *dev) */ static inline bool cpu_needs_post_dma_flush(struct device *dev) { - if (dev_is_coherent(dev)) - return false; - switch (boot_cpu_type()) { case CPU_R10000: case CPU_R12000: @@ -72,11 +49,8 @@ void *arch_dma_alloc(struct device *dev, size_t size, { void *ret; - ret = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); - if (!ret) - return NULL; - - if (!dev_is_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT)) { + ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); + if (!ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) { dma_cache_wback_inv((unsigned long) ret, size); ret = (void *)UNCAC_ADDR(ret); } @@ -87,43 +61,24 @@ void *arch_dma_alloc(struct device *dev, size_t size, void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - if (!(attrs & DMA_ATTR_NON_CONSISTENT) && !dev_is_coherent(dev)) + if (!(attrs & DMA_ATTR_NON_CONSISTENT)) cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr); - dma_direct_free(dev, size, cpu_addr, dma_addr, attrs); + dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); } -int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr) { - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long addr = (unsigned long)cpu_addr; - unsigned long off = vma->vm_pgoff; - unsigned long pfn; - int ret = -ENXIO; - - if (!dev_is_coherent(dev)) - addr = CAC_ADDR(addr); - - pfn = page_to_pfn(virt_to_page((void *)addr)); + unsigned long addr = CAC_ADDR((unsigned long)cpu_addr); + return page_to_pfn(virt_to_page((void *)addr)); +} +pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) +{ if (attrs & DMA_ATTR_WRITE_COMBINE) - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - else - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (off < count && user_count <= (count - off)) { - ret = remap_pfn_range(vma, vma->vm_start, - pfn + off, - user_count << PAGE_SHIFT, - vma->vm_page_prot); - } - - return ret; + return pgprot_writecombine(prot); + return pgprot_noncached(prot); } static inline void dma_sync_virt(void *addr, size_t size, @@ -187,8 +142,7 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size, void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - if (!dev_is_coherent(dev)) - dma_sync_phys(paddr, size, dir); + dma_sync_phys(paddr, size, dir); } void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, @@ -203,6 +157,5 @@ void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, { BUG_ON(direction == DMA_NONE); - if (!dev_is_coherent(dev)) - dma_sync_virt(vaddr, size, direction); + dma_sync_virt(vaddr, size, direction); } diff --git a/arch/mips/netlogic/xlr/platform-flash.c b/arch/mips/netlogic/xlr/platform-flash.c index 4d1b4c003376..cf9162284b07 100644 --- a/arch/mips/netlogic/xlr/platform-flash.c +++ b/arch/mips/netlogic/xlr/platform-flash.c @@ -19,8 +19,7 @@ #include <linux/mtd/mtd.h> #include <linux/mtd/physmap.h> -#include <linux/mtd/rawnand.h> -#include <linux/mtd/partitions.h> +#include <linux/mtd/platnand.h> #include <asm/netlogic/haldefs.h> #include <asm/netlogic/xlr/iomap.h> @@ -92,8 +91,8 @@ struct xlr_nand_flash_priv { static struct xlr_nand_flash_priv nand_priv; -static void xlr_nand_ctrl(struct mtd_info *mtd, int cmd, - unsigned int ctrl) +static void xlr_nand_ctrl(struct nand_chip *chip, int cmd, + unsigned int ctrl) { if (ctrl & NAND_CLE) nlm_write_reg(nand_priv.flash_mmio, diff --git a/arch/mips/pnx833x/common/platform.c b/arch/mips/pnx833x/common/platform.c index a7a4e9f5146d..dafbf027fad0 100644 --- a/arch/mips/pnx833x/common/platform.c +++ b/arch/mips/pnx833x/common/platform.c @@ -30,8 +30,7 @@ #include <linux/resource.h> #include <linux/serial.h> #include <linux/serial_pnx8xxx.h> -#include <linux/mtd/rawnand.h> -#include <linux/mtd/partitions.h> +#include <linux/mtd/platnand.h> #include <irq.h> #include <irq-mapping.h> @@ -178,10 +177,9 @@ static struct platform_device pnx833x_sata_device = { }; static void -pnx833x_flash_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) +pnx833x_flash_nand_cmd_ctrl(struct nand_chip *this, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd_to_nand(mtd); - unsigned long nandaddr = (unsigned long)this->IO_ADDR_W; + unsigned long nandaddr = (unsigned long)this->legacy.IO_ADDR_W; if (cmd == NAND_CMD_NONE) return; diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index 354d258396ff..2b23ad640f39 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -20,9 +20,8 @@ #include <linux/ctype.h> #include <linux/string.h> #include <linux/platform_device.h> -#include <linux/mtd/rawnand.h> +#include <linux/mtd/platnand.h> #include <linux/mtd/mtd.h> -#include <linux/mtd/partitions.h> #include <linux/gpio.h> #include <linux/gpio_keys.h> #include <linux/input.h> @@ -141,14 +140,13 @@ static struct platform_device cf_slot0 = { }; /* Resources and device for NAND */ -static int rb532_dev_ready(struct mtd_info *mtd) +static int rb532_dev_ready(struct nand_chip *chip) { return gpio_get_value(GPIO_RDY); } -static void rb532_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) +static void rb532_cmd_ctrl(struct nand_chip *chip, int cmd, unsigned int ctrl) { - struct nand_chip *chip = mtd_to_nand(mtd); unsigned char orbits, nandbits; if (ctrl & NAND_CTRL_CHANGE) { @@ -161,7 +159,7 @@ static void rb532_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) set_latch_u5(orbits, nandbits); } if (cmd != NAND_CMD_NONE) - writeb(cmd, chip->IO_ADDR_W); + writeb(cmd, chip->legacy.IO_ADDR_W); } static struct resource nand_slot0_res[] = { diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 7068f341133d..56992330026a 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -11,7 +11,7 @@ config NDS32 select CLKSRC_MMIO select CLONE_BACKWARDS select COMMON_CLK - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index f4ad1138e6b9..03965692fbfe 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -4,7 +4,7 @@ config NIOS2 select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_SWAP - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select TIMER_OF select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index e0081e734827..a655ae280637 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -7,7 +7,7 @@ config OPENRISC def_bool y select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select OF select OF_EARLY_FLATTREE select IRQ_DOMAIN diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 8e6d83f79e72..f1cd12afd943 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -186,7 +186,7 @@ config PA11 depends on PA7000 || PA7100LC || PA7200 || PA7300LC select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select DMA_NONCOHERENT_CACHE_SYNC config PREFETCH diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 4e87c35c22b7..755e89ec828a 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -102,7 +102,7 @@ void __init dma_ops_init(void) case pcxl: /* falls through */ case pcxs: case pcxt: - hppa_dma_ops = &dma_noncoherent_ops; + hppa_dma_ops = &dma_direct_ops; break; default: break; diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index f329b466e68f..2d14f17838d2 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -426,7 +426,7 @@ void unwind_frame_init_task(struct unwind_frame_info *info, r.gr[30] = get_parisc_stackpointer(); regs = &r; } - unwind_frame_init(info, task, &r); + unwind_frame_init(info, task, regs); } else { unwind_frame_init_from_blocked_task(info, task); } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 13a688fc8cd0..2a2486526d1f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -114,7 +114,7 @@ */ #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) /* * user access blocked by key */ @@ -132,7 +132,7 @@ */ #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) #define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) @@ -1051,7 +1051,6 @@ static inline void vmemmap_remove_mapping(unsigned long start, return hash__vmemmap_remove_mapping(start, page_size); } #endif -struct page *realmode_pfn_to_page(unsigned long pfn); static inline pte_t pmd_pte(pmd_t pmd) { diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index ab3a4fba38e3..3d4b88cb8599 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -220,8 +220,6 @@ extern void iommu_del_device(struct device *dev); extern int __init tce_iommu_bus_notifier_init(void); extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, unsigned long *hpa, enum dma_data_direction *direction); -extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry, - unsigned long *hpa, enum dma_data_direction *direction); #else static inline void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index b2f89b621b15..b694d6af1150 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -38,6 +38,7 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned int pageshift, unsigned long *hpa); +extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); #endif diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 1a951b00465d..1fffbba8d6a5 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -9,6 +9,7 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern unsigned long long memory_limit; +extern bool init_mem_is_free; extern unsigned long klimit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ea04dfb8c092..2d8fc8c9da7a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1314,9 +1314,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) #ifdef CONFIG_PPC_DENORMALISATION mfspr r10,SPRN_HSRR1 - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ - addi r11,r11,-4 /* HSRR0 is next instruction */ bne+ denorm_assist #endif @@ -1382,6 +1380,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) */ XVCPSGNDP32(32) denorm_done: + mfspr r11,SPRN_HSRR0 + subi r11,r11,4 mtspr SPRN_HSRR0,r11 mtcrf 0x80,r9 ld r9,PACA_EXGEN+EX_R9(r13) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index af7a20dc6e09..19b4c628f3be 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1013,31 +1013,6 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, } EXPORT_SYMBOL_GPL(iommu_tce_xchg); -#ifdef CONFIG_PPC_BOOK3S_64 -long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry, - unsigned long *hpa, enum dma_data_direction *direction) -{ - long ret; - - ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); - - if (!ret && ((*direction == DMA_FROM_DEVICE) || - (*direction == DMA_BIDIRECTIONAL))) { - struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT); - - if (likely(pg)) { - SetPageDirty(pg); - } else { - tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); - ret = -EFAULT; - } - } - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm); -#endif - int iommu_take_ownership(struct iommu_table *tbl) { unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 913c5725cdb2..bb6ac471a784 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1306,6 +1306,16 @@ void show_user_instructions(struct pt_regs *regs) pc = regs->nip - (instructions_to_print * 3 / 4 * sizeof(int)); + /* + * Make sure the NIP points at userspace, not kernel text/data or + * elsewhere. + */ + if (!__access_ok(pc, instructions_to_print * sizeof(int), USER_DS)) { + pr_info("%s[%d]: Bad NIP, not dumping instructions.\n", + current->comm, current->pid); + return; + } + pr_info("%s[%d]: code: ", current->comm, current->pid); for (i = 0; i < instructions_to_print; i++) { diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 6bffbc5affe7..7716374786bd 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -176,13 +176,27 @@ _GLOBAL(tm_reclaim) std r1, PACATMSCRATCH(r13) ld r1, PACAR1(r13) - /* Store the PPR in r11 and reset to decent value */ std r11, GPR11(r1) /* Temporary stash */ + /* + * Move the saved user r1 to the kernel stack in case PACATMSCRATCH is + * clobbered by an exception once we turn on MSR_RI below. + */ + ld r11, PACATMSCRATCH(r13) + std r11, GPR1(r1) + + /* + * Store r13 away so we can free up the scratch SPR for the SLB fault + * handler (needed once we start accessing the thread_struct). + */ + GET_SCRATCH0(r11) + std r11, GPR13(r1) + /* Reset MSR RI so we can take SLB faults again */ li r11, MSR_RI mtmsrd r11, 1 + /* Store the PPR in r11 and reset to decent value */ mfspr r11, SPRN_PPR HMT_MEDIUM @@ -207,11 +221,11 @@ _GLOBAL(tm_reclaim) SAVE_GPR(8, r7) /* user r8 */ SAVE_GPR(9, r7) /* user r9 */ SAVE_GPR(10, r7) /* user r10 */ - ld r3, PACATMSCRATCH(r13) /* user r1 */ + ld r3, GPR1(r1) /* user r1 */ ld r4, GPR7(r1) /* user r7 */ ld r5, GPR11(r1) /* user r11 */ ld r6, GPR12(r1) /* user r12 */ - GET_SCRATCH0(8) /* user r13 */ + ld r8, GPR13(r1) /* user r13 */ std r3, GPR1(r7) std r4, GPR7(r7) std r5, GPR11(r7) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index fd6e8c13685f..998f8d089ac7 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; - unsigned long mmu_seq, pte_size; - unsigned long gpa, gfn, hva, pfn; + unsigned long mmu_seq; + unsigned long gpa, gfn, hva; struct kvm_memory_slot *memslot; struct page *page = NULL; long ret; @@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ hva = gfn_to_hva_memslot(memslot, gfn); if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) { - pfn = page_to_pfn(page); upgrade_write = true; } else { + unsigned long pfn; + /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, writing, upgrade_p); @@ -639,63 +640,55 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } } - /* See if we can insert a 1GB or 2MB large PTE here */ - level = 0; - if (page && PageCompound(page)) { - pte_size = PAGE_SIZE << compound_order(compound_head(page)); - if (pte_size >= PUD_SIZE && - (gpa & (PUD_SIZE - PAGE_SIZE)) == - (hva & (PUD_SIZE - PAGE_SIZE))) { - level = 2; - pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1); - } else if (pte_size >= PMD_SIZE && - (gpa & (PMD_SIZE - PAGE_SIZE)) == - (hva & (PMD_SIZE - PAGE_SIZE))) { - level = 1; - pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); - } - } - /* - * Compute the PTE value that we need to insert. + * Read the PTE from the process' radix tree and use that + * so we get the shift and attribute bits. */ - if (page) { - pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE | - _PAGE_ACCESSED; - if (writing || upgrade_write) - pgflags |= _PAGE_WRITE | _PAGE_DIRTY; - pte = pfn_pte(pfn, __pgprot(pgflags)); - } else { - /* - * Read the PTE from the process' radix tree and use that - * so we get the attribute bits. - */ - local_irq_disable(); - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); - pte = *ptep; + local_irq_disable(); + ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + /* + * If the PTE disappeared temporarily due to a THP + * collapse, just return and let the guest try again. + */ + if (!ptep) { local_irq_enable(); - if (shift == PUD_SHIFT && - (gpa & (PUD_SIZE - PAGE_SIZE)) == - (hva & (PUD_SIZE - PAGE_SIZE))) { - level = 2; - } else if (shift == PMD_SHIFT && - (gpa & (PMD_SIZE - PAGE_SIZE)) == - (hva & (PMD_SIZE - PAGE_SIZE))) { - level = 1; - } else if (shift && shift != PAGE_SHIFT) { - /* Adjust PFN */ - unsigned long mask = (1ul << shift) - PAGE_SIZE; - pte = __pte(pte_val(pte) | (hva & mask)); - } - pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED); - if (writing || upgrade_write) { - if (pte_val(pte) & _PAGE_WRITE) - pte = __pte(pte_val(pte) | _PAGE_DIRTY); - } else { - pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY)); + if (page) + put_page(page); + return RESUME_GUEST; + } + pte = *ptep; + local_irq_enable(); + + /* Get pte level from shift/size */ + if (shift == PUD_SHIFT && + (gpa & (PUD_SIZE - PAGE_SIZE)) == + (hva & (PUD_SIZE - PAGE_SIZE))) { + level = 2; + } else if (shift == PMD_SHIFT && + (gpa & (PMD_SIZE - PAGE_SIZE)) == + (hva & (PMD_SIZE - PAGE_SIZE))) { + level = 1; + } else { + level = 0; + if (shift > PAGE_SHIFT) { + /* + * If the pte maps more than one page, bring over + * bits from the virtual address to get the real + * address of the specific single page we want. + */ + unsigned long rpnmask = (1ul << shift) - PAGE_SIZE; + pte = __pte(pte_val(pte) | (hva & rpnmask)); } } + pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED); + if (writing || upgrade_write) { + if (pte_val(pte) & _PAGE_WRITE) + pte = __pte(pte_val(pte) | _PAGE_DIRTY); + } else { + pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY)); + } + /* Allocate space in the tree and write the PTE */ ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 506a4d400458..6821ead4b4eb 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -187,12 +187,35 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry) +static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, + unsigned long entry, unsigned long *hpa, + enum dma_data_direction *direction) +{ + long ret; + + ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + + if (!ret && ((*direction == DMA_FROM_DEVICE) || + (*direction == DMA_BIDIRECTIONAL))) { + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry); + /* + * kvmppc_rm_tce_iommu_do_map() updates the UA cache after + * calling this so we still get here a valid UA. + */ + if (pua && *pua) + mm_iommu_ua_mark_dirty_rm(mm, be64_to_cpu(*pua)); + } + + return ret; +} + +static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl, + unsigned long entry) { unsigned long hpa = 0; enum dma_data_direction dir = DMA_NONE; - iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); + iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); } static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, @@ -224,7 +247,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, unsigned long hpa = 0; long ret; - if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir)) + if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir)) /* * real mode xchg can fail if struct page crosses * a page boundary @@ -236,7 +259,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); if (ret) - iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); + iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); return ret; } @@ -282,7 +305,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) return H_CLOSED; - ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); + ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); if (ret) { mm_iommu_mapped_dec(mem); /* @@ -371,7 +394,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return ret; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); } kvmppc_tce_put(stt, entry, tce); @@ -520,7 +543,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, goto unlock_exit; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); } kvmppc_tce_put(stt, entry + i, tce); @@ -571,7 +594,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, return ret; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); } } diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 886ed94b9c13..d05c8af4ac51 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -443,6 +443,9 @@ _GLOBAL(csum_ipv6_magic) addc r0, r8, r9 ld r10, 0(r4) ld r11, 8(r4) +#ifdef CONFIG_CPU_LITTLE_ENDIAN + rotldi r5, r5, 8 +#endif adde r0, r0, r10 add r5, r5, r7 adde r0, r0, r11 diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 850f3b8f4da5..5ffee298745f 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -142,7 +142,7 @@ static inline int unmap_patch_area(unsigned long addr) return 0; } -int patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(unsigned int *addr, unsigned int instr) { int err; unsigned int *patch_addr = NULL; @@ -182,12 +182,22 @@ out: } #else /* !CONFIG_STRICT_KERNEL_RWX */ -int patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(unsigned int *addr, unsigned int instr) { return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ + +int patch_instruction(unsigned int *addr, unsigned int instr) +{ + /* Make sure we aren't patching a freed init section */ + if (init_mem_is_free && init_section_contains(addr, 4)) { + pr_debug("Skipping init section patching addr: 0x%px\n", addr); + return 0; + } + return do_patch_instruction(addr, instr); +} NOKPROBE_SYMBOL(patch_instruction); int patch_branch(unsigned int *addr, unsigned long target, int flags) diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 51ce091914f9..7a9886f98b0c 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -308,55 +308,6 @@ void register_page_bootmem_memmap(unsigned long section_nr, { } -/* - * We do not have access to the sparsemem vmemmap, so we fallback to - * walking the list of sparsemem blocks which we already maintain for - * the sake of crashdump. In the long run, we might want to maintain - * a tree if performance of that linear walk becomes a problem. - * - * realmode_pfn_to_page functions can fail due to: - * 1) As real sparsemem blocks do not lay in RAM continously (they - * are in virtual address space which is not available in the real mode), - * the requested page struct can be split between blocks so get_page/put_page - * may fail. - * 2) When huge pages are used, the get_page/put_page API will fail - * in real mode as the linked addresses in the page struct are virtual - * too. - */ -struct page *realmode_pfn_to_page(unsigned long pfn) -{ - struct vmemmap_backing *vmem_back; - struct page *page; - unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; - unsigned long pg_va = (unsigned long) pfn_to_page(pfn); - - for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) { - if (pg_va < vmem_back->virt_addr) - continue; - - /* After vmemmap_list entry free is possible, need check all */ - if ((pg_va + sizeof(struct page)) <= - (vmem_back->virt_addr + page_size)) { - page = (struct page *) (vmem_back->phys + pg_va - - vmem_back->virt_addr); - return page; - } - } - - /* Probably that page struct is split between real pages */ - return NULL; -} -EXPORT_SYMBOL_GPL(realmode_pfn_to_page); - -#else - -struct page *realmode_pfn_to_page(unsigned long pfn) -{ - struct page *page = pfn_to_page(pfn); - return page; -} -EXPORT_SYMBOL_GPL(realmode_pfn_to_page); - #endif /* CONFIG_SPARSEMEM_VMEMMAP */ #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 5c8530d0c611..04ccb274a620 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -63,6 +63,7 @@ #endif unsigned long long memory_limit; +bool init_mem_is_free; #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; @@ -396,6 +397,7 @@ void free_initmem(void) { ppc_md.progress = ppc_printk_progress; mark_initmem_nx(); + init_mem_is_free = true; free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index c9ee9e23845f..56c2234cc6ae 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -18,11 +18,15 @@ #include <linux/migrate.h> #include <linux/hugetlb.h> #include <linux/swap.h> +#include <linux/sizes.h> #include <asm/mmu_context.h> #include <asm/pte-walk.h> static DEFINE_MUTEX(mem_list_mutex); +#define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1 +#define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1) + struct mm_iommu_table_group_mem_t { struct list_head next; struct rcu_head rcu; @@ -263,6 +267,9 @@ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) if (!page) continue; + if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY) + SetPageDirty(page); + put_page(page); mem->hpas[i] = 0; } @@ -360,7 +367,6 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm, return ret; } -EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm); struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, unsigned long ua, unsigned long entries) @@ -390,7 +396,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, if (pageshift > mem->pageshift) return -EFAULT; - *hpa = *va | (ua & ~PAGE_MASK); + *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); return 0; } @@ -413,11 +419,31 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, if (!pa) return -EFAULT; - *hpa = *pa | (ua & ~PAGE_MASK); + *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); return 0; } -EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm); + +extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua) +{ + struct mm_iommu_table_group_mem_t *mem; + long entry; + void *va; + unsigned long *pa; + + mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE); + if (!mem) + return; + + entry = (ua - mem->ua) >> PAGE_SHIFT; + va = &mem->hpas[entry]; + + pa = (void *) vmalloc_to_phys(va); + if (!pa) + return; + + *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY; +} long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) { diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 35ac5422903a..055b211b7126 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1204,7 +1204,9 @@ int find_and_online_cpu_nid(int cpu) int new_nid; /* Use associativity from first thread for all siblings */ - vphn_get_associativity(cpu, associativity); + if (vphn_get_associativity(cpu, associativity)) + return cpu_to_node(cpu); + new_nid = associativity_to_nid(associativity); if (new_nid < 0 || !node_possible(new_nid)) new_nid = first_online_node; @@ -1215,9 +1217,10 @@ int find_and_online_cpu_nid(int cpu) * Need to ensure that NODE_DATA is initialized for a node from * available memory (see memblock_alloc_try_nid). If unable to * init the node, then default to nearest node that has memory - * installed. + * installed. Skip onlining a node if the subsystems are not + * yet initialized. */ - if (try_online_node(new_nid)) + if (!topology_inited || try_online_node(new_nid)) new_nid = first_online_node; #else /* @@ -1452,7 +1455,8 @@ static struct timer_list topology_timer; static void reset_topology_timer(void) { - mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ); + if (vphn_enabled) + mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ); } #ifdef CONFIG_SMP diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 333b1f80c435..b271b283c785 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -45,7 +45,7 @@ static void scan_pkey_feature(void) * Since any pkey can be used for data or execute, we will just treat * all keys as equal and track them as one entity. */ - pkeys_total = be32_to_cpu(vals[0]); + pkeys_total = vals[0]; pkeys_devtree_defined = true; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index 6c5db1acbe8d..fe9691040f54 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -276,7 +276,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, level_shift = entries_shift + 3; level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT); - if ((level_shift - 3) * levels + page_shift >= 60) + if ((level_shift - 3) * levels + page_shift >= 55) return -EINVAL; /* Allocate TCE table */ diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..c9fecd120d18 --- /dev/null +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_PROTOTYPES_H + +#include <linux/ftrace.h> +#include <asm-generic/asm-prototypes.h> + +#endif /* _ASM_RISCV_PROTOTYPES_H */ diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index aee603123030..b2d26d9d8489 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -186,7 +186,7 @@ static void __init setup_bootmem(void) BUG_ON(mem_size == 0); set_max_mapnr(PFN_DOWN(mem_size)); - max_low_pfn = pfn_base + PFN_DOWN(mem_size); + max_low_pfn = memblock_end_of_DRAM(); #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 3cae9168f63c..e44a8d7959f5 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -108,7 +108,8 @@ int sclp_early_get_core_info(struct sclp_core_info *info); void sclp_early_get_ipl_info(struct sclp_ipl_info *info); void sclp_early_detect(void); void sclp_early_printk(const char *s); -void __sclp_early_printk(const char *s, unsigned int len); +void sclp_early_printk_force(const char *s); +void __sclp_early_printk(const char *s, unsigned int len, unsigned int force); int _sclp_get_core_info(struct sclp_core_info *info); int sclp_core_configure(u8 core); diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c index 9431784d7796..40c1dfec944e 100644 --- a/arch/s390/kernel/early_printk.c +++ b/arch/s390/kernel/early_printk.c @@ -10,7 +10,7 @@ static void sclp_early_write(struct console *con, const char *s, unsigned int len) { - __sclp_early_printk(s, len); + __sclp_early_printk(s, len, 0); } static struct console sclp_early_console = { diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index a049a7b9d6e8..c1a080b11ae9 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -198,12 +198,10 @@ pgm_check_entry: /* Suspend CPU not available -> panic */ larl %r15,init_thread_union - ahi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) + aghi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) + aghi %r15,-STACK_FRAME_OVERHEAD larl %r2,.Lpanic_string - lghi %r1,0 - sam31 - sigp %r1,%r0,SIGP_SET_ARCHITECTURE - brasl %r14,sclp_early_printk + brasl %r14,sclp_early_printk_force larl %r3,.Ldisabled_wait_31 lpsw 0(%r3) 4: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f69333fd2fa3..ac5da6b0b862 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -481,7 +481,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_S390_HPAGE_1M: r = 0; - if (hpage) + if (hpage && !kvm_is_ucontrol(kvm)) r = 1; break; case KVM_CAP_S390_MEM_OP: @@ -691,7 +691,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) mutex_lock(&kvm->lock); if (kvm->created_vcpus) r = -EBUSY; - else if (!hpage || kvm->arch.use_cmma) + else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) r = -EINVAL; else { r = 0; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index bb44990c8212..911c7ded35f1 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -708,11 +708,13 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) vmaddr |= gaddr & ~PMD_MASK; /* Find vma in the parent mm */ vma = find_vma(gmap->mm, vmaddr); + if (!vma) + continue; /* * We do not discard pages that are backed by * hugetlbfs, so we don't have to refault them. */ - if (vma && is_vm_hugetlb_page(vma)) + if (is_vm_hugetlb_page(vma)) continue; size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); zap_page_range(vma, vmaddr, size); diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 1fb7b6d72baf..475d786a65b0 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -7,6 +7,7 @@ config SUPERH select ARCH_NO_COHERENT_DMA_MMAP if !MMU select HAVE_PATA_PLATFORM select CLKDEV_LOOKUP + select DMA_DIRECT_OPS select HAVE_IDE if HAS_IOPORT_MAP select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP @@ -158,13 +159,11 @@ config SWAP_IO_SPACE bool config DMA_COHERENT - select DMA_DIRECT_OPS bool config DMA_NONCOHERENT def_bool !DMA_COHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select DMA_NONCOHERENT_OPS config PGTABLE_LEVELS default 3 if X2TLB diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index adc61d14172c..06a894526a0b 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -633,7 +633,6 @@ static struct regulator_init_data cn12_power_init_data = { static struct fixed_voltage_config cn12_power_info = { .supply_name = "CN12 SD/MMC Vdd", .microvolts = 3300000, - .gpio = GPIO_PTB7, .enable_high = 1, .init_data = &cn12_power_init_data, }; @@ -646,6 +645,16 @@ static struct platform_device cn12_power = { }, }; +static struct gpiod_lookup_table cn12_power_gpiod_table = { + .dev_id = "reg-fixed-voltage.0", + .table = { + /* Offset 7 on port B */ + GPIO_LOOKUP("sh7724_pfc", GPIO_PTB7, + NULL, GPIO_ACTIVE_HIGH), + { }, + }, +}; + #if defined(CONFIG_MMC_SDHI) || defined(CONFIG_MMC_SDHI_MODULE) /* SDHI0 */ static struct regulator_consumer_supply sdhi0_power_consumers[] = @@ -665,7 +674,6 @@ static struct regulator_init_data sdhi0_power_init_data = { static struct fixed_voltage_config sdhi0_power_info = { .supply_name = "CN11 SD/MMC Vdd", .microvolts = 3300000, - .gpio = GPIO_PTB6, .enable_high = 1, .init_data = &sdhi0_power_init_data, }; @@ -678,6 +686,16 @@ static struct platform_device sdhi0_power = { }, }; +static struct gpiod_lookup_table sdhi0_power_gpiod_table = { + .dev_id = "reg-fixed-voltage.1", + .table = { + /* Offset 6 on port B */ + GPIO_LOOKUP("sh7724_pfc", GPIO_PTB6, + NULL, GPIO_ACTIVE_HIGH), + { }, + }, +}; + static struct tmio_mmc_data sdhi0_info = { .chan_priv_tx = (void *)SHDMA_SLAVE_SDHI0_TX, .chan_priv_rx = (void *)SHDMA_SLAVE_SDHI0_RX, @@ -1413,6 +1431,11 @@ static int __init arch_setup(void) DMA_MEMORY_EXCLUSIVE); platform_device_add(ecovec_ceu_devices[1]); + gpiod_add_lookup_table(&cn12_power_gpiod_table); +#if defined(CONFIG_MMC_SDHI) || defined(CONFIG_MMC_SDHI_MODULE) + gpiod_add_lookup_table(&sdhi0_power_gpiod_table); +#endif + return platform_add_devices(ecovec_devices, ARRAY_SIZE(ecovec_devices)); } diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c index 254f2c662703..f4ad33c6d2aa 100644 --- a/arch/sh/boards/mach-migor/setup.c +++ b/arch/sh/boards/mach-migor/setup.c @@ -14,7 +14,7 @@ #include <linux/mmc/host.h> #include <linux/mtd/physmap.h> #include <linux/mfd/tmio.h> -#include <linux/mtd/rawnand.h> +#include <linux/mtd/platnand.h> #include <linux/i2c.h> #include <linux/regulator/fixed.h> #include <linux/regulator/machine.h> @@ -165,23 +165,21 @@ static struct mtd_partition migor_nand_flash_partitions[] = { }, }; -static void migor_nand_flash_cmd_ctl(struct mtd_info *mtd, int cmd, +static void migor_nand_flash_cmd_ctl(struct nand_chip *chip, int cmd, unsigned int ctrl) { - struct nand_chip *chip = mtd_to_nand(mtd); - if (cmd == NAND_CMD_NONE) return; if (ctrl & NAND_CLE) - writeb(cmd, chip->IO_ADDR_W + 0x00400000); + writeb(cmd, chip->legacy.IO_ADDR_W + 0x00400000); else if (ctrl & NAND_ALE) - writeb(cmd, chip->IO_ADDR_W + 0x00800000); + writeb(cmd, chip->legacy.IO_ADDR_W + 0x00800000); else - writeb(cmd, chip->IO_ADDR_W); + writeb(cmd, chip->legacy.IO_ADDR_W); } -static int migor_nand_flash_ready(struct mtd_info *mtd) +static int migor_nand_flash_ready(struct nand_chip *chip) { return gpio_get_value(GPIO_PTA1); /* NAND_RBn */ } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index e6f2a38d2e61..7e2aa59fcc29 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -51,7 +51,7 @@ config SPARC config SPARC32 def_bool !64BIT select ARCH_HAS_SYNC_DMA_FOR_CPU - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select CLZ_TAB select HAVE_UID16 diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 666d6b5c0440..9c3fc03abe9a 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -28,7 +28,7 @@ typedef struct { unsigned short sock_id; /* physical package */ unsigned short core_id; unsigned short max_cache_id; /* groupings of highest shared cache */ - unsigned short proc_id; /* strand (aka HW thread) id */ + signed short proc_id; /* strand (aka HW thread) id */ } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index e17566376934..b0bb2fcaf1c9 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -14,11 +14,11 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { #ifdef CONFIG_SPARC_LEON if (sparc_cpu_model == sparc_leon) - return &dma_noncoherent_ops; + return &dma_direct_ops; #endif #if defined(CONFIG_SPARC32) && defined(CONFIG_PCI) if (bus == &pci_bus_type) - return &dma_noncoherent_ops; + return &dma_direct_ops; #endif return dma_ops; } diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index 09acf0ddec10..45b4bf1875e6 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -427,8 +427,9 @@ #define __NR_preadv2 358 #define __NR_pwritev2 359 #define __NR_statx 360 +#define __NR_io_pgetevents 361 -#define NR_syscalls 361 +#define NR_syscalls 362 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/kgdb_32.c b/arch/sparc/kernel/kgdb_32.c index 5868fc333ea8..639c8e54530a 100644 --- a/arch/sparc/kernel/kgdb_32.c +++ b/arch/sparc/kernel/kgdb_32.c @@ -122,7 +122,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, linux_regs->pc = addr; linux_regs->npc = addr + 4; } - /* fallthru */ + /* fall through */ case 'D': case 'k': diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c index d5f7dc6323d5..a68bbddbdba4 100644 --- a/arch/sparc/kernel/kgdb_64.c +++ b/arch/sparc/kernel/kgdb_64.c @@ -148,7 +148,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, linux_regs->tpc = addr; linux_regs->tnpc = addr + 4; } - /* fallthru */ + /* fall through */ case 'D': case 'k': diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index d3149baaa33c..67b3e6b3ce5d 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -24,6 +24,7 @@ #include <asm/cpudata.h> #include <linux/uaccess.h> #include <linux/atomic.h> +#include <linux/sched/clock.h> #include <asm/nmi.h> #include <asm/pcr.h> #include <asm/cacheflush.h> @@ -927,6 +928,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc) sparc_perf_event_update(cp, &cp->hw, cpuc->current_idx[i]); cpuc->current_idx[i] = PIC_NO_INDEX; + if (cp->hw.state & PERF_HES_STOPPED) + cp->hw.state |= PERF_HES_ARCH; } } } @@ -959,10 +962,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc) enc = perf_event_get_enc(cpuc->events[i]); cpuc->pcr[0] &= ~mask_for_index(idx); - if (hwc->state & PERF_HES_STOPPED) + if (hwc->state & PERF_HES_ARCH) { cpuc->pcr[0] |= nop_for_index(idx); - else + } else { cpuc->pcr[0] |= event_encoding(enc, idx); + hwc->state = 0; + } } out: cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; @@ -988,6 +993,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) cpuc->current_idx[i] = idx; + if (cp->hw.state & PERF_HES_ARCH) + continue; + sparc_pmu_start(cp, PERF_EF_RELOAD); } out: @@ -1079,6 +1087,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags) event->hw.state = 0; sparc_pmu_enable_event(cpuc, &event->hw, idx); + + perf_event_update_userpage(event); } static void sparc_pmu_stop(struct perf_event *event, int flags) @@ -1371,9 +1381,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; - event->hw.state = PERF_HES_UPTODATE; + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (!(ef_flags & PERF_EF_START)) - event->hw.state |= PERF_HES_STOPPED; + event->hw.state |= PERF_HES_ARCH; /* * If group events scheduling transaction was started, @@ -1603,6 +1613,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, struct perf_sample_data data; struct cpu_hw_events *cpuc; struct pt_regs *regs; + u64 finish_clock; + u64 start_clock; int i; if (!atomic_read(&active_events)) @@ -1616,6 +1628,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_DONE; } + start_clock = sched_clock(); + regs = args->regs; cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1654,6 +1668,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, sparc_pmu_stop(event, 0); } + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return NOTIFY_STOP; } diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index f6528884a2c8..4073e2b87dd0 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -84,8 +84,9 @@ __handle_signal: ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 sethi %hi(0xf << 20), %l4 and %l1, %l4, %l4 + andn %l1, %l4, %l1 ba,pt %xcc, __handle_preemption_continue - andn %l1, %l4, %l1 + srl %l4, 20, %l4 /* When returning from a NMI (%pil==15) interrupt we want to * avoid running softirqs, doing IRQ tracing, preempting, etc. diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 12bee14b552c..621a363098ec 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -90,4 +90,4 @@ sys_call_table: /*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen /*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2 -/*360*/ .long sys_statx +/*360*/ .long sys_statx, sys_io_pgetevents diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 387ef993880a..bb68c805b891 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -91,7 +91,7 @@ sys_call_table32: .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2 -/*360*/ .word sys_statx +/*360*/ .word sys_statx, compat_sys_io_pgetevents #endif /* CONFIG_COMPAT */ @@ -173,4 +173,4 @@ sys_call_table: .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen .word sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2 -/*360*/ .word sys_statx +/*360*/ .word sys_statx, sys_io_pgetevents diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index 635d67ffc9a3..7db5aabe9708 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -180,11 +180,17 @@ static int send_dreg(struct vio_driver_state *vio) struct vio_dring_register pkt; char all[sizeof(struct vio_dring_register) + (sizeof(struct ldc_trans_cookie) * - dr->ncookies)]; + VIO_MAX_RING_COOKIES)]; } u; + size_t bytes = sizeof(struct vio_dring_register) + + (sizeof(struct ldc_trans_cookie) * + dr->ncookies); int i; - memset(&u, 0, sizeof(u)); + if (WARN_ON(bytes > sizeof(u))) + return -EINVAL; + + memset(&u, 0, bytes); init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG); u.pkt.dring_ident = 0; u.pkt.num_descr = dr->num_entries; @@ -206,7 +212,7 @@ static int send_dreg(struct vio_driver_state *vio) (unsigned long long) u.pkt.cookies[i].cookie_size); } - return send_ctrl(vio, &u.pkt.tag, sizeof(u)); + return send_ctrl(vio, &u.pkt.tag, bytes); } static int send_rdx(struct vio_driver_state *vio) diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index dd0b5a92ffd0..dc85570d8839 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -31,23 +31,21 @@ obj-y += $(vdso_img_objs) targets += $(vdso_img_cfiles) targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so) -export CPPFLAGS_vdso.lds += -P -C +CPPFLAGS_vdso.lds += -P -C VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ -Wl,--no-undefined \ -Wl,-z,max-page-size=8192 -Wl,-z,common-page-size=8192 \ $(DISABLE_LTO) -$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE +$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) HOST_EXTRACFLAGS += -I$(srctree)/tools/include hostprogs-y += vdso2c quiet_cmd_vdso2c = VDSO2C $@ -define cmd_vdso2c - $(obj)/vdso2c $< $(<:%.dbg=%) $@ -endef + cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE $(call if_changed,vdso2c) diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c index 3feb3d960ca5..75dca9aab737 100644 --- a/arch/sparc/vdso/vclock_gettime.c +++ b/arch/sparc/vdso/vclock_gettime.c @@ -33,9 +33,19 @@ #define TICK_PRIV_BIT (1ULL << 63) #endif +#ifdef CONFIG_SPARC64 #define SYSCALL_STRING \ "ta 0x6d;" \ - "sub %%g0, %%o0, %%o0;" \ + "bcs,a 1f;" \ + " sub %%g0, %%o0, %%o0;" \ + "1:" +#else +#define SYSCALL_STRING \ + "ta 0x10;" \ + "bcs,a 1f;" \ + " sub %%g0, %%o0, %%o0;" \ + "1:" +#endif #define SYSCALL_CLOBBERS \ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c index f51595f861b8..5eaff3c1aa0c 100644 --- a/arch/sparc/vdso/vma.c +++ b/arch/sparc/vdso/vma.c @@ -262,7 +262,9 @@ static __init int vdso_setup(char *s) unsigned long val; err = kstrtoul(s, 10, &val); + if (err) + return err; vdso_enabled = val; - return err; + return 0; } __setup("vdso=", vdso_setup); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 83c470364dfb..74c002ddc0ce 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -23,6 +23,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/ata.h> #include <linux/hdreg.h> #include <linux/cdrom.h> @@ -142,7 +143,6 @@ struct cow { #define MAX_SG 64 struct ubd { - struct list_head restart; /* name (and fd, below) of the file opened for writing, either the * backing or the cow file. */ char *file; @@ -156,11 +156,8 @@ struct ubd { struct cow cow; struct platform_device pdev; struct request_queue *queue; + struct blk_mq_tag_set tag_set; spinlock_t lock; - struct scatterlist sg[MAX_SG]; - struct request *request; - int start_sg, end_sg; - sector_t rq_pos; }; #define DEFAULT_COW { \ @@ -182,10 +179,6 @@ struct ubd { .shared = 0, \ .cow = DEFAULT_COW, \ .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ - .request = NULL, \ - .start_sg = 0, \ - .end_sg = 0, \ - .rq_pos = 0, \ } /* Protected by ubd_lock */ @@ -196,6 +189,9 @@ static int fake_ide = 0; static struct proc_dir_entry *proc_ide_root = NULL; static struct proc_dir_entry *proc_ide = NULL; +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd); + static void make_proc_ide(void) { proc_ide_root = proc_mkdir("ide", NULL); @@ -436,11 +432,8 @@ __uml_help(udb_setup, " in the boot output.\n\n" ); -static void do_ubd_request(struct request_queue * q); - /* Only changed by ubd_init, which is an initcall. */ static int thread_fd = -1; -static LIST_HEAD(restart); /* Function to read several request pointers at a time * handling fractional reads if (and as) needed @@ -498,9 +491,6 @@ static int bulk_req_safe_read( /* Called without dev->lock held, and only in interrupt context. */ static void ubd_handler(void) { - struct ubd *ubd; - struct list_head *list, *next_ele; - unsigned long flags; int n; int count; @@ -520,23 +510,17 @@ static void ubd_handler(void) return; } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { - blk_end_request( - (*irq_req_buffer)[count]->req, - BLK_STS_OK, - (*irq_req_buffer)[count]->length - ); - kfree((*irq_req_buffer)[count]); + struct io_thread_req *io_req = (*irq_req_buffer)[count]; + int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK; + + if (!blk_update_request(io_req->req, err, io_req->length)) + __blk_mq_end_request(io_req->req, err); + + kfree(io_req); } } - reactivate_fd(thread_fd, UBD_IRQ); - list_for_each_safe(list, next_ele, &restart){ - ubd = container_of(list, struct ubd, restart); - list_del_init(&ubd->restart); - spin_lock_irqsave(&ubd->lock, flags); - do_ubd_request(ubd->queue); - spin_unlock_irqrestore(&ubd->lock, flags); - } + reactivate_fd(thread_fd, UBD_IRQ); } static irqreturn_t ubd_intr(int irq, void *dev) @@ -857,6 +841,7 @@ static void ubd_device_release(struct device *dev) struct ubd *ubd_dev = dev_get_drvdata(dev); blk_cleanup_queue(ubd_dev->queue); + blk_mq_free_tag_set(&ubd_dev->tag_set); *ubd_dev = ((struct ubd) DEFAULT_UBD); } @@ -891,7 +876,7 @@ static int ubd_disk_register(int major, u64 size, int unit, disk->private_data = &ubd_devs[unit]; disk->queue = ubd_devs[unit].queue; - device_add_disk(parent, disk); + device_add_disk(parent, disk, NULL); *disk_out = disk; return 0; @@ -899,6 +884,10 @@ static int ubd_disk_register(int major, u64 size, int unit, #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) +static const struct blk_mq_ops ubd_mq_ops = { + .queue_rq = ubd_queue_rq, +}; + static int ubd_add(int n, char **error_out) { struct ubd *ubd_dev = &ubd_devs[n]; @@ -915,15 +904,23 @@ static int ubd_add(int n, char **error_out) ubd_dev->size = ROUND_BLOCK(ubd_dev->size); - INIT_LIST_HEAD(&ubd_dev->restart); - sg_init_table(ubd_dev->sg, MAX_SG); + ubd_dev->tag_set.ops = &ubd_mq_ops; + ubd_dev->tag_set.queue_depth = 64; + ubd_dev->tag_set.numa_node = NUMA_NO_NODE; + ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ubd_dev->tag_set.driver_data = ubd_dev; + ubd_dev->tag_set.nr_hw_queues = 1; - err = -ENOMEM; - ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); - if (ubd_dev->queue == NULL) { - *error_out = "Failed to initialize device queue"; + err = blk_mq_alloc_tag_set(&ubd_dev->tag_set); + if (err) goto out; + + ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set); + if (IS_ERR(ubd_dev->queue)) { + err = PTR_ERR(ubd_dev->queue); + goto out_cleanup; } + ubd_dev->queue->queuedata = ubd_dev; blk_queue_write_cache(ubd_dev->queue, true, false); @@ -931,7 +928,7 @@ static int ubd_add(int n, char **error_out) err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); if(err){ *error_out = "Failed to register device"; - goto out_cleanup; + goto out_cleanup_tags; } if (fake_major != UBD_MAJOR) @@ -949,6 +946,8 @@ static int ubd_add(int n, char **error_out) out: return err; +out_cleanup_tags: + blk_mq_free_tag_set(&ubd_dev->tag_set); out_cleanup: blk_cleanup_queue(ubd_dev->queue); goto out; @@ -1290,123 +1289,82 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, req->bitmap_words, bitmap_len); } -/* Called with dev->lock held */ -static void prepare_request(struct request *req, struct io_thread_req *io_req, - unsigned long long offset, int page_offset, - int len, struct page *page) +static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, + u64 off, struct bio_vec *bvec) { - struct gendisk *disk = req->rq_disk; - struct ubd *ubd_dev = disk->private_data; - - io_req->req = req; - io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : - ubd_dev->fd; - io_req->fds[1] = ubd_dev->fd; - io_req->cow_offset = -1; - io_req->offset = offset; - io_req->length = len; - io_req->error = 0; - io_req->sector_mask = 0; - - io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; - io_req->offsets[0] = 0; - io_req->offsets[1] = ubd_dev->cow.data_offset; - io_req->buffer = page_address(page) + page_offset; - io_req->sectorsize = 1 << 9; - - if(ubd_dev->cow.file != NULL) - cowify_req(io_req, ubd_dev->cow.bitmap, - ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len); - -} + struct ubd *dev = hctx->queue->queuedata; + struct io_thread_req *io_req; + int ret; -/* Called with dev->lock held */ -static void prepare_flush_request(struct request *req, - struct io_thread_req *io_req) -{ - struct gendisk *disk = req->rq_disk; - struct ubd *ubd_dev = disk->private_data; + io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); + if (!io_req) + return -ENOMEM; io_req->req = req; - io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : - ubd_dev->fd; - io_req->op = UBD_FLUSH; -} + if (dev->cow.file) + io_req->fds[0] = dev->cow.fd; + else + io_req->fds[0] = dev->fd; -static bool submit_request(struct io_thread_req *io_req, struct ubd *dev) -{ - int n = os_write_file(thread_fd, &io_req, - sizeof(io_req)); - if (n != sizeof(io_req)) { - if (n != -EAGAIN) - printk("write to io thread failed, " - "errno = %d\n", -n); - else if (list_empty(&dev->restart)) - list_add(&dev->restart, &restart); + if (req_op(req) == REQ_OP_FLUSH) { + io_req->op = UBD_FLUSH; + } else { + io_req->fds[1] = dev->fd; + io_req->cow_offset = -1; + io_req->offset = off; + io_req->length = bvec->bv_len; + io_req->error = 0; + io_req->sector_mask = 0; + + io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE; + io_req->offsets[0] = 0; + io_req->offsets[1] = dev->cow.data_offset; + io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset; + io_req->sectorsize = 1 << 9; + + if (dev->cow.file) { + cowify_req(io_req, dev->cow.bitmap, + dev->cow.bitmap_offset, dev->cow.bitmap_len); + } + } + ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); + if (ret != sizeof(io_req)) { + if (ret != -EAGAIN) + pr_err("write to io thread failed: %d\n", -ret); kfree(io_req); - return false; } - return true; + + return ret; } -/* Called with dev->lock held */ -static void do_ubd_request(struct request_queue *q) +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct io_thread_req *io_req; - struct request *req; - - while(1){ - struct ubd *dev = q->queuedata; - if(dev->request == NULL){ - struct request *req = blk_fetch_request(q); - if(req == NULL) - return; - - dev->request = req; - dev->rq_pos = blk_rq_pos(req); - dev->start_sg = 0; - dev->end_sg = blk_rq_map_sg(q, req, dev->sg); - } - - req = dev->request; + struct request *req = bd->rq; + int ret = 0; - if (req_op(req) == REQ_OP_FLUSH) { - io_req = kmalloc(sizeof(struct io_thread_req), - GFP_ATOMIC); - if (io_req == NULL) { - if (list_empty(&dev->restart)) - list_add(&dev->restart, &restart); - return; - } - prepare_flush_request(req, io_req); - if (submit_request(io_req, dev) == false) - return; - } + blk_mq_start_request(req); - while(dev->start_sg < dev->end_sg){ - struct scatterlist *sg = &dev->sg[dev->start_sg]; - - io_req = kmalloc(sizeof(struct io_thread_req), - GFP_ATOMIC); - if(io_req == NULL){ - if(list_empty(&dev->restart)) - list_add(&dev->restart, &restart); - return; - } - prepare_request(req, io_req, - (unsigned long long)dev->rq_pos << 9, - sg->offset, sg->length, sg_page(sg)); - - if (submit_request(io_req, dev) == false) - return; - - dev->rq_pos += sg->length >> 9; - dev->start_sg++; + if (req_op(req) == REQ_OP_FLUSH) { + ret = ubd_queue_one_vec(hctx, req, 0, NULL); + } else { + struct req_iterator iter; + struct bio_vec bvec; + u64 off = (u64)blk_rq_pos(req) << 9; + + rq_for_each_segment(bvec, req, iter) { + ret = ubd_queue_one_vec(hctx, req, off, &bvec); + if (ret < 0) + goto out; + off += bvec.bv_len; } - dev->end_sg = 0; - dev->request = NULL; } +out: + if (ret < 0) { + blk_mq_requeue_request(req, true); + } + return BLK_STS_OK; } static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 60eae744d8fd..3a3b40f79558 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -4,6 +4,7 @@ config UNICORE32 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select DMA_DIRECT_OPS select HAVE_MEMBLOCK select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP @@ -20,7 +21,6 @@ config UNICORE32 select GENERIC_IOMAP select MODULES_USE_ELF_REL select NEED_DMA_MAP_STATE - select SWIOTLB help UniCore-32 is 32-bit Instruction Set Architecture, including a series of low-power-consumption RISC chip diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index bfc7abe77905..1372553dc0a9 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += compat.h generic-y += current.h generic-y += device.h generic-y += div64.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += exec.h generic-y += extable.h diff --git a/arch/unicore32/include/asm/dma-mapping.h b/arch/unicore32/include/asm/dma-mapping.h deleted file mode 100644 index 790bc2ef4af2..000000000000 --- a/arch/unicore32/include/asm/dma-mapping.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * linux/arch/unicore32/include/asm/dma-mapping.h - * - * Code specific to PKUnity SoC and UniCore ISA - * - * Copyright (C) 2001-2010 GUAN Xue-tao - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef __UNICORE_DMA_MAPPING_H__ -#define __UNICORE_DMA_MAPPING_H__ - -#include <linux/swiotlb.h> - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return &swiotlb_dma_ops; -} - -#endif diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index f4950fbfe574..5f72a8d1d953 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -234,9 +234,6 @@ void __init bootmem_init(void) uc32_bootmem_init(min, max_low); -#ifdef CONFIG_SWIOTLB - swiotlb_init(1); -#endif /* * Sparsemem tries to allocate bootmem in memory_present(), * so must be done after the fixed reservations diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1a0be022f91d..e8de5de1057f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2422,7 +2422,7 @@ menu "Power management and ACPI options" config ARCH_HIBERNATION_HEADER def_bool y - depends on X86_64 && HIBERNATION + depends on HIBERNATION source "kernel/power/Kconfig" diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 28764dacf018..466f66c8a7f8 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -37,6 +37,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding) KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +KBUILD_CFLAGS += -Wno-pointer-sign KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index eaa843a52907..a480356e0ed8 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -25,20 +25,6 @@ ENTRY(get_sev_encryption_bit) push %ebx push %ecx push %edx - push %edi - - /* - * RIP-relative addressing is needed to access the encryption bit - * variable. Since we are running in 32-bit mode we need this call/pop - * sequence to get the proper relative addressing. - */ - call 1f -1: popl %edi - subl $1b, %edi - - movl enc_bit(%edi), %eax - cmpl $0, %eax - jge .Lsev_exit /* Check if running under a hypervisor */ movl $1, %eax @@ -69,15 +55,12 @@ ENTRY(get_sev_encryption_bit) movl %ebx, %eax andl $0x3f, %eax /* Return the encryption bit location */ - movl %eax, enc_bit(%edi) jmp .Lsev_exit .Lno_sev: xor %eax, %eax - movl %eax, enc_bit(%edi) .Lsev_exit: - pop %edi pop %edx pop %ecx pop %ebx @@ -113,8 +96,6 @@ ENTRY(set_sev_encryption_mask) ENDPROC(set_sev_encryption_mask) .data -enc_bit: - .int 0xffffffff #ifdef CONFIG_AMD_MEM_ENCRYPT .balign 8 diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index acd11b3bf639..2a356b948720 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -379,7 +379,6 @@ static int __init crypto_aegis128_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || !boot_cpu_has(X86_FEATURE_AES) || - !boot_cpu_has(X86_FEATURE_OSXSAVE) || !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c index 2071c3d1ae07..dbe8bb980da1 100644 --- a/arch/x86/crypto/aegis128l-aesni-glue.c +++ b/arch/x86/crypto/aegis128l-aesni-glue.c @@ -379,7 +379,6 @@ static int __init crypto_aegis128l_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || !boot_cpu_has(X86_FEATURE_AES) || - !boot_cpu_has(X86_FEATURE_OSXSAVE) || !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c index b5f2a8fd5a71..8bebda2de92f 100644 --- a/arch/x86/crypto/aegis256-aesni-glue.c +++ b/arch/x86/crypto/aegis256-aesni-glue.c @@ -379,7 +379,6 @@ static int __init crypto_aegis256_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || !boot_cpu_has(X86_FEATURE_AES) || - !boot_cpu_has(X86_FEATURE_OSXSAVE) || !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c index 95cf857d2cbb..f40244eaf14d 100644 --- a/arch/x86/crypto/morus1280-sse2-glue.c +++ b/arch/x86/crypto/morus1280-sse2-glue.c @@ -40,7 +40,6 @@ MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350); static int __init crypto_morus1280_sse2_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || - !boot_cpu_has(X86_FEATURE_OSXSAVE) || !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c index 615fb7bc9a32..9afaf8f8565a 100644 --- a/arch/x86/crypto/morus640-sse2-glue.c +++ b/arch/x86/crypto/morus640-sse2-glue.c @@ -40,7 +40,6 @@ MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400); static int __init crypto_morus640_sse2_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || - !boot_cpu_has(X86_FEATURE_OSXSAVE) || !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2767c625a52c..fbbf1ba57ec6 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -389,6 +389,13 @@ * that register for the time this macro runs */ + /* + * The high bits of the CS dword (__csh) are used for + * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case + * hardware didn't do this for us. + */ + andl $(0x0000ffff), PT_CS(%esp) + /* Are we on the entry stack? Bail out if not! */ movl PER_CPU_VAR(cpu_entry_area), %ecx addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx @@ -407,12 +414,6 @@ /* Load top of task-stack into %edi */ movl TSS_entry2task_stack(%edi), %edi - /* - * Clear unused upper bits of the dword containing the word-sized CS - * slot in pt_regs in case hardware didn't clear it for us. - */ - andl $(0x0000ffff), PT_CS(%esp) - /* Special case - entry from kernel mode via entry stack */ #ifdef CONFIG_VM86 movl PT_EFLAGS(%esp), %ecx # mix EFLAGS and CS diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 957dfb693ecc..f95dcb209fdf 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1187,6 +1187,16 @@ ENTRY(paranoid_entry) xorl %ebx, %ebx 1: + /* + * Always stash CR3 in %r14. This value will be restored, + * verbatim, at exit. Needed if paranoid_entry interrupted + * another entry that already switched to the user CR3 value + * but has not yet returned to userspace. + * + * This is also why CS (stashed in the "iret frame" by the + * hardware at entry) can not be used: this may be a return + * to kernel code, but with a user CR3 value. + */ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 ret @@ -1211,11 +1221,13 @@ ENTRY(paranoid_exit) testl %ebx, %ebx /* swapgs needed? */ jnz .Lparanoid_exit_no_swapgs TRACE_IRQS_IRETQ + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK jmp .Lparanoid_exit_restore .Lparanoid_exit_no_swapgs: TRACE_IRQS_IRETQ_DEBUG + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 .Lparanoid_exit_restore: jmp restore_regs_and_return_to_kernel @@ -1626,6 +1638,7 @@ end_repeat_nmi: movq $-1, %rsi call do_nmi + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 testl %ebx, %ebx /* swapgs needed? */ diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index fa3f439f0a92..141d415a8c80 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -68,7 +68,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS) + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + CFL += $(RETPOLINE_VDSO_CFLAGS) +endif +endif $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) @@ -138,7 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) KBUILD_CFLAGS_32 += -fno-omit-frame-pointer KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) +endif +endif + $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) $(obj)/vdso32.so.dbg: FORCE \ diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index f19856d95c60..e48ca3afa091 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -43,8 +43,9 @@ extern u8 hvclock_page notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*ts) : + "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : + "memory", "rcx", "r11"); return ret; } @@ -52,8 +53,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : + "memory", "rcx", "r11"); return ret; } @@ -64,13 +66,13 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[clock], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) + : "=a" (ret), "=m" (*ts) + : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) : "memory", "edx"); return ret; } @@ -79,13 +81,13 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[tv], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) + : "=a" (ret), "=m" (*tv), "=m" (*tz) + : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz) : "memory", "edx"); return ret; } diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 981ba5e8241b..8671de126eac 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -36,6 +36,7 @@ static int num_counters_llc; static int num_counters_nb; +static bool l3_mask; static HLIST_HEAD(uncore_unused_list); @@ -209,6 +210,13 @@ static int amd_uncore_event_init(struct perf_event *event) hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; + /* + * SliceMask and ThreadMask need to be set for certain L3 events in + * Family 17h. For other events, the two fields do not affect the count. + */ + if (l3_mask) + hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK); + if (event->cpu < 0) return -EINVAL; @@ -525,6 +533,7 @@ static int __init amd_uncore_init(void) amd_llc_pmu.name = "amd_l3"; format_attr_event_df.show = &event_show_df; format_attr_event_l3.show = &event_show_l3; + l3_mask = true; } else { num_counters_nb = NUM_COUNTERS_NB; num_counters_llc = NUM_COUNTERS_L2; @@ -532,6 +541,7 @@ static int __init amd_uncore_init(void) amd_llc_pmu.name = "amd_l2"; format_attr_event_df = format_attr_event; format_attr_event_l3 = format_attr_event; + l3_mask = false; } amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 51d7c117e3c7..c07bee31abe8 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3061,7 +3061,7 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { void bdx_uncore_cpu_init(void) { - int pkg = topology_phys_to_logical_pkg(0); + int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id); if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; @@ -3931,16 +3931,16 @@ static const struct pci_device_id skx_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3), }, { /* M3UPI0 Link 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0), }, { /* M3UPI0 Link 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1), }, { /* M3UPI1 Link 2 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2), }, { /* end: all zeroes */ } }; diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 5b0f613428c2..2c43e3055948 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -95,8 +95,8 @@ static void hv_apic_eoi_write(u32 reg, u32 val) */ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) { - struct ipi_arg_ex **arg; - struct ipi_arg_ex *ipi_arg; + struct hv_send_ipi_ex **arg; + struct hv_send_ipi_ex *ipi_arg; unsigned long flags; int nr_bank = 0; int ret = 1; @@ -105,7 +105,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) return false; local_irq_save(flags); - arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); + arg = (struct hv_send_ipi_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); ipi_arg = *arg; if (unlikely(!ipi_arg)) @@ -135,7 +135,7 @@ ipi_mask_ex_done: static bool __send_ipi_mask(const struct cpumask *mask, int vector) { int cur_cpu, vcpu; - struct ipi_arg_non_ex ipi_arg; + struct hv_send_ipi ipi_arg; int ret = 1; trace_hyperv_send_ipi_mask(mask, vector); diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index e203169931c7..6390bd8c141b 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -14,6 +14,16 @@ #ifndef _ASM_X86_FIXMAP_H #define _ASM_X86_FIXMAP_H +/* + * Exposed to assembly code for setting up initial page tables. Cannot be + * calculated in assembly code (fixmap entries are an enum), but is sanity + * checked in the actual fixmap C code to make sure that the fixmap is + * covered fully. + */ +#define FIXMAP_PMD_NUM 2 +/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ +#define FIXMAP_PMD_TOP 507 + #ifndef __ASSEMBLY__ #include <linux/kernel.h> #include <asm/acpi.h> diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a38bf5a1e37a..69dcdf195b61 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (old_fpu->initialized) { + if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index e977b6b3a538..00e01d215f74 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -726,19 +726,21 @@ struct hv_enlightened_vmcs { #define HV_STIMER_AUTOENABLE (1ULL << 3) #define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) -struct ipi_arg_non_ex { - u32 vector; - u32 reserved; - u64 cpu_mask; -}; - struct hv_vpset { u64 format; u64 valid_bank_mask; u64 bank_contents[]; }; -struct ipi_arg_ex { +/* HvCallSendSyntheticClusterIpi hypercall */ +struct hv_send_ipi { + u32 vector; + u32 reserved; + u64 cpu_mask; +}; + +/* HvCallSendSyntheticClusterIpiEx hypercall */ +struct hv_send_ipi_ex { u32 vector; u32 reserved; struct hv_vpset vp_set; diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 6de64840dd22..9a92a3ac2ac5 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -369,18 +369,6 @@ extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); extern bool is_early_ioremap_ptep(pte_t *ptep); -#ifdef CONFIG_XEN -#include <xen/xen.h> -struct bio_vec; - -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); - -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) -#endif /* CONFIG_XEN */ - #define IO_SPACE_LIMIT 0xffff #include <asm-generic/io.h> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8e90488c3d56..09b2e3e2cf1b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -869,6 +869,8 @@ struct kvm_arch { bool x2apic_format; bool x2apic_broadcast_quirk_disabled; + + bool guest_can_read_msr_platform_info; }; struct kvm_vm_stat { @@ -1022,6 +1024,7 @@ struct kvm_x86_ops { void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); + bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); @@ -1055,6 +1058,7 @@ struct kvm_x86_ops { bool (*umip_emulated)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); + void (*request_immediate_exit)(struct kvm_vcpu *vcpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu); @@ -1482,6 +1486,7 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); +void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); int kvm_is_in_guest(void); diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c0643831706e..616f8e637bc3 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -48,10 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void); +void __init mem_encrypt_free_decrypted_mem(void); bool sme_active(void); bool sev_active(void); +#define __bss_decrypted __attribute__((__section__(".bss..decrypted"))) + #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define sme_me_mask 0ULL @@ -77,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; static inline int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } +#define __bss_decrypted + #endif /* CONFIG_AMD_MEM_ENCRYPT */ /* @@ -88,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; #define __sme_pa(x) (__pa(x) | sme_me_mask) #define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask) +extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; + #endif /* __ASSEMBLY__ */ #endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e9202a0de8f0..1a19d11cfbbd 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -185,22 +185,22 @@ do { \ typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_arg(1)",%0" \ + asm volatile(op "b "__percpu_arg(1)",%0"\ : "=q" (pfo_ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_arg(1)",%0" \ + asm volatile(op "w "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_arg(1)",%0" \ + asm volatile(op "l "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_arg(1)",%0" \ + asm volatile(op "q "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 12f54082f4c8..78241b736f2a 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -46,6 +46,14 @@ #define INTEL_ARCH_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) +#define AMD64_L3_SLICE_SHIFT 48 +#define AMD64_L3_SLICE_MASK \ + ((0xFULL) << AMD64_L3_SLICE_SHIFT) + +#define AMD64_L3_THREAD_SHIFT 56 +#define AMD64_L3_THREAD_MASK \ + ((0xFFULL) << AMD64_L3_THREAD_SHIFT) + #define X86_RAW_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_EVENT | \ ARCH_PERFMON_EVENTSEL_UMASK | \ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index ce2b59047cb8..9c85b54bf03c 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -14,6 +14,7 @@ #include <asm/processor.h> #include <linux/bitops.h> #include <linux/threads.h> +#include <asm/fixmap.h> extern p4d_t level4_kernel_pgt[512]; extern p4d_t level4_ident_pgt[512]; @@ -22,7 +23,7 @@ extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; extern pmd_t level2_ident_pgt[512]; -extern pte_t level1_fixmap_pgt[512]; +extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM]; extern pgd_t init_top_pgt[]; #define swapper_pg_dir init_top_pgt diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b64acb08a62b..106b7d0e2dae 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -124,7 +124,7 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* diff --git a/arch/x86/include/asm/suspend.h b/arch/x86/include/asm/suspend.h index ecffe81ff65c..a892494ca5e4 100644 --- a/arch/x86/include/asm/suspend.h +++ b/arch/x86/include/asm/suspend.h @@ -4,3 +4,11 @@ #else # include <asm/suspend_64.h> #endif +extern unsigned long restore_jump_address __visible; +extern unsigned long jump_address_phys; +extern unsigned long restore_cr3 __visible; +extern unsigned long temp_pgt __visible; +extern unsigned long relocated_restore_code __visible; +extern int relocate_restore_code(void); +/* Defined in hibernate_asm_32/64.S */ +extern asmlinkage __visible int restore_image(void); diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 8be6afb58471..fdbd9d7b7bca 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -32,4 +32,8 @@ struct saved_context { unsigned long return_address; } __attribute__((packed)); +/* routines for saving/restoring kernel state */ +extern char core_restore_code[]; +extern char restore_registers[]; + #endif /* _ASM_X86_SUSPEND_32_H */ diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index a80c0673798f..e60c45fd3679 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -10,8 +10,13 @@ struct cpumask; struct mm_struct; #ifdef CONFIG_X86_UV +#include <linux/efi.h> extern enum uv_system_type get_uv_system_type(void); +static inline bool is_early_uv_system(void) +{ + return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab); +} extern int is_uv_system(void); extern int is_uv_hubless(void); extern void uv_cpu_init(void); @@ -23,6 +28,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, #else /* X86_UV */ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } +static inline bool is_early_uv_system(void) { return 0; } static inline int is_uv_system(void) { return 0; } static inline int is_uv_hubless(void) { return 0; } static inline void uv_cpu_init(void) { } diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index d383140e1dc8..068d9b067c83 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_XEN_EVENTS_H #define _ASM_X86_XEN_EVENTS_H +#include <xen/xen.h> + enum ipi_vector { XEN_RESCHEDULE_VECTOR, XEN_CALL_FUNCTION_VECTOR, diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 86299efa804a..fd23d5778ea1 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -377,6 +377,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index f299d8a479bb..3f9d1b4019bb 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -482,7 +482,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, { void *vaddr; - vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs); + vaddr = dma_direct_alloc_pages(dev, size, dma_addr, flag, attrs); if (!vaddr || !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24)) return vaddr; @@ -494,7 +494,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, goto out_free; return vaddr; out_free: - dma_direct_free(dev, size, vaddr, *dma_addr, attrs); + dma_direct_free_pages(dev, size, vaddr, *dma_addr, attrs); return NULL; } @@ -504,7 +504,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0); - dma_direct_free(dev, size, vaddr, dma_addr, attrs); + dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs); } static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 22ab408177b2..eeea634bee0a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -922,7 +922,7 @@ static void init_amd(struct cpuinfo_x86 *c) static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { + if (c->x86 == 6) { /* Duron Rev A0 */ if (c->x86_model == 3 && c->x86_stepping == 0) size = 64; diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 4e588f36228f..3736f6dc9545 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -382,6 +382,11 @@ static inline bool is_mbm_event(int e) e <= QOS_L3_MBM_LOCAL_EVENT_ID); } +struct rdt_parse_data { + struct rdtgroup *rdtgrp; + char *buf; +}; + /** * struct rdt_resource - attributes of an RDT resource * @rid: The index of the resource @@ -423,16 +428,19 @@ struct rdt_resource { struct rdt_cache cache; struct rdt_membw membw; const char *format_str; - int (*parse_ctrlval) (void *data, struct rdt_resource *r, - struct rdt_domain *d); + int (*parse_ctrlval)(struct rdt_parse_data *data, + struct rdt_resource *r, + struct rdt_domain *d); struct list_head evt_list; int num_rmid; unsigned int mon_scale; unsigned long fflags; }; -int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d); -int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d); +int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d); +int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d); extern struct mutex rdtgroup_mutex; @@ -521,14 +529,14 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, int rdtgroup_schemata_show(struct kernfs_open_file *of, struct seq_file *s, void *v); bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - u32 _cbm, int closid, bool exclusive); + unsigned long cbm, int closid, bool exclusive); unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, - u32 cbm); + unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm); +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm); bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d); int rdt_pseudo_lock_init(void); void rdt_pseudo_lock_release(void); @@ -536,6 +544,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); int update_domains(struct rdt_resource *r, int closid); +int closids_supported(void); void closid_free(int closid); int alloc_rmid(void); void free_rmid(u32 rmid); diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index af358ca05160..0f53049719cd 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c @@ -64,19 +64,19 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) return true; } -int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d) +int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d) { - unsigned long data; - char *buf = _buf; + unsigned long bw_val; if (d->have_new_ctrl) { rdt_last_cmd_printf("duplicate domain %d\n", d->id); return -EINVAL; } - if (!bw_validate(buf, &data, r)) + if (!bw_validate(data->buf, &bw_val, r)) return -EINVAL; - d->new_ctrl = data; + d->new_ctrl = bw_val; d->have_new_ctrl = true; return 0; @@ -123,18 +123,13 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) return true; } -struct rdt_cbm_parse_data { - struct rdtgroup *rdtgrp; - char *buf; -}; - /* * Read one cache bit mask (hex). Check that it is valid for the current * resource type. */ -int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d) +int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d) { - struct rdt_cbm_parse_data *data = _data; struct rdtgroup *rdtgrp = data->rdtgrp; u32 cbm_val; @@ -195,11 +190,17 @@ int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d) static int parse_line(char *line, struct rdt_resource *r, struct rdtgroup *rdtgrp) { - struct rdt_cbm_parse_data data; + struct rdt_parse_data data; char *dom = NULL, *id; struct rdt_domain *d; unsigned long dom_id; + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && + r->rid == RDT_RESOURCE_MBA) { + rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); + return -EINVAL; + } + next: if (!line || line[0] == '\0') return 0; diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c index 40f3903ae5d9..f8c260d522ca 100644 --- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c +++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c @@ -797,25 +797,27 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) /** * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked * @d: RDT domain - * @_cbm: CBM to test + * @cbm: CBM to test * - * @d represents a cache instance and @_cbm a capacity bitmask that is - * considered for it. Determine if @_cbm overlaps with any existing + * @d represents a cache instance and @cbm a capacity bitmask that is + * considered for it. Determine if @cbm overlaps with any existing * pseudo-locked region on @d. * - * Return: true if @_cbm overlaps with pseudo-locked region on @d, false + * @cbm is unsigned long, even if only 32 bits are used, to make the + * bitmap functions work correctly. + * + * Return: true if @cbm overlaps with pseudo-locked region on @d, false * otherwise. */ -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm) +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) { - unsigned long *cbm = (unsigned long *)&_cbm; - unsigned long *cbm_b; unsigned int cbm_len; + unsigned long cbm_b; if (d->plr) { cbm_len = d->plr->r->cache.cbm_len; - cbm_b = (unsigned long *)&d->plr->cbm; - if (bitmap_intersects(cbm, cbm_b, cbm_len)) + cbm_b = d->plr->cbm; + if (bitmap_intersects(&cbm, &cbm_b, cbm_len)) return true; } return false; diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index b799c00bef09..b140c68bc14b 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -97,6 +97,12 @@ void rdt_last_cmd_printf(const char *fmt, ...) * limited as the number of resources grows. */ static int closid_free_map; +static int closid_free_map_len; + +int closids_supported(void) +{ + return closid_free_map_len; +} static void closid_init(void) { @@ -111,6 +117,7 @@ static void closid_init(void) /* CLOSID 0 is always reserved for the default group */ closid_free_map &= ~1; + closid_free_map_len = rdt_min_closid; } static int closid_alloc(void) @@ -802,7 +809,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, sw_shareable = 0; exclusive = 0; seq_printf(seq, "%d=", dom->id); - for (i = 0; i < r->num_closid; i++, ctrl++) { + for (i = 0; i < closids_supported(); i++, ctrl++) { if (!closid_allocated(i)) continue; mode = rdtgroup_mode_by_closid(i); @@ -968,33 +975,34 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of, * is false then overlaps with any resource group or hardware entities * will be considered. * + * @cbm is unsigned long, even if only 32 bits are used, to make the + * bitmap functions work correctly. + * * Return: false if CBM does not overlap, true if it does. */ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - u32 _cbm, int closid, bool exclusive) + unsigned long cbm, int closid, bool exclusive) { - unsigned long *cbm = (unsigned long *)&_cbm; - unsigned long *ctrl_b; enum rdtgrp_mode mode; + unsigned long ctrl_b; u32 *ctrl; int i; /* Check for any overlap with regions used by hardware directly */ if (!exclusive) { - if (bitmap_intersects(cbm, - (unsigned long *)&r->cache.shareable_bits, - r->cache.cbm_len)) + ctrl_b = r->cache.shareable_bits; + if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) return true; } /* Check for overlap with other resource groups */ ctrl = d->ctrl_val; - for (i = 0; i < r->num_closid; i++, ctrl++) { - ctrl_b = (unsigned long *)ctrl; + for (i = 0; i < closids_supported(); i++, ctrl++) { + ctrl_b = *ctrl; mode = rdtgroup_mode_by_closid(i); if (closid_allocated(i) && i != closid && mode != RDT_MODE_PSEUDO_LOCKSETUP) { - if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) { + if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) { if (exclusive) { if (mode == RDT_MODE_EXCLUSIVE) return true; @@ -1024,16 +1032,27 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) { int closid = rdtgrp->closid; struct rdt_resource *r; + bool has_cache = false; struct rdt_domain *d; for_each_alloc_enabled_rdt_resource(r) { + if (r->rid == RDT_RESOURCE_MBA) + continue; + has_cache = true; list_for_each_entry(d, &r->domains, list) { if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], - rdtgrp->closid, false)) + rdtgrp->closid, false)) { + rdt_last_cmd_puts("schemata overlaps\n"); return false; + } } } + if (!has_cache) { + rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n"); + return false; + } + return true; } @@ -1085,7 +1104,6 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, rdtgrp->mode = RDT_MODE_SHAREABLE; } else if (!strcmp(buf, "exclusive")) { if (!rdtgroup_mode_test_exclusive(rdtgrp)) { - rdt_last_cmd_printf("schemata overlaps\n"); ret = -EINVAL; goto out; } @@ -1121,15 +1139,18 @@ out: * computed by first dividing the total cache size by the CBM length to * determine how many bytes each bit in the bitmask represents. The result * is multiplied with the number of bits set in the bitmask. + * + * @cbm is unsigned long, even if only 32 bits are used to make the + * bitmap functions work correctly. */ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, - struct rdt_domain *d, u32 cbm) + struct rdt_domain *d, unsigned long cbm) { struct cpu_cacheinfo *ci; unsigned int size = 0; int num_b, i; - num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len); + num_b = bitmap_weight(&cbm, r->cache.cbm_len); ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); for (i = 0; i < ci->num_leaves; i++) { if (ci->info_list[i].level == r->cache_level) { @@ -1155,8 +1176,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, struct rdt_resource *r; struct rdt_domain *d; unsigned int size; - bool sep = false; - u32 cbm; + bool sep; + u32 ctrl; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (!rdtgrp) { @@ -1174,6 +1195,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, } for_each_alloc_enabled_rdt_resource(r) { + sep = false; seq_printf(s, "%*s:", max_name_width, r->name); list_for_each_entry(d, &r->domains, list) { if (sep) @@ -1181,8 +1203,13 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { size = 0; } else { - cbm = d->ctrl_val[rdtgrp->closid]; - size = rdtgroup_cbm_to_size(r, d, cbm); + ctrl = (!is_mba_sc(r) ? + d->ctrl_val[rdtgrp->closid] : + d->mbps_val[rdtgrp->closid]); + if (r->rid == RDT_RESOURCE_MBA) + size = ctrl; + else + size = rdtgroup_cbm_to_size(r, d, ctrl); } seq_printf(s, "%d=%u", d->id, size); sep = true; @@ -2330,18 +2357,25 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) u32 used_b = 0, unused_b = 0; u32 closid = rdtgrp->closid; struct rdt_resource *r; + unsigned long tmp_cbm; enum rdtgrp_mode mode; struct rdt_domain *d; int i, ret; u32 *ctrl; for_each_alloc_enabled_rdt_resource(r) { + /* + * Only initialize default allocations for CBM cache + * resources + */ + if (r->rid == RDT_RESOURCE_MBA) + continue; list_for_each_entry(d, &r->domains, list) { d->have_new_ctrl = false; d->new_ctrl = r->cache.shareable_bits; used_b = r->cache.shareable_bits; ctrl = d->ctrl_val; - for (i = 0; i < r->num_closid; i++, ctrl++) { + for (i = 0; i < closids_supported(); i++, ctrl++) { if (closid_allocated(i) && i != closid) { mode = rdtgroup_mode_by_closid(i); if (mode == RDT_MODE_PSEUDO_LOCKSETUP) @@ -2361,9 +2395,14 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) * modify the CBM based on system availability. */ cbm_ensure_valid(&d->new_ctrl, r); - if (bitmap_weight((unsigned long *) &d->new_ctrl, - r->cache.cbm_len) < - r->cache.min_cbm_bits) { + /* + * Assign the u32 CBM to an unsigned long to ensure + * that bitmap_weight() does not access out-of-bound + * memory. + */ + tmp_cbm = d->new_ctrl; + if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < + r->cache.min_cbm_bits) { rdt_last_cmd_printf("no space on %s:%d\n", r->name, d->id); return -ENOSPC; @@ -2373,6 +2412,12 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) } for_each_alloc_enabled_rdt_resource(r) { + /* + * Only initialize default allocations for CBM cache + * resources + */ + if (r->rid == RDT_RESOURCE_MBA) + continue; ret = update_domains(r, rdtgrp->closid); if (ret < 0) { rdt_last_cmd_puts("failed to initialize allocations\n"); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 23f1691670b6..61a949d84dfa 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -314,7 +314,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * thread's fpu state, reconstruct fxstate from the fsave * header. Validate and sanitize the copied state. */ - struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; int err = 0; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 8047379e575a..ddee1f0870c4 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -35,6 +35,7 @@ #include <asm/bootparam_utils.h> #include <asm/microcode.h> #include <asm/kasan.h> +#include <asm/fixmap.h> /* * Manage page tables very early on. @@ -112,6 +113,7 @@ static bool __head check_la57_support(unsigned long physaddr) unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { + unsigned long vaddr, vaddr_end; unsigned long load_delta, *p; unsigned long pgtable_flags; pgdval_t *pgd; @@ -165,7 +167,8 @@ unsigned long __head __startup_64(unsigned long physaddr, pud[511] += load_delta; pmd = fixup_pointer(level2_fixmap_pgt, physaddr); - pmd[506] += load_delta; + for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) + pmd[i] += load_delta; /* * Set up the identity mapping for the switchover. These @@ -235,6 +238,21 @@ unsigned long __head __startup_64(unsigned long physaddr, sme_encrypt_kernel(bp); /* + * Clear the memory encryption mask from the .bss..decrypted section. + * The bss section will be memset to zero later in the initialization so + * there is no need to zero it after changing the memory encryption + * attribute. + */ + if (mem_encrypt_active()) { + vaddr = (unsigned long)__start_bss_decrypted; + vaddr_end = (unsigned long)__end_bss_decrypted; + for (; vaddr < vaddr_end; vaddr += PMD_SIZE) { + i = pmd_index(vaddr); + pmd[i] -= sme_get_me_mask(); + } + } + + /* * Return the SME encryption mask (if SME is active) to be used as a * modifier for the initial pgdir entry programmed into CR3. */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 15ebc2fc166e..a3618cf04cf6 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -24,6 +24,7 @@ #include "../entry/calling.h" #include <asm/export.h> #include <asm/nospec-branch.h> +#include <asm/fixmap.h> #ifdef CONFIG_PARAVIRT #include <asm/asm-offsets.h> @@ -445,13 +446,20 @@ NEXT_PAGE(level2_kernel_pgt) KERNEL_IMAGE_SIZE/PMD_SIZE) NEXT_PAGE(level2_fixmap_pgt) - .fill 506,8,0 - .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC - /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ - .fill 5,8,0 + .fill (512 - 4 - FIXMAP_PMD_NUM),8,0 + pgtno = 0 + .rept (FIXMAP_PMD_NUM) + .quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \ + + _PAGE_TABLE_NOENC; + pgtno = pgtno + 1 + .endr + /* 6 MB reserved space + a 2MB hole */ + .fill 4,8,0 NEXT_PAGE(level1_fixmap_pgt) + .rept (FIXMAP_PMD_NUM) .fill 512,8,0 + .endr #undef PMDS diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1e6764648af3..013fe3d21dbb 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -28,6 +28,7 @@ #include <linux/sched/clock.h> #include <linux/mm.h> #include <linux/slab.h> +#include <linux/set_memory.h> #include <asm/hypervisor.h> #include <asm/mem_encrypt.h> @@ -61,9 +62,10 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info)) static struct pvclock_vsyscall_time_info - hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE); -static struct pvclock_wall_clock wall_clock; + hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE); +static struct pvclock_wall_clock wall_clock __bss_decrypted; static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); +static struct pvclock_vsyscall_time_info *hvclock_mem; static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) { @@ -236,6 +238,45 @@ static void kvm_shutdown(void) native_machine_shutdown(); } +static void __init kvmclock_init_mem(void) +{ + unsigned long ncpus; + unsigned int order; + struct page *p; + int r; + + if (HVC_BOOT_ARRAY_SIZE >= num_possible_cpus()) + return; + + ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE; + order = get_order(ncpus * sizeof(*hvclock_mem)); + + p = alloc_pages(GFP_KERNEL, order); + if (!p) { + pr_warn("%s: failed to alloc %d pages", __func__, (1U << order)); + return; + } + + hvclock_mem = page_address(p); + + /* + * hvclock is shared between the guest and the hypervisor, must + * be mapped decrypted. + */ + if (sev_active()) { + r = set_memory_decrypted((unsigned long) hvclock_mem, + 1UL << order); + if (r) { + __free_pages(p, order); + hvclock_mem = NULL; + pr_warn("kvmclock: set_memory_decrypted() failed. Disabling\n"); + return; + } + } + + memset(hvclock_mem, 0, PAGE_SIZE << order); +} + static int __init kvm_setup_vsyscall_timeinfo(void) { #ifdef CONFIG_X86_64 @@ -250,6 +291,9 @@ static int __init kvm_setup_vsyscall_timeinfo(void) kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; #endif + + kvmclock_init_mem(); + return 0; } early_initcall(kvm_setup_vsyscall_timeinfo); @@ -269,8 +313,10 @@ static int kvmclock_setup_percpu(unsigned int cpu) /* Use the static page for the first CPUs, allocate otherwise */ if (cpu < HVC_BOOT_ARRAY_SIZE) p = &hv_clock_boot[cpu]; + else if (hvclock_mem) + p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE; else - p = kzalloc(sizeof(*p), GFP_KERNEL); + return -ENOMEM; per_cpu(hv_clock_per_cpu, cpu) = p; return p ? 0 : -ENOMEM; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index afdb303285f8..8dc69d82567e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -91,7 +91,7 @@ unsigned paravirt_patch_call(void *insnbuf, if (len < 5) { #ifdef CONFIG_RETPOLINE - WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr); + WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr); #endif return len; /* call too long for patch site */ } @@ -111,7 +111,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target, if (len < 5) { #ifdef CONFIG_RETPOLINE - WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr); + WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr); #endif return len; /* call too long for patch site */ } diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 661583662430..71c0b01d93b1 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -42,10 +42,8 @@ IOMMU_INIT_FINISH(pci_swiotlb_detect_override, int __init pci_swiotlb_detect_4gb(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ -#ifdef CONFIG_X86_64 if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN) swiotlb = 1; -#endif /* * If SME is active then swiotlb will be set to 1 so that bounce diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b4866badb235..90ecc108bc8a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1251,7 +1251,7 @@ void __init setup_arch(char **cmdline_p) x86_init.hyper.guest_late_init(); e820__reserve_resources(); - e820__register_nosave_regions(max_low_pfn); + e820__register_nosave_regions(max_pfn); x86_init.resources.reserve_resources(); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index be01328eb755..fddaefc51fb6 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -25,7 +25,7 @@ #include <asm/time.h> #ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; +__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; #endif unsigned long profile_pc(struct pt_regs *regs) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6490f618e096..6d5dc5dabfd7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -26,6 +26,7 @@ #include <asm/apic.h> #include <asm/intel-family.h> #include <asm/i8259.h> +#include <asm/uv/uv.h> unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -57,7 +58,7 @@ struct cyc2ns { static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); -void cyc2ns_read_begin(struct cyc2ns_data *data) +void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data) { int seq, idx; @@ -74,7 +75,7 @@ void cyc2ns_read_begin(struct cyc2ns_data *data) } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); } -void cyc2ns_read_end(void) +void __always_inline cyc2ns_read_end(void) { preempt_enable_notrace(); } @@ -103,7 +104,7 @@ void cyc2ns_read_end(void) * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static inline unsigned long long cycles_2_ns(unsigned long long cyc) +static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc) { struct cyc2ns_data data; unsigned long long ns; @@ -1433,6 +1434,9 @@ void __init tsc_early_init(void) { if (!boot_cpu_has(X86_FEATURE_TSC)) return; + /* Don't change UV TSC multi-chassis synchronization */ + if (is_early_uv_system()) + return; if (!determine_cpu_tsc_frequencies(true)) return; loops_per_jiffy = get_loops_per_jiffy(); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8bde0a419f86..5dd3317d761f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -65,6 +65,23 @@ jiffies_64 = jiffies; #define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); #define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); +/* + * This section contains data which will be mapped as decrypted. Memory + * encryption operates on a page basis. Make this section PMD-aligned + * to avoid splitting the pages while mapping the section early. + * + * Note: We use a separate section so that only this section gets + * decrypted to avoid exposing more than we wish. + */ +#define BSS_DECRYPTED \ + . = ALIGN(PMD_SIZE); \ + __start_bss_decrypted = .; \ + *(.bss..decrypted); \ + . = ALIGN(PAGE_SIZE); \ + __start_bss_decrypted_unused = .; \ + . = ALIGN(PMD_SIZE); \ + __end_bss_decrypted = .; \ + #else #define X86_ALIGN_RODATA_BEGIN @@ -74,6 +91,7 @@ jiffies_64 = jiffies; #define ALIGN_ENTRY_TEXT_BEGIN #define ALIGN_ENTRY_TEXT_END +#define BSS_DECRYPTED #endif @@ -355,6 +373,7 @@ SECTIONS __bss_start = .; *(.bss..page_aligned) *(.bss) + BSS_DECRYPTED . = ALIGN(PAGE_SIZE); __bss_stop = .; } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 17c0472c5b34..fbb0e6df121b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1344,9 +1344,8 @@ EXPORT_SYMBOL_GPL(kvm_lapic_reg_read); static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) { - return kvm_apic_hw_enabled(apic) && - addr >= apic->base_address && - addr < apic->base_address + LAPIC_MMIO_LENGTH; + return addr >= apic->base_address && + addr < apic->base_address + LAPIC_MMIO_LENGTH; } static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, @@ -1358,6 +1357,15 @@ static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, if (!apic_mmio_in_range(apic, address)) return -EOPNOTSUPP; + if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { + if (!kvm_check_has_quirk(vcpu->kvm, + KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) + return -EOPNOTSUPP; + + memset(data, 0xff, len); + return 0; + } + kvm_lapic_reg_read(apic, offset, len, data); return 0; @@ -1917,6 +1925,14 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, if (!apic_mmio_in_range(apic, address)) return -EOPNOTSUPP; + if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { + if (!kvm_check_has_quirk(vcpu->kvm, + KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) + return -EOPNOTSUPP; + + return 0; + } + /* * APIC register must be aligned on 128-bits boundary. * 32/64/128 bits registers must be accessed thru 32 bits. diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e24ea7067373..51b953ad9d4e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -249,6 +249,17 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_mask; */ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; +/* + * In some cases, we need to preserve the GFN of a non-present or reserved + * SPTE when we usurp the upper five bits of the physical address space to + * defend against L1TF, e.g. for MMIO SPTEs. To preserve the GFN, we'll + * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask + * left into the reserved bits, i.e. the GFN in the SPTE will be split into + * high and low parts. This mask covers the lower bits of the GFN. + */ +static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; + + static void mmu_spte_set(u64 *sptep, u64 spte); static union kvm_mmu_page_role kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); @@ -357,9 +368,7 @@ static bool is_mmio_spte(u64 spte) static gfn_t get_mmio_spte_gfn(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask | - shadow_nonpresent_or_rsvd_mask; - u64 gpa = spte & ~mask; + u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask; gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len) & shadow_nonpresent_or_rsvd_mask; @@ -423,6 +432,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); static void kvm_mmu_reset_all_pte_masks(void) { + u8 low_phys_bits; + shadow_user_mask = 0; shadow_accessed_mask = 0; shadow_dirty_mask = 0; @@ -437,12 +448,17 @@ static void kvm_mmu_reset_all_pte_masks(void) * appropriate mask to guard against L1TF attacks. Otherwise, it is * assumed that the CPU is not vulnerable to L1TF. */ + low_phys_bits = boot_cpu_data.x86_phys_bits; if (boot_cpu_data.x86_phys_bits < - 52 - shadow_nonpresent_or_rsvd_mask_len) + 52 - shadow_nonpresent_or_rsvd_mask_len) { shadow_nonpresent_or_rsvd_mask = rsvd_bits(boot_cpu_data.x86_phys_bits - shadow_nonpresent_or_rsvd_mask_len, boot_cpu_data.x86_phys_bits - 1); + low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; + } + shadow_nonpresent_or_rsvd_lower_gfn_mask = + GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); } static int is_cpuid_PSE36(void) @@ -899,7 +915,7 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) { /* * Make sure the write to vcpu->mode is not reordered in front of - * reads to sptes. If it does, kvm_commit_zap_page() can see us + * reads to sptes. If it does, kvm_mmu_commit_zap_page() can see us * OUTSIDE_GUEST_MODE and proceed to free the shadow page table. */ smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE); @@ -5417,7 +5433,12 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu) { MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); - kvm_init_mmu(vcpu, true); + /* + * kvm_mmu_setup() is called only on vCPU initialization. + * Therefore, no need to reset mmu roots as they are not yet + * initialized. + */ + kvm_init_mmu(vcpu, false); } static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 89c4c5aa15f1..61ccfb13899e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -436,14 +436,18 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) static inline bool svm_sev_enabled(void) { - return max_sev_asid; + return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0; } static inline bool sev_guest(struct kvm *kvm) { +#ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return sev->active; +#else + return false; +#endif } static inline int sev_get_asid(struct kvm *kvm) @@ -1226,8 +1230,7 @@ static __init int sev_hardware_setup(void) min_sev_asid = cpuid_edx(0x8000001F); /* Initialize SEV ASID bitmap */ - sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid), - sizeof(unsigned long), GFP_KERNEL); + sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); if (!sev_asid_bitmap) return 1; @@ -1405,7 +1408,7 @@ static __exit void svm_hardware_unsetup(void) int cpu; if (svm_sev_enabled()) - kfree(sev_asid_bitmap); + bitmap_free(sev_asid_bitmap); for_each_possible_cpu(cpu) svm_cpu_uninit(cpu); @@ -7149,6 +7152,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .check_intercept = svm_check_intercept, .handle_external_intr = svm_handle_external_intr, + .request_immediate_exit = __kvm_request_immediate_exit, + .sched_in = svm_sched_in, .pmu_ops = &amd_pmu_ops, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 533a327372c8..e665aa7167cf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -121,7 +121,6 @@ module_param_named(pml, enable_pml, bool, S_IRUGO); #define MSR_BITMAP_MODE_X2APIC 1 #define MSR_BITMAP_MODE_X2APIC_APICV 2 -#define MSR_BITMAP_MODE_LM 4 #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL @@ -397,6 +396,7 @@ struct loaded_vmcs { int cpu; bool launched; bool nmi_known_unmasked; + bool hv_timer_armed; /* Support for vnmi-less CPUs */ int soft_vnmi_blocked; ktime_t entry_time; @@ -856,6 +856,7 @@ struct nested_vmx { /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ u64 vmcs01_debugctl; + u64 vmcs01_guest_bndcfgs; u16 vpid02; u16 last_vpid; @@ -1019,6 +1020,8 @@ struct vcpu_vmx { int ple_window; bool ple_window_dirty; + bool req_immediate_exit; + /* Support for PML */ #define PML_ENTITY_NUM 512 struct page *pml_pg; @@ -1569,8 +1572,12 @@ static int vmx_hv_remote_flush_tlb(struct kvm *kvm) goto out; } + /* + * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the + * base of EPT PML4 table, strip off EPT configuration information. + */ ret = hyperv_flush_guest_mapping( - to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer); + to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK); out: spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); @@ -2864,6 +2871,8 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) u16 fs_sel, gs_sel; int i; + vmx->req_immediate_exit = false; + if (vmx->loaded_cpu_state) return; @@ -2894,8 +2903,7 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); } - if (is_long_mode(&vmx->vcpu)) - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); #else savesegment(fs, fs_sel); savesegment(gs, gs_sel); @@ -2946,8 +2954,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) vmx->loaded_cpu_state = NULL; #ifdef CONFIG_X86_64 - if (is_long_mode(&vmx->vcpu)) - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); #endif if (host_state->ldt_sel || (host_state->gs_sel & 7)) { kvm_load_ldt(host_state->ldt_sel); @@ -2975,24 +2982,19 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) #ifdef CONFIG_X86_64 static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) { - if (is_long_mode(&vmx->vcpu)) { - preempt_disable(); - if (vmx->loaded_cpu_state) - rdmsrl(MSR_KERNEL_GS_BASE, - vmx->msr_guest_kernel_gs_base); - preempt_enable(); - } + preempt_disable(); + if (vmx->loaded_cpu_state) + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + preempt_enable(); return vmx->msr_guest_kernel_gs_base; } static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) { - if (is_long_mode(&vmx->vcpu)) { - preempt_disable(); - if (vmx->loaded_cpu_state) - wrmsrl(MSR_KERNEL_GS_BASE, data); - preempt_enable(); - } + preempt_disable(); + if (vmx->loaded_cpu_state) + wrmsrl(MSR_KERNEL_GS_BASE, data); + preempt_enable(); vmx->msr_guest_kernel_gs_base = data; } #endif @@ -3528,9 +3530,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; - if (kvm_mpx_supported()) - msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; - /* We support free control of debug control saving. */ msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; @@ -3547,8 +3546,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) VM_ENTRY_LOAD_IA32_PAT; msrs->entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); - if (kvm_mpx_supported()) - msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; /* We support free control of debug control loading. */ msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; @@ -3596,12 +3593,12 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) msrs->secondary_ctls_high); msrs->secondary_ctls_low = 0; msrs->secondary_ctls_high &= - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_DESC | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_WBINVD_EXITING; + /* * We can emulate "VMCS shadowing," even if the hardware * doesn't support it. @@ -3658,6 +3655,10 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) msrs->secondary_ctls_high |= SECONDARY_EXEC_UNRESTRICTED_GUEST; + if (flexpriority_enabled) + msrs->secondary_ctls_high |= + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + /* miscellaneous data */ rdmsr(MSR_IA32_VMX_MISC, msrs->misc_low, @@ -5068,19 +5069,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) if (!msr) return; - /* - * MSR_KERNEL_GS_BASE is not intercepted when the guest is in - * 64-bit mode as a 64-bit kernel may frequently access the - * MSR. This means we need to manually save/restore the MSR - * when switching between guest and host state, but only if - * the guest is in 64-bit mode. Sync our cached value if the - * guest is transitioning to 32-bit mode and the CPU contains - * guest state, i.e. the cache is stale. - */ -#ifdef CONFIG_X86_64 - if (!(efer & EFER_LMA)) - (void)vmx_read_guest_kernel_gs_base(vmx); -#endif vcpu->arch.efer = efer; if (efer & EFER_LMA) { vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); @@ -5393,9 +5381,10 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) * To use VMXON (and later other VMX instructions), a guest * must first be able to turn on cr4.VMXE (see handle_vmon()). * So basically the check on whether to allow nested VMX - * is here. + * is here. We operate under the default treatment of SMM, + * so VMX cannot be enabled under SMM. */ - if (!nested_vmx_allowed(vcpu)) + if (!nested_vmx_allowed(vcpu) || is_smm(vcpu)) return 1; } @@ -6072,9 +6061,6 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) mode |= MSR_BITMAP_MODE_X2APIC_APICV; } - if (is_long_mode(vcpu)) - mode |= MSR_BITMAP_MODE_LM; - return mode; } @@ -6115,9 +6101,6 @@ static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) if (!changed) return; - vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, - !(mode & MSR_BITMAP_MODE_LM)); - if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); @@ -6183,6 +6166,32 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) nested_mark_vmcs12_pages_dirty(vcpu); } +static u8 vmx_get_rvi(void) +{ + return vmcs_read16(GUEST_INTR_STATUS) & 0xff; +} + +static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + void *vapic_page; + u32 vppr; + int rvi; + + if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || + !nested_cpu_has_vid(get_vmcs12(vcpu)) || + WARN_ON_ONCE(!vmx->nested.virtual_apic_page)) + return false; + + rvi = vmx_get_rvi(); + + vapic_page = kmap(vmx->nested.virtual_apic_page); + vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); + kunmap(vmx->nested.virtual_apic_page); + + return ((rvi & 0xf0) > (vppr & 0xf0)); +} + static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, bool nested) { @@ -7966,6 +7975,9 @@ static __init int hardware_setup(void) kvm_x86_ops->enable_log_dirty_pt_masked = NULL; } + if (!cpu_has_vmx_preemption_timer()) + kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit; + if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) { u64 vmx_msr; @@ -9208,7 +9220,8 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) static int handle_preemption_timer(struct kvm_vcpu *vcpu) { - kvm_lapic_expired_hv_timer(vcpu); + if (!to_vmx(vcpu)->req_immediate_exit) + kvm_lapic_expired_hv_timer(vcpu); return 1; } @@ -10214,15 +10227,16 @@ static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) if (!lapic_in_kernel(vcpu)) return; + if (!flexpriority_enabled && + !cpu_has_vmx_virtualize_x2apic_mode()) + return; + /* Postpone execution until vmcs01 is the current VMCS. */ if (is_guest_mode(vcpu)) { to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true; return; } - if (!cpu_need_tpr_shadow(vcpu)) - return; - sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); @@ -10344,6 +10358,14 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) return max_irr; } +static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu) +{ + u8 rvi = vmx_get_rvi(); + u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI); + + return ((rvi & 0xf0) > (vppr & 0xf0)); +} + static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { if (!kvm_vcpu_apicv_active(vcpu)) @@ -10595,24 +10617,43 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host, false); } -static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) +static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val) +{ + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val); + if (!vmx->loaded_vmcs->hv_timer_armed) + vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, + PIN_BASED_VMX_PREEMPTION_TIMER); + vmx->loaded_vmcs->hv_timer_armed = true; +} + +static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 tscl; u32 delta_tsc; - if (vmx->hv_deadline_tsc == -1) + if (vmx->req_immediate_exit) { + vmx_arm_hv_timer(vmx, 0); return; + } - tscl = rdtsc(); - if (vmx->hv_deadline_tsc > tscl) - /* sure to be 32 bit only because checked on set_hv_timer */ - delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >> - cpu_preemption_timer_multi); - else - delta_tsc = 0; + if (vmx->hv_deadline_tsc != -1) { + tscl = rdtsc(); + if (vmx->hv_deadline_tsc > tscl) + /* set_hv_timer ensures the delta fits in 32-bits */ + delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >> + cpu_preemption_timer_multi); + else + delta_tsc = 0; + + vmx_arm_hv_timer(vmx, delta_tsc); + return; + } - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); + if (vmx->loaded_vmcs->hv_timer_armed) + vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, + PIN_BASED_VMX_PREEMPTION_TIMER); + vmx->loaded_vmcs->hv_timer_armed = false; } static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) @@ -10672,7 +10713,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) atomic_switch_perf_msrs(vmx); - vmx_arm_hv_timer(vcpu); + vmx_update_hv_timer(vcpu); /* * If this vCPU has touched SPEC_CTRL, restore the guest's value if @@ -11214,6 +11255,23 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) #undef cr4_fixed1_update } +static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (kvm_mpx_supported()) { + bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX); + + if (mpx_enabled) { + vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; + vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; + } else { + vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS; + vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS; + } + } +} + static void vmx_cpuid_update(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -11230,8 +11288,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - if (nested_vmx_allowed(vcpu)) + if (nested_vmx_allowed(vcpu)) { nested_vmx_cr_fixed1_bits_update(vcpu); + nested_vmx_entry_exit_ctls_update(vcpu); + } } static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) @@ -11427,16 +11487,18 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; struct vcpu_vmx *vmx = to_vmx(vcpu); - if (vcpu->arch.virtual_tsc_khz == 0) - return; - - /* Make sure short timeouts reliably trigger an immediate vmexit. - * hrtimer_start does not guarantee this. */ - if (preemption_timeout <= 1) { + /* + * A timer value of zero is architecturally guaranteed to cause + * a VMExit prior to executing any instructions in the guest. + */ + if (preemption_timeout == 0) { vmx_preemption_timer_fn(&vmx->nested.preemption_timer); return; } + if (vcpu->arch.virtual_tsc_khz == 0) + return; + preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; preemption_timeout *= 1000000; do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz); @@ -11646,11 +11708,15 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, * bits 15:8 should be zero in posted_intr_nv, * the descriptor address has been already checked * in nested_get_vmcs12_pages. + * + * bits 5:0 of posted_intr_desc_addr should be zero. */ if (nested_cpu_has_posted_intr(vmcs12) && (!nested_cpu_has_vid(vmcs12) || !nested_exit_intr_ack_set(vcpu) || - vmcs12->posted_intr_nv & 0xff00)) + (vmcs12->posted_intr_nv & 0xff00) || + (vmcs12->posted_intr_desc_addr & 0x3f) || + (!page_address_valid(vcpu, vmcs12->posted_intr_desc_addr)))) return -EINVAL; /* tpr shadow is needed by all apicv features. */ @@ -11993,8 +12059,13 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) set_cr4_guest_host_mask(vmx); - if (vmx_mpx_supported()) - vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); + if (kvm_mpx_supported()) { + if (vmx->nested.nested_run_pending && + (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); + else + vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs); + } if (enable_vpid) { if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) @@ -12076,11 +12147,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, exec_control = vmcs12->pin_based_vm_exec_control; - /* Preemption timer setting is only taken from vmcs01. */ - exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + /* Preemption timer setting is computed directly in vmx_vcpu_run. */ exec_control |= vmcs_config.pin_based_exec_ctrl; - if (vmx->hv_deadline_tsc == -1) - exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + vmx->loaded_vmcs->hv_timer_armed = false; /* Posted interrupts setting is only taken from vmcs12. */ if (nested_cpu_has_posted_intr(vmcs12)) { @@ -12318,6 +12388,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; @@ -12537,15 +12610,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); bool from_vmentry = !!exit_qual; u32 dummy_exit_qual; - u32 vmcs01_cpu_exec_ctrl; + bool evaluate_pending_interrupts; int r = 0; - vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & + (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); + if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) + evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + if (kvm_mpx_supported() && + !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) + vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); vmx_segment_cache_clear(vmx); @@ -12585,16 +12664,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) * to L1 or delivered directly to L2 (e.g. In case L1 don't * intercept EXTERNAL_INTERRUPT). * - * Usually this would be handled by L0 requesting a - * IRQ/NMI window by setting VMCS accordingly. However, - * this setting was done on VMCS01 and now VMCS02 is active - * instead. Thus, we force L0 to perform pending event - * evaluation by requesting a KVM_REQ_EVENT. - */ - if (vmcs01_cpu_exec_ctrl & - (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) { + * Usually this would be handled by the processor noticing an + * IRQ/NMI window request, or checking RVI during evaluation of + * pending virtual interrupts. However, this setting was done + * on VMCS01 and now VMCS02 is active instead. Thus, we force L0 + * to perform pending event evaluation by requesting a KVM_REQ_EVENT. + */ + if (unlikely(evaluate_pending_interrupts)) kvm_make_request(KVM_REQ_EVENT, vcpu); - } /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point @@ -12863,6 +12940,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) return 0; } +static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) +{ + to_vmx(vcpu)->req_immediate_exit = true; +} + static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) { ktime_t remaining = @@ -13253,12 +13335,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); - if (vmx->hv_deadline_tsc == -1) - vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, - PIN_BASED_VMX_PREEMPTION_TIMER); - else - vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, - PIN_BASED_VMX_PREEMPTION_TIMER); + if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); @@ -13462,18 +13539,12 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) return -ERANGE; vmx->hv_deadline_tsc = tscl + delta_tsc; - vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, - PIN_BASED_VMX_PREEMPTION_TIMER); - return delta_tsc == 0; } static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - vmx->hv_deadline_tsc = -1; - vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, - PIN_BASED_VMX_PREEMPTION_TIMER); + to_vmx(vcpu)->hv_deadline_tsc = -1; } #endif @@ -13954,6 +14025,14 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON)) return -EINVAL; + /* + * SMM temporarily disables VMX, so we cannot be in guest mode, + * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags + * must be zero. + */ + if (is_smm(vcpu) ? kvm_state->flags : kvm_state->vmx.smm.flags) + return -EINVAL; + if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) return -EINVAL; @@ -14097,6 +14176,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .apicv_post_state_restore = vmx_apicv_post_state_restore, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, + .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, .sync_pir_to_irr = vmx_sync_pir_to_irr, .deliver_posted_interrupt = vmx_deliver_posted_interrupt, @@ -14130,6 +14210,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .umip_emulated = vmx_umip_emulated, .check_nested_events = vmx_check_nested_events, + .request_immediate_exit = vmx_request_immediate_exit, .sched_in = vmx_sched_in, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 542f6315444d..ca717737347e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -628,7 +628,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) gfn_t gfn; int r; - if (is_long_mode(vcpu) || !is_pae(vcpu)) + if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu)) return false; if (!test_bit(VCPU_EXREG_PDPTR, @@ -2537,7 +2537,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_PLATFORM_INFO: if (!msr_info->host_initiated || - data & ~MSR_PLATFORM_INFO_CPUID_FAULT || (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) && cpuid_fault_enabled(vcpu))) return 1; @@ -2780,6 +2779,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.osvw.status; break; case MSR_PLATFORM_INFO: + if (!msr_info->host_initiated && + !vcpu->kvm->arch.guest_can_read_msr_platform_info) + return 1; msr_info->data = vcpu->arch.msr_platform_info; break; case MSR_MISC_FEATURES_ENABLES: @@ -2927,6 +2929,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SPLIT_IRQCHIP: case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_GET_MSR_FEATURES: + case KVM_CAP_MSR_PLATFORM_INFO: r = 1; break; case KVM_CAP_SYNC_REGS: @@ -4007,19 +4010,23 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size)); + r = -EFAULT; if (get_user(user_data_size, &user_kvm_nested_state->size)) - return -EFAULT; + break; r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state, user_data_size); if (r < 0) - return r; + break; if (r > user_data_size) { if (put_user(r, &user_kvm_nested_state->size)) - return -EFAULT; - return -E2BIG; + r = -EFAULT; + else + r = -E2BIG; + break; } + r = 0; break; } @@ -4031,19 +4038,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!kvm_x86_ops->set_nested_state) break; + r = -EFAULT; if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state))) - return -EFAULT; + break; + r = -EINVAL; if (kvm_state.size < sizeof(kvm_state)) - return -EINVAL; + break; if (kvm_state.flags & ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE)) - return -EINVAL; + break; /* nested_run_pending implies guest_mode. */ if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING) - return -EINVAL; + break; r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state); break; @@ -4350,6 +4359,10 @@ split_irqchip_unlock: kvm->arch.pause_in_guest = true; r = 0; break; + case KVM_CAP_MSR_PLATFORM_INFO: + kvm->arch.guest_can_read_msr_platform_info = cap->args[0]; + r = 0; + break; default: r = -EINVAL; break; @@ -4685,7 +4698,7 @@ static void kvm_init_msr_list(void) */ switch (msrs_to_save[i]) { case MSR_IA32_BNDCFGS: - if (!kvm_x86_ops->mpx_supported()) + if (!kvm_mpx_supported()) continue; break; case MSR_TSC_AUX: @@ -7361,6 +7374,12 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); +void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) +{ + smp_send_reschedule(vcpu->cpu); +} +EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit); + /* * Returns 1 to let vcpu_run() continue the guest execution loop without * exiting to the userspace. Otherwise, the value will be returned to the @@ -7565,7 +7584,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (req_immediate_exit) { kvm_make_request(KVM_REQ_EVENT, vcpu); - smp_send_reschedule(vcpu->cpu); + kvm_x86_ops->request_immediate_exit(vcpu); } trace_kvm_entry(vcpu->vcpu_id); @@ -7829,6 +7848,29 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) return 0; } +/* Swap (qemu) user FPU context for the guest FPU context. */ +static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + copy_fpregs_to_fpstate(&vcpu->arch.user_fpu); + /* PKRU is separately restored in kvm_x86_ops->run. */ + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, + ~XFEATURE_MASK_PKRU); + preempt_enable(); + trace_kvm_fpu(1); +} + +/* When vcpu_run ends, restore user space FPU context. */ +static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state); + preempt_enable(); + ++vcpu->stat.fpu_reload; + trace_kvm_fpu(0); +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -8177,7 +8219,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) kvm_update_cpuid(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); - if (!is_long_mode(vcpu) && is_pae(vcpu)) { + if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) { load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); mmu_reset_needed = 1; } @@ -8406,29 +8448,6 @@ static void fx_init(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= X86_CR0_ET; } -/* Swap (qemu) user FPU context for the guest FPU context. */ -void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - copy_fpregs_to_fpstate(&vcpu->arch.user_fpu); - /* PKRU is separately restored in kvm_x86_ops->run. */ - __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, - ~XFEATURE_MASK_PKRU); - preempt_enable(); - trace_kvm_fpu(1); -} - -/* When vcpu_run ends, restore user space FPU context. */ -void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); - copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state); - preempt_enable(); - ++vcpu->stat.fpu_reload; - trace_kvm_fpu(0); -} - void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask; @@ -8852,6 +8871,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); pvclock_update_vm_gtod_copy(kvm); + kvm->arch.guest_can_read_msr_platform_info = true; + INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); @@ -9200,6 +9221,13 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, kvm_page_track_flush_slot(kvm, slot); } +static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) +{ + return (is_guest_mode(vcpu) && + kvm_x86_ops->guest_apic_has_interrupt && + kvm_x86_ops->guest_apic_has_interrupt(vcpu)); +} + static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) { if (!list_empty_careful(&vcpu->async_pf.done)) @@ -9224,7 +9252,8 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) return true; if (kvm_arch_interrupt_allowed(vcpu) && - kvm_cpu_has_interrupt(vcpu)) + (kvm_cpu_has_interrupt(vcpu) || + kvm_guest_apic_has_interrupt(vcpu))) return true; if (kvm_hv_has_stimer_pending(vcpu)) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 7a8fc26c1115..faca978ebf9d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end) set_memory_np_noalias(begin_ul, len_pages); } +void __weak mem_encrypt_free_decrypted_mem(void) { } + void __ref free_initmem(void) { e820__reallocate_tables(); + mem_encrypt_free_decrypted_mem(); + free_kernel_image_pages(&__init_begin, &__init_end); } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index b2de398d1fd3..006f373f54ab 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -348,6 +348,30 @@ bool sev_active(void) EXPORT_SYMBOL(sev_active); /* Architecture __weak replacement functions */ +void __init mem_encrypt_free_decrypted_mem(void) +{ + unsigned long vaddr, vaddr_end, npages; + int r; + + vaddr = (unsigned long)__start_bss_decrypted_unused; + vaddr_end = (unsigned long)__end_bss_decrypted; + npages = (vaddr_end - vaddr) >> PAGE_SHIFT; + + /* + * The unused memory range was mapped decrypted, change the encryption + * attribute from decrypted to encrypted before freeing it. + */ + if (mem_encrypt_active()) { + r = set_memory_encrypted(vaddr, npages); + if (r) { + pr_warn("failed to free unused decrypted pages\n"); + return; + } + } + + free_init_pages("unused decrypted", vaddr, vaddr_end); +} + void __init mem_encrypt_init(void) { if (!sme_me_mask) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ae394552fb94..59274e2c1ac4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -115,6 +115,8 @@ static inline void pgd_list_del(pgd_t *pgd) #define UNSHARED_PTRS_PER_PGD \ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) +#define MAX_UNSHARED_PTRS_PER_PGD \ + max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) @@ -181,6 +183,7 @@ static void pgd_dtor(pgd_t *pgd) * and initialize the kernel pmds here. */ #define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD +#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD /* * We allocate separate PMDs for the kernel part of the user page-table @@ -189,6 +192,7 @@ static void pgd_dtor(pgd_t *pgd) */ #define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \ KERNEL_PGD_PTRS : 0) +#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) { @@ -210,7 +214,9 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) /* No need to prepopulate any pagetable entries in non-PAE modes. */ #define PREALLOCATED_PMDS 0 +#define MAX_PREALLOCATED_PMDS 0 #define PREALLOCATED_USER_PMDS 0 +#define MAX_PREALLOCATED_USER_PMDS 0 #endif /* CONFIG_X86_PAE */ static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) @@ -428,8 +434,8 @@ static inline void _pgd_free(pgd_t *pgd) pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; - pmd_t *u_pmds[PREALLOCATED_USER_PMDS]; - pmd_t *pmds[PREALLOCATED_PMDS]; + pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS]; + pmd_t *pmds[MAX_PREALLOCATED_PMDS]; pgd = _pgd_alloc(); @@ -637,6 +643,15 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) { unsigned long address = __fix_to_virt(idx); +#ifdef CONFIG_X86_64 + /* + * Ensure that the static initial page tables are covering the + * fixmap completely. + */ + BUILD_BUG_ON(__end_of_permanent_fixed_addresses > + (FIXMAP_PMD_NUM * PTRS_PER_PTE)); +#endif + if (idx >= __end_of_fixed_addresses) { BUG(); return; diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c index 4392c15ed9e0..dbfc5cf2aa93 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c @@ -10,7 +10,7 @@ * of the License. */ -#include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <linux/platform_device.h> #include <linux/regulator/machine.h> #include <linux/regulator/fixed.h> @@ -43,7 +43,6 @@ static struct fixed_voltage_config bcm43xx_vmmc = { * real voltage and signaling are still 1.8V. */ .microvolts = 2000000, /* 1.8V */ - .gpio = -EINVAL, .startup_delay = 250 * 1000, /* 250ms */ .enable_high = 1, /* active high */ .enabled_at_boot = 0, /* disabled at boot */ @@ -58,11 +57,23 @@ static struct platform_device bcm43xx_vmmc_regulator = { }, }; +static struct gpiod_lookup_table bcm43xx_vmmc_gpio_table = { + .dev_id = "reg-fixed-voltage.0", + .table = { + GPIO_LOOKUP("0000:00:0c.0", -1, NULL, GPIO_ACTIVE_LOW), + {} + }, +}; + static int __init bcm43xx_regulator_register(void) { + struct gpiod_lookup_table *table = &bcm43xx_vmmc_gpio_table; + struct gpiod_lookup *lookup = table->table; int ret; - bcm43xx_vmmc.gpio = get_gpio_by_name(WLAN_SFI_GPIO_ENABLE_NAME); + lookup[0].chip_hwnum = get_gpio_by_name(WLAN_SFI_GPIO_ENABLE_NAME); + gpiod_add_lookup_table(table); + ret = platform_device_register(&bcm43xx_vmmc_regulator); if (ret) { pr_err("%s: vmmc regulator register failed\n", __func__); diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c index fd39301f25ac..7e56fc74093c 100644 --- a/arch/x86/platform/ts5500/ts5500.c +++ b/arch/x86/platform/ts5500/ts5500.c @@ -24,7 +24,6 @@ #include <linux/kernel.h> #include <linux/leds.h> #include <linux/init.h> -#include <linux/platform_data/gpio-ts5500.h> #include <linux/platform_data/max197.h> #include <linux/platform_device.h> #include <linux/slab.h> diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile index a4701389562c..37923d715741 100644 --- a/arch/x86/power/Makefile +++ b/arch/x86/power/Makefile @@ -7,4 +7,4 @@ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_cpu.o := $(nostackp) obj-$(CONFIG_PM_SLEEP) += cpu.o -obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o +obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o hibernate.o diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c new file mode 100644 index 000000000000..bcddf09b5aa3 --- /dev/null +++ b/arch/x86/power/hibernate.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hibernation support for x86 + * + * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl> + * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz> + * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> + */ +#include <linux/gfp.h> +#include <linux/smp.h> +#include <linux/suspend.h> +#include <linux/scatterlist.h> +#include <linux/kdebug.h> + +#include <crypto/hash.h> + +#include <asm/e820/api.h> +#include <asm/init.h> +#include <asm/proto.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/mtrr.h> +#include <asm/sections.h> +#include <asm/suspend.h> +#include <asm/tlbflush.h> + +/* + * Address to jump to in the last phase of restore in order to get to the image + * kernel's text (this value is passed in the image header). + */ +unsigned long restore_jump_address __visible; +unsigned long jump_address_phys; + +/* + * Value of the cr3 register from before the hibernation (this value is passed + * in the image header). + */ +unsigned long restore_cr3 __visible; +unsigned long temp_pgt __visible; +unsigned long relocated_restore_code __visible; + +/** + * pfn_is_nosave - check if given pfn is in the 'nosave' section + */ +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn; + unsigned long nosave_end_pfn; + + nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; + nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; + + return pfn >= nosave_begin_pfn && pfn < nosave_end_pfn; +} + + +#define MD5_DIGEST_SIZE 16 + +struct restore_data_record { + unsigned long jump_address; + unsigned long jump_address_phys; + unsigned long cr3; + unsigned long magic; + u8 e820_digest[MD5_DIGEST_SIZE]; +}; + +#if IS_BUILTIN(CONFIG_CRYPTO_MD5) +/** + * get_e820_md5 - calculate md5 according to given e820 table + * + * @table: the e820 table to be calculated + * @buf: the md5 result to be stored to + */ +static int get_e820_md5(struct e820_table *table, void *buf) +{ + struct crypto_shash *tfm; + struct shash_desc *desc; + int size; + int ret = 0; + + tfm = crypto_alloc_shash("md5", 0, 0); + if (IS_ERR(tfm)) + return -ENOMEM; + + desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm), + GFP_KERNEL); + if (!desc) { + ret = -ENOMEM; + goto free_tfm; + } + + desc->tfm = tfm; + desc->flags = 0; + + size = offsetof(struct e820_table, entries) + + sizeof(struct e820_entry) * table->nr_entries; + + if (crypto_shash_digest(desc, (u8 *)table, size, buf)) + ret = -EINVAL; + + kzfree(desc); + +free_tfm: + crypto_free_shash(tfm); + return ret; +} + +static int hibernation_e820_save(void *buf) +{ + return get_e820_md5(e820_table_firmware, buf); +} + +static bool hibernation_e820_mismatch(void *buf) +{ + int ret; + u8 result[MD5_DIGEST_SIZE]; + + memset(result, 0, MD5_DIGEST_SIZE); + /* If there is no digest in suspend kernel, let it go. */ + if (!memcmp(result, buf, MD5_DIGEST_SIZE)) + return false; + + ret = get_e820_md5(e820_table_firmware, result); + if (ret) + return true; + + return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false; +} +#else +static int hibernation_e820_save(void *buf) +{ + return 0; +} + +static bool hibernation_e820_mismatch(void *buf) +{ + /* If md5 is not builtin for restore kernel, let it go. */ + return false; +} +#endif + +#ifdef CONFIG_X86_64 +#define RESTORE_MAGIC 0x23456789ABCDEF01UL +#else +#define RESTORE_MAGIC 0x12345678UL +#endif + +/** + * arch_hibernation_header_save - populate the architecture specific part + * of a hibernation image header + * @addr: address to save the data at + */ +int arch_hibernation_header_save(void *addr, unsigned int max_size) +{ + struct restore_data_record *rdr = addr; + + if (max_size < sizeof(struct restore_data_record)) + return -EOVERFLOW; + rdr->magic = RESTORE_MAGIC; + rdr->jump_address = (unsigned long)restore_registers; + rdr->jump_address_phys = __pa_symbol(restore_registers); + + /* + * The restore code fixes up CR3 and CR4 in the following sequence: + * + * [in hibernation asm] + * 1. CR3 <= temporary page tables + * 2. CR4 <= mmu_cr4_features (from the kernel that restores us) + * 3. CR3 <= rdr->cr3 + * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel) + * [in restore_processor_state()] + * 5. CR4 <= saved CR4 + * 6. CR3 <= saved CR3 + * + * Our mmu_cr4_features has CR4.PCIDE=0, and toggling + * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so + * rdr->cr3 needs to point to valid page tables but must not + * have any of the PCID bits set. + */ + rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK; + + return hibernation_e820_save(rdr->e820_digest); +} + +/** + * arch_hibernation_header_restore - read the architecture specific data + * from the hibernation image header + * @addr: address to read the data from + */ +int arch_hibernation_header_restore(void *addr) +{ + struct restore_data_record *rdr = addr; + + if (rdr->magic != RESTORE_MAGIC) { + pr_crit("Unrecognized hibernate image header format!\n"); + return -EINVAL; + } + + restore_jump_address = rdr->jump_address; + jump_address_phys = rdr->jump_address_phys; + restore_cr3 = rdr->cr3; + + if (hibernation_e820_mismatch(rdr->e820_digest)) { + pr_crit("Hibernate inconsistent memory map detected!\n"); + return -ENODEV; + } + + return 0; +} + +int relocate_restore_code(void) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + relocated_restore_code = get_safe_page(GFP_ATOMIC); + if (!relocated_restore_code) + return -ENOMEM; + + memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE); + + /* Make the page containing the relocated code executable */ + pgd = (pgd_t *)__va(read_cr3_pa()) + + pgd_index(relocated_restore_code); + p4d = p4d_offset(pgd, relocated_restore_code); + if (p4d_large(*p4d)) { + set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX)); + goto out; + } + pud = pud_offset(p4d, relocated_restore_code); + if (pud_large(*pud)) { + set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX)); + goto out; + } + pmd = pmd_offset(pud, relocated_restore_code); + if (pmd_large(*pmd)) { + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX)); + goto out; + } + pte = pte_offset_kernel(pmd, relocated_restore_code); + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX)); +out: + __flush_tlb_all(); + return 0; +} diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index afc4ed7b1578..15695e30f982 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -14,9 +14,7 @@ #include <asm/pgtable.h> #include <asm/mmzone.h> #include <asm/sections.h> - -/* Defined in hibernate_asm_32.S */ -extern int restore_image(void); +#include <asm/suspend.h> /* Pointer to the temporary resume page tables */ pgd_t *resume_pg_dir; @@ -145,6 +143,32 @@ static inline void resume_init_first_level_page_table(pgd_t *pg_dir) #endif } +static int set_up_temporary_text_mapping(pgd_t *pgd_base) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_base + pgd_index(restore_jump_address); + + pmd = resume_one_md_table_init(pgd); + if (!pmd) + return -ENOMEM; + + if (boot_cpu_has(X86_FEATURE_PSE)) { + set_pmd(pmd + pmd_index(restore_jump_address), + __pmd((jump_address_phys & PMD_MASK) | pgprot_val(PAGE_KERNEL_LARGE_EXEC))); + } else { + pte = resume_one_page_table_init(pmd); + if (!pte) + return -ENOMEM; + set_pte(pte + pte_index(restore_jump_address), + __pte((jump_address_phys & PAGE_MASK) | pgprot_val(PAGE_KERNEL_EXEC))); + } + + return 0; +} + asmlinkage int swsusp_arch_resume(void) { int error; @@ -154,22 +178,22 @@ asmlinkage int swsusp_arch_resume(void) return -ENOMEM; resume_init_first_level_page_table(resume_pg_dir); + + error = set_up_temporary_text_mapping(resume_pg_dir); + if (error) + return error; + error = resume_physical_mapping_init(resume_pg_dir); if (error) return error; + temp_pgt = __pa(resume_pg_dir); + + error = relocate_restore_code(); + if (error) + return error; + /* We have got enough memory and from now on we cannot recover */ restore_image(); return 0; } - -/* - * pfn_is_nosave - check if given pfn is in the 'nosave' section - */ - -int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; - unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; - return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); -} diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index f8e3b668d20b..239f424ccb29 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -26,26 +26,6 @@ #include <asm/suspend.h> #include <asm/tlbflush.h> -/* Defined in hibernate_asm_64.S */ -extern asmlinkage __visible int restore_image(void); - -/* - * Address to jump to in the last phase of restore in order to get to the image - * kernel's text (this value is passed in the image header). - */ -unsigned long restore_jump_address __visible; -unsigned long jump_address_phys; - -/* - * Value of the cr3 register from before the hibernation (this value is passed - * in the image header). - */ -unsigned long restore_cr3 __visible; - -unsigned long temp_level4_pgt __visible; - -unsigned long relocated_restore_code __visible; - static int set_up_temporary_text_mapping(pgd_t *pgd) { pmd_t *pmd; @@ -141,46 +121,7 @@ static int set_up_temporary_mappings(void) return result; } - temp_level4_pgt = __pa(pgd); - return 0; -} - -static int relocate_restore_code(void) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - relocated_restore_code = get_safe_page(GFP_ATOMIC); - if (!relocated_restore_code) - return -ENOMEM; - - memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE); - - /* Make the page containing the relocated code executable */ - pgd = (pgd_t *)__va(read_cr3_pa()) + - pgd_index(relocated_restore_code); - p4d = p4d_offset(pgd, relocated_restore_code); - if (p4d_large(*p4d)) { - set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX)); - goto out; - } - pud = pud_offset(p4d, relocated_restore_code); - if (pud_large(*pud)) { - set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX)); - goto out; - } - pmd = pmd_offset(pud, relocated_restore_code); - if (pmd_large(*pmd)) { - set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX)); - goto out; - } - pte = pte_offset_kernel(pmd, relocated_restore_code); - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX)); -out: - __flush_tlb_all(); + temp_pgt = __pa(pgd); return 0; } @@ -200,166 +141,3 @@ asmlinkage int swsusp_arch_resume(void) restore_image(); return 0; } - -/* - * pfn_is_nosave - check if given pfn is in the 'nosave' section - */ - -int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; - unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; - return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); -} - -#define MD5_DIGEST_SIZE 16 - -struct restore_data_record { - unsigned long jump_address; - unsigned long jump_address_phys; - unsigned long cr3; - unsigned long magic; - u8 e820_digest[MD5_DIGEST_SIZE]; -}; - -#define RESTORE_MAGIC 0x23456789ABCDEF01UL - -#if IS_BUILTIN(CONFIG_CRYPTO_MD5) -/** - * get_e820_md5 - calculate md5 according to given e820 table - * - * @table: the e820 table to be calculated - * @buf: the md5 result to be stored to - */ -static int get_e820_md5(struct e820_table *table, void *buf) -{ - struct crypto_shash *tfm; - struct shash_desc *desc; - int size; - int ret = 0; - - tfm = crypto_alloc_shash("md5", 0, 0); - if (IS_ERR(tfm)) - return -ENOMEM; - - desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm), - GFP_KERNEL); - if (!desc) { - ret = -ENOMEM; - goto free_tfm; - } - - desc->tfm = tfm; - desc->flags = 0; - - size = offsetof(struct e820_table, entries) + - sizeof(struct e820_entry) * table->nr_entries; - - if (crypto_shash_digest(desc, (u8 *)table, size, buf)) - ret = -EINVAL; - - kzfree(desc); - -free_tfm: - crypto_free_shash(tfm); - return ret; -} - -static void hibernation_e820_save(void *buf) -{ - get_e820_md5(e820_table_firmware, buf); -} - -static bool hibernation_e820_mismatch(void *buf) -{ - int ret; - u8 result[MD5_DIGEST_SIZE]; - - memset(result, 0, MD5_DIGEST_SIZE); - /* If there is no digest in suspend kernel, let it go. */ - if (!memcmp(result, buf, MD5_DIGEST_SIZE)) - return false; - - ret = get_e820_md5(e820_table_firmware, result); - if (ret) - return true; - - return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false; -} -#else -static void hibernation_e820_save(void *buf) -{ -} - -static bool hibernation_e820_mismatch(void *buf) -{ - /* If md5 is not builtin for restore kernel, let it go. */ - return false; -} -#endif - -/** - * arch_hibernation_header_save - populate the architecture specific part - * of a hibernation image header - * @addr: address to save the data at - */ -int arch_hibernation_header_save(void *addr, unsigned int max_size) -{ - struct restore_data_record *rdr = addr; - - if (max_size < sizeof(struct restore_data_record)) - return -EOVERFLOW; - rdr->jump_address = (unsigned long)restore_registers; - rdr->jump_address_phys = __pa_symbol(restore_registers); - - /* - * The restore code fixes up CR3 and CR4 in the following sequence: - * - * [in hibernation asm] - * 1. CR3 <= temporary page tables - * 2. CR4 <= mmu_cr4_features (from the kernel that restores us) - * 3. CR3 <= rdr->cr3 - * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel) - * [in restore_processor_state()] - * 5. CR4 <= saved CR4 - * 6. CR3 <= saved CR3 - * - * Our mmu_cr4_features has CR4.PCIDE=0, and toggling - * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so - * rdr->cr3 needs to point to valid page tables but must not - * have any of the PCID bits set. - */ - rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK; - - rdr->magic = RESTORE_MAGIC; - - hibernation_e820_save(rdr->e820_digest); - - return 0; -} - -/** - * arch_hibernation_header_restore - read the architecture specific data - * from the hibernation image header - * @addr: address to read the data from - */ -int arch_hibernation_header_restore(void *addr) -{ - struct restore_data_record *rdr = addr; - - restore_jump_address = rdr->jump_address; - jump_address_phys = rdr->jump_address_phys; - restore_cr3 = rdr->cr3; - - if (rdr->magic != RESTORE_MAGIC) { - pr_crit("Unrecognized hibernate image header format!\n"); - return -EINVAL; - } - - if (hibernation_e820_mismatch(rdr->e820_digest)) { - pr_crit("Hibernate inconsistent memory map detected!\n"); - return -ENODEV; - } - - return 0; -} diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index 6e56815e13a0..6fe383002125 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S @@ -12,6 +12,7 @@ #include <asm/page_types.h> #include <asm/asm-offsets.h> #include <asm/processor-flags.h> +#include <asm/frame.h> .text @@ -24,13 +25,30 @@ ENTRY(swsusp_arch_suspend) pushfl popl saved_context_eflags + /* save cr3 */ + movl %cr3, %eax + movl %eax, restore_cr3 + + FRAME_BEGIN call swsusp_save + FRAME_END ret +ENDPROC(swsusp_arch_suspend) ENTRY(restore_image) + /* prepare to jump to the image kernel */ + movl restore_jump_address, %ebx + movl restore_cr3, %ebp + movl mmu_cr4_features, %ecx - movl resume_pg_dir, %eax - subl $__PAGE_OFFSET, %eax + + /* jump to relocated restore code */ + movl relocated_restore_code, %eax + jmpl *%eax + +/* code below has been relocated to a safe page */ +ENTRY(core_restore_code) + movl temp_pgt, %eax movl %eax, %cr3 jecxz 1f # cr4 Pentium and higher, skip if zero @@ -49,7 +67,7 @@ copy_loop: movl pbe_address(%edx), %esi movl pbe_orig_address(%edx), %edi - movl $1024, %ecx + movl $(PAGE_SIZE >> 2), %ecx rep movsl @@ -58,10 +76,13 @@ copy_loop: .p2align 4,,7 done: + jmpl *%ebx + + /* code below belongs to the image kernel */ + .align PAGE_SIZE +ENTRY(restore_registers) /* go back to the original page tables */ - movl $swapper_pg_dir, %eax - subl $__PAGE_OFFSET, %eax - movl %eax, %cr3 + movl %ebp, %cr3 movl mmu_cr4_features, %ecx jecxz 1f # cr4 Pentium and higher, skip if zero movl %ecx, %cr4; # turn PGE back on @@ -82,4 +103,8 @@ done: xorl %eax, %eax + /* tell the hibernation core that we've just restored the memory */ + movl %eax, in_suspend + ret +ENDPROC(restore_registers) diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index fd369a6e9ff8..3008baa2fa95 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -59,7 +59,7 @@ ENTRY(restore_image) movq restore_cr3(%rip), %r9 /* prepare to switch to temporary page tables */ - movq temp_level4_pgt(%rip), %rax + movq temp_pgt(%rip), %rax movq mmu_cr4_features(%rip), %rbx /* prepare to copy image data to their original locations */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2eeddd814653..0ca46e03b830 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -5,6 +5,7 @@ #include <linux/kexec.h> #include <linux/slab.h> +#include <xen/xen.h> #include <xen/features.h> #include <xen/page.h> #include <xen/interface/memory.h> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index c85d1a88f476..2a9025343534 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -11,6 +11,7 @@ #include <asm/xen/interface.h> #include <asm/xen/hypercall.h> +#include <xen/xen.h> #include <xen/interface/memory.h> #include <xen/interface/hvm/start_info.h> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 2fe5c9b1816b..dd461c0167ef 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1907,7 +1907,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* L3_k[511] -> level2_fixmap_pgt */ convert_pfn_mfn(level3_kernel_pgt); - /* L3_k[511][506] -> level1_fixmap_pgt */ + /* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */ convert_pfn_mfn(level2_fixmap_pgt); /* We get [511][511] and have Xen's version of level2_kernel_pgt */ @@ -1952,7 +1952,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); - set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); + + for (i = 0; i < FIXMAP_PMD_NUM; i++) { + set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE, + PAGE_KERNEL_RO); + } /* Pin down new L4 */ pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 33a783c77d96..b99585034dd2 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -23,6 +23,7 @@ #include <linux/io.h> #include <linux/export.h> +#include <xen/xen.h> #include <xen/platform_pci.h> #include "xen-ops.h" diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 7d00d4ad44d4..0972184f3f19 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -3,6 +3,7 @@ #include <linux/interrupt.h> #include <asm/xen/hypercall.h> +#include <xen/xen.h> #include <xen/page.h> #include <xen/interface/xen.h> #include <xen/interface/vcpu.h> @@ -478,7 +479,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs, irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) { int err, ret = IRQ_NONE; - struct pt_regs regs; + struct pt_regs regs = {0}; const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); uint8_t xenpmu_flags = get_xenpmu_flags(); diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index b9ad83a0ee5d..ea5d8d03e53b 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -13,7 +13,7 @@ config XTENSA select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_IRQ_SHOW |