diff options
Diffstat (limited to 'arch')
547 files changed, 4883 insertions, 3599 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 6c00e5b00f8b..f76b214cf7ad 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -867,4 +867,13 @@ config STRICT_MODULE_RWX config ARCH_WANT_RELAX_ORDER bool +config REFCOUNT_FULL + bool "Perform full reference count validation at the expense of speed" + help + Enabling this switches the refcounting infrastructure from a fast + unchecked atomic_t implementation to a fully state checked + implementation, which can be (slightly) slower but provides protections + against various use-after-free conditions that can be used in + security flaw exploits. + source "kernel/gcov/Kconfig" diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 7bee4e4799fd..3e74ca5e6402 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -36,7 +36,6 @@ generic-y += preempt.h generic-y += resource.h generic-y += sembuf.h generic-y += shmbuf.h -generic-y += siginfo.h generic-y += socket.h generic-y += sockios.h generic-y += stat.h diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 6e1242da0159..4104a0839214 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -86,8 +86,6 @@ struct task_struct; #define TSK_K_BLINK(tsk) TSK_K_REG(tsk, 4) #define TSK_K_FP(tsk) TSK_K_REG(tsk, 0) -#define thread_saved_pc(tsk) TSK_K_BLINK(tsk) - extern void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp); diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild index b15bf6bc0e94..b55fc2ae1e8c 100644 --- a/arch/arc/include/uapi/asm/Kbuild +++ b/arch/arc/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += siginfo.h diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index fc8211f338ad..666613fde91d 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -470,7 +470,7 @@ void __init setup_arch(char **cmdline_p) void __init time_init(void) { of_clk_init(NULL); - clocksource_probe(); + timer_probe(); } static int __init customize_machine(void) diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c index 3e25e8d6486b..2e13683dfb24 100644 --- a/arch/arc/mm/mmap.c +++ b/arch/arc/mm/mmap.c @@ -65,7 +65,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4c1a35f15838..6281c3e5a673 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -337,7 +337,7 @@ config ARCH_MULTIPLATFORM select ARM_HAS_SG_CHAIN select ARM_PATCH_PHYS_VIRT select AUTO_ZRELADDR - select CLKSRC_OF + select TIMER_OF select COMMON_CLK select GENERIC_CLOCKEVENTS select MIGHT_HAVE_PCI @@ -351,7 +351,7 @@ config ARM_SINGLE_ARMV7M depends on !MMU select ARM_NVIC select AUTO_ZRELADDR - select CLKSRC_OF + select TIMER_OF select COMMON_CLK select CPU_V7M select GENERIC_CLOCKEVENTS @@ -532,7 +532,7 @@ config ARCH_PXA select CLKDEV_LOOKUP select CLKSRC_PXA select CLKSRC_MMIO - select CLKSRC_OF + select TIMER_OF select CPU_XSCALE if !CPU_XSC3 select GENERIC_CLOCKEVENTS select GPIO_PXA @@ -571,7 +571,7 @@ config ARCH_SA1100 select CLKDEV_LOOKUP select CLKSRC_MMIO select CLKSRC_PXA - select CLKSRC_OF if OF + select TIMER_OF if OF select CPU_FREQ select CPU_SA1100 select GENERIC_CLOCKEVENTS @@ -1357,7 +1357,7 @@ config HAVE_ARM_ARCH_TIMER config HAVE_ARM_TWD bool - select CLKSRC_OF if OF + select TIMER_OF if OF help This options enables support for the ARM timer and watchdog unit @@ -1416,6 +1416,7 @@ choice config VMSPLIT_3G bool "3G/1G user/kernel split" config VMSPLIT_3G_OPT + depends on !ARM_LPAE bool "3G/1G user/kernel split (for full 1G low memory)" config VMSPLIT_2G bool "2G/2G user/kernel split" @@ -1637,7 +1638,7 @@ config ARCH_SELECT_MEMORY_MODEL config HAVE_ARCH_PFN_VALID def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM -config HAVE_GENERIC_RCU_GUP +config HAVE_GENERIC_GUP def_bool y depends on ARM_LPAE @@ -2061,6 +2062,23 @@ config EFI is only useful for kernels that may run on systems that have UEFI firmware. +config DMI + bool "Enable support for SMBIOS (DMI) tables" + depends on EFI + default y + help + This enables SMBIOS/DMI feature for systems. + + This option is only useful on systems that have UEFI firmware. + However, even with this option, the resultant kernel should + continue to boot on existing non-UEFI platforms. + + NOTE: This does *NOT* enable or encourage the use of DMI quirks, + i.e., the the practice of identifying the platform via DMI to + decide whether certain workarounds for buggy hardware and/or + firmware need to be enabled. This would require the DMI subsystem + to be enabled much earlier than we do on ARM, which is non-trivial. + endmenu menu "CPU Power Management" diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S index 9d5dc4fda3c1..a17ca8d78656 100644 --- a/arch/arm/boot/compressed/efi-header.S +++ b/arch/arm/boot/compressed/efi-header.S @@ -17,14 +17,13 @@ @ there. .inst 'M' | ('Z' << 8) | (0x1310 << 16) @ tstne r0, #0x4d000 #else - mov r0, r0 + AR_CLASS( mov r0, r0 ) + M_CLASS( nop.w ) #endif .endm .macro __EFI_HEADER #ifdef CONFIG_EFI_STUB - b __efi_start - .set start_offset, __efi_start - start .org start + 0x3c @ diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 7c711ba61417..8a756870c238 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -130,19 +130,22 @@ start: .rept 7 __nop .endr - ARM( mov r0, r0 ) - ARM( b 1f ) - THUMB( badr r12, 1f ) - THUMB( bx r12 ) +#ifndef CONFIG_THUMB2_KERNEL + mov r0, r0 +#else + AR_CLASS( sub pc, pc, #3 ) @ A/R: switch to Thumb2 mode + M_CLASS( nop.w ) @ M: already in Thumb2 mode + .thumb +#endif + W(b) 1f .word _magic_sig @ Magic numbers to help the loader .word _magic_start @ absolute load/run zImage address .word _magic_end @ zImage end address .word 0x04030201 @ endianness flag - THUMB( .thumb ) -1: __EFI_HEADER - + __EFI_HEADER +1: ARM_BE8( setend be ) @ go BE8 if compiled for BE8 AR_CLASS( mrs r9, cpsr ) #ifdef CONFIG_ARM_VIRT_EXT diff --git a/arch/arm/boot/dts/am335x-sl50.dts b/arch/arm/boot/dts/am335x-sl50.dts index c5d2589c55fc..fc864a855991 100644 --- a/arch/arm/boot/dts/am335x-sl50.dts +++ b/arch/arm/boot/dts/am335x-sl50.dts @@ -220,7 +220,7 @@ mmc1_pins: pinmux_mmc1_pins { pinctrl-single,pins = < - AM33XX_IOPAD(0x960, PIN_INPUT | MUX_MODE7) /* spi0_cs1.gpio0_6 */ + AM33XX_IOPAD(0x96c, PIN_INPUT | MUX_MODE7) /* uart0_rtsn.gpio1_9 */ >; }; @@ -280,10 +280,6 @@ AM33XX_IOPAD(0x834, PIN_INPUT_PULLUP | MUX_MODE7) /* nKbdReset - gpmc_ad13.gpio1_13 */ AM33XX_IOPAD(0x838, PIN_INPUT_PULLUP | MUX_MODE7) /* nDispReset - gpmc_ad14.gpio1_14 */ AM33XX_IOPAD(0x844, PIN_INPUT_PULLUP | MUX_MODE7) /* USB1_enPower - gpmc_a1.gpio1_17 */ - /* AVR Programming - SPI Bus (bit bang) - Screen and Keyboard */ - AM33XX_IOPAD(0x954, PIN_INPUT_PULLUP | MUX_MODE7) /* Kbd/Disp/BattMOSI spi0_d0.gpio0_3 */ - AM33XX_IOPAD(0x958, PIN_INPUT_PULLUP | MUX_MODE7) /* Kbd/Disp/BattMISO spi0_d1.gpio0_4 */ - AM33XX_IOPAD(0x950, PIN_INPUT_PULLUP | MUX_MODE7) /* Kbd/Disp/BattSCLK spi0_clk.gpio0_2 */ /* PDI Bus - Battery system */ AM33XX_IOPAD(0x840, PIN_INPUT_PULLUP | MUX_MODE7) /* nBattReset gpmc_a0.gpio1_16 */ AM33XX_IOPAD(0x83c, PIN_INPUT_PULLUP | MUX_MODE7) /* BattPDIData gpmc_ad15.gpio1_15 */ @@ -384,7 +380,7 @@ pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; bus-width = <4>; - cd-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>; + cd-gpios = <&gpio1 9 GPIO_ACTIVE_LOW>; vmmc-supply = <&vmmcsd_fixed>; }; diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi index 8c6bc29eb7f6..3e74929d3289 100644 --- a/arch/arm/boot/dts/aspeed-g4.dtsi +++ b/arch/arm/boot/dts/aspeed-g4.dtsi @@ -893,6 +893,7 @@ //interrupts = <16 17 18 35 36 37 38 39>; interrupts = <16>; clocks = <&clk_apb>; + clock-names = "PCLK"; }; wdt1: wdt@1e785000 { diff --git a/arch/arm/boot/dts/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed-g5.dtsi index a0bea4a6ec77..1e6c701da853 100644 --- a/arch/arm/boot/dts/aspeed-g5.dtsi +++ b/arch/arm/boot/dts/aspeed-g5.dtsi @@ -1000,6 +1000,7 @@ //interrupts = <16 17 18 35 36 37 38 39>; interrupts = <16>; clocks = <&clk_apb>; + clock-names = "PCLK"; }; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 561f27d8d922..9444a9a9ba10 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -3,6 +3,11 @@ #include <dt-bindings/clock/bcm2835-aux.h> #include <dt-bindings/gpio/gpio.h> +/* firmware-provided startup stubs live here, where the secondary CPUs are + * spinning. + */ +/memreserve/ 0x00000000 0x00001000; + /* This include file covers the common peripherals and configuration between * bcm2835 and bcm2836 implementations, leaving the CPU configuration to * bcm2835.dtsi and bcm2836.dtsi. diff --git a/arch/arm/boot/dts/imx6ul-14x14-evk.dts b/arch/arm/boot/dts/imx6ul-14x14-evk.dts index f18e1f1d0ce2..d2be8aa3370b 100644 --- a/arch/arm/boot/dts/imx6ul-14x14-evk.dts +++ b/arch/arm/boot/dts/imx6ul-14x14-evk.dts @@ -120,10 +120,16 @@ ethphy0: ethernet-phy@2 { reg = <2>; + micrel,led-mode = <1>; + clocks = <&clks IMX6UL_CLK_ENET_REF>; + clock-names = "rmii-ref"; }; ethphy1: ethernet-phy@1 { reg = <1>; + micrel,led-mode = <1>; + clocks = <&clks IMX6UL_CLK_ENET2_REF>; + clock-names = "rmii-ref"; }; }; }; diff --git a/arch/arm/boot/dts/keystone-k2l-netcp.dtsi b/arch/arm/boot/dts/keystone-k2l-netcp.dtsi index b6f26824e83a..66f615a74118 100644 --- a/arch/arm/boot/dts/keystone-k2l-netcp.dtsi +++ b/arch/arm/boot/dts/keystone-k2l-netcp.dtsi @@ -137,8 +137,8 @@ netcp: netcp@26000000 { /* NetCP address range */ ranges = <0 0x26000000 0x1000000>; - clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>, <&clkosr>; - clock-names = "pa_clk", "ethss_clk", "cpts", "osr_clk"; + clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>; + clock-names = "pa_clk", "ethss_clk", "cpts"; dma-coherent; ti,navigator-dmas = <&dma_gbe 0>, diff --git a/arch/arm/boot/dts/keystone-k2l.dtsi b/arch/arm/boot/dts/keystone-k2l.dtsi index b58e7ebc0919..148650406cf7 100644 --- a/arch/arm/boot/dts/keystone-k2l.dtsi +++ b/arch/arm/boot/dts/keystone-k2l.dtsi @@ -232,6 +232,14 @@ }; }; + osr: sram@70000000 { + compatible = "mmio-sram"; + reg = <0x70000000 0x10000>; + #address-cells = <1>; + #size-cells = <1>; + clocks = <&clkosr>; + }; + dspgpio0: keystone_dsp_gpio@02620240 { compatible = "ti,keystone-dsp-gpio"; gpio-controller; diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi index 1aeeacb3a884..d4f600dbb7eb 100644 --- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi +++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi @@ -558,10 +558,11 @@ }; r_ccu: clock@1f01400 { - compatible = "allwinner,sun50i-a64-r-ccu"; + compatible = "allwinner,sun8i-h3-r-ccu"; reg = <0x01f01400 0x100>; - clocks = <&osc24M>, <&osc32k>, <&iosc>; - clock-names = "hosc", "losc", "iosc"; + clocks = <&osc24M>, <&osc32k>, <&iosc>, + <&ccu 9>; + clock-names = "hosc", "losc", "iosc", "pll-periph"; #clock-cells = <1>; #reset-cells = <1>; }; diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts index 33a8eb28374e..06e2331f666d 100644 --- a/arch/arm/boot/dts/versatile-pb.dts +++ b/arch/arm/boot/dts/versatile-pb.dts @@ -1,4 +1,4 @@ -#include <versatile-ab.dts> +#include "versatile-ab.dts" / { model = "ARM Versatile PB"; diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index cf062472e07b..2b913f17d50f 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -235,7 +235,7 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster) return ret; } -typedef void (*phys_reset_t)(unsigned long); +typedef typeof(cpu_reset) phys_reset_t; void mcpm_cpu_power_down(void) { @@ -300,7 +300,7 @@ void mcpm_cpu_power_down(void) * on the CPU. */ phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset); - phys_reset(__pa_symbol(mcpm_entry_point)); + phys_reset(__pa_symbol(mcpm_entry_point), false); /* should never get here */ BUG(); @@ -389,7 +389,7 @@ static int __init nocache_trampoline(unsigned long _arg) __mcpm_cpu_down(cpu, cluster); phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset); - phys_reset(__pa_symbol(mcpm_entry_point)); + phys_reset(__pa_symbol(mcpm_entry_point), false); BUG(); } diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 3a36d99ff836..d8360501c082 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -28,7 +28,6 @@ generic-y += segment.h generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h -generic-y += siginfo.h generic-y += simd.h generic-y += sizes.h generic-y += socket.h diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index 36ec9c8f6e16..3234fe9bba6e 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h @@ -19,7 +19,8 @@ struct dev_archdata { #ifdef CONFIG_XEN const struct dma_map_ops *dev_dma_ops; #endif - bool dma_coherent; + unsigned int dma_coherent:1; + unsigned int dma_ops_setup:1; }; struct omap_device; diff --git a/arch/arm/include/asm/dmi.h b/arch/arm/include/asm/dmi.h new file mode 100644 index 000000000000..df2d2ff06f5b --- /dev/null +++ b/arch/arm/include/asm/dmi.h @@ -0,0 +1,19 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_DMI_H +#define __ASM_DMI_H + +#include <linux/io.h> +#include <linux/slab.h> + +#define dmi_early_remap(x, l) memremap(x, l, MEMREMAP_WB) +#define dmi_early_unmap(x, l) memunmap(x) +#define dmi_remap(x, l) memremap(x, l, MEMREMAP_WB) +#define dmi_unmap(x) memunmap(x) +#define dmi_alloc(l) kzalloc(l, GFP_KERNEL) + +#endif diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index 302240c19a5a..a0d726a47c8a 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -66,6 +66,7 @@ typedef pte_t *pte_addr_t; #define pgprot_noncached(prot) (prot) #define pgprot_writecombine(prot) (prot) #define pgprot_dmacoherent(prot) (prot) +#define pgprot_device(prot) (prot) /* diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild index 607f702c2d62..e9b098d6b766 100644 --- a/arch/arm/include/uapi/asm/Kbuild +++ b/arch/arm/include/uapi/asm/Kbuild @@ -4,3 +4,5 @@ include include/uapi/asm-generic/Kbuild.asm genhdr-y += unistd-common.h genhdr-y += unistd-oabi.h genhdr-y += unistd-eabi.h + +generic-y += siginfo.h diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 32e1a9513dc7..4e80bf7420d4 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -315,7 +315,7 @@ static void __init cacheid_init(void) if (arch >= CPU_ARCH_ARMv6) { unsigned int cachetype = read_cpuid_cachetype(); - if ((arch == CPU_ARCH_ARMv7M) && !cachetype) { + if ((arch == CPU_ARCH_ARMv7M) && !(cachetype & 0xf000f)) { cacheid = 0; } else if ((cachetype & (7 << 29)) == 4 << 29) { /* ARMv7 register format */ diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 572a8df1b766..c9a0a5299827 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -555,8 +555,7 @@ static DEFINE_RAW_SPINLOCK(stop_lock); */ static void ipi_cpu_stop(unsigned int cpu) { - if (system_state == SYSTEM_BOOTING || - system_state == SYSTEM_RUNNING) { + if (system_state <= SYSTEM_RUNNING) { raw_spin_lock(&stop_lock); pr_crit("CPU%u: stopping\n", cpu); dump_stack(); diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 895ae5197159..b30eafeef096 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -403,7 +403,7 @@ out: WARN(err, "twd_local_timer_of_register failed (%d)\n", err); return err; } -CLOCKSOURCE_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register); -CLOCKSOURCE_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register); -CLOCKSOURCE_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register); +TIMER_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register); +TIMER_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register); +TIMER_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register); #endif diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 97b22fa7cb3a..629f8e9981f1 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -120,6 +120,6 @@ void __init time_init(void) #ifdef CONFIG_COMMON_CLK of_clk_init(NULL); #endif - clocksource_probe(); + timer_probe(); } } diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 570ed4a9c261..5386528665b5 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -104,7 +104,6 @@ __do_hyp_init: @ - Write permission implies XN: disabled @ - Instruction cache: enabled @ - Data/Unified cache: enabled - @ - Memory alignment checks: enabled @ - MMU: enabled (this code must be run from an identity mapping) mrc p15, 4, r0, c1, c0, 0 @ HSCR ldr r2, =HSCTLR_MASK @@ -112,8 +111,8 @@ __do_hyp_init: mrc p15, 0, r1, c1, c0, 0 @ SCTLR ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) and r1, r1, r2 - ARM( ldr r2, =(HSCTLR_M | HSCTLR_A) ) - THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) + ARM( ldr r2, =(HSCTLR_M) ) + THUMB( ldr r2, =(HSCTLR_M | HSCTLR_TE) ) orr r1, r1, r2 orr r0, r0, r1 mcr p15, 4, r0, c1, c0, 0 @ HSCR diff --git a/arch/arm/mach-aspeed/Kconfig b/arch/arm/mach-aspeed/Kconfig index f3f8c5c658db..2d5570e6e186 100644 --- a/arch/arm/mach-aspeed/Kconfig +++ b/arch/arm/mach-aspeed/Kconfig @@ -4,7 +4,7 @@ menuconfig ARCH_ASPEED select SRAM select WATCHDOG select ASPEED_WATCHDOG - select MOXART_TIMER + select FTTMR010_TIMER select MFD_SYSCON select PINCTRL help diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig index 841e924143f9..cbd959b73654 100644 --- a/arch/arm/mach-at91/Kconfig +++ b/arch/arm/mach-at91/Kconfig @@ -1,6 +1,7 @@ menuconfig ARCH_AT91 bool "Atmel SoCs" depends on ARCH_MULTI_V4T || ARCH_MULTI_V5 || ARCH_MULTI_V7 + select ARM_CPU_SUSPEND if PM select COMMON_CLK_AT91 select GPIOLIB select PINCTRL diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index f9389c5910e7..f23a23934162 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -150,7 +150,7 @@ config ARCH_BCM2835 select ARM_ERRATA_411920 if ARCH_MULTI_V6 select ARM_TIMER_SP804 select HAVE_ARM_ARCH_TIMER if ARCH_MULTI_V7 - select CLKSRC_OF + select TIMER_OF select BCM2835_TIMER select PINCTRL select PINCTRL_BCM2835 diff --git a/arch/arm/mach-clps711x/Kconfig b/arch/arm/mach-clps711x/Kconfig index 61284b9389cf..f385b1fcafef 100644 --- a/arch/arm/mach-clps711x/Kconfig +++ b/arch/arm/mach-clps711x/Kconfig @@ -2,7 +2,7 @@ menuconfig ARCH_CLPS711X bool "Cirrus Logic EP721x/EP731x-based" depends on ARCH_MULTI_V4T select AUTO_ZRELADDR - select CLKSRC_OF + select TIMER_OF select CLPS711X_TIMER select COMMON_CLK select CPU_ARM720T diff --git a/arch/arm/mach-davinci/pm.c b/arch/arm/mach-davinci/pm.c index efb80354f303..b5cc05dc2cb2 100644 --- a/arch/arm/mach-davinci/pm.c +++ b/arch/arm/mach-davinci/pm.c @@ -153,7 +153,8 @@ int __init davinci_pm_init(void) davinci_sram_suspend = sram_alloc(davinci_cpu_suspend_sz, NULL); if (!davinci_sram_suspend) { pr_err("PM: cannot allocate SRAM memory\n"); - return -ENOMEM; + ret = -ENOMEM; + goto no_sram_mem; } davinci_sram_push(davinci_sram_suspend, davinci_cpu_suspend, @@ -161,6 +162,10 @@ int __init davinci_pm_init(void) suspend_set_ops(&davinci_pm_ops); + return 0; + +no_sram_mem: + iounmap(pm_config.ddrpsc_reg_base); no_ddrpsc_mem: iounmap(pm_config.ddrpll_reg_base); no_ddrpll_mem: diff --git a/arch/arm/mach-mediatek/mediatek.c b/arch/arm/mach-mediatek/mediatek.c index a6e3c98b95ed..c3cf215773b2 100644 --- a/arch/arm/mach-mediatek/mediatek.c +++ b/arch/arm/mach-mediatek/mediatek.c @@ -41,7 +41,7 @@ static void __init mediatek_timer_init(void) } of_clk_init(NULL); - clocksource_probe(); + timer_probe(); }; static const char * const mediatek_board_dt_compat[] = { diff --git a/arch/arm/mach-moxart/Kconfig b/arch/arm/mach-moxart/Kconfig index 70db2abf6163..a4a91f9a3301 100644 --- a/arch/arm/mach-moxart/Kconfig +++ b/arch/arm/mach-moxart/Kconfig @@ -4,7 +4,7 @@ menuconfig ARCH_MOXART select CPU_FA526 select ARM_DMA_MEM_BUFFERABLE select FARADAY_FTINTC010 - select MOXART_TIMER + select FTTMR010_TIMER select GPIOLIB select PHYLIB if NETDEVICES help diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 07dd692c4737..ae4bb9fdc483 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -497,7 +497,7 @@ void __init omap_init_time(void) __omap_sync32k_timer_init(1, "timer_32k_ck", "ti,timer-alwon", 2, "timer_sys_ck", NULL, false); - clocksource_probe(); + timer_probe(); } #if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM43XX) @@ -506,7 +506,7 @@ void __init omap3_secure_sync32k_timer_init(void) __omap_sync32k_timer_init(12, "secure_32k_fck", "ti,timer-secure", 2, "timer_sys_ck", NULL, false); - clocksource_probe(); + timer_probe(); } #endif /* CONFIG_ARCH_OMAP3 */ @@ -517,7 +517,7 @@ void __init omap3_gptimer_timer_init(void) __omap_sync32k_timer_init(2, "timer_sys_ck", NULL, 1, "timer_sys_ck", "ti,timer-alwon", true); if (of_have_populated_dt()) - clocksource_probe(); + timer_probe(); } #endif @@ -532,7 +532,7 @@ static void __init omap4_sync32k_timer_init(void) void __init omap4_local_timer_init(void) { omap4_sync32k_timer_init(); - clocksource_probe(); + timer_probe(); } #endif @@ -656,7 +656,7 @@ void __init omap5_realtime_timer_init(void) omap4_sync32k_timer_init(); realtime_counter_init(); - clocksource_probe(); + timer_probe(); } #endif /* CONFIG_SOC_OMAP5 || CONFIG_SOC_DRA7XX */ diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c index ef0500a4c8ad..5ab834ebcb49 100644 --- a/arch/arm/mach-rockchip/rockchip.c +++ b/arch/arm/mach-rockchip/rockchip.c @@ -55,7 +55,7 @@ static void __init rockchip_timer_init(void) } of_clk_init(NULL); - clocksource_probe(); + timer_probe(); } static void __init rockchip_dt_init(void) diff --git a/arch/arm/mach-s3c24xx/Kconfig b/arch/arm/mach-s3c24xx/Kconfig index 4b1690acb6a5..f07da82ebfea 100644 --- a/arch/arm/mach-s3c24xx/Kconfig +++ b/arch/arm/mach-s3c24xx/Kconfig @@ -394,7 +394,7 @@ config MACH_SMDK2416 config MACH_S3C2416_DT bool "Samsung S3C2416 machine using devicetree" - select CLKSRC_OF + select TIMER_OF select USE_OF select PINCTRL select PINCTRL_S3C24XX diff --git a/arch/arm/mach-s3c64xx/Kconfig b/arch/arm/mach-s3c64xx/Kconfig index 459214fa20b4..71a49343d711 100644 --- a/arch/arm/mach-s3c64xx/Kconfig +++ b/arch/arm/mach-s3c64xx/Kconfig @@ -336,7 +336,7 @@ config MACH_WLF_CRAGG_6410 config MACH_S3C64XX_DT bool "Samsung S3C6400/S3C6410 machine using Device Tree" - select CLKSRC_OF + select TIMER_OF select CPU_S3C6400 select CPU_S3C6410 select PINCTRL diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c index 52d466b75973..a6e74f481dea 100644 --- a/arch/arm/mach-shmobile/setup-rcar-gen2.c +++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c @@ -113,7 +113,7 @@ void __init rcar_gen2_timer_init(void) #endif /* CONFIG_ARM_ARCH_TIMER */ of_clk_init(NULL); - clocksource_probe(); + timer_probe(); } struct memory_reserve_config { diff --git a/arch/arm/mach-spear/spear13xx.c b/arch/arm/mach-spear/spear13xx.c index ca2f6a82a414..31c43cabf362 100644 --- a/arch/arm/mach-spear/spear13xx.c +++ b/arch/arm/mach-spear/spear13xx.c @@ -124,5 +124,5 @@ void __init spear13xx_timer_init(void) clk_put(pclk); spear_setup_of_timer(); - clocksource_probe(); + timer_probe(); } diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index f44e3acb5c90..7ab353fb25f2 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -42,7 +42,7 @@ static void __init sun6i_timer_init(void) of_clk_init(NULL); if (IS_ENABLED(CONFIG_RESET_CONTROLLER)) sun6i_reset_init(); - clocksource_probe(); + timer_probe(); } DT_MACHINE_START(SUN6I_DT, "Allwinner sun6i (A31) Family") diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c index a4910ea6811a..048f15e8c669 100644 --- a/arch/arm/mach-u300/core.c +++ b/arch/arm/mach-u300/core.c @@ -407,7 +407,7 @@ static const char * u300_board_compat[] = { DT_MACHINE_START(U300_DT, "U300 S335/B335 (Device Tree)") .map_io = u300_map_io, .init_irq = u300_init_irq_dt, - .init_time = clocksource_probe, + .init_time = timer_probe, .init_machine = u300_init_machine_dt, .restart = u300_restart, .dt_compat = u300_board_compat, diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index ed118648313f..6aba9ebf8041 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -150,7 +150,7 @@ static void __init zynq_timer_init(void) { zynq_clock_init(); of_clk_init(NULL); - clocksource_probe(); + timer_probe(); } static struct map_desc zynq_cortex_a9_scu_map __initdata = { diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c742dfd2967b..bd83c531828a 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -2311,7 +2311,14 @@ int arm_iommu_attach_device(struct device *dev, } EXPORT_SYMBOL_GPL(arm_iommu_attach_device); -static void __arm_iommu_detach_device(struct device *dev) +/** + * arm_iommu_detach_device + * @dev: valid struct device pointer + * + * Detaches the provided device from a previously attached map. + * This voids the dma operations (dma_map_ops pointer) + */ +void arm_iommu_detach_device(struct device *dev) { struct dma_iommu_mapping *mapping; @@ -2324,22 +2331,10 @@ static void __arm_iommu_detach_device(struct device *dev) iommu_detach_device(mapping->domain, dev); kref_put(&mapping->kref, release_iommu_mapping); to_dma_iommu_mapping(dev) = NULL; + set_dma_ops(dev, NULL); pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); } - -/** - * arm_iommu_detach_device - * @dev: valid struct device pointer - * - * Detaches the provided device from a previously attached map. - * This voids the dma operations (dma_map_ops pointer) - */ -void arm_iommu_detach_device(struct device *dev) -{ - __arm_iommu_detach_device(dev); - set_dma_ops(dev, NULL); -} EXPORT_SYMBOL_GPL(arm_iommu_detach_device); static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent) @@ -2379,7 +2374,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) if (!mapping) return; - __arm_iommu_detach_device(dev); + arm_iommu_detach_device(dev); arm_iommu_release_mapping(mapping); } @@ -2430,9 +2425,13 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dev->dma_ops = xen_dma_ops; } #endif + dev->archdata.dma_ops_setup = true; } void arch_teardown_dma_ops(struct device *dev) { + if (!dev->archdata.dma_ops_setup) + return; + arm_teardown_iommu_dma_ops(dev); } diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 2239fde10b80..f0701d8d24df 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -90,7 +90,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -141,7 +141,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 31af3cb59a60..e46a6a446cdd 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1218,15 +1218,15 @@ void __init adjust_lowmem_bounds(void) high_memory = __va(arm_lowmem_limit - 1) + 1; + if (!memblock_limit) + memblock_limit = arm_lowmem_limit; + /* * Round the memblock limit down to a pmd size. This * helps to ensure that we will allocate memory from the * last full pmd, which should be mapped. */ - if (memblock_limit) - memblock_limit = round_down(memblock_limit, PMD_SIZE); - if (!memblock_limit) - memblock_limit = arm_lowmem_limit; + memblock_limit = round_down(memblock_limit, PMD_SIZE); if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) { if (memblock_end_of_DRAM() > arm_lowmem_limit) { diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3dcd7ec69bca..95c7ed392003 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -205,7 +205,7 @@ config GENERIC_CALIBRATE_DELAY config ZONE_DMA def_bool y -config HAVE_GENERIC_RCU_GUP +config HAVE_GENERIC_GUP def_bool y config ARCH_DMA_ADDR_T_64BIT @@ -1084,10 +1084,6 @@ config SYSVIPC_COMPAT def_bool y depends on COMPAT && SYSVIPC -config KEYS_COMPAT - def_bool y - depends on COMPAT && KEYS - endmenu menu "Power management options" diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 73272f43ca01..9ed0a659046b 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -18,7 +18,7 @@ config ARCH_ALPINE config ARCH_BCM2835 bool "Broadcom BCM2835 family" - select CLKSRC_OF + select TIMER_OF select GPIOLIB select PINCTRL select PINCTRL_BCM2835 @@ -178,7 +178,7 @@ config ARCH_TEGRA select ARCH_HAS_RESET_CONTROLLER select CLKDEV_LOOKUP select CLKSRC_MMIO - select CLKSRC_OF + select TIMER_OF select GENERIC_CLOCKEVENTS select GPIOLIB select PINCTRL diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index c7f669f5884f..166c9ef884dc 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -406,8 +406,9 @@ r_ccu: clock@1f01400 { compatible = "allwinner,sun50i-a64-r-ccu"; reg = <0x01f01400 0x100>; - clocks = <&osc24M>, <&osc32k>, <&iosc>; - clock-names = "hosc", "losc", "iosc"; + clocks = <&osc24M>, <&osc32k>, <&iosc>, + <&ccu 11>; + clock-names = "hosc", "losc", "iosc", "pll-periph"; #clock-cells = <1>; #reset-cells = <1>; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi index 4d314a253fd9..732e2e06f503 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi @@ -40,7 +40,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "sunxi-h3-h5.dtsi" +#include <arm/sunxi-h3-h5.dtsi> / { cpus { diff --git a/arch/arm64/boot/dts/allwinner/sunxi-h3-h5.dtsi b/arch/arm64/boot/dts/allwinner/sunxi-h3-h5.dtsi deleted file mode 120000 index 036f01dc2b9b..000000000000 --- a/arch/arm64/boot/dts/allwinner/sunxi-h3-h5.dtsi +++ /dev/null @@ -1 +0,0 @@ -../../../../arm/boot/dts/sunxi-h3-h5.dtsi
\ No newline at end of file diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index 75bce2d0b1a8..49f6a6242cf9 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -81,6 +81,45 @@ }; }; + reg_sys_5v: regulator@0 { + compatible = "regulator-fixed"; + regulator-name = "SYS_5V"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-boot-on; + regulator-always-on; + }; + + reg_vdd_3v3: regulator@1 { + compatible = "regulator-fixed"; + regulator-name = "VDD_3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + regulator-always-on; + vin-supply = <®_sys_5v>; + }; + + reg_5v_hub: regulator@2 { + compatible = "regulator-fixed"; + regulator-name = "5V_HUB"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-boot-on; + gpio = <&gpio0 7 0>; + regulator-always-on; + vin-supply = <®_sys_5v>; + }; + + wl1835_pwrseq: wl1835-pwrseq { + compatible = "mmc-pwrseq-simple"; + /* WLAN_EN GPIO */ + reset-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>; + clocks = <&pmic>; + clock-names = "ext_clock"; + power-off-delay-us = <10>; + }; + soc { spi0: spi@f7106000 { status = "ok"; @@ -256,11 +295,31 @@ /* GPIO blocks 16 thru 19 do not appear to be routed to pins */ + dwmmc_0: dwmmc0@f723d000 { + cap-mmc-highspeed; + non-removable; + bus-width = <0x8>; + vmmc-supply = <&ldo19>; + }; + + dwmmc_1: dwmmc1@f723e000 { + card-detect-delay = <200>; + cap-sd-highspeed; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + vqmmc-supply = <&ldo7>; + vmmc-supply = <&ldo10>; + bus-width = <0x4>; + disable-wp; + cd-gpios = <&gpio1 0 1>; + }; + dwmmc_2: dwmmc2@f723f000 { - ti,non-removable; + bus-width = <0x4>; non-removable; - /* WL_EN */ - vmmc-supply = <&wlan_en_reg>; + vmmc-supply = <®_vdd_3v3>; + mmc-pwrseq = <&wl1835_pwrseq>; #address-cells = <0x1>; #size-cells = <0x0>; @@ -272,18 +331,6 @@ interrupts = <3 IRQ_TYPE_EDGE_RISING>; }; }; - - wlan_en_reg: regulator@1 { - compatible = "regulator-fixed"; - regulator-name = "wlan-en-regulator"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - /* WLAN_EN GPIO */ - gpio = <&gpio0 5 0>; - /* WLAN card specific delay */ - startup-delay-us = <70000>; - enable-active-high; - }; }; leds { @@ -330,6 +377,7 @@ pmic: pmic@f8000000 { compatible = "hisilicon,hi655x-pmic"; reg = <0x0 0xf8000000 0x0 0x1000>; + #clock-cells = <0>; interrupt-controller; #interrupt-cells = <2>; pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi index 1e5129b19280..5013e4b2ea71 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi @@ -725,20 +725,10 @@ status = "disabled"; }; - fixed_5v_hub: regulator@0 { - compatible = "regulator-fixed"; - regulator-name = "fixed_5v_hub"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - regulator-boot-on; - gpio = <&gpio0 7 0>; - regulator-always-on; - }; - usb_phy: usbphy { compatible = "hisilicon,hi6220-usb-phy"; #phy-cells = <0>; - phy-supply = <&fixed_5v_hub>; + phy-supply = <®_5v_hub>; hisilicon,peripheral-syscon = <&sys_ctrl>; }; @@ -766,17 +756,12 @@ dwmmc_0: dwmmc0@f723d000 { compatible = "hisilicon,hi6220-dw-mshc"; - num-slots = <0x1>; - cap-mmc-highspeed; - non-removable; reg = <0x0 0xf723d000 0x0 0x1000>; interrupts = <0x0 0x48 0x4>; clocks = <&sys_ctrl 2>, <&sys_ctrl 1>; clock-names = "ciu", "biu"; resets = <&sys_ctrl PERIPH_RSTDIS0_MMC0>; reset-names = "reset"; - bus-width = <0x8>; - vmmc-supply = <&ldo19>; pinctrl-names = "default"; pinctrl-0 = <&emmc_pmx_func &emmc_clk_cfg_func &emmc_cfg_func &emmc_rst_cfg_func>; @@ -784,13 +769,7 @@ dwmmc_1: dwmmc1@f723e000 { compatible = "hisilicon,hi6220-dw-mshc"; - num-slots = <0x1>; - card-detect-delay = <200>; hisilicon,peripheral-syscon = <&ao_ctrl>; - cap-sd-highspeed; - sd-uhs-sdr12; - sd-uhs-sdr25; - sd-uhs-sdr50; reg = <0x0 0xf723e000 0x0 0x1000>; interrupts = <0x0 0x49 0x4>; #address-cells = <0x1>; @@ -799,11 +778,6 @@ clock-names = "ciu", "biu"; resets = <&sys_ctrl PERIPH_RSTDIS0_MMC1>; reset-names = "reset"; - vqmmc-supply = <&ldo7>; - vmmc-supply = <&ldo10>; - bus-width = <0x4>; - disable-wp; - cd-gpios = <&gpio1 0 1>; pinctrl-names = "default", "idle"; pinctrl-0 = <&sd_pmx_func &sd_clk_cfg_func &sd_cfg_func>; pinctrl-1 = <&sd_pmx_idle &sd_clk_cfg_idle &sd_cfg_idle>; @@ -811,15 +785,12 @@ dwmmc_2: dwmmc2@f723f000 { compatible = "hisilicon,hi6220-dw-mshc"; - num-slots = <0x1>; reg = <0x0 0xf723f000 0x0 0x1000>; interrupts = <0x0 0x4a 0x4>; clocks = <&sys_ctrl HI6220_MMC2_CIUCLK>, <&sys_ctrl HI6220_MMC2_CLK>; clock-names = "ciu", "biu"; resets = <&sys_ctrl PERIPH_RSTDIS0_MMC2>; reset-names = "reset"; - bus-width = <0x4>; - broken-cd; pinctrl-names = "default", "idle"; pinctrl-0 = <&sdio_pmx_func &sdio_clk_cfg_func &sdio_cfg_func>; pinctrl-1 = <&sdio_pmx_idle &sdio_clk_cfg_idle &sdio_cfg_idle>; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi index ac8df5201cd6..b4bc42ece754 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi @@ -231,8 +231,7 @@ cpm_crypto: crypto@800000 { compatible = "inside-secure,safexcel-eip197"; reg = <0x800000 0x200000>; - interrupts = <GIC_SPI 34 (IRQ_TYPE_EDGE_RISING - | IRQ_TYPE_LEVEL_HIGH)>, + interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 7740a75a8230..6e2058847ddc 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -221,8 +221,7 @@ cps_crypto: crypto@800000 { compatible = "inside-secure,safexcel-eip197"; reg = <0x800000 0x200000>; - interrupts = <GIC_SPI 34 (IRQ_TYPE_EDGE_RISING - | IRQ_TYPE_LEVEL_HIGH)>, + interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 278 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 279 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 280 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 65cdd878cfbd..97c123e09e45 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -68,6 +68,7 @@ CONFIG_PCIE_QCOM=y CONFIG_PCIE_ARMADA_8K=y CONFIG_PCI_AARDVARK=y CONFIG_PCIE_RCAR=y +CONFIG_PCIE_ROCKCHIP=m CONFIG_PCI_HOST_GENERIC=y CONFIG_PCI_XGENE=y CONFIG_ARM64_VA_BITS_48=y @@ -208,6 +209,8 @@ CONFIG_BRCMFMAC=m CONFIG_WL18XX=m CONFIG_WLCORE_SDIO=m CONFIG_INPUT_EVDEV=y +CONFIG_KEYBOARD_ADC=m +CONFIG_KEYBOARD_CROS_EC=y CONFIG_KEYBOARD_GPIO=y CONFIG_INPUT_MISC=y CONFIG_INPUT_PM8941_PWRKEY=y @@ -263,6 +266,7 @@ CONFIG_SPI_MESON_SPIFC=m CONFIG_SPI_ORION=y CONFIG_SPI_PL022=y CONFIG_SPI_QUP=y +CONFIG_SPI_ROCKCHIP=y CONFIG_SPI_S3C64XX=y CONFIG_SPI_SPIDEV=m CONFIG_SPMI=y @@ -292,6 +296,7 @@ CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y CONFIG_CPU_THERMAL=y CONFIG_THERMAL_EMULATION=y CONFIG_EXYNOS_THERMAL=y +CONFIG_ROCKCHIP_THERMAL=m CONFIG_WATCHDOG=y CONFIG_S3C2410_WATCHDOG=y CONFIG_MESON_GXBB_WATCHDOG=m @@ -300,12 +305,14 @@ CONFIG_RENESAS_WDT=y CONFIG_BCM2835_WDT=y CONFIG_MFD_CROS_EC=y CONFIG_MFD_CROS_EC_I2C=y +CONFIG_MFD_CROS_EC_SPI=y CONFIG_MFD_EXYNOS_LPASS=m CONFIG_MFD_HI655X_PMIC=y CONFIG_MFD_MAX77620=y CONFIG_MFD_SPMI_PMIC=y CONFIG_MFD_RK808=y CONFIG_MFD_SEC_CORE=y +CONFIG_REGULATOR_FAN53555=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_GPIO=y CONFIG_REGULATOR_HI655X=y @@ -473,8 +480,10 @@ CONFIG_ARCH_TEGRA_186_SOC=y CONFIG_EXTCON_USB_GPIO=y CONFIG_IIO=y CONFIG_EXYNOS_ADC=y +CONFIG_ROCKCHIP_SARADC=m CONFIG_PWM=y CONFIG_PWM_BCM2835=m +CONFIG_PWM_CROS_EC=m CONFIG_PWM_MESON=m CONFIG_PWM_ROCKCHIP=y CONFIG_PWM_SAMSUNG=y @@ -484,6 +493,7 @@ CONFIG_PHY_HI6220_USB=y CONFIG_PHY_SUN4I_USB=y CONFIG_PHY_ROCKCHIP_INNO_USB2=y CONFIG_PHY_ROCKCHIP_EMMC=y +CONFIG_PHY_ROCKCHIP_PCIE=m CONFIG_PHY_XGENE=y CONFIG_PHY_TEGRA_XUSB=y CONFIG_ARM_SCPI_PROTOCOL=y diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 0e99978da3f0..59cca1d6ec54 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -23,9 +23,9 @@ #define ACPI_MADT_GICC_LENGTH \ (acpi_gbl_FADT.header.revision < 6 ? 76 : 80) -#define BAD_MADT_GICC_ENTRY(entry, end) \ - (!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) || \ - (entry)->header.length != ACPI_MADT_GICC_LENGTH) +#define BAD_MADT_GICC_ENTRY(entry, end) \ + (!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH || \ + (unsigned long)(entry) + ACPI_MADT_GICC_LENGTH > (end)) /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 15c142ce991c..b4d13d9267ff 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -286,6 +286,10 @@ #define SCTLR_ELx_A (1 << 1) #define SCTLR_ELx_M 1 +#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \ + (1 << 16) | (1 << 18) | (1 << 22) | (1 << 23) | \ + (1 << 28) | (1 << 29)) + #define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ SCTLR_ELx_SA | SCTLR_ELx_I) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 5d17f377d905..82cd07592519 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -11,7 +11,6 @@ * */ -#include <linux/dmi.h> #include <linux/efi.h> #include <linux/init.h> @@ -117,20 +116,6 @@ int __init efi_set_mapping_permissions(struct mm_struct *mm, set_permissions, md); } -static int __init arm64_dmi_init(void) -{ - /* - * On arm64, DMI depends on UEFI, and dmi_scan_machine() needs to - * be called early because dmi_id_init(), which is an arch_initcall - * itself, depends on dmi_scan_machine() having been called already. - */ - dmi_scan_machine(); - if (dmi_available) - dmi_set_dump_stack_arch_desc(); - return 0; -} -core_initcall(arm64_dmi_init); - /* * UpdateCapsule() depends on the system being shutdown via * ResetSystem(). diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 4f0e3ebfea4b..c7e3e6387a49 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -191,8 +191,10 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node); - if (!root_ops) + if (!root_ops) { + kfree(ri); return NULL; + } ri->cfg = pci_acpi_setup_ecam_mapping(root); if (!ri->cfg) { diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6e0e16a3a7d4..321119881abf 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -961,8 +961,7 @@ void smp_send_stop(void) cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); - if (system_state == SYSTEM_BOOTING || - system_state == SYSTEM_RUNNING) + if (system_state <= SYSTEM_RUNNING) pr_crit("SMP: stopping secondary CPUs\n"); smp_cross_call(&mask, IPI_CPU_STOP); } diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 59779699a1a4..da33c90248e9 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -70,7 +70,7 @@ void __init time_init(void) u32 arch_timer_rate; of_clk_init(NULL); - clocksource_probe(); + timer_probe(); tick_setup_hrtimer_broadcast(); diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 41b6e31f8f55..7492d9009610 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -220,11 +220,10 @@ void update_vsyscall(struct timekeeper *tk) if (!use_syscall) { /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; - vdso_data->raw_time_sec = tk->raw_time.tv_sec; - vdso_data->raw_time_nsec = tk->raw_time.tv_nsec; + vdso_data->raw_time_sec = tk->raw_sec; + vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - /* tkr_raw.xtime_nsec == 0 */ vdso_data->cs_mono_mult = tk->tkr_mono.mult; vdso_data->cs_raw_mult = tk->tkr_raw.mult; /* tkr_mono.shift == tkr_raw.shift */ diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index e00b4671bd7c..76320e920965 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -256,7 +256,6 @@ monotonic_raw: seqcnt_check fail=monotonic_raw /* All computations are done with left-shifted nsecs. */ - lsl x14, x14, x12 get_nsec_per_sec res=x9 lsl x9, x9, x12 diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 839425c24b1c..3f9615582377 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -106,10 +106,13 @@ __do_hyp_init: tlbi alle2 dsb sy - mrs x4, sctlr_el2 - and x4, x4, #SCTLR_ELx_EE // preserve endianness of EL2 - ldr x5, =SCTLR_ELx_FLAGS - orr x4, x4, x5 + /* + * Preserve all the RES1 bits while setting the default flags, + * as well as the EE bit on BE. Drop the A flag since the compiler + * is allowed to generate unaligned accesses. + */ + ldr x4, =(SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A)) +CPU_BE( orr x4, x4, #SCTLR_ELx_EE) msr sctlr_el2, x4 isb diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 79f37e37d367..6260b69e5622 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -65,8 +65,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, * Here set VMCR.CTLR in ICC_CTLR_EL1 layout. * The vgic_set_vmcr() will convert to ICH_VMCR layout. */ - vmcr.ctlr = val & ICC_CTLR_EL1_CBPR_MASK; - vmcr.ctlr |= val & ICC_CTLR_EL1_EOImode_MASK; + vmcr.cbpr = (val & ICC_CTLR_EL1_CBPR_MASK) >> ICC_CTLR_EL1_CBPR_SHIFT; + vmcr.eoim = (val & ICC_CTLR_EL1_EOImode_MASK) >> ICC_CTLR_EL1_EOImode_SHIFT; vgic_set_vmcr(vcpu, &vmcr); } else { val = 0; @@ -83,8 +83,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, * The VMCR.CTLR value is in ICC_CTLR_EL1 layout. * Extract it directly using ICC_CTLR_EL1 reg definitions. */ - val |= vmcr.ctlr & ICC_CTLR_EL1_CBPR_MASK; - val |= vmcr.ctlr & ICC_CTLR_EL1_EOImode_MASK; + val |= (vmcr.cbpr << ICC_CTLR_EL1_CBPR_SHIFT) & ICC_CTLR_EL1_CBPR_MASK; + val |= (vmcr.eoim << ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK; p->regval = val; } @@ -135,7 +135,7 @@ static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, p->regval = 0; vgic_get_vmcr(vcpu, &vmcr); - if (!((vmcr.ctlr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT)) { + if (!vmcr.cbpr) { if (p->is_write) { vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >> ICC_BPR1_EL1_SHIFT; diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 71f930501ade..c870d6f01ac2 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -36,6 +36,7 @@ int bpf_jit_enable __read_mostly; #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) #define TCALL_CNT (MAX_BPF_JIT_REG + 2) +#define TMP_REG_3 (MAX_BPF_JIT_REG + 3) /* Map BPF registers to A64 registers */ static const int bpf2a64[] = { @@ -57,6 +58,7 @@ static const int bpf2a64[] = { /* temporary registers for internal BPF JIT */ [TMP_REG_1] = A64_R(10), [TMP_REG_2] = A64_R(11), + [TMP_REG_3] = A64_R(12), /* tail_call_cnt */ [TCALL_CNT] = A64_R(26), /* temporary register for blinding constants */ @@ -319,6 +321,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 src = bpf2a64[insn->src_reg]; const u8 tmp = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; + const u8 tmp3 = bpf2a64[TMP_REG_3]; const s16 off = insn->off; const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; @@ -689,10 +692,10 @@ emit_cond_jmp: emit(A64_PRFM(tmp, PST, L1, STRM), ctx); emit(A64_LDXR(isdw, tmp2, tmp), ctx); emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); - emit(A64_STXR(isdw, tmp2, tmp, tmp2), ctx); + emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx); jmp_offset = -3; check_imm19(jmp_offset); - emit(A64_CBNZ(0, tmp2, jmp_offset), ctx); + emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); break; /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h index 85d4af97c986..dbdbb8a558df 100644 --- a/arch/blackfin/include/asm/processor.h +++ b/arch/blackfin/include/asm/processor.h @@ -75,11 +75,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* - * Return saved PC of a blocked thread. - */ -#define thread_saved_pc(tsk) (tsk->thread.pc) - unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) \ diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index f0eaf0475e7e..a3c8d05c4cc7 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -45,7 +45,6 @@ generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h generic-y += shmparam.h -generic-y += siginfo.h generic-y += signal.h generic-y += socket.h generic-y += sockios.h diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h index b9eb3da7f278..7c87b5be53b5 100644 --- a/arch/c6x/include/asm/processor.h +++ b/arch/c6x/include/asm/processor.h @@ -96,11 +96,6 @@ static inline void release_thread(struct task_struct *dead_task) #define release_segments(mm) do { } while (0) /* - * saved PC of a blocked thread. - */ -#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc) - -/* * saved kernel SP and DP of a blocked thread. */ #ifdef _BIG_ENDIAN diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild index 13a97aa2285f..1c44d3b3eba0 100644 --- a/arch/c6x/include/uapi/asm/Kbuild +++ b/arch/c6x/include/uapi/asm/Kbuild @@ -2,3 +2,4 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += kvm_para.h +generic-y += siginfo.h diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index e299d30105b5..a2cdb1521aca 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -69,14 +69,6 @@ void hard_reset_now (void) while(1) /* waiting for RETRIBUTION! */ ; } -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *t) -{ - return task_pt_regs(t)->irp; -} - /* setup the child's kernel stack with a pt_regs and switch_stack on it. * it will be un-nested during _resume and _ret_from_sys_call when the * new thread is scheduled. diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index c530a8fa87ce..fe87b383fbf3 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -85,14 +85,6 @@ hard_reset_now(void) } /* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *t) -{ - return task_pt_regs(t)->erp; -} - -/* * Setup the child's kernel stack with a pt_regs and call switch_stack() on it. * It will be unnested during _resume and _ret_from_sys_call when the new thread * is scheduled. diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 2890099992a9..acc5781100c2 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -36,7 +36,6 @@ generic-y += resource.h generic-y += sections.h generic-y += sembuf.h generic-y += shmbuf.h -generic-y += siginfo.h generic-y += socket.h generic-y += sockios.h generic-y += statfs.h diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h index 15b815df29c1..bc2729e4b2c9 100644 --- a/arch/cris/include/asm/processor.h +++ b/arch/cris/include/asm/processor.h @@ -52,8 +52,6 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp) -extern unsigned long thread_saved_pc(struct task_struct *tsk); - /* Free all resources held by a thread. */ static inline void release_thread(struct task_struct *dead_task) { diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild index b15bf6bc0e94..b55fc2ae1e8c 100644 --- a/arch/cris/include/uapi/asm/Kbuild +++ b/arch/cris/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += siginfo.h diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h index ddaeb9cc9143..e4d08d74ed9f 100644 --- a/arch/frv/include/asm/processor.h +++ b/arch/frv/include/asm/processor.h @@ -96,11 +96,6 @@ extern asmlinkage void *restore_user_regs(const struct user_context *target, ... #define release_segments(mm) do { } while (0) #define forget_segments() do { } while (0) -/* - * Return saved PC of a blocked thread. - */ -extern unsigned long thread_saved_pc(struct task_struct *tsk); - unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) ((tsk)->thread.frame0->pc) diff --git a/arch/frv/include/asm/timex.h b/arch/frv/include/asm/timex.h index a89bddefdacf..139093fab326 100644 --- a/arch/frv/include/asm/timex.h +++ b/arch/frv/include/asm/timex.h @@ -16,5 +16,11 @@ static inline cycles_t get_cycles(void) #define vxtime_lock() do {} while (0) #define vxtime_unlock() do {} while (0) +/* This attribute is used in include/linux/jiffies.h alongside with + * __cacheline_aligned_in_smp. It is assumed that __cacheline_aligned_in_smp + * for frv does not contain another section specification. + */ +#define __jiffy_arch_data __attribute__((__section__(".data"))) + #endif diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 5a4c92abc99e..a957b374e3a6 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -198,15 +198,6 @@ unsigned long get_wchan(struct task_struct *p) return 0; } -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - /* Check whether the thread is blocked in resume() */ - if (in_sched_functions(tsk->thread.pc)) - return ((unsigned long *)tsk->thread.fp)[2]; - else - return tsk->thread.pc; -} - int elf_check_arch(const struct elf32_hdr *hdr) { unsigned long hsr0 = __get_HSR(0); diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c index da82c25301e7..46aa289c5102 100644 --- a/arch/frv/mm/elf-fdpic.c +++ b/arch/frv/mm/elf-fdpic.c @@ -75,7 +75,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi addr = PAGE_ALIGN(addr); vma = find_vma(current->mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) goto success; } diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 3ae852507e57..6e3d36f37a02 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -15,7 +15,7 @@ config H8300 select OF_IRQ select OF_EARLY_FLATTREE select HAVE_MEMBLOCK - select CLKSRC_OF + select TIMER_OF select H8300_TMR8 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 757cdeb24e6e..99c824608a31 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -54,7 +54,6 @@ generic-y += serial.h generic-y += setup.h generic-y += shmbuf.h generic-y += shmparam.h -generic-y += siginfo.h generic-y += sizes.h generic-y += socket.h generic-y += sockios.h diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h index 65132d7ae9e5..afa53147e66a 100644 --- a/arch/h8300/include/asm/processor.h +++ b/arch/h8300/include/asm/processor.h @@ -110,10 +110,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk); unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) \ diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild index b15bf6bc0e94..b55fc2ae1e8c 100644 --- a/arch/h8300/include/uapi/asm/Kbuild +++ b/arch/h8300/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += siginfo.h diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 0f5db5bb561b..d1ddcabbbe83 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -129,11 +129,6 @@ int copy_thread(unsigned long clone_flags, return 0; } -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return ((struct pt_regs *)tsk->thread.esp0)->pc; -} - unsigned long get_wchan(struct task_struct *p) { unsigned long fp, pc; diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index c8c25a4e9e48..6be15d634650 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -246,5 +246,5 @@ void __init calibrate_delay(void) void __init time_init(void) { of_clk_init(NULL); - clocksource_probe(); + timer_probe(); } diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 6b45ef79eb8f..0fc9cb04e6ad 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -41,7 +41,6 @@ generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h generic-y += shmparam.h -generic-y += siginfo.h generic-y += sizes.h generic-y += socket.h generic-y += sockios.h diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h index 45a825402f63..ce67940860a5 100644 --- a/arch/hexagon/include/asm/processor.h +++ b/arch/hexagon/include/asm/processor.h @@ -33,9 +33,6 @@ /* task_struct, defined elsewhere, is the "process descriptor" */ struct task_struct; -/* this is defined in arch/process.c */ -extern unsigned long thread_saved_pc(struct task_struct *tsk); - extern void start_thread(struct pt_regs *, unsigned long, unsigned long); /* diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild index b15bf6bc0e94..b55fc2ae1e8c 100644 --- a/arch/hexagon/include/uapi/asm/Kbuild +++ b/arch/hexagon/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += siginfo.h diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index de715bab7956..656050c2e6a0 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -61,14 +61,6 @@ void arch_cpu_idle(void) } /* - * Return saved PC of a blocked thread - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return 0; -} - -/* * Copy architecture-specific thread state */ int copy_thread(unsigned long clone_flags, unsigned long usp, diff --git a/arch/hexagon/mm/uaccess.c b/arch/hexagon/mm/uaccess.c index ec90afdb3ad0..c599eb126c9e 100644 --- a/arch/hexagon/mm/uaccess.c +++ b/arch/hexagon/mm/uaccess.c @@ -37,15 +37,14 @@ __kernel_size_t __clear_user_hexagon(void __user *dest, unsigned long count) long uncleared; while (count > PAGE_SIZE) { - uncleared = __copy_to_user_hexagon(dest, &empty_zero_page, - PAGE_SIZE); + uncleared = raw_copy_to_user(dest, &empty_zero_page, PAGE_SIZE); if (uncleared) return count - (PAGE_SIZE - uncleared); count -= PAGE_SIZE; dest += PAGE_SIZE; } if (count) - count = __copy_to_user_hexagon(dest, &empty_zero_page, count); + count = raw_copy_to_user(dest, &empty_zero_page, count); return count; } diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index 26a63d69c599..ab982f07ea68 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -602,23 +602,6 @@ ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat) } /* - * Return saved PC of a blocked thread. - * Note that the only way T can block is through a call to schedule() -> switch_to(). - */ -static inline unsigned long -thread_saved_pc (struct task_struct *t) -{ - struct unw_frame_info info; - unsigned long ip; - - unw_init_from_blocked_task(&info, t); - if (unw_unwind(&info) < 0) - return 0; - unw_get_ip(&info, &ip); - return ip; -} - -/* * Get the current instruction/program counter value. */ #define current_text_addr() \ diff --git a/arch/ia64/include/asm/siginfo.h b/arch/ia64/include/asm/siginfo.h deleted file mode 100644 index 6f2e2dd0f28f..000000000000 --- a/arch/ia64/include/asm/siginfo.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Based on <asm-i386/siginfo.h>. - * - * Modified 1998-2002 - * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co - */ -#ifndef _ASM_IA64_SIGINFO_H -#define _ASM_IA64_SIGINFO_H - -#include <linux/string.h> -#include <uapi/asm/siginfo.h> - -static inline void -copy_siginfo (siginfo_t *to, siginfo_t *from) -{ - if (from->si_code < 0) - memcpy(to, from, sizeof(siginfo_t)); - else - /* _sigchld is currently the largest know union member */ - memcpy(to, from, 4*sizeof(int) + sizeof(from->_sifields._sigchld)); -} - -#endif /* _ASM_IA64_SIGINFO_H */ diff --git a/arch/ia64/include/uapi/asm/siginfo.h b/arch/ia64/include/uapi/asm/siginfo.h index f72bf0172bb2..4694c64252d6 100644 --- a/arch/ia64/include/uapi/asm/siginfo.h +++ b/arch/ia64/include/uapi/asm/siginfo.h @@ -11,7 +11,6 @@ #define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) #define HAVE_ARCH_SIGINFO_T -#define HAVE_ARCH_COPY_SIGINFO #define HAVE_ARCH_COPY_SIGINFO_TO_USER #include <asm-generic/siginfo.h> diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h index 5767367550c6..657874eeeccc 100644 --- a/arch/m32r/include/asm/processor.h +++ b/arch/m32r/include/asm/processor.h @@ -122,8 +122,6 @@ extern void release_thread(struct task_struct *); extern void copy_segments(struct task_struct *p, struct mm_struct * mm); extern void release_segments(struct mm_struct * mm); -extern unsigned long thread_saved_pc(struct task_struct *); - /* Copy and release all segment info associated with a VM */ #define copy_segments(p, mm) do { } while (0) #define release_segments(mm) do { } while (0) diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild index b15bf6bc0e94..c94ee54210bc 100644 --- a/arch/m32r/include/uapi/asm/Kbuild +++ b/arch/m32r/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += siginfo.h diff --git a/arch/m32r/include/uapi/asm/siginfo.h b/arch/m32r/include/uapi/asm/siginfo.h deleted file mode 100644 index 7d9cd9ebfd0e..000000000000 --- a/arch/m32r/include/uapi/asm/siginfo.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _M32R_SIGINFO_H -#define _M32R_SIGINFO_H - -#include <asm-generic/siginfo.h> - -#endif /* _M32R_SIGINFO_H */ diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index d8ffcfec599c..8cd7e03f4370 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -39,14 +39,6 @@ #include <linux/err.h> -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return tsk->thread.lr; -} - void (*pm_power_off)(void) = NULL; EXPORT_SYMBOL(pm_power_off); diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 531cb9eb3319..ddff1164aff0 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m CONFIG_MQ_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_KYBER=m +CONFIG_IOSCHED_BFQ=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y @@ -349,7 +351,6 @@ CONFIG_SCSI_A4000T=y CONFIG_SCSI_ZORRO7XX=y CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -361,6 +362,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_UEVENT=y CONFIG_DM_LOG_WRITES=m +CONFIG_DM_INTEGRITY=m CONFIG_TARGET_CORE=m CONFIG_TCM_IBLOCK=m CONFIG_TCM_FILEIO=m @@ -414,6 +416,7 @@ CONFIG_ZORRO8390=y # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -572,6 +575,8 @@ CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_SORT=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m @@ -590,6 +595,7 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_HARDENED_USERCOPY=y CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index ca91d39555da..17384dc959a5 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -27,6 +27,8 @@ CONFIG_SUN_PARTITION=y CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m CONFIG_MQ_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_KYBER=m +CONFIG_IOSCHED_BFQ=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y @@ -331,7 +333,6 @@ CONFIG_ISCSI_TCP=m CONFIG_ISCSI_BOOT_SYSFS=m CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -343,6 +344,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_UEVENT=y CONFIG_DM_LOG_WRITES=m +CONFIG_DM_INTEGRITY=m CONFIG_TARGET_CORE=m CONFIG_TCM_IBLOCK=m CONFIG_TCM_FILEIO=m @@ -388,6 +390,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -531,6 +534,8 @@ CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_SORT=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m @@ -549,6 +554,7 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_HARDENED_USERCOPY=y CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 23a3d8a691e2..53a641d62f85 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m CONFIG_MQ_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_KYBER=m +CONFIG_IOSCHED_BFQ=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y @@ -340,7 +342,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m CONFIG_ATARI_SCSI=y CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -352,6 +353,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_UEVENT=y CONFIG_DM_LOG_WRITES=m +CONFIG_DM_INTEGRITY=m CONFIG_TARGET_CORE=m CONFIG_TCM_IBLOCK=m CONFIG_TCM_FILEIO=m @@ -399,6 +401,7 @@ CONFIG_SMC91X=y # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -552,6 +555,8 @@ CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_SORT=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m @@ -570,6 +575,7 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_HARDENED_USERCOPY=y CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 95deb95140fe..3925ae3a5eb3 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_IOSCHED_DEADLINE=m CONFIG_MQ_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_KYBER=m +CONFIG_IOSCHED_BFQ=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68040=y @@ -330,7 +332,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m CONFIG_BVME6000_SCSI=y CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -342,6 +343,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_UEVENT=y CONFIG_DM_LOG_WRITES=m +CONFIG_DM_INTEGRITY=m CONFIG_TARGET_CORE=m CONFIG_TCM_IBLOCK=m CONFIG_TCM_FILEIO=m @@ -387,6 +389,7 @@ CONFIG_BVME6000_NET=y # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -523,6 +526,8 @@ CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_SORT=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m @@ -541,6 +546,7 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_HARDENED_USERCOPY=y CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index afae6958db2d..f4a134b390b4 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -27,6 +27,8 @@ CONFIG_SUN_PARTITION=y CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m CONFIG_MQ_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_KYBER=m +CONFIG_IOSCHED_BFQ=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y @@ -331,7 +333,6 @@ CONFIG_ISCSI_TCP=m CONFIG_ISCSI_BOOT_SYSFS=m CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -343,6 +344,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_UEVENT=y CONFIG_DM_LOG_WRITES=m +CONFIG_DM_INTEGRITY=m CONFIG_TARGET_CORE=m CONFIG_TCM_IBLOCK=m CONFIG_TCM_FILEIO=m @@ -389,6 +391,7 @@ CONFIG_HPLANCE=y # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -533,6 +536,8 @@ CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_SORT=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m @@ -551,6 +556,7 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_HARDENED_USERCOPY=y CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index b010734729a7..9ed0cef632b7 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m CONFIG_MQ_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_KYBER=m +CONFIG_IOSCHED_BFQ=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y @@ -340,7 +342,6 @@ CONFIG_MAC_SCSI=y CONFIG_SCSI_MAC_ESP=y CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -352,6 +353,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_UEVENT=y CONFIG_DM_LOG_WRITES=m +CONFIG_DM_INTEGRITY=m CONFIG_TARGET_CORE=m CONFIG_TCM_IBLOCK=m CONFIG_TCM_FILEIO=m @@ -408,6 +410,7 @@ CONFIG_MAC8390=y # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -555,6 +558,8 @@ CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_SORT=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m @@ -573,6 +578,7 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_HARDENED_USERCOPY=y CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 0e414549b235..efed0d48fd53 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -22,6 +22,8 @@ CONFIG_UNIXWARE_DISKLABEL=y # CONFIG_EFI_PARTITION is not set CONFIG_IOSCHED_DEADLINE=m CONFIG_MQ_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_KYBER=m +CONFIG_IOSCHED_BFQ=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y @@ -373,7 +375,6 @@ CONFIG_BVME6000_SCSI=y CONFIG_SUN3X_ESP=y CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -385,6 +386,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_UEVENT=y CONFIG_DM_LOG_WRITES=m +CONFIG_DM_INTEGRITY=m CONFIG_TARGET_CORE=m CONFIG_TCM_IBLOCK=m CONFIG_TCM_FILEIO=m @@ -454,6 +456,7 @@ CONFIG_SMC91X=y # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set CONFIG_PLIP=m CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m @@ -635,6 +638,8 @@ CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_SORT=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m @@ -653,6 +658,7 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_HARDENED_USERCOPY=y CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index b2e687a0ec3d..9040457c7f9c 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_IOSCHED_DEADLINE=m CONFIG_MQ_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_KYBER=m +CONFIG_IOSCHED_BFQ=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68030=y @@ -329,7 +331,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m CONFIG_MVME147_SCSI=y CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -341,6 +342,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_UEVENT=y CONFIG_DM_LOG_WRITES=m +CONFIG_DM_INTEGRITY=m CONFIG_TARGET_CORE=m CONFIG_TCM_IBLOCK=m CONFIG_TCM_FILEIO=m @@ -387,6 +389,7 @@ CONFIG_MVME147_NET=y # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -523,6 +526,8 @@ CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_SORT=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m @@ -541,6 +546,7 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_HARDENED_USERCOPY=y CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index cbd8ee24d1bc..8b17f00e0484 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_IOSCHED_DEADLINE=m CONFIG_MQ_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_KYBER=m +CONFIG_IOSCHED_BFQ=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68040=y @@ -330,7 +332,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m CONFIG_MVME16x_SCSI=y CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -342,6 +343,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_UEVENT=y CONFIG_DM_LOG_WRITES=m +CONFIG_DM_INTEGRITY=m CONFIG_TARGET_CORE=m CONFIG_TCM_IBLOCK=m CONFIG_TCM_FILEIO=m @@ -387,6 +389,7 @@ CONFIG_MVME16x_NET=y # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -523,6 +526,8 @@ CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_SORT=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m @@ -541,6 +546,7 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_HARDENED_USERCOPY=y CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 1e82cc944339..5f3718c62c85 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -27,6 +27,8 @@ CONFIG_SUN_PARTITION=y CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m CONFIG_MQ_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_KYBER=m +CONFIG_IOSCHED_BFQ=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68040=y @@ -336,7 +338,6 @@ CONFIG_ISCSI_TCP=m CONFIG_ISCSI_BOOT_SYSFS=m CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -348,6 +349,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_UEVENT=y CONFIG_DM_LOG_WRITES=m +CONFIG_DM_INTEGRITY=m CONFIG_TARGET_CORE=m CONFIG_TCM_IBLOCK=m CONFIG_TCM_FILEIO=m @@ -398,6 +400,7 @@ CONFIG_NE2000=y # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set CONFIG_PLIP=m CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m @@ -546,6 +549,8 @@ CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_SORT=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m @@ -564,6 +569,7 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_HARDENED_USERCOPY=y CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index f9e77f57a972..8c979a68fca5 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -26,6 +26,8 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m CONFIG_MQ_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_KYBER=m +CONFIG_IOSCHED_BFQ=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_SUN3=y @@ -327,7 +329,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m CONFIG_SUN3_SCSI=y CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -339,6 +340,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_UEVENT=y CONFIG_DM_LOG_WRITES=m +CONFIG_DM_INTEGRITY=m CONFIG_TARGET_CORE=m CONFIG_TCM_IBLOCK=m CONFIG_TCM_FILEIO=m @@ -385,6 +387,7 @@ CONFIG_SUN3_82586=y # CONFIG_NET_VENDOR_SUN is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -525,6 +528,8 @@ CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_SORT=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m @@ -542,6 +547,7 @@ CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_ENCRYPTED_KEYS=m +CONFIG_HARDENED_USERCOPY=y CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 3c394fcfb368..a1e79530e806 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -26,6 +26,8 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m CONFIG_MQ_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_KYBER=m +CONFIG_IOSCHED_BFQ=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_SUN3X=y @@ -327,7 +329,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m CONFIG_SUN3X_ESP=y CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -339,6 +340,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_UEVENT=y CONFIG_DM_LOG_WRITES=m +CONFIG_DM_INTEGRITY=m CONFIG_TARGET_CORE=m CONFIG_TCM_IBLOCK=m CONFIG_TCM_FILEIO=m @@ -385,6 +387,7 @@ CONFIG_SUN3LANCE=y # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -525,6 +528,8 @@ CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_SORT=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m @@ -543,6 +548,7 @@ CONFIG_TEST_UDELAY=m CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_HARDENED_USERCOPY=y CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 82005d2ff717..5ecf4e47b2e2 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -25,7 +25,6 @@ generic-y += preempt.h generic-y += resource.h generic-y += sections.h generic-y += shmparam.h -generic-y += siginfo.h generic-y += spinlock.h generic-y += statfs.h generic-y += termios.h diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index 77239e81379b..94c36030440c 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -130,8 +130,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -extern unsigned long thread_saved_pc(struct task_struct *tsk); - unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) \ diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h index 8c8ce5e1ee0e..3bc64d02ba5f 100644 --- a/arch/m68k/include/asm/signal.h +++ b/arch/m68k/include/asm/signal.h @@ -62,9 +62,4 @@ static inline int __gen_sigismember(sigset_t *set, int _sig) #endif /* !CONFIG_CPU_HAS_NO_BITFIELDS */ -#ifndef __uClinux__ -extern void ptrace_signal_deliver(void); -#define ptrace_signal_deliver ptrace_signal_deliver -#endif /* __uClinux__ */ - #endif /* _M68K_SIGNAL_H */ diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild index 64368077235a..68b45cc87e2c 100644 --- a/arch/m68k/include/uapi/asm/Kbuild +++ b/arch/m68k/include/uapi/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += auxvec.h generic-y += msgbuf.h generic-y += sembuf.h generic-y += shmbuf.h +generic-y += siginfo.h generic-y += socket.h generic-y += sockios.h generic-y += termbits.h diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index e475c945c8b2..7df92f8b0781 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -40,20 +40,6 @@ asmlinkage void ret_from_fork(void); asmlinkage void ret_from_kernel_thread(void); - -/* - * Return saved PC from a blocked thread - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp; - /* Check whether the thread is blocked in resume() */ - if (in_sched_functions(sw->retpc)) - return ((unsigned long *)sw->a6)[1]; - else - return sw->retpc; -} - void arch_cpu_idle(void) { #if defined(MACH_ATARI_ONLY) diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 6f945bb5ffbd..e79421f5b9cd 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -109,22 +109,6 @@ int fixup_exception(struct pt_regs *regs) return 1; } -void ptrace_signal_deliver(void) -{ - struct pt_regs *regs = signal_pt_regs(); - if (regs->orig_d0 < 0) - return; - switch (regs->d0) { - case -ERESTARTNOHAND: - case -ERESTARTSYS: - case -ERESTARTNOINTR: - regs->d0 = regs->orig_d0; - regs->orig_d0 = -1; - regs->pc -= 2; - break; - } -} - static inline void push_cache (unsigned long vaddr) { /* diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index 232a12bf3f99..2dbbb7c66043 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c @@ -567,8 +567,7 @@ static void stop_this_cpu(void *data) { unsigned int cpu = smp_processor_id(); - if (system_state == SYSTEM_BOOTING || - system_state == SYSTEM_RUNNING) { + if (system_state <= SYSTEM_RUNNING) { spin_lock(&stop_lock); pr_crit("CPU%u: stopping\n", cpu); dump_stack(); diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 85885a501dce..8e47121b8b8b 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -4,7 +4,7 @@ config MICROBLAZE select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT - select CLKSRC_OF + select TIMER_OF select CLONE_BACKWARDS3 select COMMON_CLK select GENERIC_ATOMIC64 diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index dc5dd5b69fde..92fd4e95b488 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -1,8 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_AUDIT=y -CONFIG_AUDIT_LOGINUID_IMMUTABLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_SYSFS_DEPRECATED=y @@ -33,10 +31,12 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set +CONFIG_BRIDGE=m CONFIG_MTD=y -CONFIG_PROC_DEVICETREE=y +CONFIG_MTD_CFI=y +CONFIG_MTD_CFI_INTELEXT=y +CONFIG_MTD_CFI_AMDSTD=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_NETDEVICES=y @@ -47,9 +47,9 @@ CONFIG_XILINX_LL_TEMAC=y # CONFIG_VT is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_UARTLITE=y CONFIG_SERIAL_UARTLITE_CONSOLE=y -CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set CONFIG_XILINX_HWICAP=y CONFIG_I2C=y @@ -66,7 +66,6 @@ CONFIG_FB=y CONFIG_FB_XILINX=y # CONFIG_USB_SUPPORT is not set CONFIG_UIO=y -CONFIG_UIO_PDRV=y CONFIG_UIO_PDRV_GENIRQ=y CONFIG_UIO_DMEM_GENIRQ=y CONFIG_EXT2_FS=y @@ -77,14 +76,13 @@ CONFIG_NFS_FS=y CONFIG_CIFS=y CONFIG_CIFS_STATS=y CONFIG_CIFS_STATS2=y -CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_SLAB=y +CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_INFO=y CONFIG_KGDB=y CONFIG_KGDB_TESTS=y CONFIG_KGDB_KDB=y CONFIG_EARLY_PRINTK=y CONFIG_KEYS=y CONFIG_ENCRYPTED_KEYS=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig index 4cdaf565e638..06d69a6e192d 100644 --- a/arch/microblaze/configs/nommu_defconfig +++ b/arch/microblaze/configs/nommu_defconfig @@ -1,9 +1,6 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_AUDIT=y -CONFIG_AUDIT_LOGINUID_IMMUTABLE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_IKCONFIG=y @@ -34,18 +31,15 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_NETDEVICES=y @@ -56,9 +50,9 @@ CONFIG_XILINX_LL_TEMAC=y # CONFIG_VT is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_UARTLITE=y CONFIG_SERIAL_UARTLITE_CONSOLE=y -CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set CONFIG_XILINX_HWICAP=y CONFIG_I2C=y @@ -74,10 +68,6 @@ CONFIG_XILINX_WATCHDOG=y CONFIG_FB=y CONFIG_FB_XILINX=y # CONFIG_USB_SUPPORT is not set -CONFIG_UIO=y -CONFIG_UIO_PDRV=y -CONFIG_UIO_PDRV_GENIRQ=y -CONFIG_UIO_DMEM_GENIRQ=y CONFIG_EXT2_FS=y # CONFIG_DNOTIFY is not set CONFIG_CRAMFS=y @@ -85,10 +75,10 @@ CONFIG_ROMFS_FS=y CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NLS=y -CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_SLAB=y +CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_INFO=y CONFIG_EARLY_PRINTK=y CONFIG_KEYS=y CONFIG_ENCRYPTED_KEYS=y @@ -97,4 +87,3 @@ CONFIG_CRYPTO_MD4=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_ARC4=y CONFIG_CRYPTO_DES=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 56830ff65333..83a4ef3a2495 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -1,14 +1,57 @@ generic-y += barrier.h +generic-y += bitops.h +generic-y += bitsperlong.h +generic-y += bug.h +generic-y += bugs.h generic-y += clkdev.h generic-y += device.h +generic-y += div64.h +generic-y += emergency-restart.h +generic-y += errno.h generic-y += exec.h generic-y += extable.h +generic-y += fb.h +generic-y += fcntl.h +generic-y += hardirq.h +generic-y += ioctl.h +generic-y += ioctls.h +generic-y += ipcbuf.h +generic-y += irq_regs.h generic-y += irq_work.h +generic-y += kdebug.h +generic-y += kmap_types.h +generic-y += kprobes.h +generic-y += linkage.h +generic-y += local.h +generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h +generic-y += mman.h +generic-y += msgbuf.h +generic-y += param.h +generic-y += parport.h +generic-y += percpu.h +generic-y += poll.h generic-y += preempt.h +generic-y += resource.h +generic-y += sembuf.h +generic-y += serial.h +generic-y += shmbuf.h +generic-y += shmparam.h +generic-y += siginfo.h +generic-y += signal.h +generic-y += socket.h +generic-y += sockios.h +generic-y += stat.h +generic-y += statfs.h +generic-y += swab.h generic-y += syscalls.h +generic-y += termbits.h +generic-y += termios.h +generic-y += topology.h generic-y += trace_clock.h +generic-y += ucontext.h +generic-y += vga.h generic-y += word-at-a-time.h -generic-y += kprobes.h +generic-y += xor.h diff --git a/arch/microblaze/include/asm/bitops.h b/arch/microblaze/include/asm/bitops.h deleted file mode 100644 index a72468f15c8b..000000000000 --- a/arch/microblaze/include/asm/bitops.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/bitops.h> diff --git a/arch/microblaze/include/asm/bug.h b/arch/microblaze/include/asm/bug.h deleted file mode 100644 index b12fd89e42e9..000000000000 --- a/arch/microblaze/include/asm/bug.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/bug.h> diff --git a/arch/microblaze/include/asm/bugs.h b/arch/microblaze/include/asm/bugs.h deleted file mode 100644 index 61791e1ad9f5..000000000000 --- a/arch/microblaze/include/asm/bugs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/bugs.h> diff --git a/arch/microblaze/include/asm/div64.h b/arch/microblaze/include/asm/div64.h deleted file mode 100644 index 6cd978cefb28..000000000000 --- a/arch/microblaze/include/asm/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/div64.h> diff --git a/arch/microblaze/include/asm/emergency-restart.h b/arch/microblaze/include/asm/emergency-restart.h deleted file mode 100644 index 3711bd9d50bd..000000000000 --- a/arch/microblaze/include/asm/emergency-restart.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/emergency-restart.h> diff --git a/arch/microblaze/include/asm/fb.h b/arch/microblaze/include/asm/fb.h deleted file mode 100644 index 3a4988e8df45..000000000000 --- a/arch/microblaze/include/asm/fb.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/fb.h> diff --git a/arch/microblaze/include/asm/hardirq.h b/arch/microblaze/include/asm/hardirq.h deleted file mode 100644 index fb3c05a0cbbf..000000000000 --- a/arch/microblaze/include/asm/hardirq.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/hardirq.h> diff --git a/arch/microblaze/include/asm/irq_regs.h b/arch/microblaze/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b70270..000000000000 --- a/arch/microblaze/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/irq_regs.h> diff --git a/arch/microblaze/include/asm/kdebug.h b/arch/microblaze/include/asm/kdebug.h deleted file mode 100644 index 6ece1b037665..000000000000 --- a/arch/microblaze/include/asm/kdebug.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/kdebug.h> diff --git a/arch/microblaze/include/asm/kmap_types.h b/arch/microblaze/include/asm/kmap_types.h deleted file mode 100644 index 25975252d83d..000000000000 --- a/arch/microblaze/include/asm/kmap_types.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_MICROBLAZE_KMAP_TYPES_H -#define _ASM_MICROBLAZE_KMAP_TYPES_H - -#include <asm-generic/kmap_types.h> - -#endif /* _ASM_MICROBLAZE_KMAP_TYPES_H */ diff --git a/arch/microblaze/include/asm/linkage.h b/arch/microblaze/include/asm/linkage.h deleted file mode 100644 index 0540bbaad897..000000000000 --- a/arch/microblaze/include/asm/linkage.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/linkage.h> diff --git a/arch/microblaze/include/asm/local.h b/arch/microblaze/include/asm/local.h deleted file mode 100644 index c11c530f74d0..000000000000 --- a/arch/microblaze/include/asm/local.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local.h> diff --git a/arch/microblaze/include/asm/local64.h b/arch/microblaze/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239..000000000000 --- a/arch/microblaze/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local64.h> diff --git a/arch/microblaze/include/asm/parport.h b/arch/microblaze/include/asm/parport.h deleted file mode 100644 index cf252af64590..000000000000 --- a/arch/microblaze/include/asm/parport.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/parport.h> diff --git a/arch/microblaze/include/asm/percpu.h b/arch/microblaze/include/asm/percpu.h deleted file mode 100644 index 06a959d67234..000000000000 --- a/arch/microblaze/include/asm/percpu.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/percpu.h> diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index 37ef196e4519..330d556860ba 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -69,8 +69,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -extern unsigned long thread_saved_pc(struct task_struct *t); - extern unsigned long get_wchan(struct task_struct *p); # define KSTK_EIP(tsk) (0) @@ -121,10 +119,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* Return saved (kernel) PC of a blocked thread. */ -# define thread_saved_pc(tsk) \ - ((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0) - unsigned long get_wchan(struct task_struct *p); /* The size allocated for kernel stacks. This _must_ be a power of two! */ diff --git a/arch/microblaze/include/asm/serial.h b/arch/microblaze/include/asm/serial.h deleted file mode 100644 index a0cb0caff152..000000000000 --- a/arch/microblaze/include/asm/serial.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/serial.h> diff --git a/arch/microblaze/include/asm/shmparam.h b/arch/microblaze/include/asm/shmparam.h deleted file mode 100644 index 93f30deb95d0..000000000000 --- a/arch/microblaze/include/asm/shmparam.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/shmparam.h> diff --git a/arch/microblaze/include/asm/topology.h b/arch/microblaze/include/asm/topology.h deleted file mode 100644 index 5428f333a02c..000000000000 --- a/arch/microblaze/include/asm/topology.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/topology.h> diff --git a/arch/microblaze/include/asm/ucontext.h b/arch/microblaze/include/asm/ucontext.h deleted file mode 100644 index 9bc07b9f30fb..000000000000 --- a/arch/microblaze/include/asm/ucontext.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ucontext.h> diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index 032fed71223f..9774e1d9507b 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -38,6 +38,6 @@ #endif /* __ASSEMBLY__ */ -#define __NR_syscalls 398 +#define __NR_syscalls 399 #endif /* _ASM_MICROBLAZE_UNISTD_H */ diff --git a/arch/microblaze/include/asm/vga.h b/arch/microblaze/include/asm/vga.h deleted file mode 100644 index 89d82fd8fcf1..000000000000 --- a/arch/microblaze/include/asm/vga.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/vga.h> diff --git a/arch/microblaze/include/asm/xor.h b/arch/microblaze/include/asm/xor.h deleted file mode 100644 index c82eb12a5b18..000000000000 --- a/arch/microblaze/include/asm/xor.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/xor.h> diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild index 2178c78c7c1a..cb6784f4a427 100644 --- a/arch/microblaze/include/uapi/asm/Kbuild +++ b/arch/microblaze/include/uapi/asm/Kbuild @@ -2,3 +2,4 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += types.h +generic-y += siginfo.h diff --git a/arch/microblaze/include/uapi/asm/bitsperlong.h b/arch/microblaze/include/uapi/asm/bitsperlong.h deleted file mode 100644 index 6dc0bb0c13b2..000000000000 --- a/arch/microblaze/include/uapi/asm/bitsperlong.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/bitsperlong.h> diff --git a/arch/microblaze/include/uapi/asm/errno.h b/arch/microblaze/include/uapi/asm/errno.h deleted file mode 100644 index 4c82b503d92f..000000000000 --- a/arch/microblaze/include/uapi/asm/errno.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/errno.h> diff --git a/arch/microblaze/include/uapi/asm/fcntl.h b/arch/microblaze/include/uapi/asm/fcntl.h deleted file mode 100644 index 46ab12db5739..000000000000 --- a/arch/microblaze/include/uapi/asm/fcntl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/fcntl.h> diff --git a/arch/microblaze/include/uapi/asm/ioctl.h b/arch/microblaze/include/uapi/asm/ioctl.h deleted file mode 100644 index b279fe06dfe5..000000000000 --- a/arch/microblaze/include/uapi/asm/ioctl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ioctl.h> diff --git a/arch/microblaze/include/uapi/asm/ioctls.h b/arch/microblaze/include/uapi/asm/ioctls.h deleted file mode 100644 index ec34c760665e..000000000000 --- a/arch/microblaze/include/uapi/asm/ioctls.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ioctls.h> diff --git a/arch/microblaze/include/uapi/asm/ipcbuf.h b/arch/microblaze/include/uapi/asm/ipcbuf.h deleted file mode 100644 index 84c7e51cb6d0..000000000000 --- a/arch/microblaze/include/uapi/asm/ipcbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ipcbuf.h> diff --git a/arch/microblaze/include/uapi/asm/kvm_para.h b/arch/microblaze/include/uapi/asm/kvm_para.h deleted file mode 100644 index 14fab8f0b957..000000000000 --- a/arch/microblaze/include/uapi/asm/kvm_para.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/kvm_para.h> diff --git a/arch/microblaze/include/uapi/asm/mman.h b/arch/microblaze/include/uapi/asm/mman.h deleted file mode 100644 index 8eebf89f5ab1..000000000000 --- a/arch/microblaze/include/uapi/asm/mman.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/mman.h> diff --git a/arch/microblaze/include/uapi/asm/msgbuf.h b/arch/microblaze/include/uapi/asm/msgbuf.h deleted file mode 100644 index 809134c644a6..000000000000 --- a/arch/microblaze/include/uapi/asm/msgbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/msgbuf.h> diff --git a/arch/microblaze/include/uapi/asm/param.h b/arch/microblaze/include/uapi/asm/param.h deleted file mode 100644 index 965d45427975..000000000000 --- a/arch/microblaze/include/uapi/asm/param.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/param.h> diff --git a/arch/microblaze/include/uapi/asm/poll.h b/arch/microblaze/include/uapi/asm/poll.h deleted file mode 100644 index c98509d3149e..000000000000 --- a/arch/microblaze/include/uapi/asm/poll.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/poll.h> diff --git a/arch/microblaze/include/uapi/asm/resource.h b/arch/microblaze/include/uapi/asm/resource.h deleted file mode 100644 index 04bc4db8921b..000000000000 --- a/arch/microblaze/include/uapi/asm/resource.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/resource.h> diff --git a/arch/microblaze/include/uapi/asm/sembuf.h b/arch/microblaze/include/uapi/asm/sembuf.h deleted file mode 100644 index 7673b83cfef7..000000000000 --- a/arch/microblaze/include/uapi/asm/sembuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/sembuf.h> diff --git a/arch/microblaze/include/uapi/asm/shmbuf.h b/arch/microblaze/include/uapi/asm/shmbuf.h deleted file mode 100644 index 83c05fc2de38..000000000000 --- a/arch/microblaze/include/uapi/asm/shmbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/shmbuf.h> diff --git a/arch/microblaze/include/uapi/asm/siginfo.h b/arch/microblaze/include/uapi/asm/siginfo.h deleted file mode 100644 index 0815d29d82e5..000000000000 --- a/arch/microblaze/include/uapi/asm/siginfo.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/siginfo.h> diff --git a/arch/microblaze/include/uapi/asm/signal.h b/arch/microblaze/include/uapi/asm/signal.h deleted file mode 100644 index 7b1573ce19de..000000000000 --- a/arch/microblaze/include/uapi/asm/signal.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/signal.h> diff --git a/arch/microblaze/include/uapi/asm/socket.h b/arch/microblaze/include/uapi/asm/socket.h deleted file mode 100644 index 6b71384b9d8b..000000000000 --- a/arch/microblaze/include/uapi/asm/socket.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/socket.h> diff --git a/arch/microblaze/include/uapi/asm/sockios.h b/arch/microblaze/include/uapi/asm/sockios.h deleted file mode 100644 index def6d4746ee7..000000000000 --- a/arch/microblaze/include/uapi/asm/sockios.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/sockios.h> diff --git a/arch/microblaze/include/uapi/asm/stat.h b/arch/microblaze/include/uapi/asm/stat.h deleted file mode 100644 index 3dc90fa92c70..000000000000 --- a/arch/microblaze/include/uapi/asm/stat.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/stat.h> diff --git a/arch/microblaze/include/uapi/asm/statfs.h b/arch/microblaze/include/uapi/asm/statfs.h deleted file mode 100644 index 0b91fe198c20..000000000000 --- a/arch/microblaze/include/uapi/asm/statfs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/statfs.h> diff --git a/arch/microblaze/include/uapi/asm/swab.h b/arch/microblaze/include/uapi/asm/swab.h deleted file mode 100644 index 7847e563ab66..000000000000 --- a/arch/microblaze/include/uapi/asm/swab.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/swab.h> diff --git a/arch/microblaze/include/uapi/asm/termbits.h b/arch/microblaze/include/uapi/asm/termbits.h deleted file mode 100644 index 3935b106de79..000000000000 --- a/arch/microblaze/include/uapi/asm/termbits.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/termbits.h> diff --git a/arch/microblaze/include/uapi/asm/termios.h b/arch/microblaze/include/uapi/asm/termios.h deleted file mode 100644 index 280d78a9d966..000000000000 --- a/arch/microblaze/include/uapi/asm/termios.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/termios.h> diff --git a/arch/microblaze/include/uapi/asm/unistd.h b/arch/microblaze/include/uapi/asm/unistd.h index d8086159d996..a88b3c11cc20 100644 --- a/arch/microblaze/include/uapi/asm/unistd.h +++ b/arch/microblaze/include/uapi/asm/unistd.h @@ -413,5 +413,6 @@ #define __NR_pkey_mprotect 395 #define __NR_pkey_alloc 396 #define __NR_pkey_free 397 +#define __NR_statx 398 #endif /* _UAPI_ASM_MICROBLAZE_UNISTD_H */ diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index 12e093a03e60..e45ada8fb006 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -65,8 +65,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, if (attrs & DMA_ATTR_SKIP_CPU_SYNC) continue; - __dma_sync(page_to_phys(sg_page(sg)) + sg->offset, - sg->length, direction); + __dma_sync(sg_phys(sg), sg->length, direction); } return nents; diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S index ef548510b951..4e1b567becd6 100644 --- a/arch/microblaze/kernel/entry.S +++ b/arch/microblaze/kernel/entry.S @@ -208,9 +208,7 @@ syscall_debug_table: mfs r11, rmsr; /* save MSR */ \ swi r11, r1, PT_MSR; -#define RESTORE_REGS \ - lwi r11, r1, PT_MSR; \ - mts rmsr , r11; \ +#define RESTORE_REGS_GP \ lwi r2, r1, PT_R2; /* restore SDA */ \ lwi r3, r1, PT_R3; \ lwi r4, r1, PT_R4; \ @@ -242,6 +240,18 @@ syscall_debug_table: lwi r30, r1, PT_R30; \ lwi r31, r1, PT_R31; /* Restore cur task reg */ +#define RESTORE_REGS \ + lwi r11, r1, PT_MSR; \ + mts rmsr , r11; \ + RESTORE_REGS_GP + +#define RESTORE_REGS_RTBD \ + lwi r11, r1, PT_MSR; \ + andni r11, r11, MSR_EIP; /* clear EIP */ \ + ori r11, r11, MSR_EE | MSR_BIP; /* set EE and BIP */ \ + mts rmsr , r11; \ + RESTORE_REGS_GP + #define SAVE_STATE \ swi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* save stack */ \ /* See if already in kernel mode.*/ \ @@ -427,7 +437,7 @@ C_ENTRY(ret_from_trap): swi CURRENT_TASK, r0, PER_CPU(CURRENT_SAVE); /* save current */ VM_OFF; tophys(r1,r1); - RESTORE_REGS; + RESTORE_REGS_RTBD; addik r1, r1, PT_SIZE /* Clean up stack space. */ lwi r1, r1, PT_R1 - PT_SIZE;/* Restore user stack pointer. */ bri 6f; @@ -436,7 +446,7 @@ C_ENTRY(ret_from_trap): 2: set_bip; /* Ints masked for state restore */ VM_OFF; tophys(r1,r1); - RESTORE_REGS; + RESTORE_REGS_RTBD; addik r1, r1, PT_SIZE /* Clean up stack space. */ tovirt(r1,r1); 6: @@ -612,7 +622,7 @@ C_ENTRY(ret_from_exc): VM_OFF; tophys(r1,r1); - RESTORE_REGS; + RESTORE_REGS_RTBD; addik r1, r1, PT_SIZE /* Clean up stack space. */ lwi r1, r1, PT_R1 - PT_SIZE; /* Restore user stack pointer. */ @@ -621,7 +631,7 @@ C_ENTRY(ret_from_exc): 2: set_bip; /* Ints masked for state restore */ VM_OFF; tophys(r1,r1); - RESTORE_REGS; + RESTORE_REGS_RTBD; addik r1, r1, PT_SIZE /* Clean up stack space. */ tovirt(r1,r1); @@ -847,7 +857,7 @@ dbtrap_call: /* Return point for kernel/user entry + 8 because of rtsd r15, 8 */ VM_OFF; tophys(r1,r1); /* MS: Restore all regs */ - RESTORE_REGS + RESTORE_REGS_RTBD addik r1, r1, PT_SIZE /* Clean up stack space */ lwi r1, r1, PT_R1 - PT_SIZE; /* Restore user stack pointer */ DBTRAP_return_user: /* MS: Make global symbol for debugging */ @@ -858,7 +868,7 @@ DBTRAP_return_user: /* MS: Make global symbol for debugging */ 2: VM_OFF; tophys(r1,r1); /* MS: Restore all regs */ - RESTORE_REGS + RESTORE_REGS_RTBD lwi r14, r1, PT_R14; lwi r16, r1, PT_PC; addik r1, r1, PT_SIZE; /* MS: Clean up stack space */ diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index e92a817e645f..6527ec22f158 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -119,23 +119,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, return 0; } -#ifndef CONFIG_MMU -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct cpu_context *ctx = - &(((struct thread_info *)(tsk->stack))->cpu_context); - - /* Check whether the thread is blocked in resume() */ - if (in_sched_functions(ctx->r15)) - return (unsigned long)ctx->r15; - else - return ctx->r14; -} -#endif - unsigned long get_wchan(struct task_struct *p) { /* TBD (used by procfs) */ diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index f31ebb5dc26c..be98ffe28ca8 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -192,7 +192,7 @@ void __init time_init(void) { of_clk_init(NULL); setup_cpuinfo_clk(); - clocksource_probe(); + timer_probe(); } #ifdef CONFIG_DEBUG_FS diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index 6841c2df14d9..c48ff4ad2070 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -398,3 +398,4 @@ ENTRY(sys_call_table) .long sys_pkey_mprotect /* 395 */ .long sys_pkey_alloc .long sys_pkey_free + .long sys_statx diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c index 999066192715..ea2d83f1f4bb 100644 --- a/arch/microblaze/kernel/timer.c +++ b/arch/microblaze/kernel/timer.c @@ -178,8 +178,10 @@ static __init int xilinx_clockevent_init(void) clockevent_xilinx_timer.shift); clockevent_xilinx_timer.max_delta_ns = clockevent_delta2ns((u32)~0, &clockevent_xilinx_timer); + clockevent_xilinx_timer.max_delta_ticks = (u32)~0; clockevent_xilinx_timer.min_delta_ns = clockevent_delta2ns(1, &clockevent_xilinx_timer); + clockevent_xilinx_timer.min_delta_ticks = 1; clockevent_xilinx_timer.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_xilinx_timer); @@ -333,5 +335,5 @@ static int __init xilinx_timer_init(struct device_node *timer) return 0; } -CLOCKSOURCE_OF_DECLARE(xilinx_timer, "xlnx,xps-timer-1.00.a", +TIMER_OF_DECLARE(xilinx_timer, "xlnx,xps-timer-1.00.a", xilinx_timer_init); diff --git a/arch/microblaze/mm/highmem.c b/arch/microblaze/mm/highmem.c index 2fcc5a52d84d..ed4454c5ce35 100644 --- a/arch/microblaze/mm/highmem.c +++ b/arch/microblaze/mm/highmem.c @@ -60,6 +60,7 @@ void __kunmap_atomic(void *kvaddr) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; int type; + unsigned int idx; if (vaddr < __fix_to_virt(FIX_KMAP_END)) { pagefault_enable(); @@ -68,21 +69,18 @@ void __kunmap_atomic(void *kvaddr) } type = kmap_atomic_idx(); -#ifdef CONFIG_DEBUG_HIGHMEM - { - unsigned int idx; - - idx = type + KM_TYPE_NR * smp_processor_id(); - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); - /* - * force other mappings to Oops if they'll try to access - * this pte without first remap it - */ - pte_clear(&init_mm, vaddr, kmap_pte-idx); - local_flush_tlb_page(NULL, vaddr); - } + idx = type + KM_TYPE_NR * smp_processor_id(); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); #endif + /* + * force other mappings to Oops if they'll try to access + * this pte without first remap it + */ + pte_clear(&init_mm, vaddr, kmap_pte-idx); + local_flush_tlb_page(NULL, vaddr); + kmap_atomic_idx_pop(); pagefault_enable(); preempt_enable(); diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index 2728a9a9c7c5..145b5ce8eb7e 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -128,19 +128,19 @@ quiet_cmd_cpp_its_S = ITS $@ -DADDR_BITS=$(ADDR_BITS) \ -DADDR_CELLS=$(itb_addr_cells) -$(obj)/vmlinux.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE +$(obj)/vmlinux.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE $(call if_changed_dep,cpp_its_S,none,vmlinux.bin) -$(obj)/vmlinux.gz.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE +$(obj)/vmlinux.gz.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE $(call if_changed_dep,cpp_its_S,gzip,vmlinux.bin.gz) -$(obj)/vmlinux.bz2.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE +$(obj)/vmlinux.bz2.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE $(call if_changed_dep,cpp_its_S,bzip2,vmlinux.bin.bz2) -$(obj)/vmlinux.lzma.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE +$(obj)/vmlinux.lzma.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE $(call if_changed_dep,cpp_its_S,lzma,vmlinux.bin.lzma) -$(obj)/vmlinux.lzo.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE +$(obj)/vmlinux.lzo.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE $(call if_changed_dep,cpp_its_S,lzo,vmlinux.bin.lzo) quiet_cmd_itb-image = ITB $@ diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c index 4af619215410..1231b5a17b37 100644 --- a/arch/mips/generic/init.c +++ b/arch/mips/generic/init.c @@ -161,7 +161,7 @@ void __init plat_time_init(void) } } - clocksource_probe(); + timer_probe(); } void __init arch_init_irq(void) diff --git a/arch/mips/include/asm/highmem.h b/arch/mips/include/asm/highmem.h index d34536e7653f..279b6d14ffeb 100644 --- a/arch/mips/include/asm/highmem.h +++ b/arch/mips/include/asm/highmem.h @@ -35,7 +35,12 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ +#ifdef CONFIG_PHYS_ADDR_T_64BIT +#define LAST_PKMAP 512 +#else #define LAST_PKMAP 1024 +#endif + #define LAST_PKMAP_MASK (LAST_PKMAP-1) #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) diff --git a/arch/mips/include/asm/kprobes.h b/arch/mips/include/asm/kprobes.h index 291846d9ba83..ad1a99948f27 100644 --- a/arch/mips/include/asm/kprobes.h +++ b/arch/mips/include/asm/kprobes.h @@ -43,7 +43,8 @@ typedef union mips_instruction kprobe_opcode_t; #define flush_insn_slot(p) \ do { \ - flush_icache_range((unsigned long)p->addr, \ + if (p->addr) \ + flush_icache_range((unsigned long)p->addr, \ (unsigned long)p->addr + \ (MAX_INSN_SIZE * sizeof(kprobe_opcode_t))); \ } while (0) diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index 6f94bed571c4..74afe8c76bdd 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -19,6 +19,10 @@ #define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> +#ifdef CONFIG_HIGHMEM +#include <asm/highmem.h> +#endif + extern int temp_tlb_entry; /* @@ -62,7 +66,8 @@ extern int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, #define VMALLOC_START MAP_BASE -#define PKMAP_BASE (0xfe000000UL) +#define PKMAP_END ((FIXADDR_START) & ~((LAST_PKMAP << PAGE_SHIFT)-1)) +#define PKMAP_BASE (PKMAP_END - PAGE_SIZE * LAST_PKMAP) #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index b11facd11c9d..f702a459a830 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -804,8 +804,10 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, break; } /* Compact branch: BNEZC || JIALC */ - if (insn.i_format.rs) + if (!insn.i_format.rs) { + /* JIALC: set $31/ra */ regs->regs[31] = epc + 4; + } regs->cp0_epc += 8; break; #endif diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index 8d83fc2a96b7..38a302919e6b 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -11,6 +11,7 @@ #include <asm/asm.h> #include <asm/asmmacro.h> #include <asm/compiler.h> +#include <asm/irqflags.h> #include <asm/regdef.h> #include <asm/mipsregs.h> #include <asm/stackframe.h> @@ -119,6 +120,7 @@ work_pending: andi t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS beqz t0, work_notifysig work_resched: + TRACE_IRQS_OFF jal schedule local_irq_disable # make sure need_resched and @@ -155,6 +157,7 @@ syscall_exit_work: beqz t0, work_pending # trace bit set? local_irq_enable # could let syscall_trace_leave() # call schedule() instead + TRACE_IRQS_ON move a0, sp jal syscall_trace_leave b resume_userspace diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 30a3b75e88eb..9d9b8fbae202 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -38,20 +38,6 @@ void arch_ftrace_update_code(int command) #endif -/* - * Check if the address is in kernel space - * - * Clone core_kernel_text() from kernel/extable.c, but doesn't call - * init_kernel_text() for Ftrace doesn't trace functions in init sections. - */ -static inline int in_kernel_space(unsigned long ip) -{ - if (ip >= (unsigned long)_stext && - ip <= (unsigned long)_etext) - return 1; - return 0; -} - #ifdef CONFIG_DYNAMIC_FTRACE #define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */ @@ -198,7 +184,7 @@ int ftrace_make_nop(struct module *mod, * If ip is in kernel space, no long call, otherwise, long call is * needed. */ - new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F; + new = core_kernel_text(ip) ? INSN_NOP : INSN_B_1F; #ifdef CONFIG_64BIT return ftrace_modify_code(ip, new); #else @@ -218,12 +204,12 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) unsigned int new; unsigned long ip = rec->ip; - new = in_kernel_space(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0]; + new = core_kernel_text(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0]; #ifdef CONFIG_64BIT return ftrace_modify_code(ip, new); #else - return ftrace_modify_code_2r(ip, new, in_kernel_space(ip) ? + return ftrace_modify_code_2r(ip, new, core_kernel_text(ip) ? INSN_NOP : insn_la_mcount[1]); #endif } @@ -289,7 +275,7 @@ unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long * instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for * kernel, move after the instruction "move ra, at"(offset is 16) */ - ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24); + ip = self_ra - (core_kernel_text(self_ra) ? 16 : 24); /* * search the text until finding the non-store instruction or "s{d,w} @@ -394,7 +380,7 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, * entries configured through the tracing/set_graph_function interface. */ - insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1; + insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1; trace.func = self_ra - (MCOUNT_INSN_SIZE * insns); /* Only trace if the calling function expects to */ diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S index cf052204eb0a..d1bb506adc10 100644 --- a/arch/mips/kernel/head.S +++ b/arch/mips/kernel/head.S @@ -106,8 +106,8 @@ NESTED(kernel_entry, 16, sp) # kernel entry point beq t0, t1, dtb_found #endif li t1, -2 - beq a0, t1, dtb_found move t2, a1 + beq a0, t1, dtb_found li t2, 0 dtb_found: diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 313a88b2973f..f3e301f95aef 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -1597,7 +1597,6 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config) break; case CPU_P5600: case CPU_P6600: - case CPU_I6400: /* 8-bit event numbers */ raw_id = config & 0x1ff; base_id = raw_id & 0xff; @@ -1610,6 +1609,11 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config) raw_event.range = P; #endif break; + case CPU_I6400: + /* 8-bit event numbers */ + base_id = config & 0xff; + raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD; + break; case CPU_1004K: if (IS_BOTH_COUNTERS_1004K_EVENT(base_id)) raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD; diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c index 5f928c34c148..d99416094ba9 100644 --- a/arch/mips/kernel/pm-cps.c +++ b/arch/mips/kernel/pm-cps.c @@ -56,7 +56,6 @@ DECLARE_BITMAP(state_support, CPS_PM_STATE_COUNT); * state. Actually per-core rather than per-CPU. */ static DEFINE_PER_CPU_ALIGNED(u32*, ready_count); -static DEFINE_PER_CPU_ALIGNED(void*, ready_count_alloc); /* Indicates online CPUs coupled with the current CPU */ static DEFINE_PER_CPU_ALIGNED(cpumask_t, online_coupled); @@ -642,7 +641,6 @@ static int cps_pm_online_cpu(unsigned int cpu) { enum cps_pm_state state; unsigned core = cpu_data[cpu].core; - unsigned dlinesz = cpu_data[cpu].dcache.linesz; void *entry_fn, *core_rc; for (state = CPS_PM_NC_WAIT; state < CPS_PM_STATE_COUNT; state++) { @@ -662,16 +660,11 @@ static int cps_pm_online_cpu(unsigned int cpu) } if (!per_cpu(ready_count, core)) { - core_rc = kmalloc(dlinesz * 2, GFP_KERNEL); + core_rc = kmalloc(sizeof(u32), GFP_KERNEL); if (!core_rc) { pr_err("Failed allocate core %u ready_count\n", core); return -ENOMEM; } - per_cpu(ready_count_alloc, core) = core_rc; - - /* Ensure ready_count is aligned to a cacheline boundary */ - core_rc += dlinesz - 1; - core_rc = (void *)((unsigned long)core_rc & ~(dlinesz - 1)); per_cpu(ready_count, core) = core_rc; } diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 918d4c73e951..5351e1f3950d 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -120,7 +120,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs, *regs = current_pt_regs(); unsigned long childksp; - p->set_child_tid = p->clear_child_tid = NULL; childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 9681b5877140..38dfa27730ff 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -201,6 +201,8 @@ void show_stack(struct task_struct *task, unsigned long *sp) { struct pt_regs regs; mm_segment_t old_fs = get_fs(); + + regs.cp0_status = KSU_KERNEL; if (sp) { regs.regs[29] = (unsigned long)sp; regs.regs[31] = 0; diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index 7c6336dd2638..7cd92166a0b9 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi) int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va, bool user, bool kernel) { - int idx_user, idx_kernel; + /* + * Initialize idx_user and idx_kernel to workaround bogus + * maybe-initialized warning when using GCC 6. + */ + int idx_user = 0, idx_kernel = 0; unsigned long flags, old_entryhi; local_irq_save(flags); diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c index 4a2d03c72959..caa62f20a888 100644 --- a/arch/mips/math-emu/dp_maddf.c +++ b/arch/mips/math-emu/dp_maddf.c @@ -54,7 +54,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, return ieee754dp_nanxcpt(z); case IEEE754_CLASS_DNORM: DPDNORMZ; - /* QNAN is handled separately below */ + /* QNAN and ZERO cases are handled separately below */ } switch (CLPAIR(xc, yc)) { @@ -210,6 +210,9 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, } assert(rm & (DP_HIDDEN_BIT << 3)); + if (zc == IEEE754_CLASS_ZERO) + return ieee754dp_format(rs, re, rm); + /* And now the addition */ assert(zm & DP_HIDDEN_BIT); diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c index a8cd8b4f235e..c91d5e5d9b5f 100644 --- a/arch/mips/math-emu/sp_maddf.c +++ b/arch/mips/math-emu/sp_maddf.c @@ -54,7 +54,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, return ieee754sp_nanxcpt(z); case IEEE754_CLASS_DNORM: SPDNORMZ; - /* QNAN is handled separately below */ + /* QNAN and ZERO cases are handled separately below */ } switch (CLPAIR(xc, yc)) { @@ -203,6 +203,9 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, } assert(rm & (SP_HIDDEN_BIT << 3)); + if (zc == IEEE754_CLASS_ZERO) + return ieee754sp_format(rs, re, rm); + /* And now the addition */ assert(zm & SP_HIDDEN_BIT); diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index fe8df14b6169..e08598c70b3e 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -68,12 +68,25 @@ static inline struct page *dma_addr_to_page(struct device *dev, * systems and only the R10000 and R12000 are used in such systems, the * SGI IP28 Indigo² rsp. SGI IP32 aka O2. */ -static inline int cpu_needs_post_dma_flush(struct device *dev) +static inline bool cpu_needs_post_dma_flush(struct device *dev) { - return !plat_device_is_coherent(dev) && - (boot_cpu_type() == CPU_R10000 || - boot_cpu_type() == CPU_R12000 || - boot_cpu_type() == CPU_BMIPS5000); + if (plat_device_is_coherent(dev)) + return false; + + switch (boot_cpu_type()) { + case CPU_R10000: + case CPU_R12000: + case CPU_BMIPS5000: + return true; + + default: + /* + * Presence of MAARs suggests that the CPU supports + * speculatively prefetching data, and therefore requires + * the post-DMA flush/invalidate. + */ + return cpu_has_maar; + } } static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 64dd8bdd92c3..28adeabe851f 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -93,7 +93,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c index adc6911ba748..b19a3c506b1e 100644 --- a/arch/mips/mm/pgtable-32.c +++ b/arch/mips/mm/pgtable-32.c @@ -51,15 +51,15 @@ void __init pagetable_init(void) /* * Fixed mappings: */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base); + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1); + fixrange_init(vaddr & PMD_MASK, vaddr + FIXADDR_SIZE, pgd_base); #ifdef CONFIG_HIGHMEM /* * Permanent kmaps: */ vaddr = PKMAP_BASE; - fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); pgd = swapper_pg_dir + __pgd_offset(vaddr); pud = pud_offset(pgd, vaddr); diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 289edcfadd7c..cea4ec909806 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -265,7 +265,7 @@ void __init plat_time_init(void) (freq%1000000)*100/1000000); #ifdef CONFIG_CLKSRC_MIPS_GIC update_gic_frequency_dt(); - clocksource_probe(); + timer_probe(); #endif } #endif diff --git a/arch/mips/pic32/pic32mzda/time.c b/arch/mips/pic32/pic32mzda/time.c index 62a0a78b6c64..1894e50939b5 100644 --- a/arch/mips/pic32/pic32mzda/time.c +++ b/arch/mips/pic32/pic32mzda/time.c @@ -64,5 +64,5 @@ void __init plat_time_init(void) pr_info("CPU Clock: %ldMHz\n", rate / 1000000); mips_hpt_frequency = rate / 2; - clocksource_probe(); + timer_probe(); } diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c index 1022201b2beb..17a0f1dec05b 100644 --- a/arch/mips/pistachio/time.c +++ b/arch/mips/pistachio/time.c @@ -39,7 +39,7 @@ void __init plat_time_init(void) struct clk *clk; of_clk_init(NULL); - clocksource_probe(); + timer_probe(); np = of_get_cpu_node(0, NULL); if (!np) { diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig index 9825dee10bc1..710b04cf4851 100644 --- a/arch/mips/ralink/Kconfig +++ b/arch/mips/ralink/Kconfig @@ -4,7 +4,7 @@ config CLKEVT_RT3352 bool depends on SOC_RT305X || SOC_MT7620 default y - select CLKSRC_OF + select TIMER_OF select CLKSRC_MMIO config RALINK_ILL_ACC diff --git a/arch/mips/ralink/cevt-rt3352.c b/arch/mips/ralink/cevt-rt3352.c index b8a1376165b0..92f284d2b802 100644 --- a/arch/mips/ralink/cevt-rt3352.c +++ b/arch/mips/ralink/cevt-rt3352.c @@ -152,4 +152,4 @@ static int __init ralink_systick_init(struct device_node *np) return 0; } -CLOCKSOURCE_OF_DECLARE(systick, "ralink,cevt-systick", ralink_systick_init); +TIMER_OF_DECLARE(systick, "ralink,cevt-systick", ralink_systick_init); diff --git a/arch/mips/ralink/clk.c b/arch/mips/ralink/clk.c index df795885eace..eb1c61917eb7 100644 --- a/arch/mips/ralink/clk.c +++ b/arch/mips/ralink/clk.c @@ -82,5 +82,5 @@ void __init plat_time_init(void) pr_info("CPU Clock: %ldMHz\n", clk_get_rate(clk) / 1000000); mips_hpt_frequency = clk_get_rate(clk) / 2; clk_put(clk); - clocksource_probe(); + timer_probe(); } diff --git a/arch/mips/ralink/timer-gic.c b/arch/mips/ralink/timer-gic.c index 069771dbec42..b5f07d21fcf2 100644 --- a/arch/mips/ralink/timer-gic.c +++ b/arch/mips/ralink/timer-gic.c @@ -20,5 +20,5 @@ void __init plat_time_init(void) ralink_of_remap(); of_clk_init(NULL); - clocksource_probe(); + timer_probe(); } diff --git a/arch/mips/xilfpga/time.c b/arch/mips/xilfpga/time.c index cbb3fca7b6fa..36f3f1870ee2 100644 --- a/arch/mips/xilfpga/time.c +++ b/arch/mips/xilfpga/time.c @@ -22,7 +22,7 @@ void __init plat_time_init(void) struct clk *clk; of_clk_init(NULL); - clocksource_probe(); + timer_probe(); np = of_get_cpu_node(0, NULL); if (!np) { diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h index 18e17abf7664..3ae479117b42 100644 --- a/arch/mn10300/include/asm/processor.h +++ b/arch/mn10300/include/asm/processor.h @@ -132,11 +132,6 @@ static inline void start_thread(struct pt_regs *regs, /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); -/* - * Return saved PC of a blocked thread. - */ -extern unsigned long thread_saved_pc(struct task_struct *tsk); - unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(task) ((task)->thread.uregs) diff --git a/arch/mn10300/include/uapi/asm/Kbuild b/arch/mn10300/include/uapi/asm/Kbuild index b15bf6bc0e94..c94ee54210bc 100644 --- a/arch/mn10300/include/uapi/asm/Kbuild +++ b/arch/mn10300/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += siginfo.h diff --git a/arch/mn10300/include/uapi/asm/siginfo.h b/arch/mn10300/include/uapi/asm/siginfo.h deleted file mode 100644 index 0815d29d82e5..000000000000 --- a/arch/mn10300/include/uapi/asm/siginfo.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/siginfo.h> diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index c9fa42619c6a..89e8027e07fb 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -40,14 +40,6 @@ #include "internal.h" /* - * return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return ((unsigned long *) tsk->thread.sp)[3]; -} - -/* * power off function, if any */ void (*pm_power_off)(void); diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index a72d5f0de692..c587764b9c5a 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -1,6 +1,6 @@ config NIOS2 def_bool y - select CLKSRC_OF + select TIMER_OF select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 727dbb333f60..e1a843def56f 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -47,7 +47,6 @@ generic-y += segment.h generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h -generic-y += siginfo.h generic-y += signal.h generic-y += socket.h generic-y += sockios.h diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h index 3bbbc3d798e5..4944e2e1d8b0 100644 --- a/arch/nios2/include/asm/processor.h +++ b/arch/nios2/include/asm/processor.h @@ -75,9 +75,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* Return saved PC of a blocked thread. */ -#define thread_saved_pc(tsk) ((tsk)->thread.kregs->ea) - extern unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(p) \ diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild index 374bd123329f..51eff5bc2eb4 100644 --- a/arch/nios2/include/uapi/asm/Kbuild +++ b/arch/nios2/include/uapi/asm/Kbuild @@ -2,4 +2,5 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += setup.h +generic-y += siginfo.h generic-y += ucontext.h diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c index 6e2bdc9b8530..645129aaa9a0 100644 --- a/arch/nios2/kernel/time.c +++ b/arch/nios2/kernel/time.c @@ -350,7 +350,7 @@ void __init time_init(void) if (count < 2) panic("%d timer is found, it needs 2 timers in system\n", count); - clocksource_probe(); + timer_probe(); } -CLOCKSOURCE_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init); +TIMER_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init); diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index fdbcf0bf44a4..091585addb91 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -46,7 +46,6 @@ generic-y += sembuf.h generic-y += setup.h generic-y += shmbuf.h generic-y += shmparam.h -generic-y += siginfo.h generic-y += signal.h generic-y += socket.h generic-y += sockios.h diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index a908e6c30a00..396d8f306c21 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -84,11 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp); void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); -/* - * Return saved PC of a blocked thread. For now, this is the "user" PC - */ -extern unsigned long thread_saved_pc(struct task_struct *t); - #define init_stack (init_thread_union.stack) #define cpu_relax() barrier() diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild index b15bf6bc0e94..b55fc2ae1e8c 100644 --- a/arch/openrisc/include/uapi/asm/Kbuild +++ b/arch/openrisc/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += siginfo.h diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index f8da545854f9..f9b77003f113 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -110,11 +110,6 @@ void show_regs(struct pt_regs *regs) show_registers(regs); } -unsigned long thread_saved_pc(struct task_struct *t) -{ - return (unsigned long)user_regs(t->stack)->pc; -} - void release_thread(struct task_struct *dead_task) { } @@ -167,8 +162,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp, top_of_kernel_stack = sp; - p->set_child_tid = p->clear_child_tid = NULL; - /* Locate userspace context on stack... */ sp -= STACK_FRAME_OVERHEAD; /* redzone */ sp -= sizeof(struct pt_regs); diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h index 5404c6a726b2..9a2a8956a695 100644 --- a/arch/parisc/include/asm/dma-mapping.h +++ b/arch/parisc/include/asm/dma-mapping.h @@ -20,6 +20,8 @@ ** flush/purge and allocate "regular" cacheable pages for everything. */ +#define DMA_ERROR_CODE (~(dma_addr_t)0) + #ifdef CONFIG_PA11 extern const struct dma_map_ops pcxl_dma_ops; extern const struct dma_map_ops pcx_dma_ops; @@ -54,12 +56,13 @@ parisc_walk_tree(struct device *dev) break; } } - BUG_ON(!dev->platform_data); return dev->platform_data; } - -#define GET_IOC(dev) (HBA_DATA(parisc_walk_tree(dev))->iommu) - + +#define GET_IOC(dev) ({ \ + void *__pdata = parisc_walk_tree(dev); \ + __pdata ? HBA_DATA(__pdata)->iommu : NULL; \ +}) #ifdef CONFIG_IOMMU_CCIO struct parisc_device; diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h index 1a16f1d1075f..af98254f7257 100644 --- a/arch/parisc/include/asm/io.h +++ b/arch/parisc/include/asm/io.h @@ -34,10 +34,10 @@ static inline unsigned char gsc_readb(unsigned long addr) unsigned char ret; __asm__ __volatile__( - " rsm 2,%0\n" + " rsm %3,%0\n" " ldbx 0(%2),%1\n" " mtsm %0\n" - : "=&r" (flags), "=r" (ret) : "r" (addr) ); + : "=&r" (flags), "=r" (ret) : "r" (addr), "i" (PSW_SM_D) ); return ret; } @@ -48,10 +48,10 @@ static inline unsigned short gsc_readw(unsigned long addr) unsigned short ret; __asm__ __volatile__( - " rsm 2,%0\n" + " rsm %3,%0\n" " ldhx 0(%2),%1\n" " mtsm %0\n" - : "=&r" (flags), "=r" (ret) : "r" (addr) ); + : "=&r" (flags), "=r" (ret) : "r" (addr), "i" (PSW_SM_D) ); return ret; } @@ -87,20 +87,20 @@ static inline void gsc_writeb(unsigned char val, unsigned long addr) { long flags; __asm__ __volatile__( - " rsm 2,%0\n" + " rsm %3,%0\n" " stbs %1,0(%2)\n" " mtsm %0\n" - : "=&r" (flags) : "r" (val), "r" (addr) ); + : "=&r" (flags) : "r" (val), "r" (addr), "i" (PSW_SM_D) ); } static inline void gsc_writew(unsigned short val, unsigned long addr) { long flags; __asm__ __volatile__( - " rsm 2,%0\n" + " rsm %3,%0\n" " sths %1,0(%2)\n" " mtsm %0\n" - : "=&r" (flags) : "r" (val), "r" (addr) ); + : "=&r" (flags) : "r" (val), "r" (addr), "i" (PSW_SM_D) ); } static inline void gsc_writel(unsigned int val, unsigned long addr) diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h index 59be25764433..a81226257878 100644 --- a/arch/parisc/include/asm/mmu_context.h +++ b/arch/parisc/include/asm/mmu_context.h @@ -49,15 +49,26 @@ static inline void load_context(mm_context_t context) mtctl(__space_to_prot(context), 8); } -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) +static inline void switch_mm_irqs_off(struct mm_struct *prev, + struct mm_struct *next, struct task_struct *tsk) { - if (prev != next) { mtctl(__pa(next->pgd), 25); load_context(next->context); } } +static inline void switch_mm(struct mm_struct *prev, + struct mm_struct *next, struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + switch_mm_irqs_off(prev, next, tsk); + local_irq_restore(flags); +} +#define switch_mm_irqs_off switch_mm_irqs_off + #define deactivate_mm(tsk,mm) do { } while (0) static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h index 451906d78136..7569627a032b 100644 --- a/arch/parisc/include/asm/pdc.h +++ b/arch/parisc/include/asm/pdc.h @@ -6,6 +6,8 @@ #if !defined(__ASSEMBLY__) extern int pdc_type; +extern unsigned long parisc_cell_num; /* cell number the CPU runs on (PAT) */ +extern unsigned long parisc_cell_loc; /* cell location of CPU (PAT) */ /* Values for pdc_type */ #define PDC_TYPE_ILLEGAL -1 @@ -143,6 +145,18 @@ struct pdc_btlb_info { /* PDC_BLOCK_TLB, return of PDC_BTLB_INFO */ #endif /* !CONFIG_PA20 */ +struct pdc_mem_retinfo { /* PDC_MEM/PDC_MEM_MEMINFO (return info) */ + unsigned long pdt_size; + unsigned long pdt_entries; + unsigned long pdt_status; + unsigned long first_dbe_loc; + unsigned long good_mem; +}; + +struct pdc_mem_read_pdt { /* PDC_MEM/PDC_MEM_READ_PDT (return info) */ + unsigned long pdt_entries; +}; + #ifdef CONFIG_64BIT struct pdc_memory_table_raddr { /* PDC_MEM/PDC_MEM_TABLE (return info) */ unsigned long entries_returned; @@ -301,6 +315,10 @@ int pdc_get_initiator(struct hardware_path *, struct pdc_initiator *); int pdc_tod_read(struct pdc_tod *tod); int pdc_tod_set(unsigned long sec, unsigned long usec); +void pdc_pdt_init(void); /* in pdt.c */ +int pdc_mem_pdt_info(struct pdc_mem_retinfo *rinfo); +int pdc_mem_pdt_read_entries(struct pdc_mem_read_pdt *rpdt_read, + unsigned long *pdt_entries_ptr); #ifdef CONFIG_64BIT int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr, struct pdc_memory_table *tbl, unsigned long entries); diff --git a/arch/parisc/include/asm/pdcpat.h b/arch/parisc/include/asm/pdcpat.h index e1d289092705..32e105fb8adb 100644 --- a/arch/parisc/include/asm/pdcpat.h +++ b/arch/parisc/include/asm/pdcpat.h @@ -147,9 +147,9 @@ #define PDC_PAT_MEM_CELL_CLEAR 6L /* Clear PDT For Cell */ #define PDC_PAT_MEM_CELL_READ 7L /* Read PDT entries For Cell */ #define PDC_PAT_MEM_CELL_RESET 8L /* Reset clear bit For Cell */ -#define PDC_PAT_MEM_SETGM 9L /* Set Golden Memory value */ -#define PDC_PAT_MEM_ADD_PAGE 10L /* ADDs a page to the cell */ -#define PDC_PAT_MEM_ADDRESS 11L /* Get Physical Location From */ +#define PDC_PAT_MEM_SETGM 9L /* Set Good Memory value */ +#define PDC_PAT_MEM_ADD_PAGE 10L /* ADDs a page to the cell */ +#define PDC_PAT_MEM_ADDRESS 11L /* Get Physical Location From */ /* Memory Address */ #define PDC_PAT_MEM_GET_TXT_SIZE 12L /* Get Formatted Text Size */ #define PDC_PAT_MEM_GET_PD_TXT 13L /* Get PD Formatted Text */ @@ -212,6 +212,23 @@ struct pdc_pat_cpu_num { unsigned long cpu_loc; }; +struct pdc_pat_mem_retinfo { /* PDC_PAT_MEM/PDC_PAT_MEM_PD_INFO (return info) */ + unsigned int ke; /* bit 0: memory inside good memory? */ + unsigned int current_pdt_entries:16; + unsigned int max_pdt_entries:16; + unsigned long Cs_bitmap; + unsigned long Ic_bitmap; + unsigned long good_mem; + unsigned long first_dbe_loc; /* first location of double bit error */ + unsigned long clear_time; /* last PDT clear time (since Jan 1970) */ +}; + +struct pdc_pat_mem_read_pd_retinfo { /* PDC_PAT_MEM/PDC_PAT_MEM_PD_READ */ + unsigned long actual_count_bytes; + unsigned long pdt_entries; +}; + + struct pdc_pat_pd_addr_map_entry { unsigned char entry_type; /* 1 = Memory Descriptor Entry Type */ unsigned char reserve1[5]; @@ -293,15 +310,15 @@ extern int pdc_pat_cpu_get_number(struct pdc_pat_cpu_num *cpu_info, unsigned lon extern int pdc_pat_pd_get_addr_map(unsigned long *actual_len, void *mem_addr, unsigned long count, unsigned long offset); - extern int pdc_pat_io_pci_cfg_read(unsigned long pci_addr, int pci_size, u32 *val); extern int pdc_pat_io_pci_cfg_write(unsigned long pci_addr, int pci_size, u32 val); - -/* Flag to indicate this is a PAT box...don't use this unless you -** really have to...it might go away some day. -*/ -extern int pdc_pat; /* arch/parisc/kernel/inventory.c */ +extern int pdc_pat_mem_pdt_info(struct pdc_pat_mem_retinfo *rinfo); +extern int pdc_pat_mem_read_cell_pdt(struct pdc_pat_mem_read_pd_retinfo *pret, + unsigned long *pdt_entries_ptr, unsigned long max_entries); +extern int pdc_pat_mem_read_pd_pdt(struct pdc_pat_mem_read_pd_retinfo *pret, + unsigned long *pdt_entries_ptr, unsigned long count, + unsigned long offset); #endif /* __ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 3a4ed9f91d57..71ca86cb0f16 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -511,6 +511,9 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, #define pte_same(A,B) (pte_val(A) == pte_val(B)) +struct seq_file; +extern void arch_report_meminfo(struct seq_file *m); + #endif /* !__ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index a3661ee6b060..b3b66c3d6f3c 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -103,6 +103,8 @@ struct cpuinfo_parisc { unsigned long bh_count; /* number of times bh was invoked */ unsigned long fp_rev; unsigned long fp_model; + unsigned long cpu_num; /* CPU number from PAT firmware */ + unsigned long cpu_loc; /* CPU location from PAT firmware */ unsigned int state; struct parisc_device *dev; unsigned long loops_per_jiffy; @@ -163,12 +165,7 @@ struct thread_struct { .flags = 0 \ } -/* - * Return saved PC of a blocked thread. This is used by ps mostly. - */ - struct task_struct; -unsigned long thread_saved_pc(struct task_struct *t); void show_trace(struct task_struct *task, unsigned long *stack); /* diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 6b113f39f30c..c3e114f67485 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -69,17 +69,6 @@ struct exception_table_entry { ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) /* - * The page fault handler stores, in a per-cpu area, the following information - * if a fixup routine is available. - */ -struct exception_data { - unsigned long fault_ip; - unsigned long fault_gp; - unsigned long fault_space; - unsigned long fault_addr; -}; - -/* * load_sr2() preloads the space register %%sr2 - based on the value of * get_fs() - with either a value of 0 to access kernel space (KERNEL_DS which * is 0), or with the current value of %%sr3 to access user space (USER_DS) diff --git a/arch/parisc/include/uapi/asm/pdc.h b/arch/parisc/include/uapi/asm/pdc.h index 0609ff117f67..1f30b49772aa 100644 --- a/arch/parisc/include/uapi/asm/pdc.h +++ b/arch/parisc/include/uapi/asm/pdc.h @@ -131,12 +131,12 @@ #define PDC_TLB_SETUP 1 /* set up miss handling */ #define PDC_MEM 20 /* Manage memory */ -#define PDC_MEM_MEMINFO 0 -#define PDC_MEM_ADD_PAGE 1 -#define PDC_MEM_CLEAR_PDT 2 -#define PDC_MEM_READ_PDT 3 -#define PDC_MEM_RESET_CLEAR 4 -#define PDC_MEM_GOODMEM 5 +#define PDC_MEM_MEMINFO 0 /* Return PDT info */ +#define PDC_MEM_ADD_PAGE 1 /* Add page to PDT */ +#define PDC_MEM_CLEAR_PDT 2 /* Clear PDT */ +#define PDC_MEM_READ_PDT 3 /* Read PDT entry */ +#define PDC_MEM_RESET_CLEAR 4 /* Reset PDT clear flag */ +#define PDC_MEM_GOODMEM 5 /* Set good_mem value */ #define PDC_MEM_TABLE 128 /* Non contig mem map (sprockets) */ #define PDC_MEM_RETURN_ADDRESS_TABLE PDC_MEM_TABLE #define PDC_MEM_GET_MEMORY_SYSTEM_TABLES_SIZE 131 diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 69a11183d48d..c4294df69fb6 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -4,7 +4,7 @@ extra-y := head.o vmlinux.lds -obj-y := cache.o pacache.o setup.o traps.o time.o irq.o \ +obj-y := cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \ pa7300lc.o syscall.o entry.o sys_parisc.o firmware.o \ ptrace.o hardware.o inventory.o drivers.o \ signal.o hpmc.o real2.o parisc_ksyms.o unaligned.o \ diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 1c4fe61a592b..dfff8a0d6fd1 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -298,11 +298,6 @@ int main(void) DEFINE(HUGEPAGE_SIZE, PAGE_SIZE); #endif BLANK(); - DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip)); - DEFINE(EXCDATA_GP, offsetof(struct exception_data, fault_gp)); - DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space)); - DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr)); - BLANK(); DEFINE(ASM_PDC_RESULT_SIZE, NUM_PDC_RESULT * sizeof(unsigned long)); BLANK(); return 0; diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 9d797ae4fa22..98190252c12f 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -957,6 +957,41 @@ int pdc_tod_read(struct pdc_tod *tod) } EXPORT_SYMBOL(pdc_tod_read); +int pdc_mem_pdt_info(struct pdc_mem_retinfo *rinfo) +{ + int retval; + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_MEM, PDC_MEM_MEMINFO, __pa(pdc_result), 0); + convert_to_wide(pdc_result); + memcpy(rinfo, pdc_result, sizeof(*rinfo)); + spin_unlock_irqrestore(&pdc_lock, flags); + + return retval; +} + +int pdc_mem_pdt_read_entries(struct pdc_mem_read_pdt *pret, + unsigned long *pdt_entries_ptr) +{ + int retval; + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_MEM, PDC_MEM_READ_PDT, __pa(pdc_result), + __pa(pdc_result2)); + if (retval == PDC_OK) { + convert_to_wide(pdc_result); + memcpy(pret, pdc_result, sizeof(*pret)); + convert_to_wide(pdc_result2); + memcpy(pdt_entries_ptr, pdc_result2, + pret->pdt_entries * sizeof(*pdt_entries_ptr)); + } + spin_unlock_irqrestore(&pdc_lock, flags); + + return retval; +} + /** * pdc_tod_set - Set the Time-Of-Day clock. * @sec: The number of seconds since epoch. @@ -1383,6 +1418,79 @@ int pdc_pat_io_pci_cfg_write(unsigned long pci_addr, int pci_size, u32 val) return retval; } + +/** + * pdc_pat_mem_pdc_info - Retrieve information about page deallocation table + * @rinfo: memory pdt information + * + */ +int pdc_pat_mem_pdt_info(struct pdc_pat_mem_retinfo *rinfo) +{ + int retval; + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_PD_INFO, + __pa(&pdc_result)); + if (retval == PDC_OK) + memcpy(rinfo, &pdc_result, sizeof(*rinfo)); + spin_unlock_irqrestore(&pdc_lock, flags); + + return retval; +} + +/** + * pdc_pat_mem_read_cell_pdt - Read PDT entries from (old) PAT firmware + * @pret: array of PDT entries + * @pdt_entries_ptr: ptr to hold number of PDT entries + * @max_entries: maximum number of entries to be read + * + */ +int pdc_pat_mem_read_cell_pdt(struct pdc_pat_mem_read_pd_retinfo *pret, + unsigned long *pdt_entries_ptr, unsigned long max_entries) +{ + int retval; + unsigned long flags, entries; + + spin_lock_irqsave(&pdc_lock, flags); + /* PDC_PAT_MEM_CELL_READ is available on early PAT machines only */ + retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_CELL_READ, + __pa(&pdc_result), parisc_cell_num, __pa(&pdc_result2)); + + if (retval == PDC_OK) { + /* build up return value as for PDC_PAT_MEM_PD_READ */ + entries = min(pdc_result[0], max_entries); + pret->pdt_entries = entries; + pret->actual_count_bytes = entries * sizeof(unsigned long); + memcpy(pdt_entries_ptr, &pdc_result2, pret->actual_count_bytes); + } + + spin_unlock_irqrestore(&pdc_lock, flags); + WARN_ON(retval == PDC_OK && pdc_result[0] > max_entries); + + return retval; +} +/** + * pdc_pat_mem_read_pd_pdt - Read PDT entries from (newer) PAT firmware + * @pret: array of PDT entries + * @pdt_entries_ptr: ptr to hold number of PDT entries + * + */ +int pdc_pat_mem_read_pd_pdt(struct pdc_pat_mem_read_pd_retinfo *pret, + unsigned long *pdt_entries_ptr, unsigned long count, + unsigned long offset) +{ + int retval; + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_PD_READ, + __pa(&pret), __pa(pdt_entries_ptr), + count, offset); + spin_unlock_irqrestore(&pdc_lock, flags); + + return retval; +} #endif /* CONFIG_64BIT */ diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S index 0fbd0a0e1cda..e3a8e5e4d5de 100644 --- a/arch/parisc/kernel/hpmc.S +++ b/arch/parisc/kernel/hpmc.S @@ -44,6 +44,7 @@ #include <asm/assembly.h> #include <asm/pdc.h> +#include <asm/psw.h> #include <linux/linkage.h> #include <linux/init.h> @@ -135,7 +136,7 @@ ENTRY_CFI(os_hpmc) * So turn on the Q bit and turn off the M bit. */ - ldo 8(%r0),%r4 /* PSW Q on, PSW M off */ + ldi PSW_SM_Q,%r4 /* PSW Q on, PSW M off */ mtctl %r4,ipsw mtctl %r0,pcsq mtctl %r0,pcsq @@ -257,7 +258,7 @@ os_hpmc_5: tovirt_r1 %r30 /* make sp virtual */ - rsm 8,%r0 /* Clear Q bit */ + rsm PSW_SM_Q,%r0 /* Clear Q bit */ ldi 1,%r8 /* Set trap code to "1" for HPMC */ load32 PA(intr_save),%r1 be 0(%sr7,%r1) diff --git a/arch/parisc/kernel/inventory.c b/arch/parisc/kernel/inventory.c index c9789d9c73b4..b0fe19ac4d78 100644 --- a/arch/parisc/kernel/inventory.c +++ b/arch/parisc/kernel/inventory.c @@ -40,6 +40,11 @@ int pdc_type __read_mostly = PDC_TYPE_ILLEGAL; +/* cell number and location (PAT firmware only) */ +unsigned long parisc_cell_num __read_mostly; +unsigned long parisc_cell_loc __read_mostly; + + void __init setup_pdc(void) { long status; @@ -78,6 +83,10 @@ void __init setup_pdc(void) if (status == PDC_OK) { pdc_type = PDC_TYPE_PAT; pr_cont("64 bit PAT.\n"); + parisc_cell_num = cell_info.cell_num; + parisc_cell_loc = cell_info.cell_loc; + pr_info("PAT: Running on cell %lu and location %lu.\n", + parisc_cell_num, parisc_cell_loc); return; } #endif diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c new file mode 100644 index 000000000000..f3a797e670b0 --- /dev/null +++ b/arch/parisc/kernel/pdt.c @@ -0,0 +1,143 @@ +/* + * Page Deallocation Table (PDT) support + * + * The Page Deallocation Table (PDT) holds a table with pointers to bad + * memory (broken RAM modules) which is maintained by firmware. + * + * Copyright 2017 by Helge Deller <deller@gmx.de> + * + * TODO: + * - check regularily for new bad memory + * - add userspace interface with procfs or sysfs + * - increase number of PDT entries dynamically + */ + +#include <linux/memblock.h> +#include <linux/seq_file.h> + +#include <asm/pdc.h> +#include <asm/pdcpat.h> +#include <asm/sections.h> +#include <asm/pgtable.h> + +enum pdt_access_type { + PDT_NONE, + PDT_PDC, + PDT_PAT_NEW, + PDT_PAT_OLD +}; + +static enum pdt_access_type pdt_type; + +/* global PDT status information */ +static struct pdc_mem_retinfo pdt_status; + +#define MAX_PDT_TABLE_SIZE PAGE_SIZE +#define MAX_PDT_ENTRIES (MAX_PDT_TABLE_SIZE / sizeof(unsigned long)) +static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss; + + +/* report PDT entries via /proc/meminfo */ +void arch_report_meminfo(struct seq_file *m) +{ + if (pdt_type == PDT_NONE) + return; + + seq_printf(m, "PDT_max_entries: %7lu\n", + pdt_status.pdt_size); + seq_printf(m, "PDT_cur_entries: %7lu\n", + pdt_status.pdt_entries); +} + +/* + * pdc_pdt_init() + * + * Initialize kernel PDT structures, read initial PDT table from firmware, + * report all current PDT entries and mark bad memory with memblock_reserve() + * to avoid that the kernel will use broken memory areas. + * + */ +void __init pdc_pdt_init(void) +{ + int ret, i; + unsigned long entries; + struct pdc_mem_read_pdt pdt_read_ret; + + if (is_pdc_pat()) { + struct pdc_pat_mem_retinfo pat_rinfo; + + pdt_type = PDT_PAT_NEW; + ret = pdc_pat_mem_pdt_info(&pat_rinfo); + pdt_status.pdt_size = pat_rinfo.max_pdt_entries; + pdt_status.pdt_entries = pat_rinfo.current_pdt_entries; + pdt_status.pdt_status = 0; + pdt_status.first_dbe_loc = pat_rinfo.first_dbe_loc; + pdt_status.good_mem = pat_rinfo.good_mem; + } else { + pdt_type = PDT_PDC; + ret = pdc_mem_pdt_info(&pdt_status); + } + + if (ret != PDC_OK) { + pdt_type = PDT_NONE; + pr_info("PDT: Firmware does not provide any page deallocation" + " information.\n"); + return; + } + + entries = pdt_status.pdt_entries; + WARN_ON(entries > MAX_PDT_ENTRIES); + + pr_info("PDT: size %lu, entries %lu, status %lu, dbe_loc 0x%lx," + " good_mem %lu\n", + pdt_status.pdt_size, pdt_status.pdt_entries, + pdt_status.pdt_status, pdt_status.first_dbe_loc, + pdt_status.good_mem); + + if (entries == 0) { + pr_info("PDT: Firmware reports all memory OK.\n"); + return; + } + + if (pdt_status.first_dbe_loc && + pdt_status.first_dbe_loc <= __pa((unsigned long)&_end)) + pr_crit("CRITICAL: Bad memory inside kernel image memory area!\n"); + + pr_warn("PDT: Firmware reports %lu entries of faulty memory:\n", + entries); + + if (pdt_type == PDT_PDC) + ret = pdc_mem_pdt_read_entries(&pdt_read_ret, pdt_entry); + else { +#ifdef CONFIG_64BIT + struct pdc_pat_mem_read_pd_retinfo pat_pret; + + ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry, + MAX_PDT_ENTRIES); + if (ret != PDC_OK) { + pdt_type = PDT_PAT_OLD; + ret = pdc_pat_mem_read_pd_pdt(&pat_pret, pdt_entry, + MAX_PDT_TABLE_SIZE, 0); + } +#else + ret = PDC_BAD_PROC; +#endif + } + + if (ret != PDC_OK) { + pdt_type = PDT_NONE; + pr_debug("PDT type %d, retval = %d\n", pdt_type, ret); + return; + } + + for (i = 0; i < pdt_status.pdt_entries; i++) { + if (i < 20) + pr_warn("PDT: BAD PAGE #%d at 0x%08lx (error_type = %lu)\n", + i, + pdt_entry[i] & PAGE_MASK, + pdt_entry[i] & 1); + + /* mark memory page bad */ + memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE); + } +} diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 4516a5b53f38..b64d7d21646e 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -239,11 +239,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp, return 0; } -unsigned long thread_saved_pc(struct task_struct *t) -{ - return t->thread.regs.kpc; -} - unsigned long get_wchan(struct task_struct *p) { diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 85de47f4eb59..0ab32779dfa7 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -94,7 +94,7 @@ static int processor_probe(struct parisc_device *dev) unsigned long txn_addr; unsigned long cpuid; struct cpuinfo_parisc *p; - struct pdc_pat_cpu_num cpu_info __maybe_unused; + struct pdc_pat_cpu_num cpu_info = { }; #ifdef CONFIG_SMP if (num_online_cpus() >= nr_cpu_ids) { @@ -113,6 +113,7 @@ static int processor_probe(struct parisc_device *dev) */ cpuid = boot_cpu_data.cpu_count; txn_addr = dev->hpa.start; /* for legacy PDC */ + cpu_info.cpu_num = cpu_info.cpu_loc = cpuid; #ifdef CONFIG_64BIT if (is_pdc_pat()) { @@ -180,6 +181,8 @@ static int processor_probe(struct parisc_device *dev) p->hpa = dev->hpa.start; /* save CPU hpa */ p->cpuid = cpuid; /* save CPU id */ p->txn_addr = txn_addr; /* save CPU IRQ address */ + p->cpu_num = cpu_info.cpu_num; + p->cpu_loc = cpu_info.cpu_loc; #ifdef CONFIG_SMP /* ** FIXME: review if any other initialization is clobbered diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index e5288638a1d9..378a754ca186 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -90,7 +90,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; + struct vm_area_struct *vma, *prev; unsigned long task_size = TASK_SIZE; int do_color_align, last_mmap; struct vm_unmapped_area_info info; @@ -117,9 +117,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, else addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); + vma = find_vma_prev(mm, addr, &prev); if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma)) && + (!prev || addr >= vm_end_gap(prev))) goto found_addr; } @@ -143,7 +144,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long len, const unsigned long pgoff, const unsigned long flags) { - struct vm_area_struct *vma; + struct vm_area_struct *vma, *prev; struct mm_struct *mm = current->mm; unsigned long addr = addr0; int do_color_align, last_mmap; @@ -177,9 +178,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = COLOR_ALIGN(addr, last_mmap, pgoff); else addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); + + vma = find_vma_prev(mm, addr, &prev); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma)) && + (!prev || addr >= vm_end_gap(prev))) goto found_addr; } diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 44aeaa9c039f..6308749359e4 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -361,7 +361,7 @@ ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */ ENTRY_SAME(add_key) ENTRY_SAME(request_key) /* 265 */ - ENTRY_SAME(keyctl) + ENTRY_COMP(keyctl) ENTRY_SAME(ioprio_set) ENTRY_SAME(ioprio_get) ENTRY_SAME(inotify_init) diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 89421df70160..2d956aa0a38a 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -243,14 +243,30 @@ void __init time_init(void) static int __init init_cr16_clocksource(void) { /* - * The cr16 interval timers are not syncronized across CPUs, so mark - * them unstable and lower rating on SMP systems. + * The cr16 interval timers are not syncronized across CPUs on + * different sockets, so mark them unstable and lower rating on + * multi-socket SMP systems. */ if (num_online_cpus() > 1) { - clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; - clocksource_cr16.rating = 0; + int cpu; + unsigned long cpu0_loc; + cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; + + for_each_online_cpu(cpu) { + if (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc) + continue; + + clocksource_cr16.name = "cr16_unstable"; + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; + break; + } } + /* XXX: We may want to mark sched_clock stable here if cr16 clocks are + * in sync: + * (clocksource_cr16.flags == CLOCK_SOURCE_IS_CONTINUOUS) */ + /* register at clocksource framework */ clocksource_register_hz(&clocksource_cr16, 100 * PAGE0->mem_10msec); diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S index 85c28bb80fb7..d4fe19806d57 100644 --- a/arch/parisc/lib/lusercopy.S +++ b/arch/parisc/lib/lusercopy.S @@ -56,12 +56,6 @@ mtsp %r1,%sr1 .endm - .macro fixup_branch lbl - ldil L%\lbl, %r1 - ldo R%\lbl(%r1), %r1 - bv %r0(%r1) - .endm - /* * unsigned long lclear_user(void *to, unsigned long n) * @@ -82,16 +76,16 @@ $lclu_loop: $lclu_done: bv %r0(%r2) copy %r25,%r28 - .exit -ENDPROC_CFI(lclear_user) - .section .fixup,"ax" -2: fixup_branch $lclu_done - ldo 1(%r25),%r25 - .previous +2: b $lclu_done + ldo 1(%r25),%r25 ASM_EXCEPTIONTABLE_ENTRY(1b,2b) + .exit +ENDPROC_CFI(lclear_user) + + .procend /* @@ -122,16 +116,15 @@ $lslen_done: $lslen_nzero: b $lslen_done ldo 1(%r26),%r26 /* special case for N == 0 */ -ENDPROC_CFI(lstrnlen_user) - .section .fixup,"ax" -3: fixup_branch $lslen_done +3: b $lslen_done copy %r24,%r26 /* reset r26 so 0 is returned on fault */ - .previous ASM_EXCEPTIONTABLE_ENTRY(1b,3b) ASM_EXCEPTIONTABLE_ENTRY(2b,3b) +ENDPROC_CFI(lstrnlen_user) + .procend diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 32ec22146141..5b101f6a5607 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -29,8 +29,6 @@ #define BITSSET 0x1c0 /* for identifying LDCW */ -DEFINE_PER_CPU(struct exception_data, exception_data); - int show_unhandled_signals = 1; /* @@ -143,13 +141,6 @@ int fixup_exception(struct pt_regs *regs) fix = search_exception_tables(regs->iaoq[0]); if (fix) { - struct exception_data *d; - d = this_cpu_ptr(&exception_data); - d->fault_ip = regs->iaoq[0]; - d->fault_gp = regs->gr[27]; - d->fault_space = regs->isr; - d->fault_addr = regs->ior; - /* * Fix up get_user() and put_user(). * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant @@ -163,6 +154,7 @@ int fixup_exception(struct pt_regs *regs) /* zero target register for get_user() */ if (parisc_acctyp(0, regs->iir) == VM_READ) { int treg = regs->iir & 0x1f; + BUG_ON(treg == 0); regs->gr[treg] = 0; } } @@ -367,7 +359,7 @@ bad_area: case 15: /* Data TLB miss fault/Data page fault */ /* send SIGSEGV when outside of vma */ if (!vma || - address < vma->vm_start || address > vma->vm_end) { + address < vma->vm_start || address >= vma->vm_end) { si.si_signo = SIGSEGV; si.si_code = SEGV_MAPERR; break; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 66f3a6345105..1ca9a2b4239f 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -381,6 +381,9 @@ static void __init setup_bootmem(void) request_resource(res, &data_resource); } request_resource(&sysram_resources[0], &pdcdata_resource); + + /* Initialize Page Deallocation Table (PDT) and check for bad memory. */ + pdc_pdt_init(); } static int __init parisc_text_address(unsigned long vaddr) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f7c8f9972f61..6189238e69f8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -184,7 +184,7 @@ config PPC select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS - select HAVE_GENERIC_RCU_GUP + select HAVE_GENERIC_GUP select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IDE select HAVE_IOREMAP_PROT @@ -380,22 +380,6 @@ source "arch/powerpc/platforms/Kconfig" menu "Kernel options" -config PPC_DT_CPU_FTRS - bool "Device-tree based CPU feature discovery & setup" - depends on PPC_BOOK3S_64 - default n - help - This enables code to use a new device tree binding for describing CPU - compatibility and features. Saying Y here will attempt to use the new - binding if the firmware provides it. Currently only the skiboot - firmware provides this binding. - If you're not sure say Y. - -config PPC_CPUFEATURES_ENABLE_UNKNOWN - bool "cpufeatures pass through unknown features to guest/userspace" - depends on PPC_DT_CPU_FTRS - default y - config HIGHMEM bool "High memory support" depends on PPC32 @@ -1215,11 +1199,6 @@ source "arch/powerpc/Kconfig.debug" source "security/Kconfig" -config KEYS_COMPAT - bool - depends on COMPAT && KEYS - default y - source "crypto/Kconfig" config PPC_LIB_RHEAP diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index b4b5e6b671ca..0c4e470571ca 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -8,7 +8,7 @@ #define H_PTE_INDEX_SIZE 9 #define H_PMD_INDEX_SIZE 7 #define H_PUD_INDEX_SIZE 9 -#define H_PGD_INDEX_SIZE 12 +#define H_PGD_INDEX_SIZE 9 #ifndef __ASSEMBLY__ #define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index f2c562a0a427..0151af6c2a50 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -104,7 +104,7 @@ "1: "PPC_TLNEI" %4,0\n" \ _EMIT_BUG_ENTRY \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (BUGFLAG_TAINT(TAINT_WARN)), \ + "i" (BUGFLAG_WARNING|BUGFLAG_TAINT(TAINT_WARN)),\ "i" (sizeof(struct bug_entry)), \ "r" (__ret_warn_on)); \ } \ diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index c2d509584a98..d02ad93bf708 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -214,7 +214,6 @@ enum { #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) -#define CPU_FTR_SUBCORE LONG_ASM_CONST(0x2000000000000000) #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) #ifndef __ASSEMBLY__ @@ -463,7 +462,7 @@ enum { CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_SUBCORE) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) #define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index a83821f33ea3..8814a7249ceb 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self, extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_handler(struct pt_regs *regs); extern int kprobe_post_handler(struct pt_regs *regs); +extern int is_current_kprobe_addr(unsigned long addr); #ifdef CONFIG_KPROBES_ON_FTRACE extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb); diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index a2123f291ab0..1189d04f3bd1 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -110,13 +110,18 @@ void release_thread(struct task_struct *); #define TASK_SIZE_128TB (0x0000800000000000UL) #define TASK_SIZE_512TB (0x0002000000000000UL) -#ifdef CONFIG_PPC_BOOK3S_64 +/* + * For now 512TB is only supported with book3s and 64K linux page size. + */ +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES) /* * Max value currently used: */ -#define TASK_SIZE_USER64 TASK_SIZE_512TB +#define TASK_SIZE_USER64 TASK_SIZE_512TB +#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB #else -#define TASK_SIZE_USER64 TASK_SIZE_64TB +#define TASK_SIZE_USER64 TASK_SIZE_64TB +#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB #endif /* @@ -132,7 +137,7 @@ void release_thread(struct task_struct *); * space during mmap's. */ #define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4)) -#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_128TB / 4)) +#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4)) #define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \ TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 ) @@ -143,21 +148,15 @@ void release_thread(struct task_struct *); * with 128TB and conditionally enable upto 512TB */ #ifdef CONFIG_PPC_BOOK3S_64 -#define DEFAULT_MAP_WINDOW ((is_32bit_task()) ? \ - TASK_SIZE_USER32 : TASK_SIZE_128TB) +#define DEFAULT_MAP_WINDOW ((is_32bit_task()) ? \ + TASK_SIZE_USER32 : DEFAULT_MAP_WINDOW_USER64) #else #define DEFAULT_MAP_WINDOW TASK_SIZE #endif #ifdef __powerpc64__ -#ifdef CONFIG_PPC_BOOK3S_64 -/* Limit stack to 128TB */ -#define STACK_TOP_USER64 TASK_SIZE_128TB -#else -#define STACK_TOP_USER64 TASK_SIZE_USER64 -#endif - +#define STACK_TOP_USER64 DEFAULT_MAP_WINDOW_USER64 #define STACK_TOP_USER32 TASK_SIZE_USER32 #define STACK_TOP (is_32bit_task() ? \ @@ -379,12 +378,6 @@ struct thread_struct { } #endif -/* - * Return saved PC of a blocked thread. For now, this is the "user" PC - */ -#define thread_saved_pc(tsk) \ - ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0) - #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.regs) unsigned long get_wchan(struct task_struct *p); diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index a2d36b7703ae..dc4e15937ccf 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -45,8 +45,22 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid); extern void sysfs_remove_device_from_node(struct device *dev, int nid); extern int numa_update_cpu_topology(bool cpus_locked); +static inline int early_cpu_to_node(int cpu) +{ + int nid; + + nid = numa_cpu_lookup_table[cpu]; + + /* + * Fall back to node 0 if nid is unset (it should be, except bugs). + * This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)). + */ + return (nid < 0) ? 0 : nid; +} #else +static inline int early_cpu_to_node(int cpu) { return 0; } + static inline void dump_numa_cpu_topology(void) {} static inline int sysfs_add_device_to_node(struct device *dev, int nid) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 5c0d8a8cdae5..41e88d3ce36b 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -267,13 +267,7 @@ do { \ extern unsigned long __copy_tofrom_user(void __user *to, const void __user *from, unsigned long size); -#ifndef __powerpc64__ - -#define INLINE_COPY_FROM_USER -#define INLINE_COPY_TO_USER - -#else /* __powerpc64__ */ - +#ifdef __powerpc64__ static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index c8a822acf962..c23ff4389ca2 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -94,11 +94,13 @@ struct xive_q { * store at 0 and some ESBs support doing a trigger via a * separate trigger page. */ -#define XIVE_ESB_GET 0x800 -#define XIVE_ESB_SET_PQ_00 0xc00 -#define XIVE_ESB_SET_PQ_01 0xd00 -#define XIVE_ESB_SET_PQ_10 0xe00 -#define XIVE_ESB_SET_PQ_11 0xf00 +#define XIVE_ESB_STORE_EOI 0x400 /* Store */ +#define XIVE_ESB_LOAD_EOI 0x000 /* Load */ +#define XIVE_ESB_GET 0x800 /* Load */ +#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ +#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ +#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ +#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ #define XIVE_ESB_VAL_P 0x2 #define XIVE_ESB_VAL_Q 0x1 diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index 3e7ce86d5c13..4d877144f377 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -46,6 +46,8 @@ #define PPC_FEATURE2_HTM_NOSC 0x01000000 #define PPC_FEATURE2_ARCH_3_00 0x00800000 /* ISA 3.00 */ #define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float 128-bit */ +#define PPC_FEATURE2_DARN 0x00200000 /* darn random number insn */ +#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */ /* * IMPORTANT! diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 9b3e88b1a9c8..6f849832a669 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -124,7 +124,8 @@ extern void __restore_cpu_e6500(void); #define COMMON_USER_POWER9 COMMON_USER_POWER8 #define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \ PPC_FEATURE2_ARCH_3_00 | \ - PPC_FEATURE2_HAS_IEEE128) + PPC_FEATURE2_HAS_IEEE128 | \ + PPC_FEATURE2_DARN ) #ifdef CONFIG_PPC_BOOK3E_64 #define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE) diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index fcc7588a96d6..4c7656dc4e04 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -8,6 +8,7 @@ #include <linux/export.h> #include <linux/init.h> #include <linux/jump_label.h> +#include <linux/libfdt.h> #include <linux/memblock.h> #include <linux/printk.h> #include <linux/sched.h> @@ -642,7 +643,6 @@ static struct dt_cpu_feature_match __initdata {"processor-control-facility", feat_enable_dbell, CPU_FTR_DBELL}, {"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL}, {"processor-utilization-of-resources-register", feat_enable_purr, 0}, - {"subcore", feat_enable, CPU_FTR_SUBCORE}, {"no-execute", feat_enable, 0}, {"strong-access-ordering", feat_enable, CPU_FTR_SAO}, {"cache-inhibited-large-page", feat_enable_large_ci, 0}, @@ -671,12 +671,24 @@ static struct dt_cpu_feature_match __initdata {"wait-v3", feat_enable, 0}, }; -/* XXX: how to configure this? Default + boot time? */ -#ifdef CONFIG_PPC_CPUFEATURES_ENABLE_UNKNOWN -#define CPU_FEATURE_ENABLE_UNKNOWN 1 -#else -#define CPU_FEATURE_ENABLE_UNKNOWN 0 -#endif +static bool __initdata using_dt_cpu_ftrs; +static bool __initdata enable_unknown = true; + +static int __init dt_cpu_ftrs_parse(char *str) +{ + if (!str) + return 0; + + if (!strcmp(str, "off")) + using_dt_cpu_ftrs = false; + else if (!strcmp(str, "known")) + enable_unknown = false; + else + return 1; + + return 0; +} +early_param("dt_cpu_ftrs", dt_cpu_ftrs_parse); static void __init cpufeatures_setup_start(u32 isa) { @@ -707,7 +719,7 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f) } } - if (!known && CPU_FEATURE_ENABLE_UNKNOWN) { + if (!known && enable_unknown) { if (!feat_try_enable_unknown(f)) { pr_info("not enabling: %s (unknown and unsupported by kernel)\n", f->name); @@ -756,6 +768,26 @@ static void __init cpufeatures_setup_finished(void) cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features); } +static int __init disabled_on_cmdline(void) +{ + unsigned long root, chosen; + const char *p; + + root = of_get_flat_dt_root(); + chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); + if (chosen == -FDT_ERR_NOTFOUND) + return false; + + p = of_get_flat_dt_prop(chosen, "bootargs", NULL); + if (!p) + return false; + + if (strstr(p, "dt_cpu_ftrs=off")) + return true; + + return false; +} + static int __init fdt_find_cpu_features(unsigned long node, const char *uname, int depth, void *data) { @@ -766,8 +798,6 @@ static int __init fdt_find_cpu_features(unsigned long node, const char *uname, return 0; } -static bool __initdata using_dt_cpu_ftrs = false; - bool __init dt_cpu_ftrs_in_use(void) { return using_dt_cpu_ftrs; @@ -775,6 +805,8 @@ bool __init dt_cpu_ftrs_in_use(void) bool __init dt_cpu_ftrs_init(void *fdt) { + using_dt_cpu_ftrs = false; + /* Setup and verify the FDT, if it fails we just bail */ if (!early_init_dt_verify(fdt)) return false; @@ -782,6 +814,9 @@ bool __init dt_cpu_ftrs_init(void *fdt) if (!of_scan_flat_dt(fdt_find_cpu_features, NULL)) return false; + if (disabled_on_cmdline()) + return false; + cpufeatures_setup_cpu(); using_dt_cpu_ftrs = true; @@ -1027,5 +1062,8 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char void __init dt_cpu_ftrs_scan(void) { + if (!using_dt_cpu_ftrs) + return; + of_scan_flat_dt(dt_cpu_ftrs_scan_callback, NULL); } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ae418b85c17c..b886795060fd 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1411,10 +1411,8 @@ USE_TEXT_SECTION() .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_STD_MMU_64 - andis. r0,r4,0xa410 /* weird error? */ + andis. r0,r4,0xa450 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ - andis. r0,r4,DSISR_DABRMATCH@h - bne- handle_dabr_fault CURRENT_THREAD_INFO(r11, r1) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ @@ -1438,11 +1436,16 @@ do_hash_page: /* Error */ blt- 13f + + /* Reload DSISR into r4 for the DABR check below */ + ld r4,_DSISR(r1) #endif /* CONFIG_PPC_STD_MMU_64 */ /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: -11: ld r4,_DAR(r1) +11: andis. r0,r4,DSISR_DABRMATCH@h + bne- handle_dabr_fault + ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_page_fault diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index fc4343514bed..01addfb0ed0a 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; +int is_current_kprobe_addr(unsigned long addr) +{ + struct kprobe *p = kprobe_running(); + return (p && (unsigned long)p->addr == addr) ? 1 : 0; +} + bool arch_within_kprobe_blacklist(unsigned long addr) { return (addr >= (unsigned long)__kprobes_text_start && @@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); #endif + /* + * jprobes use jprobe_return() which skips the normal return + * path of the function, and this messes up the accounting of the + * function graph tracer. + * + * Pause function graph tracing while performing the jprobe function. + */ + pause_graph_tracing(); + return 1; } NOKPROBE_SYMBOL(setjmp_pre_handler); @@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) * saved regs... */ memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); + /* It's OK to start function graph tracing again */ + unpause_graph_tracing(); preempt_enable_no_resched(); return 1; } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index baae104b16c7..2ad725ef4368 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1666,6 +1666,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) #ifdef CONFIG_VSX current->thread.used_vsr = 0; #endif + current->thread.load_fp = 0; memset(¤t->thread.fp_state, 0, sizeof(current->thread.fp_state)); current->thread.fp_save_area = NULL; #ifdef CONFIG_ALTIVEC @@ -1674,6 +1675,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.vr_save_area = NULL; current->thread.vrsave = 0; current->thread.used_vr = 0; + current->thread.load_vec = 0; #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_SPE memset(current->thread.evr, 0, sizeof(current->thread.evr)); @@ -1685,6 +1687,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.tm_tfhar = 0; current->thread.tm_texasr = 0; current->thread.tm_tfiar = 0; + current->thread.load_tm = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ } EXPORT_SYMBOL(start_thread); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 40c4887c27b6..f83056297441 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -161,7 +161,9 @@ static struct ibm_pa_feature { { .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL }, { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE }, { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE }, +#ifdef CONFIG_PPC_RADIX_MMU { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX }, +#endif { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, .cpu_user_ftrs = PPC_FEATURE_TRUE_LE }, diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 71dcda91755d..857129acf960 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -928,7 +928,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_PPC_MM_SLICES #ifdef CONFIG_PPC64 - init_mm.context.addr_limit = TASK_SIZE_128TB; + init_mm.context.addr_limit = DEFAULT_MAP_WINDOW_USER64; #else #error "context.addr_limit not initialized." #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index f35ff9dea4fb..4640f6d64f8b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -616,6 +616,24 @@ void __init exc_lvl_early_init(void) #endif /* + * Emergency stacks are used for a range of things, from asynchronous + * NMIs (system reset, machine check) to synchronous, process context. + * We set preempt_count to zero, even though that isn't necessarily correct. To + * get the right value we'd need to copy it from the previous thread_info, but + * doing that might fault causing more problems. + * TODO: what to do with accounting? + */ +static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu) +{ + ti->task = NULL; + ti->cpu = cpu; + ti->preempt_count = 0; + ti->local_flags = 0; + ti->flags = 0; + klp_init_thread_info(ti); +} + +/* * Stack space used when we detect a bad kernel stack pointer, and * early in SMP boots before relocation is enabled. Exclusive emergency * stack for machine checks. @@ -633,24 +651,31 @@ void __init emergency_stack_init(void) * Since we use these as temporary stacks during secondary CPU * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. + * + * The IRQ stacks allocated elsewhere in this file are zeroed and + * initialized in kernel/irq.c. These are initialized here in order + * to have emergency stacks available as early as possible. */ limit = min(safe_stack_limit(), ppc64_rma_size); for_each_possible_cpu(i) { struct thread_info *ti; ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); - klp_init_thread_info(ti); + memset(ti, 0, THREAD_SIZE); + emerg_stack_init_thread_info(ti, i); paca[i].emergency_sp = (void *)ti + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 /* emergency stack for NMI exception handling. */ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); - klp_init_thread_info(ti); + memset(ti, 0, THREAD_SIZE); + emerg_stack_init_thread_info(ti, i); paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; /* emergency stack for machine check exception handling. */ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); - klp_init_thread_info(ti); + memset(ti, 0, THREAD_SIZE); + emerg_stack_init_thread_info(ti, i); paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE; #endif } @@ -661,7 +686,7 @@ void __init emergency_stack_init(void) static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) { - return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align, + return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align, __pa(MAX_DMA_ADDRESS)); } @@ -672,7 +697,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size) static int pcpu_cpu_distance(unsigned int from, unsigned int to) { - if (cpu_to_node(from) == cpu_to_node(to)) + if (early_cpu_to_node(from) == early_cpu_to_node(to)) return LOCAL_DISTANCE; else return REMOTE_DISTANCE; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index df2a41647d8e..1069f74fca47 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -97,7 +97,7 @@ int smp_generic_cpu_bootable(unsigned int nr) /* Special case - we inhibit secondary thread startup * during boot if the user requests it. */ - if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) { + if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) return 0; if (smt_enabled_at_boot diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index 7c933a99f5d5..c98e90b4ea7b 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller) stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ - SAVE_8GPRS(0,r1) - SAVE_8GPRS(8,r1) - SAVE_8GPRS(16,r1) - SAVE_8GPRS(24,r1) + SAVE_GPR(0, r1) + SAVE_10GPRS(2, r1) + SAVE_10GPRS(12, r1) + SAVE_10GPRS(22, r1) + + /* Save previous stack pointer (r1) */ + addi r8, r1, SWITCH_FRAME_SIZE + std r8, GPR1(r1) /* Load special regs for save below */ mfmsr r8 @@ -95,18 +99,44 @@ ftrace_call: bl ftrace_stub nop - /* Load ctr with the possibly modified NIP */ - ld r3, _NIP(r1) - mtctr r3 + /* Load the possibly modified NIP */ + ld r15, _NIP(r1) + #ifdef CONFIG_LIVEPATCH - cmpd r14,r3 /* has NIP been altered? */ + cmpd r14, r15 /* has NIP been altered? */ +#endif + +#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE) + /* NIP has not been altered, skip over further checks */ + beq 1f + + /* Check if there is an active kprobe on us */ + subi r3, r14, 4 + bl is_current_kprobe_addr + nop + + /* + * If r3 == 1, then this is a kprobe/jprobe. + * else, this is livepatched function. + * + * The conditional branch for livepatch_handler below will use the + * result of this comparison. For kprobe/jprobe, we just need to branch to + * the new NIP, not call livepatch_handler. The branch below is bne, so we + * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want + * CR0[EQ] = (r3 == 1). + */ + cmpdi r3, 1 +1: #endif + /* Load CTR with the possibly modified NIP */ + mtctr r15 + /* Restore gprs */ - REST_8GPRS(0,r1) - REST_8GPRS(8,r1) - REST_8GPRS(16,r1) - REST_8GPRS(24,r1) + REST_GPR(0,r1) + REST_10GPRS(2,r1) + REST_10GPRS(12,r1) + REST_10GPRS(22,r1) /* Restore possibly modified LR */ ld r0, _LINK(r1) @@ -119,7 +149,10 @@ ftrace_call: addi r1, r1, SWITCH_FRAME_SIZE #ifdef CONFIG_LIVEPATCH - /* Based on the cmpd above, if the NIP was altered handle livepatch */ + /* + * Based on the cmpd or cmpdi above, if the NIP was altered and we're + * not on a kprobe/jprobe, then handle livepatch. + */ bne- livepatch_handler #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 48a6bd160011..773b35d16a0b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1486,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); break; case KVM_REG_PPC_TB_OFFSET: + /* + * POWER9 DD1 has an erratum where writing TBU40 causes + * the timebase to lose ticks. So we don't let the + * timebase offset be changed on P9 DD1. (It is + * initialized to zero.) + */ + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + break; /* round up to multiple of 2^24 */ vcpu->arch.vcore->tb_offset = ALIGN(set_reg_val(id, *val), 1UL << 24); @@ -2907,12 +2915,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) { int r; int srcu_idx; + unsigned long ebb_regs[3] = {}; /* shut up GCC */ + unsigned long user_tar = 0; + unsigned int user_vrsave; if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; } + /* + * Don't allow entry with a suspended transaction, because + * the guest entry/exit code will lose it. + * If the guest has TM enabled, save away their TM-related SPRs + * (they will get restored by the TM unavailable interrupt). + */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs && + (current->thread.regs->msr & MSR_TM)) { + if (MSR_TM_ACTIVE(current->thread.regs->msr)) { + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + run->fail_entry.hardware_entry_failure_reason = 0; + return -EINVAL; + } + current->thread.tm_tfhar = mfspr(SPRN_TFHAR); + current->thread.tm_tfiar = mfspr(SPRN_TFIAR); + current->thread.tm_texasr = mfspr(SPRN_TEXASR); + current->thread.regs->msr &= ~MSR_TM; + } +#endif + kvmppc_core_prepare_to_enter(vcpu); /* No need to go into the guest when all we'll do is come back out */ @@ -2934,6 +2966,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) flush_all_to_thread(current); + /* Save userspace EBB and other register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + ebb_regs[0] = mfspr(SPRN_EBBHR); + ebb_regs[1] = mfspr(SPRN_EBBRR); + ebb_regs[2] = mfspr(SPRN_BESCR); + user_tar = mfspr(SPRN_TAR); + } + user_vrsave = mfspr(SPRN_VRSAVE); + vcpu->arch.wqp = &vcpu->arch.vcore->wq; vcpu->arch.pgdir = current->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; @@ -2960,6 +3001,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } } while (is_kvmppc_resume_guest(r)); + /* Restore userspace EBB and other register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_EBBHR, ebb_regs[0]); + mtspr(SPRN_EBBRR, ebb_regs[1]); + mtspr(SPRN_BESCR, ebb_regs[2]); + mtspr(SPRN_TAR, user_tar); + mtspr(SPRN_FSCR, current->thread.fscr); + } + mtspr(SPRN_VRSAVE, user_vrsave); + out: vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&vcpu->kvm->arch.vcpus_running); diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 0fdc4a28970b..404deb512844 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * Put whatever is in the decrementer into the * hypervisor decrementer. */ +BEGIN_FTR_SECTION + ld r5, HSTATE_KVM_VCORE(r13) + ld r6, VCORE_KVM(r5) + ld r9, KVM_HOST_LPCR(r6) + andis. r9, r9, LPCR_LD@h +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mfspr r8,SPRN_DEC mftb r7 - mtspr SPRN_HDEC,r8 +BEGIN_FTR_SECTION + /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */ + bne 32f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) extsw r8,r8 +32: mtspr SPRN_HDEC,r8 add r8,r8,r7 std r8,HSTATE_DECEXP(r13) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bdb3f76ceb6b..4888dd494604 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -32,12 +32,29 @@ #include <asm/opal.h> #include <asm/xive-regs.h> +/* Sign-extend HDEC if not on POWER9 */ +#define EXTEND_HDEC(reg) \ +BEGIN_FTR_SECTION; \ + extsw reg, reg; \ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) /* Values in HSTATE_NAPPING(r13) */ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 +/* Stack frame offsets for kvmppc_hv_entry */ +#define SFS 144 +#define STACK_SLOT_TRAP (SFS-4) +#define STACK_SLOT_TID (SFS-16) +#define STACK_SLOT_PSSCR (SFS-24) +#define STACK_SLOT_PID (SFS-32) +#define STACK_SLOT_IAMR (SFS-40) +#define STACK_SLOT_CIABR (SFS-48) +#define STACK_SLOT_DAWR (SFS-56) +#define STACK_SLOT_DAWRX (SFS-64) + /* * Call kvmppc_hv_entry in real mode. * Must be called with interrupts hard-disabled. @@ -214,6 +231,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) kvmppc_primary_no_guest: /* We handle this much like a ceded vcpu */ /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ + /* HDEC may be larger than DEC for arch >= v3.00, but since the */ + /* HDEC value came from DEC in the first place, it will fit */ mfspr r3, SPRN_HDEC mtspr SPRN_DEC, r3 /* @@ -295,8 +314,9 @@ kvm_novcpu_wakeup: /* See if our timeslice has expired (HDEC is negative) */ mfspr r0, SPRN_HDEC + EXTEND_HDEC(r0) li r12, BOOK3S_INTERRUPT_HV_DECREMENTER - cmpwi r0, 0 + cmpdi r0, 0 blt kvm_novcpu_exit /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */ @@ -319,10 +339,10 @@ kvm_novcpu_exit: bl kvmhv_accumulate_time #endif 13: mr r3, r12 - stw r12, 112-4(r1) + stw r12, STACK_SLOT_TRAP(r1) bl kvmhv_commence_exit nop - lwz r12, 112-4(r1) + lwz r12, STACK_SLOT_TRAP(r1) b kvmhv_switch_to_host /* @@ -390,8 +410,8 @@ kvm_secondary_got_guest: lbz r4, HSTATE_PTID(r13) cmpwi r4, 0 bne 63f - lis r6, 0x7fff - ori r6, r6, 0xffff + LOAD_REG_ADDR(r6, decrementer_max) + ld r6, 0(r6) mtspr SPRN_HDEC, r6 /* and set per-LPAR registers, if doing dynamic micro-threading */ ld r6, HSTATE_SPLIT_MODE(r13) @@ -545,11 +565,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * * *****************************************************************************/ -/* Stack frame offsets */ -#define STACK_SLOT_TID (112-16) -#define STACK_SLOT_PSSCR (112-24) -#define STACK_SLOT_PID (112-32) - .global kvmppc_hv_entry kvmppc_hv_entry: @@ -565,7 +580,7 @@ kvmppc_hv_entry: */ mflr r0 std r0, PPC_LR_STKOFF(r1) - stdu r1, -112(r1) + stdu r1, -SFS(r1) /* Save R1 in the PACA */ std r1, HSTATE_HOST_R1(r13) @@ -749,10 +764,20 @@ BEGIN_FTR_SECTION mfspr r5, SPRN_TIDR mfspr r6, SPRN_PSSCR mfspr r7, SPRN_PID + mfspr r8, SPRN_IAMR std r5, STACK_SLOT_TID(r1) std r6, STACK_SLOT_PSSCR(r1) std r7, STACK_SLOT_PID(r1) + std r8, STACK_SLOT_IAMR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) +BEGIN_FTR_SECTION + mfspr r5, SPRN_CIABR + mfspr r6, SPRN_DAWR + mfspr r7, SPRN_DAWRX + std r5, STACK_SLOT_CIABR(r1) + std r6, STACK_SLOT_DAWR(r1) + std r7, STACK_SLOT_DAWRX(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION /* Set partition DABR */ @@ -968,7 +993,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) /* Check if HDEC expires soon */ mfspr r3, SPRN_HDEC - cmpwi r3, 512 /* 1 microsecond */ + EXTEND_HDEC(r3) + cmpdi r3, 512 /* 1 microsecond */ blt hdec_soon #ifdef CONFIG_KVM_XICS @@ -1505,11 +1531,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) * set by the guest could disrupt the host. */ li r0, 0 - mtspr SPRN_IAMR, r0 - mtspr SPRN_CIABR, r0 - mtspr SPRN_DAWRX, r0 + mtspr SPRN_PSPB, r0 mtspr SPRN_WORT, r0 BEGIN_FTR_SECTION + mtspr SPRN_IAMR, r0 mtspr SPRN_TCSCR, r0 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ li r0, 1 @@ -1525,6 +1550,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) std r6,VCPU_UAMOR(r9) li r6,0 mtspr SPRN_AMR,r6 + mtspr SPRN_UAMOR, r6 /* Switch DSCR back to host value */ mfspr r8, SPRN_DSCR @@ -1670,12 +1696,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* Restore host values of some registers */ BEGIN_FTR_SECTION + ld r5, STACK_SLOT_CIABR(r1) + ld r6, STACK_SLOT_DAWR(r1) + ld r7, STACK_SLOT_DAWRX(r1) + mtspr SPRN_CIABR, r5 + mtspr SPRN_DAWR, r6 + mtspr SPRN_DAWRX, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) ld r6, STACK_SLOT_PSSCR(r1) ld r7, STACK_SLOT_PID(r1) + ld r8, STACK_SLOT_IAMR(r1) mtspr SPRN_TIDR, r5 mtspr SPRN_PSSCR, r6 mtspr SPRN_PID, r7 + mtspr SPRN_IAMR, r8 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) BEGIN_FTR_SECTION PPC_INVALIDATE_ERAT @@ -1819,8 +1855,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) - ld r0, 112+PPC_LR_STKOFF(r1) - addi r1, r1, 112 + ld r0, SFS+PPC_LR_STKOFF(r1) + addi r1, r1, SFS mtlr r0 blr @@ -2366,12 +2402,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) mfspr r3, SPRN_DEC mfspr r4, SPRN_HDEC mftb r5 - cmpw r3, r4 + extsw r3, r3 + EXTEND_HDEC(r4) + cmpd r3, r4 ble 67f mtspr SPRN_DEC, r4 67: /* save expiry time of guest decrementer */ - extsw r3, r3 add r3, r3, r5 ld r4, HSTATE_KVM_VCPU(r13) ld r5, HSTATE_KVM_VCORE(r13) diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 023a31133c37..4636ca6e7d38 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -69,7 +69,7 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd) { /* If the XIVE supports the new "store EOI facility, use it */ if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) - __x_writeq(0, __x_eoi_page(xd)); + __x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI); else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) { opal_int_eoi(hw_irq); } else { @@ -89,7 +89,7 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd) * properly. */ if (xd->flags & XIVE_IRQ_FLAG_LSI) - __x_readq(__x_eoi_page(xd)); + __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI); else { eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00); diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c index 6575b9aabef4..a12e86395025 100644 --- a/arch/powerpc/mm/hugetlbpage-radix.c +++ b/arch/powerpc/mm/hugetlbpage-radix.c @@ -68,7 +68,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (mm->task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } /* diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index 9dbd2a733d6b..0ee6be4f1ba4 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -112,7 +112,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (mm->task_size - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -157,7 +157,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (mm->task_size - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index c6dca2ae78ef..a3edf813d455 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -99,7 +99,7 @@ static int hash__init_new_context(struct mm_struct *mm) * mm->context.addr_limit. Default to max task size so that we copy the * default values to paca which will help us to handle slb miss early. */ - mm->context.addr_limit = TASK_SIZE_128TB; + mm->context.addr_limit = DEFAULT_MAP_WINDOW_USER64; /* * The old code would re-promote on fork, we don't do that when using diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 966b9fccfa66..45f6740dd407 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -99,7 +99,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, if ((mm->task_size - len) < addr) return 0; vma = find_vma(mm, addr); - return (!vma || (addr + len) <= vma->vm_start); + return (!vma || (addr + len) <= vm_start_gap(vma)); } static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice) diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index cbd82fde5770..09ceea6175ba 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user, struct pt_regs *regs_user_copy) { regs_user->regs = task_pt_regs(current); - regs_user->abi = perf_reg_abi(current); + regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) : + PERF_SAMPLE_REGS_ABI_NONE; } diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 018f8e90ac35..bb28e1a41257 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -402,7 +402,7 @@ static struct power_pmu power9_isa207_pmu = { .name = "POWER9", .n_counter = MAX_PMU_COUNTERS, .add_fields = ISA207_ADD_FIELDS, - .test_adder = ISA207_TEST_ADDER, + .test_adder = P9_DD1_TEST_ADDER, .compute_mmcr = isa207_compute_mmcr, .config_bhrb = power9_config_bhrb, .bhrb_filter_map = power9_bhrb_filter_map, @@ -421,7 +421,7 @@ static struct power_pmu power9_pmu = { .name = "POWER9", .n_counter = MAX_PMU_COUNTERS, .add_fields = ISA207_ADD_FIELDS, - .test_adder = P9_DD1_TEST_ADDER, + .test_adder = ISA207_TEST_ADDER, .compute_mmcr = isa207_compute_mmcr, .config_bhrb = power9_config_bhrb, .bhrb_filter_map = power9_bhrb_filter_map, diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 33244e3d9375..4fd64d3f5c44 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -59,6 +59,17 @@ config PPC_OF_BOOT_TRAMPOLINE In case of doubt, say Y +config PPC_DT_CPU_FTRS + bool "Device-tree based CPU feature discovery & setup" + depends on PPC_BOOK3S_64 + default y + help + This enables code to use a new device tree binding for describing CPU + compatibility and features. Saying Y here will attempt to use the new + binding if the firmware provides it. Currently only the skiboot + firmware provides this binding. + If you're not sure say Y. + config UDBG_RTAS_CONSOLE bool "RTAS based debug console" depends on PPC_RTAS diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 96c2b8a40630..0c45cdbac4cf 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -197,7 +197,9 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) (REGION_ID(ea) != USER_REGION_ID)) { spin_unlock(&spu->register_lock); - ret = hash_page(ea, _PAGE_PRESENT | _PAGE_READ, 0x300, dsisr); + ret = hash_page(ea, + _PAGE_PRESENT | _PAGE_READ | _PAGE_PRIVILEGED, + 0x300, dsisr); spin_lock(&spu->register_lock); if (!ret) { diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index e5a891ae80ee..84b7ac926ce6 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -175,6 +175,8 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i, skip = roundup(cprm->pos - total + sz, 4) - cprm->pos; if (!dump_skip(cprm, skip)) goto Eio; + + rc = 0; out: free_page((unsigned long)buf); return rc; diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 067defeea691..b5d960d6db3d 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -75,7 +75,8 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index) if (WARN_ON(!gpdev)) return NULL; - if (WARN_ON(!gpdev->dev.of_node)) + /* Not all PCI devices have device-tree nodes */ + if (!gpdev->dev.of_node) return NULL; /* Get assoicated PCI device */ @@ -448,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch, return mmio_atsd_reg; } -static int mmio_invalidate_pid(struct npu *npu, unsigned long pid) +static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) { unsigned long launch; @@ -464,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid) /* PID */ launch |= pid << PPC_BITLSHIFT(38); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); + /* Invalidating the entire process doesn't use a va */ return mmio_launch_invalidate(npu, launch, 0); } static int mmio_invalidate_va(struct npu *npu, unsigned long va, - unsigned long pid) + unsigned long pid, bool flush) { unsigned long launch; @@ -485,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va, /* PID */ launch |= pid << PPC_BITLSHIFT(38); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); + return mmio_launch_invalidate(npu, launch, va); } #define mn_to_npu_context(x) container_of(x, struct npu_context, mn) +struct mmio_atsd_reg { + struct npu *npu; + int reg; +}; + +static void mmio_invalidate_wait( + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush) +{ + struct npu *npu; + int i, reg; + + /* Wait for all invalidations to complete */ + for (i = 0; i <= max_npu2_index; i++) { + if (mmio_atsd_reg[i].reg < 0) + continue; + + /* Wait for completion */ + npu = mmio_atsd_reg[i].npu; + reg = mmio_atsd_reg[i].reg; + while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) + cpu_relax(); + + put_mmio_atsd_reg(npu, reg); + + /* + * The GPU requires two flush ATSDs to ensure all entries have + * been flushed. We use PID 0 as it will never be used for a + * process on the GPU. + */ + if (flush) + mmio_invalidate_pid(npu, 0, true); + } +} + /* * Invalidate either a single address or an entire PID depending on * the value of va. */ static void mmio_invalidate(struct npu_context *npu_context, int va, - unsigned long address) + unsigned long address, bool flush) { - int i, j, reg; + int i, j; struct npu *npu; struct pnv_phb *nphb; struct pci_dev *npdev; - struct { - struct npu *npu; - int reg; - } mmio_atsd_reg[NV_MAX_NPUS]; + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; unsigned long pid = npu_context->mm->context.id; /* @@ -524,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, if (va) mmio_atsd_reg[i].reg = - mmio_invalidate_va(npu, address, pid); + mmio_invalidate_va(npu, address, pid, + flush); else mmio_atsd_reg[i].reg = - mmio_invalidate_pid(npu, pid); + mmio_invalidate_pid(npu, pid, flush); /* * The NPU hardware forwards the shootdown to all GPUs @@ -543,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, */ flush_tlb_mm(npu_context->mm); - /* Wait for all invalidations to complete */ - for (i = 0; i <= max_npu2_index; i++) { - if (mmio_atsd_reg[i].reg < 0) - continue; - - /* Wait for completion */ - npu = mmio_atsd_reg[i].npu; - reg = mmio_atsd_reg[i].reg; - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) - cpu_relax(); - put_mmio_atsd_reg(npu, reg); - } + mmio_invalidate_wait(mmio_atsd_reg, flush); + if (flush) + /* Wait for the flush to complete */ + mmio_invalidate_wait(mmio_atsd_reg, false); } static void pnv_npu2_mn_release(struct mmu_notifier *mn, @@ -570,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn, * There should be no more translation requests for this PID, but we * need to ensure any entries for it are removed from the TLB. */ - mmio_invalidate(npu_context, 0, 0); + mmio_invalidate(npu_context, 0, 0, true); } static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, @@ -580,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, { struct npu_context *npu_context = mn_to_npu_context(mn); - mmio_invalidate(npu_context, 1, address); + mmio_invalidate(npu_context, 1, address, true); } static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, @@ -589,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, { struct npu_context *npu_context = mn_to_npu_context(mn); - mmio_invalidate(npu_context, 1, address); + mmio_invalidate(npu_context, 1, address, true); } static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, @@ -599,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, struct npu_context *npu_context = mn_to_npu_context(mn); unsigned long address; - for (address = start; address <= end; address += PAGE_SIZE) - mmio_invalidate(npu_context, 1, address); + for (address = start; address < end; address += PAGE_SIZE) + mmio_invalidate(npu_context, 1, address, false); + + /* Do the flush only on the final addess == end */ + mmio_invalidate(npu_context, 1, address, true); } static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { @@ -650,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, /* No nvlink associated with this GPU device */ return ERR_PTR(-ENODEV); - if (!mm) { - /* kernel thread contexts are not supported */ + if (!mm || mm->context.id == 0) { + /* + * Kernel thread contexts are not supported and context id 0 is + * reserved on the GPU. + */ return ERR_PTR(-EINVAL); } @@ -714,7 +751,7 @@ static void pnv_npu2_release_context(struct kref *kref) void pnv_npu2_destroy_context(struct npu_context *npu_context, struct pci_dev *gpdev) { - struct pnv_phb *nphb, *phb; + struct pnv_phb *nphb; struct npu *npu; struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); struct device_node *nvlink_dn; @@ -728,13 +765,12 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context, nphb = pci_bus_to_host(npdev->bus)->private_data; npu = &nphb->npu; - phb = pci_bus_to_host(gpdev->bus)->private_data; nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", &nvlink_index))) return; npu_context->npdev[npu->index][nvlink_index] = NULL; - opal_npu_destroy_context(phb->opal_id, npu_context->mm->context.id, + opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, PCI_DEVID(gpdev->bus->number, gpdev->devfn)); kref_put(&npu_context->kref, pnv_npu2_release_context); } diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index e6230f104dd9..309876d699e9 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -408,7 +408,13 @@ static DEVICE_ATTR(subcores_per_core, 0644, static int subcore_init(void) { - if (!cpu_has_feature(CPU_FTR_SUBCORE)) + unsigned pvr_ver; + + pvr_ver = PVR_VER(mfspr(SPRN_PVR)); + + if (pvr_ver != PVR_POWER8 && + pvr_ver != PVR_POWER8E && + pvr_ver != PVR_POWER8NVL) return 0; /* diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index e104c71ea44a..1fb162ba9d1c 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -124,6 +124,7 @@ static struct property *dlpar_clone_drconf_property(struct device_node *dn) for (i = 0; i < num_lmbs; i++) { lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr); lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index); + lmbs[i].aa_index = be32_to_cpu(lmbs[i].aa_index); lmbs[i].flags = be32_to_cpu(lmbs[i].flags); } @@ -147,6 +148,7 @@ static void dlpar_update_drconf_property(struct device_node *dn, for (i = 0; i < num_lmbs; i++) { lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr); lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index); + lmbs[i].aa_index = cpu_to_be32(lmbs[i].aa_index); lmbs[i].flags = cpu_to_be32(lmbs[i].flags); } diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c index ef470b470b04..6afddae2fb47 100644 --- a/arch/powerpc/sysdev/simple_gpio.c +++ b/arch/powerpc/sysdev/simple_gpio.c @@ -75,7 +75,8 @@ static int u8_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) static void u8_gpio_save_regs(struct of_mm_gpio_chip *mm_gc) { - struct u8_gpio_chip *u8_gc = gpiochip_get_data(&mm_gc->gc); + struct u8_gpio_chip *u8_gc = + container_of(mm_gc, struct u8_gpio_chip, mm_gc); u8_gc->data = in_8(mm_gc->regs); } diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 913825086b8d..8f5e3035483b 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -297,7 +297,7 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) { /* If the XIVE supports the new "store EOI facility, use it */ if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) - out_be64(xd->eoi_mmio, 0); + out_be64(xd->eoi_mmio + XIVE_ESB_STORE_EOI, 0); else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) { /* * The FW told us to call it. This happens for some diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e161fafb495b..37abe86e5bc9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -64,6 +64,7 @@ config ARCH_SUPPORTS_UPROBES config S390 def_bool y + select ARCH_BINFMT_ELF_STATE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL @@ -184,7 +185,7 @@ config SCHED_OMIT_FRAME_POINTER config PGTABLE_LEVELS int - default 4 + default 5 source "init/Kconfig" @@ -363,9 +364,6 @@ config COMPAT config SYSVIPC_COMPAT def_bool y if COMPAT && SYSVIPC -config KEYS_COMPAT - def_bool y if COMPAT && KEYS - config SMP def_bool y prompt "Symmetric multi-processing support" diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index a5039fa89314..282072206df7 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -30,6 +30,7 @@ CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y +# CONFIG_SYSFS_SYSCALL is not set CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -44,7 +45,10 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BLK_WBT=y +CONFIG_BLK_WBT_SQ=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -90,6 +94,8 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_SMC=m +CONFIG_SMC_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -359,6 +365,7 @@ CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y +CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m @@ -367,16 +374,19 @@ CONFIG_DEVTMPFS=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 CONFIG_CONNECTOR=y +CONFIG_ZRAM=m CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_OSD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_CDROM_PKTCDVD=m -CONFIG_ATA_OVER_ETH=m +CONFIG_BLK_DEV_RAM_DAX=y CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_RBD=m CONFIG_ENCLOSURE_SERVICES=m +CONFIG_GENWQE=m CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -442,6 +452,8 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m +CONFIG_MLX5_CORE=m +CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_NATSEMI is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m @@ -452,7 +464,6 @@ CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set @@ -471,6 +482,7 @@ CONFIG_DIAG288_WATCHDOG=m CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_INFINIBAND=m CONFIG_VIRTIO_BALLOON=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y @@ -487,12 +499,18 @@ CONFIG_XFS_POSIX_ACL=y CONFIG_XFS_RT=y CONFIG_XFS_DEBUG=y CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_BTRFS_DEBUG=y CONFIG_NILFS2_FS=m +CONFIG_FS_DAX=y +CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FANOTIFY=y +CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_QUOTA_DEBUG=y CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m @@ -558,6 +576,7 @@ CONFIG_HEADERS_CHECK=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_RODATA_TEST=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_SELFTEST=y CONFIG_DEBUG_OBJECTS_FREE=y @@ -580,7 +599,6 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y CONFIG_PANIC_ON_OOPS=y CONFIG_DEBUG_TIMEKEEPING=y -CONFIG_TIMER_STATS=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y CONFIG_PROVE_LOCKING=y @@ -595,6 +613,7 @@ CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_PM_NOTIFIER_ERROR_INJECT=m +CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y CONFIG_FAILSLAB=y CONFIG_FAIL_PAGE_ALLOC=y @@ -616,13 +635,12 @@ CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_LKDTM=m CONFIG_TEST_LIST_SORT=y +CONFIG_TEST_SORT=y CONFIG_KPROBES_SANITY_TEST=y CONFIG_RBTREE_TEST=y CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y -CONFIG_TEST_STRING_HELPERS=y -CONFIG_TEST_KSTRTOX=y CONFIG_DMA_API_DEBUG=y CONFIG_TEST_BPF=m CONFIG_BUG_ON_DATA_CORRUPTION=y @@ -630,6 +648,7 @@ CONFIG_S390_PTDUMP=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y +CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 @@ -640,7 +659,9 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_USER=m +CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_GCM=m @@ -648,6 +669,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m @@ -657,8 +679,10 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -674,6 +698,7 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_ANSI_CPRNG=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m @@ -685,6 +710,7 @@ CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_ASYMMETRIC_KEY_TYPE=y @@ -692,6 +718,7 @@ CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m CONFIG_CRC7=m CONFIG_CRC8=m +CONFIG_RANDOM32_SELFTEST=y CONFIG_CORDIC=m CONFIG_CMM=m CONFIG_APPLDATA_BASE=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 83970b5afb2b..3c6b78189fbc 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -31,6 +31,7 @@ CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y +# CONFIG_SYSFS_SYSCALL is not set CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -46,7 +47,10 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BLK_WBT=y +CONFIG_BLK_WBT_SQ=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -88,6 +92,8 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_SMC=m +CONFIG_SMC_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -356,6 +362,7 @@ CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y +CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m @@ -364,16 +371,18 @@ CONFIG_DEVTMPFS=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 CONFIG_CONNECTOR=y +CONFIG_ZRAM=m CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_OSD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_CDROM_PKTCDVD=m -CONFIG_ATA_OVER_ETH=m +CONFIG_BLK_DEV_RAM_DAX=y CONFIG_VIRTIO_BLK=y CONFIG_ENCLOSURE_SERVICES=m +CONFIG_GENWQE=m CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -439,6 +448,8 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m +CONFIG_MLX5_CORE=m +CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_NATSEMI is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m @@ -449,7 +460,6 @@ CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set @@ -468,6 +478,7 @@ CONFIG_DIAG288_WATCHDOG=m CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_INFINIBAND=m CONFIG_VIRTIO_BALLOON=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y @@ -483,11 +494,15 @@ CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y CONFIG_XFS_RT=y CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m +CONFIG_FS_DAX=y +CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FANOTIFY=y +CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m @@ -553,7 +568,6 @@ CONFIG_UNUSED_SYMBOLS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y -CONFIG_TIMER_STATS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y @@ -576,6 +590,7 @@ CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y +CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 @@ -599,6 +614,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m @@ -611,6 +627,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -626,16 +643,19 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_ANSI_CPRNG=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_ZCRYPT=m +CONFIG_PKEY=m CONFIG_CRYPTO_SHA1_S390=m CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRC7=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index fbc6542aaf59..653d72bcc007 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -31,6 +31,7 @@ CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y +# CONFIG_SYSFS_SYSCALL is not set CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -44,7 +45,10 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BLK_WBT=y +CONFIG_BLK_WBT_SQ=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -86,6 +90,8 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_SMC=m +CONFIG_SMC_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -354,6 +360,7 @@ CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y +CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m @@ -362,16 +369,18 @@ CONFIG_DEVTMPFS=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 CONFIG_CONNECTOR=y +CONFIG_ZRAM=m CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_OSD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_CDROM_PKTCDVD=m -CONFIG_ATA_OVER_ETH=m +CONFIG_BLK_DEV_RAM_DAX=y CONFIG_VIRTIO_BLK=y CONFIG_ENCLOSURE_SERVICES=m +CONFIG_GENWQE=m CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -437,6 +446,8 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m +CONFIG_MLX5_CORE=m +CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_NATSEMI is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m @@ -447,7 +458,6 @@ CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set @@ -466,6 +476,7 @@ CONFIG_DIAG288_WATCHDOG=m CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_INFINIBAND=m CONFIG_VIRTIO_BALLOON=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y @@ -481,11 +492,15 @@ CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y CONFIG_XFS_RT=y CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m +CONFIG_FS_DAX=y +CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FANOTIFY=y +CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m @@ -551,7 +566,6 @@ CONFIG_UNUSED_SYMBOLS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y -CONFIG_TIMER_STATS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y @@ -574,6 +588,7 @@ CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y +CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 @@ -597,6 +612,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m @@ -609,6 +625,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -624,6 +641,7 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_ANSI_CPRNG=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m @@ -635,6 +653,7 @@ CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRC7=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index e23d97c13735..afa46a7406ea 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -12,8 +12,10 @@ CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=2 # CONFIG_HOTPLUG_CPU is not set CONFIG_HZ_100=y +# CONFIG_ARCH_RANDOM is not set # CONFIG_COMPACTION is not set # CONFIG_MIGRATION is not set +# CONFIG_BOUNCE is not set # CONFIG_CHECK_STACK is not set # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set @@ -36,11 +38,11 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_FC_ATTRS=y CONFIG_ZFCP=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_HVC_IUCV is not set +# CONFIG_HW_RANDOM_S390 is not set CONFIG_RAW_DRIVER=y # CONFIG_SCLP_ASYNC is not set # CONFIG_HMC_DRV is not set @@ -54,9 +56,9 @@ CONFIG_RAW_DRIVER=y # CONFIG_INOTIFY_USER is not set CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set +# CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index 678d9863e3f0..ad4bd777768d 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -6,7 +6,8 @@ obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o -obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o paes_s390.o +obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o +obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o obj-$(CONFIG_S390_PRNG) += prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 9317b3e645e2..36aefc07d10c 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/atomic.h> +#include <linux/random.h> #include <linux/static_key.h> #include <asm/cpacf.h> diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 97189dbaf34b..20244a38c886 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -28,6 +28,7 @@ CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y +# CONFIG_SYSFS_SYSCALL is not set CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -108,7 +109,6 @@ CONFIG_ZFCP=y CONFIG_SCSI_VIRTIO=y CONFIG_MD=y CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m CONFIG_MD_MULTIPATH=m CONFIG_BLK_DEV_DM=y CONFIG_DM_CRYPT=m @@ -131,6 +131,7 @@ CONFIG_TUN=m CONFIG_VIRTIO_NET=y # CONFIG_NET_VENDOR_ALACRITECH is not set # CONFIG_NET_VENDOR_SOLARFLARE is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set CONFIG_DEVKMEM=y @@ -162,7 +163,6 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y CONFIG_DETECT_HUNG_TASK=y CONFIG_PANIC_ON_OOPS=y -CONFIG_TIMER_STATS=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y @@ -172,14 +172,12 @@ CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 -CONFIG_RCU_TRACE=y CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_KPROBES_SANITY_TEST=y @@ -190,7 +188,6 @@ CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_CTS=m -CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m @@ -230,6 +227,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_ZCRYPT=m CONFIG_PKEY=m +CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_SHA1_S390=m CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 45092b12f54f..b3c88479feba 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -1,10 +1,12 @@ generic-y += asm-offsets.h generic-y += cacheflush.h generic-y += clkdev.h +generic-y += device.h generic-y += dma-contiguous.h generic-y += div64.h generic-y += emergency-restart.h generic-y += export.h +generic-y += fb.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kmap_types.h diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h deleted file mode 100644 index 5203fc87f080..000000000000 --- a/arch/s390/include/asm/device.h +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Arch specific extensions to struct device - * - * This file is released under the GPLv2 - */ -struct dev_archdata { -}; - -struct pdev_archdata { -}; diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h index 67026300c88e..144809a3f4f6 100644 --- a/arch/s390/include/asm/eadm.h +++ b/arch/s390/include/asm/eadm.h @@ -3,6 +3,7 @@ #include <linux/types.h> #include <linux/device.h> +#include <linux/blkdev.h> struct arqb { u64 data; @@ -105,13 +106,14 @@ struct scm_driver { int (*probe) (struct scm_device *scmdev); int (*remove) (struct scm_device *scmdev); void (*notify) (struct scm_device *scmdev, enum scm_event event); - void (*handler) (struct scm_device *scmdev, void *data, int error); + void (*handler) (struct scm_device *scmdev, void *data, + blk_status_t error); }; int scm_driver_register(struct scm_driver *scmdrv); void scm_driver_unregister(struct scm_driver *scmdrv); int eadm_start_aob(struct aob *aob); -void scm_irq_handler(struct aob *aob, int error); +void scm_irq_handler(struct aob *aob, blk_status_t error); #endif /* _ASM_S390_EADM_H */ diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index e8f623041769..ec024c08dabe 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -117,6 +117,9 @@ #define ELF_DATA ELFDATA2MSB #define ELF_ARCH EM_S390 +/* s390 specific phdr types */ +#define PT_S390_PGSTE 0x70000000 + /* * ELF register definitions.. */ @@ -151,6 +154,35 @@ extern unsigned int vdso_enabled; && (x)->e_ident[EI_CLASS] == ELF_CLASS) #define compat_start_thread start_thread31 +struct arch_elf_state { + int rc; +}; + +#define INIT_ARCH_ELF_STATE { .rc = 0 } + +#define arch_check_elf(ehdr, interp, interp_ehdr, state) (0) +#ifdef CONFIG_PGSTE +#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \ +({ \ + struct arch_elf_state *_state = state; \ + if ((phdr)->p_type == PT_S390_PGSTE && \ + !page_table_allocate_pgste && \ + !test_thread_flag(TIF_PGSTE) && \ + !current->mm->context.alloc_pgste) { \ + set_thread_flag(TIF_PGSTE); \ + set_pt_regs_flag(task_pt_regs(current), \ + PIF_SYSCALL_RESTART); \ + _state->rc = -EAGAIN; \ + } \ + _state->rc; \ +}) +#else +#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \ +({ \ + (state)->rc; \ +}) +#endif + /* For SVR4/S390 the function pointer to be registered with `atexit` is passed in R14. */ #define ELF_PLAT_INIT(_r, load_addr) \ diff --git a/arch/s390/include/asm/fb.h b/arch/s390/include/asm/fb.h deleted file mode 100644 index c7df38030992..000000000000 --- a/arch/s390/include/asm/fb.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_FB_H_ -#define _ASM_FB_H_ -#include <linux/fb.h> - -#define fb_pgprotect(...) do {} while (0) - -static inline int fb_is_primary_device(struct fb_info *info) -{ - return 0; -} - -#endif /* _ASM_FB_H_ */ diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 437e9af96688..904e4b3af95d 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -25,8 +25,6 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define IO_SPACE_LIMIT 0 -#ifdef CONFIG_PCI - #define ioremap_nocache(addr, size) ioremap(addr, size) #define ioremap_wc ioremap_nocache #define ioremap_wt ioremap_nocache @@ -49,6 +47,8 @@ static inline void ioport_unmap(void __iomem *p) { } +#ifdef CONFIG_PCI + /* * s390 needs a private implementation of pci_iomap since ioremap with its * offset parameter isn't sufficient. That's because BAR spaces are not diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 426614a882a9..6baae236f461 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -107,6 +107,20 @@ struct esca_block { struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; } __packed; +/* + * This struct is used to store some machine check info from lowcore + * for machine checks that happen while the guest is running. + * This info in host's lowcore might be overwritten by a second machine + * check from host when host is in the machine check's high-level handling. + * The size is 24 bytes. + */ +struct mcck_volatile_info { + __u64 mcic; + __u64 failing_storage_address; + __u32 ext_damage_code; + __u32 reserved; +}; + #define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 @@ -264,7 +278,8 @@ struct kvm_s390_itdb { struct sie_page { struct kvm_s390_sie_block sie_block; - __u8 reserved200[1024]; /* 0x0200 */ + struct mcck_volatile_info mcck_info; /* 0x0200 */ + __u8 reserved218[1000]; /* 0x0218 */ struct kvm_s390_itdb itdb; /* 0x0600 */ __u8 reserved700[2304]; /* 0x0700 */ } __packed; @@ -541,7 +556,6 @@ struct kvm_s390_float_interrupt { struct mutex ais_lock; u8 simm; u8 nimm; - int ais_enabled; }; struct kvm_hw_wp_info_arch { diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 8712e11bead4..4541ac44b35f 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -25,7 +25,9 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.gmap_asce = 0; mm->context.flush_mm = 0; #ifdef CONFIG_PGSTE - mm->context.alloc_pgste = page_table_allocate_pgste; + mm->context.alloc_pgste = page_table_allocate_pgste || + test_thread_flag(TIF_PGSTE) || + current->mm->context.alloc_pgste; mm->context.has_pgste = 0; mm->context.use_skey = 0; mm->context.use_cmma = 0; diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index e3e8895f5d3e..13623b9991d4 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -14,7 +14,14 @@ #include <linux/const.h> #include <linux/types.h> +#define MCIC_SUBCLASS_MASK (1ULL<<63 | 1ULL<<62 | 1ULL<<61 | \ + 1ULL<<59 | 1ULL<<58 | 1ULL<<56 | \ + 1ULL<<55 | 1ULL<<54 | 1ULL<<53 | \ + 1ULL<<52 | 1ULL<<47 | 1ULL<<46 | \ + 1ULL<<45 | 1ULL<<44) #define MCCK_CODE_SYSTEM_DAMAGE _BITUL(63) +#define MCCK_CODE_EXT_DAMAGE _BITUL(63 - 5) +#define MCCK_CODE_CP _BITUL(63 - 9) #define MCCK_CODE_CPU_TIMER_VALID _BITUL(63 - 46) #define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20) #define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 69b8a41fca84..624deaa44230 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -74,6 +74,7 @@ typedef struct { unsigned long pgste; } pgste_t; typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pud; } pud_t; +typedef struct { unsigned long p4d; } p4d_t; typedef struct { unsigned long pgd; } pgd_t; typedef pte_t *pgtable_t; @@ -82,12 +83,14 @@ typedef pte_t *pgtable_t; #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).pmd) #define pud_val(x) ((x).pud) +#define p4d_val(x) ((x).p4d) #define pgd_val(x) ((x).pgd) #define __pgste(x) ((pgste_t) { (x) } ) #define __pte(x) ((pte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) #define __pud(x) ((pud_t) { (x) } ) +#define __p4d(x) ((p4d_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 4e3186649578..f36b4b726057 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -70,11 +70,10 @@ struct zpci_fmb { } __packed __aligned(128); enum zpci_state { - ZPCI_FN_STATE_RESERVED, - ZPCI_FN_STATE_STANDBY, - ZPCI_FN_STATE_CONFIGURED, - ZPCI_FN_STATE_ONLINE, - NR_ZPCI_FN_STATES, + ZPCI_FN_STATE_STANDBY = 0, + ZPCI_FN_STATE_CONFIGURED = 1, + ZPCI_FN_STATE_RESERVED = 2, + ZPCI_FN_STATE_ONLINE = 3, }; struct zpci_bar_struct { @@ -109,7 +108,7 @@ struct zpci_dev { u64 msi_addr; /* MSI address */ unsigned int max_msi; /* maximum number of MSI's */ struct airq_iv *aibv; /* adapter interrupt bit vector */ - unsigned int aisb; /* number of the summary bit */ + unsigned long aisb; /* number of the summary bit */ /* DMA stuff */ unsigned long *dma_table; @@ -159,11 +158,12 @@ extern const struct attribute_group *zpci_attr_groups[]; ----------------------------------------------------------------------------- */ /* Base stuff */ int zpci_create_device(struct zpci_dev *); +void zpci_remove_device(struct zpci_dev *zdev); int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); -void zpci_stop_device(struct zpci_dev *); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); int zpci_unregister_ioat(struct zpci_dev *, u8); +void zpci_remove_reserved_devices(void); /* CLP */ int clp_scan_pci_devices(void); @@ -172,6 +172,7 @@ int clp_rescan_pci_devices_simple(void); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); +int clp_get_state(u32 fid, enum zpci_state *state); #ifdef CONFIG_PCI /* Error handling and recovery */ diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index 649eb62c52b3..34abcf275799 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -76,7 +76,7 @@ struct zpci_fib { u32 gd; } __packed __aligned(8); -int zpci_mod_fc(u64 req, struct zpci_fib *fib); +u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status); int zpci_refresh_trans(u64 fn, u64 addr, u64 range); int zpci_load(u64 *data, u64 req, u64 offset); int zpci_store(u64 data, u64 req, u64 offset); diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 166f703dad7c..bb0ff1bb0c4a 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -51,12 +51,24 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) return _SEGMENT_ENTRY_EMPTY; if (mm->context.asce_limit <= (1UL << 42)) return _REGION3_ENTRY_EMPTY; - return _REGION2_ENTRY_EMPTY; + if (mm->context.asce_limit <= (1UL << 53)) + return _REGION2_ENTRY_EMPTY; + return _REGION1_ENTRY_EMPTY; } -int crst_table_upgrade(struct mm_struct *); +int crst_table_upgrade(struct mm_struct *mm, unsigned long limit); void crst_table_downgrade(struct mm_struct *); +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address) +{ + unsigned long *table = crst_table_alloc(mm); + + if (table) + crst_table_init(table, _REGION2_ENTRY_EMPTY); + return (p4d_t *) table; +} +#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d) + static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { unsigned long *table = crst_table_alloc(mm); @@ -86,9 +98,14 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) crst_table_free(mm, (unsigned long *) pmd); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) +{ + pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d); +} + +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) { - pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); + p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index e6e3b887bee3..57057fb1cc07 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -24,7 +24,6 @@ * the S390 page table tree. */ #ifndef __ASSEMBLY__ -#include <asm-generic/5level-fixup.h> #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/page-flags.h> @@ -87,12 +86,15 @@ extern unsigned long zero_page_mask; */ #define PMD_SHIFT 20 #define PUD_SHIFT 31 -#define PGDIR_SHIFT 42 +#define P4D_SHIFT 42 +#define PGDIR_SHIFT 53 #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) #define PUD_SIZE (1UL << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) +#define P4D_SIZE (1UL << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE-1)) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -105,6 +107,7 @@ extern unsigned long zero_page_mask; #define PTRS_PER_PTE 256 #define PTRS_PER_PMD 2048 #define PTRS_PER_PUD 2048 +#define PTRS_PER_P4D 2048 #define PTRS_PER_PGD 2048 #define FIRST_USER_ADDRESS 0UL @@ -115,6 +118,8 @@ extern unsigned long zero_page_mask; printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e)) #define pud_ERROR(e) \ printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e)) +#define p4d_ERROR(e) \ + printk("%s:%d: bad p4d %p.\n", __FILE__, __LINE__, (void *) p4d_val(e)) #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e)) @@ -296,8 +301,6 @@ static inline int is_module_addr(void *addr) #define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) #define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */ -#define _REGION3_ENTRY_ORIGIN ~0x7ffUL/* region third table origin */ - #define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */ #define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */ #define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */ @@ -310,8 +313,8 @@ static inline int is_module_addr(void *addr) #define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */ #endif -#define _REGION_ENTRY_BITS 0xfffffffffffff227UL -#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe27UL +#define _REGION_ENTRY_BITS 0xfffffffffffff22fUL +#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL @@ -560,18 +563,23 @@ static inline void crdte(unsigned long old, unsigned long new, } /* - * pgd/pmd/pte query functions + * pgd/p4d/pud/pmd/pte query functions */ +static inline int pgd_folded(pgd_t pgd) +{ + return (pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1; +} + static inline int pgd_present(pgd_t pgd) { - if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) + if (pgd_folded(pgd)) return 1; return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL; } static inline int pgd_none(pgd_t pgd) { - if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) + if (pgd_folded(pgd)) return 0; return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL; } @@ -589,16 +597,48 @@ static inline int pgd_bad(pgd_t pgd) return (pgd_val(pgd) & mask) != 0; } +static inline int p4d_folded(p4d_t p4d) +{ + return (p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2; +} + +static inline int p4d_present(p4d_t p4d) +{ + if (p4d_folded(p4d)) + return 1; + return (p4d_val(p4d) & _REGION_ENTRY_ORIGIN) != 0UL; +} + +static inline int p4d_none(p4d_t p4d) +{ + if (p4d_folded(p4d)) + return 0; + return p4d_val(p4d) == _REGION2_ENTRY_EMPTY; +} + +static inline unsigned long p4d_pfn(p4d_t p4d) +{ + unsigned long origin_mask; + + origin_mask = _REGION_ENTRY_ORIGIN; + return (p4d_val(p4d) & origin_mask) >> PAGE_SHIFT; +} + +static inline int pud_folded(pud_t pud) +{ + return (pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3; +} + static inline int pud_present(pud_t pud) { - if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) + if (pud_folded(pud)) return 1; return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; } static inline int pud_none(pud_t pud) { - if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) + if (pud_folded(pud)) return 0; return pud_val(pud) == _REGION3_ENTRY_EMPTY; } @@ -614,7 +654,7 @@ static inline unsigned long pud_pfn(pud_t pud) { unsigned long origin_mask; - origin_mask = _REGION3_ENTRY_ORIGIN; + origin_mask = _REGION_ENTRY_ORIGIN; if (pud_large(pud)) origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; return (pud_val(pud) & origin_mask) >> PAGE_SHIFT; @@ -641,6 +681,13 @@ static inline int pud_bad(pud_t pud) return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0; } +static inline int p4d_bad(p4d_t p4d) +{ + if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) + return pud_bad(__pud(p4d_val(p4d))); + return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0; +} + static inline int pmd_present(pmd_t pmd) { return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY; @@ -794,8 +841,14 @@ static inline int pte_unused(pte_t pte) static inline void pgd_clear(pgd_t *pgd) { - if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; + if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) + pgd_val(*pgd) = _REGION1_ENTRY_EMPTY; +} + +static inline void p4d_clear(p4d_t *p4d) +{ + if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + p4d_val(*p4d) = _REGION2_ENTRY_EMPTY; } static inline void pud_clear(pud_t *pud) @@ -1089,6 +1142,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) } #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) +#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1)) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) @@ -1098,19 +1152,31 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) +#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN) #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) -static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { - pud_t *pud = (pud_t *) pgd; - if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - pud = (pud_t *) pgd_deref(*pgd); - return pud + pud_index(address); + p4d_t *p4d = (p4d_t *) pgd; + + if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) + p4d = (p4d_t *) pgd_deref(*pgd); + return p4d + p4d_index(address); +} + +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +{ + pud_t *pud = (pud_t *) p4d; + + if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pud = (pud_t *) p4d_deref(*p4d); + return pud + pud_index(address); } static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { pmd_t *pmd = (pmd_t *) pud; + if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) pmd = (pmd_t *) pud_deref(*pud); return pmd + pmd_index(address); @@ -1122,6 +1188,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) #define pud_page(pud) pfn_to_page(pud_pfn(pud)) +#define p4d_page(pud) pfn_to_page(p4d_pfn(p4d)) /* Find an entry in the lowest level page table.. */ #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 60d395fdc864..c25d57e0aad3 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -20,6 +20,7 @@ #define CIF_FPU 4 /* restore FPU registers */ #define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */ #define CIF_ENABLED_WAIT 6 /* in enabled wait state */ +#define CIF_MCCK_GUEST 7 /* machine check happening in guest */ #define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING) #define _CIF_ASCE_PRIMARY _BITUL(CIF_ASCE_PRIMARY) @@ -28,6 +29,7 @@ #define _CIF_FPU _BITUL(CIF_FPU) #define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ) #define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT) +#define _CIF_MCCK_GUEST _BITUL(CIF_MCCK_GUEST) #ifndef __ASSEMBLY__ @@ -92,11 +94,11 @@ extern void execve_tail(void); */ #define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \ - (1UL << 31) : (1UL << 53)) + (1UL << 31) : -PAGE_SIZE) #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ (1UL << 30) : (1UL << 41)) #define TASK_SIZE TASK_SIZE_OF(current) -#define TASK_SIZE_MAX (1UL << 53) +#define TASK_SIZE_MAX (-PAGE_SIZE) #define STACK_TOP (test_thread_flag(TIF_31BIT) ? \ (1UL << 31) : (1UL << 42)) @@ -221,11 +223,6 @@ extern void release_thread(struct task_struct *); /* Free guarded storage control block for current */ void exit_thread_gs(void); -/* - * Return saved PC of a blocked thread. - */ -extern unsigned long thread_saved_pc(struct task_struct *t); - unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) \ (task_stack_page(tsk) + THREAD_SIZE) - 1) diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 99bc456cc26a..853b01245c20 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -11,9 +11,11 @@ #define PIF_SYSCALL 0 /* inside a system call */ #define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */ +#define PIF_SYSCALL_RESTART 2 /* restart the current system call */ #define _PIF_SYSCALL _BITUL(PIF_SYSCALL) #define _PIF_PER_TRAP _BITUL(PIF_PER_TRAP) +#define _PIF_SYSCALL_RESTART _BITUL(PIF_SYSCALL_RESTART) #ifndef __ASSEMBLY__ @@ -24,38 +26,38 @@ PSW_MASK_PSTATE | PSW_ASC_PRIMARY) struct psw_bits { - unsigned long : 1; - unsigned long r : 1; /* PER-Mask */ - unsigned long : 3; - unsigned long t : 1; /* DAT Mode */ - unsigned long i : 1; /* Input/Output Mask */ - unsigned long e : 1; /* External Mask */ - unsigned long key : 4; /* PSW Key */ - unsigned long : 1; - unsigned long m : 1; /* Machine-Check Mask */ - unsigned long w : 1; /* Wait State */ - unsigned long p : 1; /* Problem State */ - unsigned long as : 2; /* Address Space Control */ - unsigned long cc : 2; /* Condition Code */ - unsigned long pm : 4; /* Program Mask */ - unsigned long ri : 1; /* Runtime Instrumentation */ - unsigned long : 6; - unsigned long eaba : 2; /* Addressing Mode */ - unsigned long : 31; - unsigned long ia : 64; /* Instruction Address */ + unsigned long : 1; + unsigned long per : 1; /* PER-Mask */ + unsigned long : 3; + unsigned long dat : 1; /* DAT Mode */ + unsigned long io : 1; /* Input/Output Mask */ + unsigned long ext : 1; /* External Mask */ + unsigned long key : 4; /* PSW Key */ + unsigned long : 1; + unsigned long mcheck : 1; /* Machine-Check Mask */ + unsigned long wait : 1; /* Wait State */ + unsigned long pstate : 1; /* Problem State */ + unsigned long as : 2; /* Address Space Control */ + unsigned long cc : 2; /* Condition Code */ + unsigned long pm : 4; /* Program Mask */ + unsigned long ri : 1; /* Runtime Instrumentation */ + unsigned long : 6; + unsigned long eaba : 2; /* Addressing Mode */ + unsigned long : 31; + unsigned long ia : 64; /* Instruction Address */ }; enum { - PSW_AMODE_24BIT = 0, - PSW_AMODE_31BIT = 1, - PSW_AMODE_64BIT = 3 + PSW_BITS_AMODE_24BIT = 0, + PSW_BITS_AMODE_31BIT = 1, + PSW_BITS_AMODE_64BIT = 3 }; enum { - PSW_AS_PRIMARY = 0, - PSW_AS_ACCREG = 1, - PSW_AS_SECONDARY = 2, - PSW_AS_HOME = 3 + PSW_BITS_AS_PRIMARY = 0, + PSW_BITS_AS_ACCREG = 1, + PSW_BITS_AS_SECONDARY = 2, + PSW_BITS_AS_HOME = 3 }; #define psw_bits(__psw) (*({ \ diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index 72df5f2de6b0..020a8814d511 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -59,7 +59,7 @@ static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm, int cc; cc = ____pcpu_sigp(addr, order, parm, &_status); - if (status && cc == 1) + if (status && cc == SIGP_CC_STATUS_STORED) *status = _status; return cc; } diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index e784bed6ed7f..2b498e58b914 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -109,7 +109,7 @@ struct sysinfo_2_2_2 { unsigned short cpus_shared; char reserved_4[3]; unsigned char vsne; - uuid_be uuid; + uuid_t uuid; char reserved_5[160]; char ext_name[256]; }; @@ -134,7 +134,7 @@ struct sysinfo_3_2_2 { char reserved_1[3]; unsigned char evmne; unsigned int reserved_2; - uuid_be uuid; + uuid_t uuid; } vm[8]; char reserved_3[1504]; char ext_names[8][256]; diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 0b3ee083a665..1aecf432c48d 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -58,6 +58,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define TIF_UPROBE 3 /* breakpointed or single-stepping */ #define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ #define TIF_PATCH_PENDING 5 /* pending live patching update */ +#define TIF_PGSTE 6 /* New mm's will use 4K page tables */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 853b2a3d8dee..7317b3108a88 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -137,6 +137,21 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, } /* + * p4d_free_tlb frees a pud table and clears the CRSTE for the + * region second table entry from the tlb. + * If the mm uses a four level page table the single p4d is freed + * as the pgd. p4d_free_tlb checks the asce_limit against 8PB + * to avoid the double free of the p4d in this case. + */ +static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, + unsigned long address) +{ + if (tlb->mm->context.asce_limit <= (1UL << 53)) + return; + tlb_remove_table(tlb, p4d); +} + +/* * pud_free_tlb frees a pud table and clears the CRSTE for the * region third table entry from the tlb. * If the mm uses a three level page table the single pud is freed diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 6bb29633e1f1..b65c414b6c0e 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -58,6 +58,9 @@ int main(void) OFFSET(__SF_BACKCHAIN, stack_frame, back_chain); OFFSET(__SF_GPRS, stack_frame, gprs); OFFSET(__SF_EMPTY, stack_frame, empty1); + OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]); + OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]); + OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]); BLANK(); /* timeval/timezone offsets for use by vdso */ OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 829e1c53005c..dab78babfab6 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -98,8 +98,10 @@ static int show_address(void *data, unsigned long address, int reliable) return 0; } -static void show_trace(struct task_struct *task, unsigned long sp) +void show_stack(struct task_struct *task, unsigned long *stack) { + unsigned long sp = (unsigned long) stack; + if (!sp) sp = task ? task->thread.ksp : current_stack_pointer(); printk("Call Trace:\n"); @@ -109,29 +111,6 @@ static void show_trace(struct task_struct *task, unsigned long sp) debug_show_held_locks(task); } -void show_stack(struct task_struct *task, unsigned long *sp) -{ - unsigned long *stack; - int i; - - stack = sp; - if (!stack) { - if (!task) - stack = (unsigned long *)current_stack_pointer(); - else - stack = (unsigned long *)task->thread.ksp; - } - printk(KERN_DEFAULT "Stack:\n"); - for (i = 0; i < 20; i++) { - if (((addr_t) stack & (THREAD_SIZE-1)) == 0) - break; - if (i % 4 == 0) - printk(KERN_DEFAULT " "); - pr_cont("%016lx%c", *stack++, i % 4 == 3 ? '\n' : ' '); - } - show_trace(task, (unsigned long)sp); -} - static void show_last_breaking_event(struct pt_regs *regs) { printk("Last Breaking-Event-Address:\n"); @@ -149,8 +128,8 @@ void show_registers(struct pt_regs *regs) pr_cont(" (%pSR)", (void *)regs->psw.addr); pr_cont("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " - "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e, - psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm); + "P:%x AS:%x CC:%x PM:%x", psw->per, psw->dat, psw->io, psw->ext, + psw->key, psw->mcheck, psw->wait, psw->pstate, psw->as, psw->cc, psw->pm); pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba); printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); @@ -169,7 +148,7 @@ void show_regs(struct pt_regs *regs) show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) - show_trace(NULL, regs->gprs[15]); + show_stack(NULL, (unsigned long *) regs->gprs[15]); show_last_breaking_event(regs); } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index e408d9cc5b96..21900e1cee9c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -52,7 +52,7 @@ _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ _CIF_ASCE_SECONDARY | _CIF_FPU) -_PIF_WORK = (_PIF_PER_TRAP) +_PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) #define BASED(name) name-cleanup_critical(%r13) @@ -225,18 +225,24 @@ ENTRY(sie64a) jnz .Lsie_skip TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lsie_skip # exit if fp/vx regs changed +.Lsie_entry: sie 0(%r14) .Lsie_skip: ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce .Lsie_done: # some program checks are suppressing. C code (e.g. do_protection_exception) -# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other -# instructions between sie64a and .Lsie_done should not cause program -# interrupts. So lets use a nop (47 00 00 00) as a landing pad. +# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There +# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. +# Other instructions between sie64a and .Lsie_done should not cause program +# interrupts. So lets use 3 nops as a landing pad for all possible rewinds. # See also .Lcleanup_sie -.Lrewind_pad: - nop 0 +.Lrewind_pad6: + nopr 7 +.Lrewind_pad4: + nopr 7 +.Lrewind_pad2: + nopr 7 .globl sie_exit sie_exit: lg %r14,__SF_EMPTY+8(%r15) # load guest register save area @@ -249,7 +255,9 @@ sie_exit: stg %r14,__SF_EMPTY+16(%r15) # set exit reason code j sie_exit - EX_TABLE(.Lrewind_pad,.Lsie_fault) + EX_TABLE(.Lrewind_pad6,.Lsie_fault) + EX_TABLE(.Lrewind_pad4,.Lsie_fault) + EX_TABLE(.Lrewind_pad2,.Lsie_fault) EX_TABLE(sie_exit,.Lsie_fault) EXPORT_SYMBOL(sie64a) EXPORT_SYMBOL(sie_exit) @@ -327,6 +335,8 @@ ENTRY(system_call) jo .Lsysc_mcck_pending TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED jo .Lsysc_reschedule + TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART + jo .Lsysc_syscall_restart #ifdef CONFIG_UPROBES TSTMSK __TI_flags(%r12),_TIF_UPROBE jo .Lsysc_uprobe_notify @@ -340,6 +350,8 @@ ENTRY(system_call) jo .Lsysc_patch_pending # handle live patching just before # signals and possible syscall restart #endif + TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART + jo .Lsysc_syscall_restart TSTMSK __TI_flags(%r12),_TIF_SIGPENDING jo .Lsysc_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME @@ -441,6 +453,15 @@ ENTRY(system_call) jg do_per_trap # +# _PIF_SYSCALL_RESTART is set, repeat the current system call +# +.Lsysc_syscall_restart: + ni __PT_FLAGS+7(%r11),255-_PIF_SYSCALL_RESTART + lmg %r1,%r7,__PT_R1(%r11) # load svc arguments + lg %r2,__PT_ORIG_GPR2(%r11) + j .Lsysc_do_svc + +# # call tracehook_report_syscall_entry/tracehook_report_syscall_exit before # and after the system call # @@ -874,9 +895,7 @@ ENTRY(save_fpu_regs) oi __LC_CPU_FLAGS+7,_CIF_FPU br %r14 .Lsave_fpu_regs_end: -#if IS_ENABLED(CONFIG_KVM) EXPORT_SYMBOL(save_fpu_regs) -#endif /* * Load floating-point controls and floating-point or vector registers. @@ -1104,7 +1123,13 @@ cleanup_critical: .quad .Lsie_done .Lcleanup_sie: - lg %r9,__SF_EMPTY(%r15) # get control block pointer + cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt? + je 1f + slg %r9,BASED(.Lsie_crit_mcck_start) + clg %r9,BASED(.Lsie_crit_mcck_length) + jh 1f + oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST +1: lg %r9,__SF_EMPTY(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit @@ -1289,6 +1314,10 @@ cleanup_critical: .quad .Lsie_gmap .Lsie_critical_length: .quad .Lsie_done - .Lsie_gmap +.Lsie_crit_mcck_start: + .quad .Lsie_entry +.Lsie_crit_mcck_length: + .quad .Lsie_skip - .Lsie_entry #endif .section .rodata, "a" diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index e545ffe5155a..8e622bb52f7a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -564,8 +564,6 @@ static struct kset *ipl_kset; static void __ipl_run(void *unused) { - if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW) - diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); diag308(DIAG308_LOAD_CLEAR, NULL); if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); @@ -1088,10 +1086,7 @@ static void __reipl_run(void *unused) break; case REIPL_METHOD_CCW_DIAG: diag308(DIAG308_SET, reipl_block_ccw); - if (MACHINE_IS_LPAR) - diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); - else - diag308(DIAG308_LOAD_CLEAR, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); break; case REIPL_METHOD_FCP_RW_DIAG: diag308(DIAG308_SET, reipl_block_fcp); diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 985589523970..31d03a84126c 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -25,6 +25,8 @@ #include <asm/crw.h> #include <asm/switch_to.h> #include <asm/ctl_reg.h> +#include <asm/asm-offsets.h> +#include <linux/kvm_host.h> struct mcck_struct { unsigned int kill_task : 1; @@ -274,12 +276,39 @@ static int notrace s390_validate_registers(union mci mci, int umode) return kill_task; } +/* + * Backup the guest's machine check info to its description block + */ +static void notrace s390_backup_mcck_info(struct pt_regs *regs) +{ + struct mcck_volatile_info *mcck_backup; + struct sie_page *sie_page; + + /* r14 contains the sie block, which was set in sie64a */ + struct kvm_s390_sie_block *sie_block = + (struct kvm_s390_sie_block *) regs->gprs[14]; + + if (sie_block == NULL) + /* Something's seriously wrong, stop system. */ + s390_handle_damage(); + + sie_page = container_of(sie_block, struct sie_page, sie_block); + mcck_backup = &sie_page->mcck_info; + mcck_backup->mcic = S390_lowcore.mcck_interruption_code & + ~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE); + mcck_backup->ext_damage_code = S390_lowcore.external_damage_code; + mcck_backup->failing_storage_address + = S390_lowcore.failing_storage_address; +} + #define MAX_IPD_COUNT 29 #define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */ #define ED_STP_ISLAND 6 /* External damage STP island check */ #define ED_STP_SYNC 7 /* External damage STP sync check */ +#define MCCK_CODE_NO_GUEST (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE) + /* * machine check handler. */ @@ -291,6 +320,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) struct mcck_struct *mcck; unsigned long long tmp; union mci mci; + unsigned long mcck_dam_code; nmi_enter(); inc_irq_stat(NMI_NMI); @@ -301,7 +331,13 @@ void notrace s390_do_machine_check(struct pt_regs *regs) /* System damage -> stopping machine */ s390_handle_damage(); } - if (mci.pd) { + + /* + * Reinject the instruction processing damages' machine checks + * including Delayed Access Exception into the guest + * instead of damaging the host if they happen in the guest. + */ + if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) { if (mci.b) { /* Processing backup -> verify if we can survive this */ u64 z_mcic, o_mcic, t_mcic; @@ -345,6 +381,14 @@ void notrace s390_do_machine_check(struct pt_regs *regs) mcck->mcck_code = mci.val; set_cpu_flag(CIF_MCCK_PENDING); } + + /* + * Backup the machine check's info if it happens when the guest + * is running. + */ + if (test_cpu_flag(CIF_MCCK_GUEST)) + s390_backup_mcck_info(regs); + if (mci.cd) { /* Timing facility damage */ s390_handle_damage(); @@ -358,15 +402,22 @@ void notrace s390_do_machine_check(struct pt_regs *regs) if (mcck->stp_queue) set_cpu_flag(CIF_MCCK_PENDING); } - if (mci.se) - /* Storage error uncorrected */ - s390_handle_damage(); - if (mci.ke) - /* Storage key-error uncorrected */ - s390_handle_damage(); - if (mci.ds && mci.fa) - /* Storage degradation */ - s390_handle_damage(); + + /* + * Reinject storage related machine checks into the guest if they + * happen when the guest is running. + */ + if (!test_cpu_flag(CIF_MCCK_GUEST)) { + if (mci.se) + /* Storage error uncorrected */ + s390_handle_damage(); + if (mci.ke) + /* Storage key-error uncorrected */ + s390_handle_damage(); + if (mci.ds && mci.fa) + /* Storage degradation */ + s390_handle_damage(); + } if (mci.cp) { /* Channel report word pending */ mcck->channel_report = 1; @@ -377,6 +428,19 @@ void notrace s390_do_machine_check(struct pt_regs *regs) mcck->warning = 1; set_cpu_flag(CIF_MCCK_PENDING); } + + /* + * If there are only Channel Report Pending and External Damage + * machine checks, they will not be reinjected into the guest + * because they refer to host conditions only. + */ + mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK); + if (test_cpu_flag(CIF_MCCK_GUEST) && + (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) { + /* Set exit reason code for host's later handling */ + *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR; + } + clear_cpu_flag(CIF_MCCK_GUEST); nmi_exit(); } diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index ca960d0370d5..0c82f7903fc7 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -995,11 +995,11 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) regs.int_parm = CPU_MF_INT_SF_PRA; sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; - psw_bits(regs.psw).ia = sfr->basic.ia; - psw_bits(regs.psw).t = sfr->basic.T; - psw_bits(regs.psw).w = sfr->basic.W; - psw_bits(regs.psw).p = sfr->basic.P; - psw_bits(regs.psw).as = sfr->basic.AS; + psw_bits(regs.psw).ia = sfr->basic.ia; + psw_bits(regs.psw).dat = sfr->basic.T; + psw_bits(regs.psw).wait = sfr->basic.W; + psw_bits(regs.psw).per = sfr->basic.P; + psw_bits(regs.psw).as = sfr->basic.AS; /* * Use the hardware provided configuration level to decide if the diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 955a7b6fa0a4..93a386f4a3b5 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -245,6 +245,5 @@ ssize_t cpumf_events_sysfs_show(struct device *dev, struct perf_pmu_events_attr *pmu_attr; pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - return sprintf(page, "event=0x%04llx,name=%s\n", - pmu_attr->id, attr->attr.name); + return sprintf(page, "event=0x%04llx\n", pmu_attr->id); } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 999d7154bbdc..bb32b8618bf6 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -41,31 +41,6 @@ asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); -/* - * Return saved PC of a blocked thread. used in kernel/sched. - * resume in entry.S does not create a new stack frame, it - * just stores the registers %r6-%r15 to the frame given by - * schedule. We want to return the address of the caller of - * schedule, so we have to walk the backchain one time to - * find the frame schedule() store its return address. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct stack_frame *sf, *low, *high; - - if (!tsk || !task_stack_page(tsk)) - return 0; - low = task_stack_page(tsk); - high = (struct stack_frame *) task_pt_regs(tsk); - sf = (struct stack_frame *) tsk->thread.ksp; - if (sf <= low || sf > high) - return 0; - sf = (struct stack_frame *) sf->back_chain; - if (sf <= low || sf > high) - return 0; - return sf->gprs[8]; -} - extern void kernel_thread_starter(void); /* diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 488c5bb8dc77..252ed61a128b 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1160,6 +1160,8 @@ static int s390_gs_cb_get(struct task_struct *target, return -ENODEV; if (!data) return -ENODATA; + if (target == current) + save_gs_cb(data); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, sizeof(struct gs_cb)); } @@ -1170,6 +1172,7 @@ static int s390_gs_cb_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { struct gs_cb *data = target->thread.gs_cb; + int rc; if (!MACHINE_HAS_GS) return -ENODEV; @@ -1177,10 +1180,18 @@ static int s390_gs_cb_set(struct task_struct *target, data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; + data->gsd = 25; target->thread.gs_cb = data; + if (target == current) + __ctl_set_bit(2, 4); + } else if (target == current) { + save_gs_cb(data); } - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(struct gs_cb)); + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); + if (target == current) + restore_gs_cb(data); + return rc; } static int s390_gs_bc_get(struct task_struct *target, diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 363000a77ffc..1020a11a24e5 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -26,6 +26,7 @@ #include <linux/err.h> #include <linux/spinlock.h> #include <linux/kernel_stat.h> +#include <linux/kmemleak.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/irqflags.h> @@ -207,6 +208,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL); if (!mcesa_origin) goto out; + /* The pointer is stored with mcesa_bits ORed in */ + kmemleak_not_leak((void *) mcesa_origin); mcesa_bits = MACHINE_HAS_GS ? 11 : 0; } } else { diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index eefcb54872a5..fb869b103825 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -242,7 +242,7 @@ static void print_ext_name(struct seq_file *m, int lvl, static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info) { - if (!memcmp(&info->vm[i].uuid, &NULL_UUID_BE, sizeof(uuid_be))) + if (uuid_is_null(&info->vm[i].uuid)) return; seq_printf(m, "VM%02d UUID: %pUb\n", i, &info->vm[i].uuid); } diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index f787b9d8f54c..442e5423ce3d 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -21,6 +21,7 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/cpu.h> #include <asm/fpu/api.h> #include "entry.h" diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index 314e0ee3016a..d94baa8db507 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -27,12 +27,12 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - if (psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) + if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) return -EINVAL; - if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) + if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) return -EINVAL; clear_pt_regs_flag(regs, PIF_PER_TRAP); - auprobe->saved_per = psw_bits(regs->psw).r; + auprobe->saved_per = psw_bits(regs->psw).per; auprobe->saved_int_code = regs->int_code; regs->int_code = UPROBE_TRAP_NR; regs->psw.addr = current->utask->xol_vaddr; @@ -81,7 +81,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP); update_cr_regs(current); - psw_bits(regs->psw).r = auprobe->saved_per; + psw_bits(regs->psw).per = auprobe->saved_per; regs->int_code = auprobe->saved_int_code; if (fixup & FIXUP_PSW_NORMAL) @@ -372,8 +372,8 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { - if ((psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) || - ((psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) && + if ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) || + ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) && !is_compat_task())) { regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE); do_report_trap(regs, SIGILL, ILL_ILLADR, NULL); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 10516ae3b55e..b89d19f6f2ab 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -50,6 +50,56 @@ static struct page **vdso64_pagelist; */ unsigned int __read_mostly vdso_enabled = 1; +static int vdso_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page **vdso_pagelist; + unsigned long vdso_pages; + + vdso_pagelist = vdso64_pagelist; + vdso_pages = vdso64_pages; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; + } +#endif + + if (vmf->pgoff >= vdso_pages) + return VM_FAULT_SIGBUS; + + vmf->page = vdso_pagelist[vmf->pgoff]; + get_page(vmf->page); + return 0; +} + +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *vma) +{ + unsigned long vdso_pages; + + vdso_pages = vdso64_pages; +#ifdef CONFIG_COMPAT + if (is_compat_task()) + vdso_pages = vdso32_pages; +#endif + + if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start) + return -EINVAL; + + if (WARN_ON_ONCE(current->mm != vma->vm_mm)) + return -EFAULT; + + current->mm->context.vdso_base = vma->vm_start; + return 0; +} + +static const struct vm_special_mapping vdso_mapping = { + .name = "[vdso]", + .fault = vdso_fault, + .mremap = vdso_mremap, +}; + static int __init vdso_setup(char *s) { unsigned long val; @@ -181,7 +231,7 @@ static void vdso_init_cr5(void) int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - struct page **vdso_pagelist; + struct vm_area_struct *vma; unsigned long vdso_pages; unsigned long vdso_base; int rc; @@ -194,13 +244,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!uses_interp) return 0; - vdso_pagelist = vdso64_pagelist; vdso_pages = vdso64_pages; #ifdef CONFIG_COMPAT - if (is_compat_task()) { - vdso_pagelist = vdso32_pagelist; + if (is_compat_task()) vdso_pages = vdso32_pages; - } #endif /* * vDSO has a problem and was disabled, just don't "enable" it for @@ -209,8 +256,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (vdso_pages == 0) return 0; - current->mm->context.vdso_base = 0; - /* * pick a base address for the vDSO in process space. We try to put * it at vdso_base which is the "natural" base for it, but we might @@ -225,13 +270,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) } /* - * Put vDSO base into mm struct. We need to do this before calling - * install_special_mapping or the perf counter mmap tracking code - * will fail to recognise it as a vDSO (since arch_vma_name fails). - */ - current->mm->context.vdso_base = vdso_base; - - /* * our vma flags don't have VM_WRITE so by default, the process * isn't allowed to write those pages. * gdb can break that with ptrace interface, and thus trigger COW @@ -241,24 +279,23 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * It's fine to use that for setting breakpoints in the vDSO code * pages though. */ - rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pagelist); - if (rc) - current->mm->context.vdso_base = 0; + vma = _install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + &vdso_mapping); + if (IS_ERR(vma)) { + rc = PTR_ERR(vma); + goto out_up; + } + + current->mm->context.vdso_base = vdso_base; + rc = 0; + out_up: up_write(&mm->mmap_sem); return rc; } -const char *arch_vma_name(struct vm_area_struct *vma) -{ - if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base) - return "[vdso]"; - return NULL; -} - static int __init vdso_init(void) { int i; diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 072d84ba42a3..dd7178fbb4f3 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -110,11 +110,10 @@ static inline u64 scale_vtime(u64 vtime) return vtime; } -static void account_system_index_scaled(struct task_struct *p, - u64 cputime, u64 scaled, +static void account_system_index_scaled(struct task_struct *p, u64 cputime, enum cpu_usage_stat index) { - p->stimescaled += cputime_to_nsecs(scaled); + p->stimescaled += cputime_to_nsecs(scale_vtime(cputime)); account_system_index_time(p, cputime_to_nsecs(cputime), index); } @@ -176,14 +175,11 @@ static int do_account_vtime(struct task_struct *tsk) } if (system) - account_system_index_scaled(tsk, system, scale_vtime(system), - CPUTIME_SYSTEM); + account_system_index_scaled(tsk, system, CPUTIME_SYSTEM); if (hardirq) - account_system_index_scaled(tsk, hardirq, scale_vtime(hardirq), - CPUTIME_IRQ); + account_system_index_scaled(tsk, hardirq, CPUTIME_IRQ); if (softirq) - account_system_index_scaled(tsk, softirq, scale_vtime(softirq), - CPUTIME_SOFTIRQ); + account_system_index_scaled(tsk, softirq, CPUTIME_SOFTIRQ); steal = S390_lowcore.steal_timer; if ((s64) steal > 0) { diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 9da243d94cc3..875f8bea8c67 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -551,26 +551,26 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, int rc; struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); - if (!psw.t) { + if (!psw.dat) { asce->val = 0; asce->r = 1; return 0; } - if (mode == GACC_IFETCH) - psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY; + if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME)) + psw.as = PSW_BITS_AS_PRIMARY; switch (psw.as) { - case PSW_AS_PRIMARY: + case PSW_BITS_AS_PRIMARY: asce->val = vcpu->arch.sie_block->gcr[1]; return 0; - case PSW_AS_SECONDARY: + case PSW_BITS_AS_SECONDARY: asce->val = vcpu->arch.sie_block->gcr[7]; return 0; - case PSW_AS_HOME: + case PSW_BITS_AS_HOME: asce->val = vcpu->arch.sie_block->gcr[13]; return 0; - case PSW_AS_ACCREG: + case PSW_BITS_AS_ACCREG: rc = ar_translation(vcpu, asce, ar, mode); if (rc > 0) return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC); @@ -771,7 +771,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, if (!ctlreg0.lap) return 0; - if (psw_bits(*psw).t && asce.p) + if (psw_bits(*psw).dat && asce.p) return 0; return 1; } @@ -790,7 +790,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, PROT_TYPE_LA); ga &= PAGE_MASK; - if (psw_bits(*psw).t) { + if (psw_bits(*psw).dat) { rc = guest_translate(vcpu, ga, pages, asce, mode); if (rc < 0) return rc; @@ -831,7 +831,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, pages = vmalloc(nr_pages * sizeof(unsigned long)); if (!pages) return -ENOMEM; - need_ipte_lock = psw_bits(*psw).t && !asce.r; + need_ipte_lock = psw_bits(*psw).dat && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode); @@ -899,7 +899,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, mode, PROT_TYPE_LA); } - if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ + if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */ rc = guest_translate(vcpu, gva, gpa, asce, mode); if (rc > 0) return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT); @@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, ptr = asce.origin * 4096; if (asce.r) { *fake = 1; + ptr = 0; asce.dt = ASCE_TYPE_REGION1; } switch (asce.dt) { case ASCE_TYPE_REGION1: - if (vaddr.rfx01 > asce.tl && !asce.r) + if (vaddr.rfx01 > asce.tl && !*fake) return PGM_REGION_FIRST_TRANS; break; case ASCE_TYPE_REGION2: @@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, union region1_table_entry rfte; if (*fake) { - /* offset in 16EB guest memory block */ - ptr = ptr + ((unsigned long) vaddr.rsx << 53UL); + ptr += (unsigned long) vaddr.rfx << 53; rfte.val = ptr; goto shadow_r2t; } @@ -1036,8 +1036,7 @@ shadow_r2t: union region2_table_entry rste; if (*fake) { - /* offset in 8PB guest memory block */ - ptr = ptr + ((unsigned long) vaddr.rtx << 42UL); + ptr += (unsigned long) vaddr.rsx << 42; rste.val = ptr; goto shadow_r3t; } @@ -1064,8 +1063,7 @@ shadow_r3t: union region3_table_entry rtte; if (*fake) { - /* offset in 4TB guest memory block */ - ptr = ptr + ((unsigned long) vaddr.sx << 31UL); + ptr += (unsigned long) vaddr.rtx << 31; rtte.val = ptr; goto shadow_sgt; } @@ -1101,8 +1099,7 @@ shadow_sgt: union segment_table_entry ste; if (*fake) { - /* offset in 2G guest memory block */ - ptr = ptr + ((unsigned long) vaddr.sx << 20UL); + ptr += (unsigned long) vaddr.sx << 20; ste.val = ptr; goto shadow_pgt; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 7ce47fd36f28..bec42b852246 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -57,9 +57,9 @@ static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu, { psw_t *psw = &vcpu->arch.sie_block->gpsw; - if (psw_bits(*psw).eaba == PSW_AMODE_64BIT) + if (psw_bits(*psw).eaba == PSW_BITS_AMODE_64BIT) return ga; - if (psw_bits(*psw).eaba == PSW_AMODE_31BIT) + if (psw_bits(*psw).eaba == PSW_BITS_AMODE_31BIT) return ga & ((1UL << 31) - 1); return ga & ((1UL << 24) - 1); } diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index 23d9a4e12da1..c2e0ddc1356e 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -613,15 +613,15 @@ int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) * instruction. Check primary and home space-switch-event * controls. (theoretically home -> home produced no event) */ - if (((new_as == PSW_AS_HOME) ^ old_as_is_home(vcpu)) && - (pssec(vcpu) || hssec(vcpu))) + if (((new_as == PSW_BITS_AS_HOME) ^ old_as_is_home(vcpu)) && + (pssec(vcpu) || hssec(vcpu))) vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH; /* * PT, PTI, PR, PC instruction operate on primary AS only. Check * if the primary-space-switch-event control was or got set. */ - if (new_as == PSW_AS_PRIMARY && !old_as_is_home(vcpu) && + if (new_as == PSW_BITS_AS_PRIMARY && !old_as_is_home(vcpu) && (pssec(vcpu) || old_ssec(vcpu))) vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index caf15c8a8948..2d120fef7d90 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2160,7 +2160,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_ais_req req; int ret = 0; - if (!fi->ais_enabled) + if (!test_kvm_facility(kvm, 72)) return -ENOTSUPP; if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) @@ -2204,7 +2204,7 @@ static int kvm_s390_inject_airq(struct kvm *kvm, }; int ret = 0; - if (!fi->ais_enabled || !adapter->suppressible) + if (!test_kvm_facility(kvm, 72) || !adapter->suppressible) return kvm_s390_inject_vm(kvm, &s390int); mutex_lock(&fi->ais_lock); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 689ac48361c6..b0d7de5a533d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -558,7 +558,6 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) } else { set_kvm_facility(kvm->arch.model.fac_mask, 72); set_kvm_facility(kvm->arch.model.fac_list, 72); - kvm->arch.float_int.ais_enabled = 1; r = 0; } mutex_unlock(&kvm->lock); @@ -1533,7 +1532,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_init(&kvm->arch.float_int.ais_lock); kvm->arch.float_int.simm = 0; kvm->arch.float_int.nimm = 0; - kvm->arch.float_int.ais_enabled = 0; spin_lock_init(&kvm->arch.float_int.lock); for (i = 0; i < FIRQ_LIST_COUNT; i++) INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); @@ -2069,6 +2067,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, if (!vcpu) goto out; + BUILD_BUG_ON(sizeof(struct sie_page) != 4096); sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); if (!sie_page) goto out_free_cpu; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c03106c428cf..e53292a89257 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -361,7 +361,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) } } if (m3 & SSKE_MB) { - if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) + if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK; else vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL; @@ -374,7 +374,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) static int handle_ipte_interlock(struct kvm_vcpu *vcpu) { vcpu->stat.instruction_ipte_interlock++; - if (psw_bits(vcpu->arch.sie_block->gpsw).p) + if (psw_bits(vcpu->arch.sie_block->gpsw).pstate) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); kvm_s390_retry_instr(vcpu); @@ -901,7 +901,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) /* only support 2G frame size if EDAT2 is available and we are not in 24-bit addressing mode */ if (!test_kvm_facility(vcpu->kvm, 78) || - psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_24BIT) + psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); end = (start + (1UL << 31)) & ~((1UL << 31) - 1); break; @@ -938,7 +938,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) start += PAGE_SIZE; } if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { - if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) { + if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) { vcpu->run->s.regs.gprs[reg2] = end; } else { vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL; diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 1b553d847140..049c3c455b32 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -149,7 +149,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, } static void walk_pud_level(struct seq_file *m, struct pg_state *st, - pgd_t *pgd, unsigned long addr) + p4d_t *p4d, unsigned long addr) { unsigned int prot; pud_t *pud; @@ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) { st->current_address = addr; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); if (!pud_none(*pud)) if (pud_large(*pud)) { prot = pud_val(*pud) & @@ -172,6 +172,23 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, } } +static void walk_p4d_level(struct seq_file *m, struct pg_state *st, + pgd_t *pgd, unsigned long addr) +{ + p4d_t *p4d; + int i; + + for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) { + st->current_address = addr; + p4d = p4d_offset(pgd, addr); + if (!p4d_none(*p4d)) + walk_pud_level(m, st, p4d, addr); + else + note_page(m, st, _PAGE_INVALID, 2); + addr += P4D_SIZE; + } +} + static void walk_pgd_level(struct seq_file *m) { unsigned long addr = 0; @@ -184,7 +201,7 @@ static void walk_pgd_level(struct seq_file *m) st.current_address = addr; pgd = pgd_offset_k(addr); if (!pgd_none(*pgd)) - walk_pud_level(m, &st, pgd, addr); + walk_p4d_level(m, &st, pgd, addr); else note_page(m, &st, _PAGE_INVALID, 1); addr += PGDIR_SIZE; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 5845d3028ffc..14f25798b001 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -130,7 +130,7 @@ static int bad_address(void *p) static void dump_pagetable(unsigned long asce, unsigned long address) { - unsigned long *table = __va(asce & PAGE_MASK); + unsigned long *table = __va(asce & _ASCE_ORIGIN); pr_alert("AS:%016lx ", asce); switch (asce & _ASCE_TYPE_MASK) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 7f6db1e6c048..4fb3d3cdb370 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -125,7 +125,7 @@ static void gmap_radix_tree_free(struct radix_tree_root *root) struct radix_tree_iter iter; unsigned long indices[16]; unsigned long index; - void **slot; + void __rcu **slot; int i, nr; /* A radix tree is freed by deleting all of its entries */ @@ -150,7 +150,7 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) struct radix_tree_iter iter; unsigned long indices[16]; unsigned long index; - void **slot; + void __rcu **slot; int i, nr; /* A radix tree is freed by deleting all of its entries */ @@ -537,6 +537,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) unsigned long *table; spinlock_t *ptl; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; int rc; @@ -573,7 +574,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) mm = gmap->mm; pgd = pgd_offset(mm, vmaddr); VM_BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, vmaddr); + p4d = p4d_offset(pgd, vmaddr); + VM_BUG_ON(p4d_none(*p4d)); + pud = pud_offset(p4d, vmaddr); VM_BUG_ON(pud_none(*pud)); /* large puds cannot yet be handled */ if (pud_large(*pud)) @@ -1008,7 +1011,7 @@ EXPORT_SYMBOL_GPL(gmap_read_table); static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, struct gmap_rmap *rmap) { - void **slot; + void __rcu **slot; BUG_ON(!gmap_is_shadow(sg)); slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index b7b779c40a5b..8ecc25e760fa 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -166,15 +166,15 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, return 1; } -static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, +static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long next; pud_t *pudp, pud; - pudp = (pud_t *) pgdp; - if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - pudp = (pud_t *) pgd_deref(pgd); + pudp = (pud_t *) p4dp; + if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pudp = (pud_t *) p4d_deref(p4d); pudp += pud_index(addr); do { pud = *pudp; @@ -194,6 +194,29 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, return 1; } +static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long next; + p4d_t *p4dp, p4d; + + p4dp = (p4d_t *) pgdp; + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) + p4dp = (p4d_t *) pgd_deref(pgd); + p4dp += p4d_index(addr); + do { + p4d = *p4dp; + barrier(); + next = p4d_addr_end(addr, end); + if (p4d_none(p4d)) + return 0; + if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr)) + return 0; + } while (p4dp++, addr = next, addr != end); + + return 1; +} + /* * Like get_user_pages_fast() except its IRQ-safe in that it won't fall * back to the regular GUP. @@ -228,7 +251,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, next = pgd_addr_end(addr, end); if (pgd_none(pgd)) break; - if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) + if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr)) break; } while (pgdp++, addr = next, addr != end); local_irq_restore(flags); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 9b4050caa4e9..d3a5e39756f6 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -162,16 +162,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp = NULL; pgdp = pgd_offset(mm, addr); - pudp = pud_alloc(mm, pgdp, addr); - if (pudp) { - if (sz == PUD_SIZE) - return (pte_t *) pudp; - else if (sz == PMD_SIZE) - pmdp = pmd_alloc(mm, pudp, addr); + p4dp = p4d_alloc(mm, pgdp, addr); + if (p4dp) { + pudp = pud_alloc(mm, p4dp, addr); + if (pudp) { + if (sz == PUD_SIZE) + return (pte_t *) pudp; + else if (sz == PMD_SIZE) + pmdp = pmd_alloc(mm, pudp, addr); + } } return (pte_t *) pmdp; } @@ -179,16 +183,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp = NULL; pgdp = pgd_offset(mm, addr); if (pgd_present(*pgdp)) { - pudp = pud_offset(pgdp, addr); - if (pud_present(*pudp)) { - if (pud_large(*pudp)) - return (pte_t *) pudp; - pmdp = pmd_offset(pudp, addr); + p4dp = p4d_offset(pgdp, addr); + if (p4d_present(*p4dp)) { + pudp = pud_offset(p4dp, addr); + if (pud_present(*pudp)) { + if (pud_large(*pudp)) + return (pte_t *) pudp; + pmdp = pmd_offset(pudp, addr); + } } } return (pte_t *) pmdp; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ee6a1d3d4983..3348e60dd8ad 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -81,6 +81,7 @@ void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; unsigned long pgd_type, asce_bits; + psw_t psw; init_mm.pgd = swapper_pg_dir; if (VMALLOC_END > (1UL << 42)) { @@ -100,7 +101,10 @@ void __init paging_init(void) __ctl_load(S390_lowcore.kernel_asce, 1, 1); __ctl_load(S390_lowcore.kernel_asce, 7, 7); __ctl_load(S390_lowcore.kernel_asce, 13, 13); - __arch_local_irq_stosm(0x04); + psw.mask = __extract_psw(); + psw_bits(psw).dat = 1; + psw_bits(psw).as = PSW_BITS_AS_HOME; + __load_psw_mask(psw.mask); sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index b017daed6887..2e10d2b8ad35 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -101,7 +101,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) goto check_asce_limit; } @@ -120,7 +120,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, check_asce_limit: if (addr + len > current->mm->context.asce_limit) { - rc = crst_table_upgrade(mm); + rc = crst_table_upgrade(mm, addr + len); if (rc) return (unsigned long) rc; } @@ -151,7 +151,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) goto check_asce_limit; } @@ -184,7 +184,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, check_asce_limit: if (addr + len > current->mm->context.asce_limit) { - rc = crst_table_upgrade(mm); + rc = crst_table_upgrade(mm, addr + len); if (rc) return (unsigned long) rc; } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 49e721f3645e..180481589246 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -229,14 +229,14 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); } -static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end, +static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long flags) { unsigned long next; pud_t *pudp; int rc = 0; - pudp = pud_offset(pgd, addr); + pudp = pud_offset(p4d, addr); do { if (pud_none(*pudp)) return -EINVAL; @@ -259,6 +259,26 @@ static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end, return rc; } +static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end, + unsigned long flags) +{ + unsigned long next; + p4d_t *p4dp; + int rc = 0; + + p4dp = p4d_offset(pgd, addr); + do { + if (p4d_none(*p4dp)) + return -EINVAL; + next = p4d_addr_end(addr, end); + rc = walk_pud_level(p4dp, addr, next, flags); + p4dp++; + addr = next; + cond_resched(); + } while (addr < end && !rc); + return rc; +} + static DEFINE_MUTEX(cpa_mutex); static int change_page_attr(unsigned long addr, unsigned long end, @@ -278,7 +298,7 @@ static int change_page_attr(unsigned long addr, unsigned long end, if (pgd_none(*pgdp)) break; next = pgd_addr_end(addr, end); - rc = walk_pud_level(pgdp, addr, next, flags); + rc = walk_p4d_level(pgdp, addr, next, flags); if (rc) break; cond_resched(); @@ -319,6 +339,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) unsigned long address; int nr, i, j; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -326,7 +347,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) for (i = 0; i < numpages;) { address = page_to_phys(page + i); pgd = pgd_offset_k(address); - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + pud = pud_offset(p4d, address); pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); nr = (unsigned long)pte >> ilog2(sizeof(long)); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index f502cbe657af..18918e394ce4 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -76,29 +76,46 @@ static void __crst_table_upgrade(void *arg) __tlb_flush_local(); } -int crst_table_upgrade(struct mm_struct *mm) +int crst_table_upgrade(struct mm_struct *mm, unsigned long end) { unsigned long *table, *pgd; + int rc, notify; - /* upgrade should only happen from 3 to 4 levels */ - BUG_ON(mm->context.asce_limit != (1UL << 42)); - - table = crst_table_alloc(mm); - if (!table) + /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ + BUG_ON(mm->context.asce_limit < (1UL << 42)); + if (end >= TASK_SIZE_MAX) return -ENOMEM; - - spin_lock_bh(&mm->page_table_lock); - pgd = (unsigned long *) mm->pgd; - crst_table_init(table, _REGION2_ENTRY_EMPTY); - pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); - mm->pgd = (pgd_t *) table; - mm->context.asce_limit = 1UL << 53; - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_REGION2; - spin_unlock_bh(&mm->page_table_lock); - - on_each_cpu(__crst_table_upgrade, mm, 0); - return 0; + rc = 0; + notify = 0; + while (mm->context.asce_limit < end) { + table = crst_table_alloc(mm); + if (!table) { + rc = -ENOMEM; + break; + } + spin_lock_bh(&mm->page_table_lock); + pgd = (unsigned long *) mm->pgd; + if (mm->context.asce_limit == (1UL << 42)) { + crst_table_init(table, _REGION2_ENTRY_EMPTY); + p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->context.asce_limit = 1UL << 53; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + } else { + crst_table_init(table, _REGION1_ENTRY_EMPTY); + pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->context.asce_limit = -PAGE_SIZE; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION1; + } + notify = 1; + spin_unlock_bh(&mm->page_table_lock); + } + if (notify) + on_each_cpu(__crst_table_upgrade, mm, 0); + return rc; } void crst_table_downgrade(struct mm_struct *mm) @@ -274,7 +291,7 @@ static void __tlb_remove_table(void *_table) struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); switch (mask) { - case 0: /* pmd or pud */ + case 0: /* pmd, pud, or p4d */ free_pages((unsigned long) table, 2); break; case 1: /* lower 2K of a 4K page table */ diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 947b66a5cdba..d4d409ba206b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -610,6 +610,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pgste_t pgste; @@ -618,7 +619,10 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) bool dirty; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return false; + pud = pud_alloc(mm, p4d, addr); if (!pud) return false; pmd = pmd_alloc(mm, pud, addr); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index c33c94b4be60..d8398962a723 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -38,6 +38,17 @@ static void __ref *vmem_alloc_pages(unsigned int order) return (void *) memblock_alloc(size, size); } +static inline p4d_t *vmem_p4d_alloc(void) +{ + p4d_t *p4d = NULL; + + p4d = vmem_alloc_pages(2); + if (!p4d) + return NULL; + clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4); + return p4d; +} + static inline pud_t *vmem_pud_alloc(void) { pud_t *pud = NULL; @@ -85,6 +96,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size) unsigned long end = start + size; unsigned long address = start; pgd_t *pg_dir; + p4d_t *p4_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; @@ -102,12 +114,19 @@ static int vmem_add_mem(unsigned long start, unsigned long size) while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { + p4_dir = vmem_p4d_alloc(); + if (!p4_dir) + goto out; + pgd_populate(&init_mm, pg_dir, p4_dir); + } + p4_dir = p4d_offset(pg_dir, address); + if (p4d_none(*p4_dir)) { pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate(&init_mm, pg_dir, pu_dir); + p4d_populate(&init_mm, p4_dir, pu_dir); } - pu_dir = pud_offset(pg_dir, address); + pu_dir = pud_offset(p4_dir, address); if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && !debug_pagealloc_enabled()) { @@ -161,6 +180,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) unsigned long end = start + size; unsigned long address = start; pgd_t *pg_dir; + p4d_t *p4_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; @@ -172,7 +192,12 @@ static void vmem_remove_range(unsigned long start, unsigned long size) address += PGDIR_SIZE; continue; } - pu_dir = pud_offset(pg_dir, address); + p4_dir = p4d_offset(pg_dir, address); + if (p4d_none(*p4_dir)) { + address += P4D_SIZE; + continue; + } + pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { address += PUD_SIZE; continue; @@ -213,6 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) unsigned long pgt_prot, sgt_prot; unsigned long address = start; pgd_t *pg_dir; + p4d_t *p4_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; @@ -227,13 +253,21 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) for (address = start; address < end;) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { + p4_dir = vmem_p4d_alloc(); + if (!p4_dir) + goto out; + pgd_populate(&init_mm, pg_dir, p4_dir); + } + + p4_dir = p4d_offset(pg_dir, address); + if (p4d_none(*p4_dir)) { pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate(&init_mm, pg_dir, pu_dir); + p4d_populate(&init_mm, p4_dir, pu_dir); } - pu_dir = pud_offset(pg_dir, address); + pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { pm_dir = vmem_pmd_alloc(); if (!pm_dir) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 8051df109db3..7b30af5da222 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -86,6 +86,25 @@ struct zpci_dev *get_zdev_by_fid(u32 fid) return zdev; } +void zpci_remove_reserved_devices(void) +{ + struct zpci_dev *tmp, *zdev; + enum zpci_state state; + LIST_HEAD(remove); + + spin_lock(&zpci_list_lock); + list_for_each_entry_safe(zdev, tmp, &zpci_list, entry) { + if (zdev->state == ZPCI_FN_STATE_STANDBY && + !clp_get_state(zdev->fid, &state) && + state == ZPCI_FN_STATE_RESERVED) + list_move_tail(&zdev->entry, &remove); + } + spin_unlock(&zpci_list_lock); + + list_for_each_entry_safe(zdev, tmp, &remove, entry) + zpci_remove_device(zdev); +} + static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus) { return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL; @@ -108,6 +127,7 @@ static int zpci_set_airq(struct zpci_dev *zdev) { u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); struct zpci_fib fib = {0}; + u8 status; fib.isc = PCI_ISC; fib.sum = 1; /* enable summary notifications */ @@ -117,60 +137,58 @@ static int zpci_set_airq(struct zpci_dev *zdev) fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8; fib.aisbo = zdev->aisb & 63; - return zpci_mod_fc(req, &fib); + return zpci_mod_fc(req, &fib, &status) ? -EIO : 0; } -struct mod_pci_args { - u64 base; - u64 limit; - u64 iota; - u64 fmb_addr; -}; - -static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args *args) +/* Modify PCI: Unregister adapter interruptions */ +static int zpci_clear_airq(struct zpci_dev *zdev) { - u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, fn); + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT); struct zpci_fib fib = {0}; + u8 cc, status; - fib.pba = args->base; - fib.pal = args->limit; - fib.iota = args->iota; - fib.fmb_addr = args->fmb_addr; + cc = zpci_mod_fc(req, &fib, &status); + if (cc == 3 || (cc == 1 && status == 24)) + /* Function already gone or IRQs already deregistered. */ + cc = 0; - return zpci_mod_fc(req, &fib); + return cc ? -EIO : 0; } /* Modify PCI: Register I/O address translation parameters */ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, u64 base, u64 limit, u64 iota) { - struct mod_pci_args args = { base, limit, iota, 0 }; + u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT); + struct zpci_fib fib = {0}; + u8 status; WARN_ON_ONCE(iota & 0x3fff); - args.iota |= ZPCI_IOTA_RTTO_FLAG; - return mod_pci(zdev, ZPCI_MOD_FC_REG_IOAT, dmaas, &args); + fib.pba = base; + fib.pal = limit; + fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; + return zpci_mod_fc(req, &fib, &status) ? -EIO : 0; } /* Modify PCI: Unregister I/O address translation parameters */ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas) { - struct mod_pci_args args = { 0, 0, 0, 0 }; - - return mod_pci(zdev, ZPCI_MOD_FC_DEREG_IOAT, dmaas, &args); -} - -/* Modify PCI: Unregister adapter interruptions */ -static int zpci_clear_airq(struct zpci_dev *zdev) -{ - struct mod_pci_args args = { 0, 0, 0, 0 }; + u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_DEREG_IOAT); + struct zpci_fib fib = {0}; + u8 cc, status; - return mod_pci(zdev, ZPCI_MOD_FC_DEREG_INT, 0, &args); + cc = zpci_mod_fc(req, &fib, &status); + if (cc == 3) /* Function already gone. */ + cc = 0; + return cc ? -EIO : 0; } /* Modify PCI: Set PCI function measurement parameters */ int zpci_fmb_enable_device(struct zpci_dev *zdev) { - struct mod_pci_args args = { 0, 0, 0, 0 }; + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE); + struct zpci_fib fib = {0}; + u8 cc, status; if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length) return -EINVAL; @@ -185,25 +203,35 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev) atomic64_set(&zdev->mapped_pages, 0); atomic64_set(&zdev->unmapped_pages, 0); - args.fmb_addr = virt_to_phys(zdev->fmb); - return mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args); + fib.fmb_addr = virt_to_phys(zdev->fmb); + cc = zpci_mod_fc(req, &fib, &status); + if (cc) { + kmem_cache_free(zdev_fmb_cache, zdev->fmb); + zdev->fmb = NULL; + } + return cc ? -EIO : 0; } /* Modify PCI: Disable PCI function measurement */ int zpci_fmb_disable_device(struct zpci_dev *zdev) { - struct mod_pci_args args = { 0, 0, 0, 0 }; - int rc; + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE); + struct zpci_fib fib = {0}; + u8 cc, status; if (!zdev->fmb) return -EINVAL; /* Function measurement is disabled if fmb address is zero */ - rc = mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args); + cc = zpci_mod_fc(req, &fib, &status); + if (cc == 3) /* Function already gone. */ + cc = 0; - kmem_cache_free(zdev_fmb_cache, zdev->fmb); - zdev->fmb = NULL; - return rc; + if (!cc) { + kmem_cache_free(zdev_fmb_cache, zdev->fmb); + zdev->fmb = NULL; + } + return cc ? -EIO : 0; } static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) @@ -372,22 +400,21 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) struct msi_msg msg; int rc, irq; + zdev->aisb = -1UL; if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); /* Allocate adapter summary indicator bit */ - rc = -EIO; aisb = airq_iv_alloc_bit(zpci_aisb_iv); if (aisb == -1UL) - goto out; + return -EIO; zdev->aisb = aisb; /* Create adapter interrupt vector */ - rc = -ENOMEM; zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK); if (!zdev->aibv) - goto out_si; + return -ENOMEM; /* Wire up shortcut pointer */ zpci_aibv[aisb] = zdev->aibv; @@ -398,10 +425,10 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) rc = -EIO; irq = irq_alloc_desc(0); /* Alloc irq on node 0 */ if (irq < 0) - goto out_msi; + return -ENOMEM; rc = irq_set_msi_desc(irq, msi); if (rc) - goto out_msi; + return rc; irq_set_chip_and_handler(irq, &zpci_irq_chip, handle_simple_irq); msg.data = hwirq; @@ -415,27 +442,9 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) /* Enable adapter interrupts */ rc = zpci_set_airq(zdev); if (rc) - goto out_msi; + return rc; return (msi_vecs == nvec) ? 0 : msi_vecs; - -out_msi: - for_each_pci_msi_entry(msi, pdev) { - if (hwirq-- == 0) - break; - irq_set_msi_desc(msi->irq, NULL); - irq_free_desc(msi->irq); - msi->msg.address_lo = 0; - msi->msg.address_hi = 0; - msi->msg.data = 0; - msi->irq = 0; - } - zpci_aibv[aisb] = NULL; - airq_iv_release(zdev->aibv); -out_si: - airq_iv_free_bit(zpci_aisb_iv, aisb); -out: - return rc; } void arch_teardown_msi_irqs(struct pci_dev *pdev) @@ -451,6 +460,8 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) /* Release MSI interrupts */ for_each_pci_msi_entry(msi, pdev) { + if (!msi->irq) + continue; if (msi->msi_attrib.is_msix) __pci_msix_desc_mask_irq(msi, 1); else @@ -463,9 +474,15 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) msi->irq = 0; } - zpci_aibv[zdev->aisb] = NULL; - airq_iv_release(zdev->aibv); - airq_iv_free_bit(zpci_aisb_iv, zdev->aisb); + if (zdev->aisb != -1UL) { + zpci_aibv[zdev->aisb] = NULL; + airq_iv_free_bit(zpci_aisb_iv, zdev->aisb); + zdev->aisb = -1UL; + } + if (zdev->aibv) { + airq_iv_release(zdev->aibv); + zdev->aibv = NULL; + } } static void zpci_map_resources(struct pci_dev *pdev) @@ -719,6 +736,16 @@ static int zpci_alloc_domain(struct zpci_dev *zdev) { if (zpci_unique_uid) { zdev->domain = (u16) zdev->uid; + if (zdev->domain >= ZPCI_NR_DEVICES) + return 0; + + spin_lock(&zpci_domain_lock); + if (test_bit(zdev->domain, zpci_domain)) { + spin_unlock(&zpci_domain_lock); + return -EEXIST; + } + set_bit(zdev->domain, zpci_domain); + spin_unlock(&zpci_domain_lock); return 0; } @@ -735,7 +762,7 @@ static int zpci_alloc_domain(struct zpci_dev *zdev) static void zpci_free_domain(struct zpci_dev *zdev) { - if (zpci_unique_uid) + if (zdev->domain >= ZPCI_NR_DEVICES) return; spin_lock(&zpci_domain_lock); @@ -755,6 +782,7 @@ void pcibios_remove_bus(struct pci_bus *bus) list_del(&zdev->entry); spin_unlock(&zpci_list_lock); + zpci_dbg(3, "rem fid:%x\n", zdev->fid); kfree(zdev); } @@ -847,15 +875,14 @@ out: return rc; } -void zpci_stop_device(struct zpci_dev *zdev) +void zpci_remove_device(struct zpci_dev *zdev) { - zpci_dma_exit_device(zdev); - /* - * Note: SCLP disables fh via set-pci-fn so don't - * do that here. - */ + if (!zdev->bus) + return; + + pci_stop_root_bus(zdev->bus); + pci_remove_root_bus(zdev->bus); } -EXPORT_SYMBOL_GPL(zpci_stop_device); int zpci_report_error(struct pci_dev *pdev, struct zpci_report_error_header *report) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 1c3332ac1957..bd534b4d40e3 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -193,12 +193,12 @@ out: int clp_add_pci_device(u32 fid, u32 fh, int configured) { struct zpci_dev *zdev; - int rc; + int rc = -ENOMEM; zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured); zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); if (!zdev) - return -ENOMEM; + goto error; zdev->fh = fh; zdev->fid = fid; @@ -219,6 +219,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured) return 0; error: + zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc); kfree(zdev); return rc; } @@ -295,8 +296,8 @@ int clp_disable_fh(struct zpci_dev *zdev) return rc; } -static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, - void (*cb)(struct clp_fh_list_entry *entry)) +static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data, + void (*cb)(struct clp_fh_list_entry *, void *)) { u64 resume_token = 0; int entries, i, rc; @@ -327,21 +328,13 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, resume_token = rrb->response.resume_token; for (i = 0; i < entries; i++) - cb(&rrb->response.fh_list[i]); + cb(&rrb->response.fh_list[i], data); } while (resume_token); out: return rc; } -static void __clp_add(struct clp_fh_list_entry *entry) -{ - if (!entry->vendor_id) - return; - - clp_add_pci_device(entry->fid, entry->fh, entry->config_state); -} - -static void __clp_rescan(struct clp_fh_list_entry *entry) +static void __clp_add(struct clp_fh_list_entry *entry, void *data) { struct zpci_dev *zdev; @@ -349,22 +342,11 @@ static void __clp_rescan(struct clp_fh_list_entry *entry) return; zdev = get_zdev_by_fid(entry->fid); - if (!zdev) { + if (!zdev) clp_add_pci_device(entry->fid, entry->fh, entry->config_state); - return; - } - - if (!entry->config_state) { - /* - * The handle is already disabled, that means no iota/irq freeing via - * the firmware interfaces anymore. Need to free resources manually - * (DMA memory, debug, sysfs)... - */ - zpci_stop_device(zdev); - } } -static void __clp_update(struct clp_fh_list_entry *entry) +static void __clp_update(struct clp_fh_list_entry *entry, void *data) { struct zpci_dev *zdev; @@ -387,7 +369,7 @@ int clp_scan_pci_devices(void) if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, __clp_add); + rc = clp_list_pci(rrb, NULL, __clp_add); clp_free_block(rrb); return rc; @@ -398,11 +380,13 @@ int clp_rescan_pci_devices(void) struct clp_req_rsp_list_pci *rrb; int rc; + zpci_remove_reserved_devices(); + rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, __clp_rescan); + rc = clp_list_pci(rrb, NULL, __clp_add); clp_free_block(rrb); return rc; @@ -417,7 +401,40 @@ int clp_rescan_pci_devices_simple(void) if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, __clp_update); + rc = clp_list_pci(rrb, NULL, __clp_update); + + clp_free_block(rrb); + return rc; +} + +struct clp_state_data { + u32 fid; + enum zpci_state state; +}; + +static void __clp_get_state(struct clp_fh_list_entry *entry, void *data) +{ + struct clp_state_data *sd = data; + + if (entry->fid != sd->fid) + return; + + sd->state = entry->config_state; +} + +int clp_get_state(u32 fid, enum zpci_state *state) +{ + struct clp_req_rsp_list_pci *rrb; + struct clp_state_data sd = {fid, ZPCI_FN_STATE_RESERVED}; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, &sd, __clp_get_state); + if (!rc) + *state = sd.state; clp_free_block(rrb); return rc; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 9081a57fa340..8eb1cc341dab 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -601,7 +601,9 @@ void zpci_dma_exit_device(struct zpci_dev *zdev) */ WARN_ON(zdev->s390_domain); - zpci_unregister_ioat(zdev, 0); + if (zpci_unregister_ioat(zdev, 0)) + return; + dma_cleanup_tables(zdev->dma_table); zdev->dma_table = NULL; vfree(zdev->iommu_bitmap); diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index c2b27ad8e94d..0bbc04af4418 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -74,6 +74,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); struct pci_dev *pdev = NULL; + enum zpci_state state; int ret; if (zdev) @@ -108,6 +109,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) clp_add_pci_device(ccdf->fid, ccdf->fh, 0); break; case 0x0303: /* Deconfiguration requested */ + if (!zdev) + break; if (pdev) pci_stop_and_remove_bus_device_locked(pdev); @@ -121,7 +124,9 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zdev->state = ZPCI_FN_STATE_STANDBY; break; - case 0x0304: /* Configured -> Standby */ + case 0x0304: /* Configured -> Standby|Reserved */ + if (!zdev) + break; if (pdev) { /* Give the driver a hint that the function is * already unusable. */ @@ -132,6 +137,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zdev->fh = ccdf->fh; zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; + if (!clp_get_state(ccdf->fid, &state) && + state == ZPCI_FN_STATE_RESERVED) { + zpci_remove_device(zdev); + } break; case 0x0306: /* 0x308 or 0x302 for multiple devices */ clp_rescan_pci_devices(); @@ -139,8 +148,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0308: /* Standby -> Reserved */ if (!zdev) break; - pci_stop_root_bus(zdev->bus); - pci_remove_root_bus(zdev->bus); + zpci_remove_device(zdev); break; default: break; diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index fa8d7d4b9751..ea34086c8674 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -40,20 +40,20 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status) return cc; } -int zpci_mod_fc(u64 req, struct zpci_fib *fib) +u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status) { - u8 cc, status; + u8 cc; do { - cc = __mpcifc(req, fib, &status); + cc = __mpcifc(req, fib, status); if (cc == 2) msleep(ZPCI_INSN_BUSY_DELAY); } while (cc == 2); if (cc) - zpci_err_insn(cc, status, req, 0); + zpci_err_insn(cc, *status, req, 0); - return (cc) ? -EIO : 0; + return cc; } /* Refresh PCI Translations */ diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index be63fbd699fd..025ea20fc4b4 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -34,8 +34,6 @@ static struct facility_def facility_defs[] = { 18, /* long displacement facility */ #endif #ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES - 7, /* stfle */ - 17, /* message security assist */ 21, /* extended-immediate facility */ 25, /* store clock fast */ #endif diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h index d9a922d8711b..299274581968 100644 --- a/arch/score/include/asm/processor.h +++ b/arch/score/include/asm/processor.h @@ -13,7 +13,6 @@ struct task_struct; */ extern void (*cpu_wait)(void); -extern unsigned long thread_saved_pc(struct task_struct *tsk); extern void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp); extern unsigned long get_wchan(struct task_struct *p); diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild index b15bf6bc0e94..c94ee54210bc 100644 --- a/arch/score/include/uapi/asm/Kbuild +++ b/arch/score/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += siginfo.h diff --git a/arch/score/include/uapi/asm/siginfo.h b/arch/score/include/uapi/asm/siginfo.h deleted file mode 100644 index 87ca35607a28..000000000000 --- a/arch/score/include/uapi/asm/siginfo.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_SCORE_SIGINFO_H -#define _ASM_SCORE_SIGINFO_H - -#include <asm-generic/siginfo.h> - -#endif /* _ASM_SCORE_SIGINFO_H */ diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index eb64d7a677cb..6e20241a1ed4 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -101,11 +101,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r) return 1; } -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return task_pt_regs(tsk)->cp0_epc; -} - unsigned long get_wchan(struct task_struct *task) { if (!task || task == current || task->state == TASK_RUNNING) diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig index 4e21949593cf..3554fcaa023b 100644 --- a/arch/sh/boards/Kconfig +++ b/arch/sh/boards/Kconfig @@ -10,7 +10,7 @@ config SH_DEVICE_TREE bool "Board Described by Device Tree" select OF select OF_EARLY_FLATTREE - select CLKSRC_OF + select TIMER_OF select COMMON_CLK select GENERIC_CALIBRATE_DELAY help diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c index 1fb6d5714bae..4feb7c86f4ac 100644 --- a/arch/sh/boards/of-generic.c +++ b/arch/sh/boards/of-generic.c @@ -119,7 +119,7 @@ static void __init sh_of_mem_reserve(void) static void __init sh_of_time_init(void) { pr_info("SH generic board support: scanning for clocksource devices\n"); - clocksource_probe(); + timer_probe(); } static void __init sh_of_setup(char **cmdline_p) diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index cf2a75063b53..590c91ae7541 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -29,7 +29,6 @@ generic-y += rwsem.h generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h -generic-y += siginfo.h generic-y += sizes.h generic-y += socket.h generic-y += statfs.h diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild index b15bf6bc0e94..b55fc2ae1e8c 100644 --- a/arch/sh/include/uapi/asm/Kbuild +++ b/arch/sh/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += siginfo.h diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c index 08e7af0be4a7..6a1a1297baae 100644 --- a/arch/sh/mm/mmap.c +++ b/arch/sh/mm/mmap.c @@ -64,7 +64,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -114,7 +114,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 58243b0d21c0..5639c9fe5b55 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -192,9 +192,9 @@ config NR_CPUS int "Maximum number of CPUs" depends on SMP range 2 32 if SPARC32 - range 2 1024 if SPARC64 + range 2 4096 if SPARC64 default 32 if SPARC32 - default 64 if SPARC64 + default 4096 if SPARC64 source kernel/Kconfig.hz @@ -295,9 +295,13 @@ config NUMA depends on SPARC64 && SMP config NODES_SHIFT - int - default "4" + int "Maximum NUMA Nodes (as a power of 2)" + range 4 5 if SPARC64 + default "5" depends on NEED_MULTIPLE_NODES + help + Specify the maximum number of NUMA Nodes available on the target + system. Increases memory reserved to accommodate various tables. # Some NUMA nodes have memory ranges that span # other nodes. Even though a pfn is valid and @@ -573,9 +577,6 @@ config SYSVIPC_COMPAT depends on COMPAT && SYSVIPC default y -config KEYS_COMPAT - def_bool y if COMPAT && KEYS - endmenu source "net/Kconfig" diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h index f7de0dbc38af..83b36a5371ff 100644 --- a/arch/sparc/include/asm/mmu_64.h +++ b/arch/sparc/include/asm/mmu_64.h @@ -52,7 +52,7 @@ #define CTX_NR_MASK TAG_CONTEXT_BITS #define CTX_HW_MASK (CTX_NR_MASK | CTX_PGSZ_MASK) -#define CTX_FIRST_VERSION ((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL)) +#define CTX_FIRST_VERSION BIT(CTX_VERSION_SHIFT) #define CTX_VALID(__ctx) \ (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK)) #define CTX_HWBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_HW_MASK) diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 22fede6eba11..2cddcda4f85f 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -19,13 +19,8 @@ extern spinlock_t ctx_alloc_lock; extern unsigned long tlb_context_cache; extern unsigned long mmu_context_bmap[]; +DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm); void get_new_mmu_context(struct mm_struct *mm); -#ifdef CONFIG_SMP -void smp_new_mmu_context_version(void); -#else -#define smp_new_mmu_context_version() do { } while (0) -#endif - int init_new_context(struct task_struct *tsk, struct mm_struct *mm); void destroy_context(struct mm_struct *mm); @@ -76,8 +71,9 @@ void __flush_tlb_mm(unsigned long, unsigned long); static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk) { unsigned long ctx_valid, flags; - int cpu; + int cpu = smp_processor_id(); + per_cpu(per_cpu_secondary_mm, cpu) = mm; if (unlikely(mm == &init_mm)) return; @@ -123,7 +119,6 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str * for the first time, we must flush that context out of the * local TLB. */ - cpu = smp_processor_id(); if (!ctx_valid || !cpumask_test_cpu(cpu, mm_cpumask(mm))) { cpumask_set_cpu(cpu, mm_cpumask(mm)); __flush_tlb_mm(CTX_HWBITS(mm->context), @@ -133,26 +128,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str } #define deactivate_mm(tsk,mm) do { } while (0) - -/* Activate a new MM instance for the current task. */ -static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm) -{ - unsigned long flags; - int cpu; - - spin_lock_irqsave(&mm->context.lock, flags); - if (!CTX_VALID(mm->context)) - get_new_mmu_context(mm); - cpu = smp_processor_id(); - if (!cpumask_test_cpu(cpu, mm_cpumask(mm))) - cpumask_set_cpu(cpu, mm_cpumask(mm)); - - load_secondary_context(mm); - __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); - tsb_context_switch(mm); - spin_unlock_irqrestore(&mm->context.lock, flags); -} - +#define activate_mm(active_mm, mm) switch_mm(active_mm, mm, NULL) #endif /* !(__ASSEMBLY__) */ #endif /* !(__SPARC64_MMU_CONTEXT_H) */ diff --git a/arch/sparc/include/asm/pil.h b/arch/sparc/include/asm/pil.h index 266937030546..522b43db2ed3 100644 --- a/arch/sparc/include/asm/pil.h +++ b/arch/sparc/include/asm/pil.h @@ -20,7 +20,6 @@ #define PIL_SMP_CALL_FUNC 1 #define PIL_SMP_RECEIVE_SIGNAL 2 #define PIL_SMP_CAPTURE 3 -#define PIL_SMP_CTX_NEW_VERSION 4 #define PIL_DEVICE_IRQ 5 #define PIL_SMP_CALL_FUNC_SNGL 6 #define PIL_DEFERRED_PCR_WORK 7 diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index dd27159819eb..b395e5620c0b 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -67,9 +67,6 @@ struct thread_struct { .current_ds = KERNEL_DS, \ } -/* Return saved PC of a blocked thread. */ -unsigned long thread_saved_pc(struct task_struct *t); - /* Do necessary setup to start up a newly executed thread. */ static inline void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index b58ee9018433..f04dc5a43062 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -89,9 +89,7 @@ struct thread_struct { #include <linux/types.h> #include <asm/fpumacro.h> -/* Return saved PC of a blocked thread. */ struct task_struct; -unsigned long thread_saved_pc(struct task_struct *); /* On Uniprocessor, even in RMO processes see TSO semantics */ #ifdef CONFIG_SMP diff --git a/arch/sparc/include/asm/siginfo.h b/arch/sparc/include/asm/siginfo.h deleted file mode 100644 index 48c34c19f810..000000000000 --- a/arch/sparc/include/asm/siginfo.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __SPARC_SIGINFO_H -#define __SPARC_SIGINFO_H - -#include <uapi/asm/siginfo.h> - - -#ifdef CONFIG_COMPAT - -struct compat_siginfo; - -#endif /* CONFIG_COMPAT */ - -#endif /* !(__SPARC_SIGINFO_H) */ diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h index 8174f6cdbbbb..9dca7a892978 100644 --- a/arch/sparc/include/asm/vio.h +++ b/arch/sparc/include/asm/vio.h @@ -327,6 +327,7 @@ struct vio_dev { int compat_len; u64 dev_no; + u64 id; unsigned long channel_id; diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index b542cc7c8d94..f87265afb175 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -909,7 +909,7 @@ static int register_services(struct ds_info *dp) pbuf.req.handle = cp->handle; pbuf.req.major = 1; pbuf.req.minor = 0; - strcpy(pbuf.req.svc_id, cp->service_id); + strcpy(pbuf.id_buf, cp->service_id); err = __ds_send(lp, &pbuf, msg_len); if (err > 0) diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 4d0248aa0928..99dd133a029f 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -1034,17 +1034,26 @@ static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb) { #ifdef CONFIG_SMP unsigned long page; + void *mondo, *p; - BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64)); + BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > PAGE_SIZE); + + /* Make sure mondo block is 64byte aligned */ + p = kzalloc(127, GFP_KERNEL); + if (!p) { + prom_printf("SUN4V: Error, cannot allocate mondo block.\n"); + prom_halt(); + } + mondo = (void *)(((unsigned long)p + 63) & ~0x3f); + tb->cpu_mondo_block_pa = __pa(mondo); page = get_zeroed_page(GFP_KERNEL); if (!page) { - prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n"); + prom_printf("SUN4V: Error, cannot allocate cpu list page.\n"); prom_halt(); } - tb->cpu_mondo_block_pa = __pa(page); - tb->cpu_list_pa = __pa(page + 64); + tb->cpu_list_pa = __pa(page); #endif } diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index c9804551262c..6ae1e77be0bf 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -37,7 +37,6 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr /* smp_64.c */ void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs); void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs); -void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs); void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs); void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs); diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index b6dac8e980f0..9245f93398c7 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -177,14 +177,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) } /* - * Note: sparc64 has a pretty intricated thread_saved_pc, check it out. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return task_thread_info(tsk)->kpc; -} - -/* * Free current thread data structures etc.. */ void exit_thread(struct task_struct *tsk) diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 1badc493e62e..b96104da5bd6 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -400,25 +400,6 @@ core_initcall(sparc_sysrq_init); #endif -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct thread_info *ti = task_thread_info(tsk); - unsigned long ret = 0xdeadbeefUL; - - if (ti && ti->ksp) { - unsigned long *sp; - sp = (unsigned long *)(ti->ksp + STACK_BIAS); - if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL && - sp[14]) { - unsigned long *fp; - fp = (unsigned long *)(sp[14] + STACK_BIAS); - if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL) - ret = fp[15]; - } - } - return ret; -} - /* Free current thread data structures etc.. */ void exit_thread(struct task_struct *tsk) { diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index b3bc0ac757cc..fdf31040a7dc 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -964,37 +964,6 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) preempt_enable(); } -void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) -{ - struct mm_struct *mm; - unsigned long flags; - - clear_softint(1 << irq); - - /* See if we need to allocate a new TLB context because - * the version of the one we are using is now out of date. - */ - mm = current->active_mm; - if (unlikely(!mm || (mm == &init_mm))) - return; - - spin_lock_irqsave(&mm->context.lock, flags); - - if (unlikely(!CTX_VALID(mm->context))) - get_new_mmu_context(mm); - - spin_unlock_irqrestore(&mm->context.lock, flags); - - load_secondary_context(mm); - __flush_tlb_mm(CTX_HWBITS(mm->context), - SECONDARY_CONTEXT); -} - -void smp_new_mmu_context_version(void) -{ - smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0); -} - #ifdef CONFIG_KGDB void kgdb_roundup_cpus(unsigned long flags) { diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index ef4520efc813..043544d0cda3 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -120,7 +120,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi vma = find_vma(mm, addr); if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -183,7 +183,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr); if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index 10689cfd0ad4..07c0df924960 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -455,13 +455,16 @@ __tsb_context_switch: .type copy_tsb,#function copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size * %o2=new_tsb_base, %o3=new_tsb_size + * %o4=page_size_shift */ sethi %uhi(TSB_PASS_BITS), %g7 srlx %o3, 4, %o3 - add %o0, %o1, %g1 /* end of old tsb */ + add %o0, %o1, %o1 /* end of old tsb */ sllx %g7, 32, %g7 sub %o3, 1, %o3 /* %o3 == new tsb hash mask */ + mov %o4, %g1 /* page_size_shift */ + 661: prefetcha [%o0] ASI_N, #one_read .section .tsb_phys_patch, "ax" .word 661b @@ -486,9 +489,9 @@ copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size /* This can definitely be computed faster... */ srlx %o0, 4, %o5 /* Build index */ and %o5, 511, %o5 /* Mask index */ - sllx %o5, PAGE_SHIFT, %o5 /* Put into vaddr position */ + sllx %o5, %g1, %o5 /* Put into vaddr position */ or %o4, %o5, %o4 /* Full VADDR. */ - srlx %o4, PAGE_SHIFT, %o4 /* Shift down to create index */ + srlx %o4, %g1, %o4 /* Shift down to create index */ and %o4, %o3, %o4 /* Mask with new_tsb_nents-1 */ sllx %o4, 4, %o4 /* Shift back up into tsb ent offset */ TSB_STORE(%o2 + %o4, %g2) /* Store TAG */ @@ -496,7 +499,7 @@ copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size TSB_STORE(%o2 + %o4, %g3) /* Store TTE */ 80: add %o0, 16, %o0 - cmp %o0, %g1 + cmp %o0, %o1 bne,pt %xcc, 90b nop diff --git a/arch/sparc/kernel/ttable_64.S b/arch/sparc/kernel/ttable_64.S index 7bd8f6556352..efe93ab4a9c0 100644 --- a/arch/sparc/kernel/ttable_64.S +++ b/arch/sparc/kernel/ttable_64.S @@ -50,7 +50,7 @@ tl0_resv03e: BTRAP(0x3e) BTRAP(0x3f) BTRAP(0x40) tl0_irq1: TRAP_IRQ(smp_call_function_client, 1) tl0_irq2: TRAP_IRQ(smp_receive_signal_client, 2) tl0_irq3: TRAP_IRQ(smp_penguin_jailcell, 3) -tl0_irq4: TRAP_IRQ(smp_new_mmu_context_version_client, 4) +tl0_irq4: BTRAP(0x44) #else tl0_irq1: BTRAP(0x41) tl0_irq2: BTRAP(0x42) diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c index f6bb857254fc..075d38980dee 100644 --- a/arch/sparc/kernel/vio.c +++ b/arch/sparc/kernel/vio.c @@ -302,13 +302,16 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, if (!id) { dev_set_name(&vdev->dev, "%s", bus_id_name); vdev->dev_no = ~(u64)0; + vdev->id = ~(u64)0; } else if (!cfg_handle) { dev_set_name(&vdev->dev, "%s-%llu", bus_id_name, *id); vdev->dev_no = *id; + vdev->id = ~(u64)0; } else { dev_set_name(&vdev->dev, "%s-%llu-%llu", bus_id_name, *cfg_handle, *id); vdev->dev_no = *cfg_handle; + vdev->id = *id; } vdev->dev.parent = parent; @@ -351,27 +354,84 @@ static void vio_add(struct mdesc_handle *hp, u64 node) (void) vio_create_one(hp, node, &root_vdev->dev); } +struct vio_md_node_query { + const char *type; + u64 dev_no; + u64 id; +}; + static int vio_md_node_match(struct device *dev, void *arg) { + struct vio_md_node_query *query = (struct vio_md_node_query *) arg; struct vio_dev *vdev = to_vio_dev(dev); - if (vdev->mp == (u64) arg) - return 1; + if (vdev->dev_no != query->dev_no) + return 0; + if (vdev->id != query->id) + return 0; + if (strcmp(vdev->type, query->type)) + return 0; - return 0; + return 1; } static void vio_remove(struct mdesc_handle *hp, u64 node) { + const char *type; + const u64 *id, *cfg_handle; + u64 a; + struct vio_md_node_query query; struct device *dev; - dev = device_find_child(&root_vdev->dev, (void *) node, + type = mdesc_get_property(hp, node, "device-type", NULL); + if (!type) { + type = mdesc_get_property(hp, node, "name", NULL); + if (!type) + type = mdesc_node_name(hp, node); + } + + query.type = type; + + id = mdesc_get_property(hp, node, "id", NULL); + cfg_handle = NULL; + mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) { + u64 target; + + target = mdesc_arc_target(hp, a); + cfg_handle = mdesc_get_property(hp, target, + "cfg-handle", NULL); + if (cfg_handle) + break; + } + + if (!id) { + query.dev_no = ~(u64)0; + query.id = ~(u64)0; + } else if (!cfg_handle) { + query.dev_no = *id; + query.id = ~(u64)0; + } else { + query.dev_no = *cfg_handle; + query.id = *id; + } + + dev = device_find_child(&root_vdev->dev, &query, vio_md_node_match); if (dev) { printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev)); device_unregister(dev); put_device(dev); + } else { + if (!id) + printk(KERN_ERR "VIO: Removed unknown %s node.\n", + type); + else if (!cfg_handle) + printk(KERN_ERR "VIO: Removed unknown %s node %llu.\n", + type, *id); + else + printk(KERN_ERR "VIO: Removed unknown %s node %llu-%llu.\n", + type, *cfg_handle, *id); } } diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 69912d2f8b54..07c03e72d812 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -15,6 +15,7 @@ lib-$(CONFIG_SPARC32) += copy_user.o locks.o lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o +lib-$(CONFIG_SPARC64) += multi3.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/multi3.S b/arch/sparc/lib/multi3.S new file mode 100644 index 000000000000..d6b6c97fe3c7 --- /dev/null +++ b/arch/sparc/lib/multi3.S @@ -0,0 +1,35 @@ +#include <linux/linkage.h> +#include <asm/export.h> + + .text + .align 4 +ENTRY(__multi3) /* %o0 = u, %o1 = v */ + mov %o1, %g1 + srl %o3, 0, %g4 + mulx %g4, %g1, %o1 + srlx %g1, 0x20, %g3 + mulx %g3, %g4, %g5 + sllx %g5, 0x20, %o5 + srl %g1, 0, %g4 + sub %o1, %o5, %o5 + srlx %o5, 0x20, %o5 + addcc %g5, %o5, %g5 + srlx %o3, 0x20, %o5 + mulx %g4, %o5, %g4 + mulx %g3, %o5, %o5 + sethi %hi(0x80000000), %g3 + addcc %g5, %g4, %g5 + srlx %g5, 0x20, %g5 + add %g3, %g3, %g3 + movcc %xcc, %g0, %g3 + addcc %o5, %g5, %o5 + sllx %g4, 0x20, %g4 + add %o1, %g4, %o1 + add %o5, %g3, %g2 + mulx %g1, %o2, %g1 + add %g1, %g2, %g1 + mulx %o0, %o3, %o0 + retl + add %g1, %o0, %o0 +ENDPROC(__multi3) +EXPORT_SYMBOL(__multi3) diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 7c29d38e6b99..88855e383b34 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -120,7 +120,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } if (mm->get_unmapped_area == arch_get_unmapped_area) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 0cda653ae007..3c40ebd50f92 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -358,7 +358,8 @@ static int __init setup_hugepagesz(char *string) } if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) { - pr_warn("hugepagesz=%llu not supported by MMU.\n", + hugetlb_bad_size(); + pr_err("hugepagesz=%llu not supported by MMU.\n", hugepage_size); goto out; } @@ -706,10 +707,58 @@ EXPORT_SYMBOL(__flush_dcache_range); /* get_new_mmu_context() uses "cache + 1". */ DEFINE_SPINLOCK(ctx_alloc_lock); -unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1; +unsigned long tlb_context_cache = CTX_FIRST_VERSION; #define MAX_CTX_NR (1UL << CTX_NR_BITS) #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR) DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); +DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0}; + +static void mmu_context_wrap(void) +{ + unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK; + unsigned long new_ver, new_ctx, old_ctx; + struct mm_struct *mm; + int cpu; + + bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS); + + /* Reserve kernel context */ + set_bit(0, mmu_context_bmap); + + new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION; + if (unlikely(new_ver == 0)) + new_ver = CTX_FIRST_VERSION; + tlb_context_cache = new_ver; + + /* + * Make sure that any new mm that are added into per_cpu_secondary_mm, + * are going to go through get_new_mmu_context() path. + */ + mb(); + + /* + * Updated versions to current on those CPUs that had valid secondary + * contexts + */ + for_each_online_cpu(cpu) { + /* + * If a new mm is stored after we took this mm from the array, + * it will go into get_new_mmu_context() path, because we + * already bumped the version in tlb_context_cache. + */ + mm = per_cpu(per_cpu_secondary_mm, cpu); + + if (unlikely(!mm || mm == &init_mm)) + continue; + + old_ctx = mm->context.sparc64_ctx_val; + if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) { + new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver; + set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap); + mm->context.sparc64_ctx_val = new_ctx; + } + } +} /* Caller does TLB context flushing on local CPU if necessary. * The caller also ensures that CTX_VALID(mm->context) is false. @@ -725,48 +774,30 @@ void get_new_mmu_context(struct mm_struct *mm) { unsigned long ctx, new_ctx; unsigned long orig_pgsz_bits; - int new_version; spin_lock(&ctx_alloc_lock); +retry: + /* wrap might have happened, test again if our context became valid */ + if (unlikely(CTX_VALID(mm->context))) + goto out; orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); ctx = (tlb_context_cache + 1) & CTX_NR_MASK; new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); - new_version = 0; if (new_ctx >= (1 << CTX_NR_BITS)) { new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); if (new_ctx >= ctx) { - int i; - new_ctx = (tlb_context_cache & CTX_VERSION_MASK) + - CTX_FIRST_VERSION; - if (new_ctx == 1) - new_ctx = CTX_FIRST_VERSION; - - /* Don't call memset, for 16 entries that's just - * plain silly... - */ - mmu_context_bmap[0] = 3; - mmu_context_bmap[1] = 0; - mmu_context_bmap[2] = 0; - mmu_context_bmap[3] = 0; - for (i = 4; i < CTX_BMAP_SLOTS; i += 4) { - mmu_context_bmap[i + 0] = 0; - mmu_context_bmap[i + 1] = 0; - mmu_context_bmap[i + 2] = 0; - mmu_context_bmap[i + 3] = 0; - } - new_version = 1; - goto out; + mmu_context_wrap(); + goto retry; } } + if (mm->context.sparc64_ctx_val) + cpumask_clear(mm_cpumask(mm)); mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63)); new_ctx |= (tlb_context_cache & CTX_VERSION_MASK); -out: tlb_context_cache = new_ctx; mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; +out: spin_unlock(&ctx_alloc_lock); - - if (unlikely(new_version)) - smp_new_mmu_context_version(); } static int numa_enabled = 1; diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index bedf08b22a47..0d4b998c7d7b 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -496,7 +496,8 @@ retry_tsb_alloc: extern void copy_tsb(unsigned long old_tsb_base, unsigned long old_tsb_size, unsigned long new_tsb_base, - unsigned long new_tsb_size); + unsigned long new_tsb_size, + unsigned long page_size_shift); unsigned long old_tsb_base = (unsigned long) old_tsb; unsigned long new_tsb_base = (unsigned long) new_tsb; @@ -504,7 +505,9 @@ retry_tsb_alloc: old_tsb_base = __pa(old_tsb_base); new_tsb_base = __pa(new_tsb_base); } - copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); + copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size, + tsb_index == MM_TSB_BASE ? + PAGE_SHIFT : REAL_HPAGE_SHIFT); } mm->context.tsb_block[tsb_index].tsb = new_tsb; diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index 5d2fd6cd3189..fcf4d27a38fb 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -971,11 +971,6 @@ xcall_capture: wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint retry - .globl xcall_new_mmu_context_version -xcall_new_mmu_context_version: - wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint - retry - #ifdef CONFIG_KGDB .globl xcall_kgdb_capture xcall_kgdb_capture: diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 0bc9968b97a1..f71e5206650b 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -214,13 +214,6 @@ static inline void release_thread(struct task_struct *dead_task) extern void prepare_exit_to_usermode(struct pt_regs *regs, u32 flags); - -/* - * Return saved (kernel) PC of a blocked thread. - * Only used in a printk() in kernel/sched/core.c, so don't work too hard. - */ -#define thread_saved_pc(t) ((t)->thread.pc) - unsigned long get_wchan(struct task_struct *p); /* Return initial ksp value for given task. */ diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index 1a70e6c0f259..94709ab41ed8 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S @@ -24,8 +24,7 @@ * has an opportunity to return -EFAULT to the user if needed. * The 64-bit routines just return a "long long" with the value, * since they are only used from kernel space and don't expect to fault. - * Support for 16-bit ops is included in the framework but we don't provide - * any (x86_64 has an atomic_inc_short(), so we might want to some day). + * Support for 16-bit ops is included in the framework but we don't provide any. * * Note that the caller is advised to issue a suitable L1 or L2 * prefetch on the address being manipulated to avoid extra stalls. diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index cb10153b5c9f..03e5cc4e76e4 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -233,7 +233,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } if (current->mm->get_unmapped_area == arch_get_unmapped_area) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 85410279beab..b55fe9bf5d3e 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -534,7 +534,7 @@ static void ubd_handler(void) for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { blk_end_request( (*irq_req_buffer)[count]->req, - 0, + BLK_STS_OK, (*irq_req_buffer)[count]->length ); kfree((*irq_req_buffer)[count]); diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 2d1e0dd5bb0b..f6d1a3f747a9 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -58,8 +58,6 @@ static inline void release_thread(struct task_struct *task) { } -extern unsigned long thread_saved_pc(struct task_struct *t); - static inline void mm_copy_segments(struct mm_struct *from_mm, struct mm_struct *new_mm) { diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 64a1fd06f3fd..7b5640117325 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -56,12 +56,6 @@ union thread_union cpu0_irqstack __attribute__((__section__(".data..init_irqstack"))) = { INIT_THREAD_INFO(init_task) }; -unsigned long thread_saved_pc(struct task_struct *task) -{ - /* FIXME: Need to look up userspace_pid by cpu */ - return os_process_pc(userspace_pid[0]); -} - /* Changed in setup_arch, which is called in early boot */ static char host_info[(__NEW_UTS_LEN + 1) * 5]; diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index e9ad511c1043..7a53a55341de 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -44,7 +44,6 @@ generic-y += serial.h generic-y += setup.h generic-y += shmbuf.h generic-y += shmparam.h -generic-y += siginfo.h generic-y += signal.h generic-y += sizes.h generic-y += socket.h diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild index 13a97aa2285f..1c44d3b3eba0 100644 --- a/arch/unicore32/include/uapi/asm/Kbuild +++ b/arch/unicore32/include/uapi/asm/Kbuild @@ -2,3 +2,4 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += kvm_para.h +generic-y += siginfo.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cd18994a9555..dfe1a7ffe6d4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -69,7 +69,7 @@ config X86 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS - select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP + select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH select ARCH_WANT_FRAME_POINTERS select ARCH_WANTS_DYNAMIC_TASK_STRUCT select BUILDTIME_EXTABLE_SORT @@ -87,6 +87,8 @@ config X86 select GENERIC_EARLY_IOREMAP select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP + select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP + select GENERIC_IRQ_MIGRATION if SMP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_PENDING_IRQ if SMP @@ -166,6 +168,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER select IRQ_FORCED_THREADING + select PCI_LOCKLESS_CONFIG select PERF_EVENTS select RTC_LIB select RTC_MC146818_LIB @@ -360,7 +363,7 @@ config SMP Management" code will be disabled if you say Y here. See also <file:Documentation/x86/i386/IO-APIC.txt>, - <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at + <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at <http://www.tldp.org/docs.html#howto>. If you don't know what to do here, say N. @@ -2776,10 +2779,6 @@ config COMPAT_FOR_U64_ALIGNMENT config SYSVIPC_COMPAT def_bool y depends on SYSVIPC - -config KEYS_COMPAT - def_bool y - depends on KEYS endif endmenu @@ -2797,6 +2796,9 @@ config X86_DMA_REMAP bool depends on STA2X11 +config HAVE_GENERIC_GUP + def_bool y + source "net/Kconfig" source "drivers/Kconfig" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5851411e60fb..bf240b920473 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -159,7 +159,7 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER # If '-Os' is enabled, disable it and print a warning. ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE undefine CONFIG_CC_OPTIMIZE_FOR_SIZE - $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE. Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.) + $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE. Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.) endif endif diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 44163e8c3868..2c860ad4fe06 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -94,7 +94,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o quiet_cmd_check_data_rel = DATAREL $@ define cmd_check_data_rel for obj in $(filter %.o,$^); do \ - readelf -S $$obj | grep -qF .rel.local && { \ + ${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \ echo "error: $$obj has data relocations!" >&2; \ exit 1; \ } || true; \ diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index 73ccf63b0f48..9dc1ce6ba3c0 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -13,7 +13,7 @@ static inline char rdfs8(addr_t addr) return *((char *)(fs + addr)); } #include "../cmdline.c" -static unsigned long get_cmd_line_ptr(void) +unsigned long get_cmd_line_ptr(void) { unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr; diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index cbf4b87f55b9..c3e869eaef0c 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1046,9 +1046,31 @@ struct boot_params *efi_main(struct efi_config *c, memset((char *)gdt->address, 0x0, gdt->size); desc = (struct desc_struct *)gdt->address; - /* The first GDT is a dummy and the second is unused. */ - desc += 2; + /* The first GDT is a dummy. */ + desc++; + + if (IS_ENABLED(CONFIG_X86_64)) { + /* __KERNEL32_CS */ + desc->limit0 = 0xffff; + desc->base0 = 0x0000; + desc->base1 = 0x0000; + desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ; + desc->s = DESC_TYPE_CODE_DATA; + desc->dpl = 0; + desc->p = 1; + desc->limit = 0xf; + desc->avl = 0; + desc->l = 0; + desc->d = SEG_OP_SIZE_32BIT; + desc->g = SEG_GRANULARITY_4KB; + desc->base2 = 0x00; + desc++; + } else { + /* Second entry is unused on 32-bit */ + desc++; + } + /* __KERNEL_CS */ desc->limit0 = 0xffff; desc->base0 = 0x0000; desc->base1 = 0x0000; @@ -1058,12 +1080,18 @@ struct boot_params *efi_main(struct efi_config *c, desc->p = 1; desc->limit = 0xf; desc->avl = 0; - desc->l = 0; - desc->d = SEG_OP_SIZE_32BIT; + if (IS_ENABLED(CONFIG_X86_64)) { + desc->l = 1; + desc->d = 0; + } else { + desc->l = 0; + desc->d = SEG_OP_SIZE_32BIT; + } desc->g = SEG_GRANULARITY_4KB; desc->base2 = 0x00; - desc++; + + /* __KERNEL_DS */ desc->limit0 = 0xffff; desc->base0 = 0x0000; desc->base1 = 0x0000; @@ -1077,24 +1105,25 @@ struct boot_params *efi_main(struct efi_config *c, desc->d = SEG_OP_SIZE_32BIT; desc->g = SEG_GRANULARITY_4KB; desc->base2 = 0x00; - -#ifdef CONFIG_X86_64 - /* Task segment value */ desc++; - desc->limit0 = 0x0000; - desc->base0 = 0x0000; - desc->base1 = 0x0000; - desc->type = SEG_TYPE_TSS; - desc->s = 0; - desc->dpl = 0; - desc->p = 1; - desc->limit = 0x0; - desc->avl = 0; - desc->l = 0; - desc->d = 0; - desc->g = SEG_GRANULARITY_4KB; - desc->base2 = 0x00; -#endif /* CONFIG_X86_64 */ + + if (IS_ENABLED(CONFIG_X86_64)) { + /* Task segment value */ + desc->limit0 = 0x0000; + desc->base0 = 0x0000; + desc->base1 = 0x0000; + desc->type = SEG_TYPE_TSS; + desc->s = 0; + desc->dpl = 0; + desc->p = 1; + desc->limit = 0x0; + desc->avl = 0; + desc->l = 0; + desc->d = 0; + desc->g = SEG_GRANULARITY_4KB; + desc->base2 = 0x00; + desc++; + } asm volatile("cli"); asm volatile ("lgdt %0" : : "m" (*gdt)); diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d2ae1f821e0c..fbf4c32d0b62 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -346,6 +346,48 @@ preferred_addr: /* Set up the stack */ leaq boot_stack_end(%rbx), %rsp +#ifdef CONFIG_X86_5LEVEL + /* Check if 5-level paging has already enabled */ + movq %cr4, %rax + testl $X86_CR4_LA57, %eax + jnz lvl5 + + /* + * At this point we are in long mode with 4-level paging enabled, + * but we want to enable 5-level paging. + * + * The problem is that we cannot do it directly. Setting LA57 in + * long mode would trigger #GP. So we need to switch off long mode + * first. + * + * NOTE: This is not going to work if bootloader put us above 4G + * limit. + * + * The first step is go into compatibility mode. + */ + + /* Clear additional page table */ + leaq lvl5_pgtable(%rbx), %rdi + xorq %rax, %rax + movq $(PAGE_SIZE/8), %rcx + rep stosq + + /* + * Setup current CR3 as the first and only entry in a new top level + * page table. + */ + movq %cr3, %rdi + leaq 0x7 (%rdi), %rax + movq %rax, lvl5_pgtable(%rbx) + + /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ + pushq $__KERNEL32_CS + leaq compatible_mode(%rip), %rax + pushq %rax + lretq +lvl5: +#endif + /* Zero EFLAGS */ pushq $0 popfq @@ -429,6 +471,44 @@ relocated: jmp *%rax .code32 +#ifdef CONFIG_X86_5LEVEL +compatible_mode: + /* Setup data and stack segments */ + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %ss + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + /* Point CR3 to 5-level paging */ + leal lvl5_pgtable(%ebx), %eax + movl %eax, %cr3 + + /* Enable PAE and LA57 mode */ + movl %cr4, %eax + orl $(X86_CR4_PAE | X86_CR4_LA57), %eax + movl %eax, %cr4 + + /* Calculate address we are running at */ + call 1f +1: popl %edi + subl $1b, %edi + + /* Prepare stack for far return to Long Mode */ + pushl $__KERNEL_CS + leal lvl5(%edi), %eax + push %eax + + /* Enable paging back */ + movl $(X86_CR0_PG | X86_CR0_PE), %eax + movl %eax, %cr0 + + lret +#endif + no_longmode: /* This isn't an x86-64 CPU so hang */ 1: @@ -442,7 +522,7 @@ gdt: .word gdt_end - gdt .long gdt .word 0 - .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ .quad 0x00af9a000000ffff /* __KERNEL_CS */ .quad 0x00cf92000000ffff /* __KERNEL_DS */ .quad 0x0080890000000000 /* TS descriptor */ @@ -486,3 +566,7 @@ boot_stack_end: .balign 4096 pgtable: .fill BOOT_PGT_SIZE, 1, 0 +#ifdef CONFIG_X86_5LEVEL +lvl5_pgtable: + .fill PAGE_SIZE, 1, 0 +#endif diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 54c24f0a43d3..91f27ab970ef 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -9,16 +9,42 @@ * contain the entire properly aligned running kernel image. * */ + +/* + * isspace() in linux/ctype.h is expected by next_args() to filter + * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h, + * since isdigit() is implemented in both of them. Hence disable it + * here. + */ +#define BOOT_CTYPE_H + +/* + * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h. + * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL + * which is meaningless and will cause compiling error in some cases. + * So do not include linux/export.h and define EXPORT_SYMBOL(sym) + * as empty. + */ +#define _LINUX_EXPORT_H +#define EXPORT_SYMBOL(sym) + #include "misc.h" #include "error.h" -#include "../boot.h" +#include "../string.h" #include <generated/compile.h> #include <linux/module.h> #include <linux/uts.h> #include <linux/utsname.h> +#include <linux/ctype.h> #include <generated/utsrelease.h> +/* Macros used by the included decompressor code below. */ +#define STATIC +#include <linux/decompress/mm.h> + +extern unsigned long get_cmd_line_ptr(void); + /* Simplified build-specific string for starting entropy. */ static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; @@ -62,6 +88,11 @@ struct mem_vector { static bool memmap_too_large; + +/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */ +unsigned long long mem_limit = ULLONG_MAX; + + enum mem_avoid_index { MEM_AVOID_ZO_RANGE = 0, MEM_AVOID_INITRD, @@ -85,49 +116,14 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) return true; } -/** - * _memparse - Parse a string with mem suffixes into a number - * @ptr: Where parse begins - * @retptr: (output) Optional pointer to next char after parse completes - * - * Parses a string into a number. The number stored at @ptr is - * potentially suffixed with K, M, G, T, P, E. - */ -static unsigned long long _memparse(const char *ptr, char **retptr) +char *skip_spaces(const char *str) { - char *endptr; /* Local pointer to end of parsed string */ - - unsigned long long ret = simple_strtoull(ptr, &endptr, 0); - - switch (*endptr) { - case 'E': - case 'e': - ret <<= 10; - case 'P': - case 'p': - ret <<= 10; - case 'T': - case 't': - ret <<= 10; - case 'G': - case 'g': - ret <<= 10; - case 'M': - case 'm': - ret <<= 10; - case 'K': - case 'k': - ret <<= 10; - endptr++; - default: - break; - } - - if (retptr) - *retptr = endptr; - - return ret; + while (isspace(*str)) + ++str; + return (char *)str; } +#include "../../../../lib/ctype.c" +#include "../../../../lib/cmdline.c" static int parse_memmap(char *p, unsigned long long *start, unsigned long long *size) @@ -142,40 +138,41 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size) return -EINVAL; oldp = p; - *size = _memparse(p, &p); + *size = memparse(p, &p); if (p == oldp) return -EINVAL; switch (*p) { - case '@': - /* Skip this region, usable */ - *start = 0; - *size = 0; - return 0; case '#': case '$': case '!': - *start = _memparse(p + 1, &p); + *start = memparse(p + 1, &p); + return 0; + case '@': + /* memmap=nn@ss specifies usable region, should be skipped */ + *size = 0; + /* Fall through */ + default: + /* + * If w/o offset, only size specified, memmap=nn[KMG] has the + * same behaviour as mem=nn[KMG]. It limits the max address + * system can use. Region above the limit should be avoided. + */ + *start = 0; return 0; } return -EINVAL; } -static void mem_avoid_memmap(void) +static void mem_avoid_memmap(char *str) { - char arg[128]; + static int i; int rc; - int i; - char *str; - /* See if we have any memmap areas */ - rc = cmdline_find_option("memmap", arg, sizeof(arg)); - if (rc <= 0) + if (i >= MAX_MEMMAP_REGIONS) return; - i = 0; - str = arg; while (str && (i < MAX_MEMMAP_REGIONS)) { int rc; unsigned long long start, size; @@ -188,9 +185,14 @@ static void mem_avoid_memmap(void) if (rc < 0) break; str = k; - /* A usable region that should not be skipped */ - if (size == 0) + + if (start == 0) { + /* Store the specified memory limit if size > 0 */ + if (size > 0) + mem_limit = size; + continue; + } mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start; mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size; @@ -202,6 +204,57 @@ static void mem_avoid_memmap(void) memmap_too_large = true; } +static int handle_mem_memmap(void) +{ + char *args = (char *)get_cmd_line_ptr(); + size_t len = strlen((char *)args); + char *tmp_cmdline; + char *param, *val; + u64 mem_size; + + if (!strstr(args, "memmap=") && !strstr(args, "mem=")) + return 0; + + tmp_cmdline = malloc(len + 1); + if (!tmp_cmdline ) + error("Failed to allocate space for tmp_cmdline"); + + memcpy(tmp_cmdline, args, len); + tmp_cmdline[len] = 0; + args = tmp_cmdline; + + /* Chew leading spaces */ + args = skip_spaces(args); + + while (*args) { + args = next_arg(args, ¶m, &val); + /* Stop at -- */ + if (!val && strcmp(param, "--") == 0) { + warn("Only '--' specified in cmdline"); + free(tmp_cmdline); + return -1; + } + + if (!strcmp(param, "memmap")) { + mem_avoid_memmap(val); + } else if (!strcmp(param, "mem")) { + char *p = val; + + if (!strcmp(p, "nopentium")) + continue; + mem_size = memparse(p, &p); + if (mem_size == 0) { + free(tmp_cmdline); + return -EINVAL; + } + mem_limit = mem_size; + } + } + + free(tmp_cmdline); + return 0; +} + /* * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T). * The mem_avoid array is used to store the ranges that need to be avoided @@ -323,7 +376,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, /* We don't need to set a mapping for setup_data. */ /* Mark the memmap regions we need to avoid */ - mem_avoid_memmap(); + handle_mem_memmap(); #ifdef CONFIG_X86_VERBOSE_BOOTUP /* Make sure video RAM can be used. */ @@ -432,7 +485,8 @@ static void process_e820_entry(struct boot_e820_entry *entry, { struct mem_vector region, overlap; struct slot_area slot_area; - unsigned long start_orig; + unsigned long start_orig, end; + struct boot_e820_entry cur_entry; /* Skip non-RAM entries. */ if (entry->type != E820_TYPE_RAM) @@ -446,8 +500,15 @@ static void process_e820_entry(struct boot_e820_entry *entry, if (entry->addr + entry->size < minimum) return; - region.start = entry->addr; - region.size = entry->size; + /* Ignore entries above memory limit */ + end = min(entry->size + entry->addr, mem_limit); + if (entry->addr >= end) + return; + cur_entry.addr = entry->addr; + cur_entry.size = end - entry->addr; + + region.start = cur_entry.addr; + region.size = cur_entry.size; /* Give up if slot area array is full. */ while (slot_area_index < MAX_SLOT_AREA) { @@ -461,7 +522,7 @@ static void process_e820_entry(struct boot_e820_entry *entry, region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); /* Did we raise the address above this e820 region? */ - if (region.start > entry->addr + entry->size) + if (region.start > cur_entry.addr + cur_entry.size) return; /* Reduce size by any delta from the original address. */ @@ -564,9 +625,6 @@ void choose_random_location(unsigned long input, { unsigned long random_addr, min_addr; - /* By default, keep output position unchanged. */ - *virt_addr = *output; - if (cmdline_find_option_bool("nokaslr")) { warn("KASLR disabled: 'nokaslr' on cmdline."); return; diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index b3c5a5f030ce..00241c815524 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -338,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, unsigned long output_len) { const unsigned long kernel_total_size = VO__end - VO__text; - unsigned long virt_addr = (unsigned long)output; + unsigned long virt_addr = LOAD_PHYSICAL_ADDR; /* Retain x86 boot parameters pointer passed from startup_32/64. */ boot_params = rmode; @@ -390,6 +390,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #ifdef CONFIG_X86_64 if (heap > 0x3fffffffffffUL) error("Destination address too large"); + if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE) + error("Destination virtual address is beyond the kernel mapping area"); #else if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) error("Destination address too large"); @@ -397,7 +399,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #ifndef CONFIG_RELOCATABLE if ((unsigned long)output != LOAD_PHYSICAL_ADDR) error("Destination address does not match LOAD_PHYSICAL_ADDR"); - if ((unsigned long)output != virt_addr) + if (virt_addr != LOAD_PHYSICAL_ADDR) error("Destination virtual address changed when not relocatable"); #endif diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 1c8355eadbd1..766a5211f827 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -81,8 +81,6 @@ static inline void choose_random_location(unsigned long input, unsigned long output_size, unsigned long *virt_addr) { - /* No change from existing output location. */ - *virt_addr = *output; } #endif diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index 1d78f1739087..28029be47fbb 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -63,7 +63,7 @@ static void *alloc_pgt_page(void *context) static struct alloc_pgt_data pgt_data; /* The top level page table entry pointer. */ -static unsigned long level4p; +static unsigned long top_level_pgt; /* * Mapping information structure passed to kernel_ident_mapping_init(). @@ -91,9 +91,15 @@ void initialize_identity_maps(void) * If we came here via startup_32(), cr3 will be _pgtable already * and we must append to the existing area instead of entirely * overwriting it. + * + * With 5-level paging, we use '_pgtable' to allocate the p4d page table, + * the top-level page table is allocated separately. + * + * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level + * cases. On 4-level paging it's equal to 'top_level_pgt'. */ - level4p = read_cr3(); - if (level4p == (unsigned long)_pgtable) { + top_level_pgt = read_cr3_pa(); + if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) { debug_putstr("booted via startup_32()\n"); pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE; pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE; @@ -103,7 +109,7 @@ void initialize_identity_maps(void) pgt_data.pgt_buf = _pgtable; pgt_data.pgt_buf_size = BOOT_PGT_SIZE; memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); - level4p = (unsigned long)alloc_pgt_page(&pgt_data); + top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data); } } @@ -123,7 +129,7 @@ void add_identity_map(unsigned long start, unsigned long size) return; /* Build the mapping. */ - kernel_ident_mapping_init(&mapping_info, (pgd_t *)level4p, + kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, start, end); } @@ -134,5 +140,5 @@ void add_identity_map(unsigned long start, unsigned long size) */ void finalize_identity_maps(void) { - write_cr3(level4p); + write_cr3(top_level_pgt); } diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S index 1eb7d298b47d..15d9f74b0008 100644 --- a/arch/x86/boot/copy.S +++ b/arch/x86/boot/copy.S @@ -65,23 +65,3 @@ GLOBAL(copy_to_fs) popw %es retl ENDPROC(copy_to_fs) - -#if 0 /* Not currently used, but can be enabled as needed */ -GLOBAL(copy_from_gs) - pushw %ds - pushw %gs - popw %ds - calll memcpy - popw %ds - retl -ENDPROC(copy_from_gs) - -GLOBAL(copy_to_gs) - pushw %es - pushw %gs - popw %es - calll memcpy - popw %es - retl -ENDPROC(copy_to_gs) -#endif diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 5457b02fc050..630e3664906b 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -122,6 +122,14 @@ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas return result; } +long simple_strtol(const char *cp, char **endp, unsigned int base) +{ + if (*cp == '-') + return -simple_strtoull(cp + 1, endp, base); + + return simple_strtoull(cp, endp, base); +} + /** * strlen - Find the length of a string * @s: The string to be sized diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h index 113588ddb43f..f274a50db5fa 100644 --- a/arch/x86/boot/string.h +++ b/arch/x86/boot/string.h @@ -22,6 +22,7 @@ extern int strcmp(const char *str1, const char *str2); extern int strncmp(const char *cs, const char *ct, size_t count); extern size_t strlen(const char *s); extern char *strstr(const char *s1, const char *s2); +extern char *strchr(const char *s, int c); extern size_t strnlen(const char *s, size_t maxlen); extern unsigned int atou(const char *s); extern unsigned long long simple_strtoull(const char *cp, char **endp, diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 34b3fa2889d1..9e32d40d71bd 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -2,6 +2,8 @@ # Arch-specific CryptoAPI modules. # +OBJECT_FILES_NON_STANDARD := y + avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ $(comma)4)$(comma)%ymm2,yes,no) diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile index 2f8756375df5..2e14acc3da25 100644 --- a/arch/x86/crypto/sha1-mb/Makefile +++ b/arch/x86/crypto/sha1-mb/Makefile @@ -2,6 +2,8 @@ # Arch-specific CryptoAPI modules. # +OBJECT_FILES_NON_STANDARD := y + avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ $(comma)4)$(comma)%ymm2,yes,no) ifeq ($(avx2_supported),yes) diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile index 41089e7c400c..45b4fca6c4a8 100644 --- a/arch/x86/crypto/sha256-mb/Makefile +++ b/arch/x86/crypto/sha256-mb/Makefile @@ -2,6 +2,8 @@ # Arch-specific CryptoAPI modules. # +OBJECT_FILES_NON_STANDARD := y + avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ $(comma)4)$(comma)%ymm2,yes,no) ifeq ($(avx2_supported),yes) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 50bc26949e9e..48ef7bb32c42 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -252,6 +252,23 @@ ENTRY(__switch_to_asm) END(__switch_to_asm) /* + * The unwinder expects the last frame on the stack to always be at the same + * offset from the end of the page, which allows it to validate the stack. + * Calling schedule_tail() directly would break that convention because its an + * asmlinkage function so its argument has to be pushed on the stack. This + * wrapper creates a proper "end of stack" frame header before the call. + */ +ENTRY(schedule_tail_wrapper) + FRAME_BEGIN + + pushl %eax + call schedule_tail + popl %eax + + FRAME_END + ret +ENDPROC(schedule_tail_wrapper) +/* * A newly forked process directly context switches into this address. * * eax: prev task we switched from @@ -259,24 +276,15 @@ END(__switch_to_asm) * edi: kernel thread arg */ ENTRY(ret_from_fork) - FRAME_BEGIN /* help unwinder find end of stack */ - - /* - * schedule_tail() is asmlinkage so we have to put its 'prev' argument - * on the stack. - */ - pushl %eax - call schedule_tail - popl %eax + call schedule_tail_wrapper testl %ebx, %ebx jnz 1f /* kernel threads are uncommon */ 2: /* When we fork, we trace the syscall return in the child, too. */ - leal FRAME_OFFSET(%esp), %eax + movl %esp, %eax call syscall_return_slowpath - FRAME_END jmp restore_all /* kernel thread */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 607d72c4a485..a9a8027a6c0e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -36,7 +36,6 @@ #include <asm/smap.h> #include <asm/pgtable_types.h> #include <asm/export.h> -#include <asm/frame.h> #include <linux/err.h> .code64 @@ -266,7 +265,8 @@ return_from_SYSCALL_64: * If width of "canonical tail" ever becomes variable, this will need * to be updated to remain correct on both old and new CPUs. * - * Change top 16 bits to be the sign-extension of 47th bit + * Change top bits to match most significant bit (47th or 56th bit + * depending on paging mode) in the address. */ shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx @@ -406,19 +406,17 @@ END(__switch_to_asm) * r12: kernel thread arg */ ENTRY(ret_from_fork) - FRAME_BEGIN /* help unwinder find end of stack */ movq %rax, %rdi - call schedule_tail /* rdi: 'prev' task parameter */ + call schedule_tail /* rdi: 'prev' task parameter */ - testq %rbx, %rbx /* from kernel_thread? */ - jnz 1f /* kernel threads are uncommon */ + testq %rbx, %rbx /* from kernel_thread? */ + jnz 1f /* kernel threads are uncommon */ 2: - leaq FRAME_OFFSET(%rsp),%rdi /* pt_regs pointer */ + movq %rsp, %rdi call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ SWAPGS - FRAME_END jmp restore_regs_and_iret 1: diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index ac650d57ebf7..ff1ea2fb9705 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1750,6 +1750,8 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) return ret; } +static struct attribute_group x86_pmu_attr_group; + static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -1813,6 +1815,14 @@ static int __init init_hw_perf_events(void) x86_pmu_events_group.attrs = tmp; } + if (x86_pmu.attrs) { + struct attribute **tmp; + + tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs); + if (!WARN_ON(!tmp)) + x86_pmu_attr_group.attrs = tmp; + } + pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); pr_info("... generic registers: %d\n", x86_pmu.num_counters); @@ -2101,8 +2111,7 @@ static int x86_pmu_event_init(struct perf_event *event) static void refresh_pce(void *ignored) { - if (current->active_mm) - load_mm_cr4(current->active_mm); + load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm)); } static void x86_pmu_event_mapped(struct perf_event *event) @@ -2254,7 +2263,7 @@ static struct pmu pmu = { void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) { - struct cyc2ns_data *data; + struct cyc2ns_data data; u64 offset; userpg->cap_user_time = 0; @@ -2266,17 +2275,17 @@ void arch_perf_update_userpage(struct perf_event *event, if (!using_native_sched_clock() || !sched_clock_stable()) return; - data = cyc2ns_read_begin(); + cyc2ns_read_begin(&data); - offset = data->cyc2ns_offset + __sched_clock_offset; + offset = data.cyc2ns_offset + __sched_clock_offset; /* * Internal timekeeping for enabled/running/stopped times * is always in the local_clock domain. */ userpg->cap_user_time = 1; - userpg->time_mult = data->cyc2ns_mul; - userpg->time_shift = data->cyc2ns_shift; + userpg->time_mult = data.cyc2ns_mul; + userpg->time_shift = data.cyc2ns_shift; userpg->time_offset = offset - now; /* @@ -2288,7 +2297,7 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->time_zero = offset; } - cyc2ns_read_end(data); + cyc2ns_read_end(); } void @@ -2333,7 +2342,7 @@ static unsigned long get_segment_base(unsigned int segment) /* IRQs are off, so this synchronizes with smp_store_release */ ldt = lockless_dereference(current->active_mm->context.ldt); - if (!ldt || idx > ldt->size) + if (!ldt || idx > ldt->nr_entries) return 0; desc = &ldt->entries[idx]; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index b9174aacf42f..aa62437d1aa1 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cache_event_ids [ C(DTLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ - [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ + [ C(RESULT_MISS) ] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ - [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */ + [ C(RESULT_MISS) ] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */ }, [ C(OP_PREFETCH) ] = { [ C(RESULT_ACCESS) ] = 0x0, @@ -3160,6 +3160,19 @@ err: return -ENOMEM; } +static void flip_smm_bit(void *data) +{ + unsigned long set = *(unsigned long *)data; + + if (set > 0) { + msr_set_bit(MSR_IA32_DEBUGCTLMSR, + DEBUGCTLMSR_FREEZE_IN_SMM_BIT); + } else { + msr_clear_bit(MSR_IA32_DEBUGCTLMSR, + DEBUGCTLMSR_FREEZE_IN_SMM_BIT); + } +} + static void intel_pmu_cpu_starting(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); @@ -3174,6 +3187,8 @@ static void intel_pmu_cpu_starting(int cpu) cpuc->lbr_sel = NULL; + flip_smm_bit(&x86_pmu.attr_freeze_on_smi); + if (!cpuc->shared_regs) return; @@ -3595,6 +3610,52 @@ static struct attribute *hsw_events_attrs[] = { NULL }; +static ssize_t freeze_on_smi_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi); +} + +static DEFINE_MUTEX(freeze_on_smi_mutex); + +static ssize_t freeze_on_smi_store(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + ssize_t ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + if (val > 1) + return -EINVAL; + + mutex_lock(&freeze_on_smi_mutex); + + if (x86_pmu.attr_freeze_on_smi == val) + goto done; + + x86_pmu.attr_freeze_on_smi = val; + + get_online_cpus(); + on_each_cpu(flip_smm_bit, &val, 1); + put_online_cpus(); +done: + mutex_unlock(&freeze_on_smi_mutex); + + return count; +} + +static DEVICE_ATTR_RW(freeze_on_smi); + +static struct attribute *intel_pmu_attrs[] = { + &dev_attr_freeze_on_smi.attr, + NULL, +}; + __init int intel_pmu_init(void) { union cpuid10_edx edx; @@ -3641,6 +3702,8 @@ __init int intel_pmu_init(void) x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); + + x86_pmu.attrs = intel_pmu_attrs; /* * Quirk: v2 perfmon does not report fixed-purpose events, so * assume at least 3 events, when not running in a hypervisor: diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index f924629836a8..eb261656a320 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -18,7 +18,7 @@ enum { LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME, }; -static enum { +static const enum { LBR_EIP_FLAGS = 1, LBR_TSX = 2, } lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = { @@ -287,7 +287,7 @@ inline u64 lbr_from_signext_quirk_wr(u64 val) /* * If quirk is needed, ensure sign extension is 61 bits: */ -u64 lbr_from_signext_quirk_rd(u64 val) +static u64 lbr_from_signext_quirk_rd(u64 val) { if (static_branch_unlikely(&lbr_from_quirk_key)) { /* diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 758c1aa5009d..44ec523287f6 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1170,7 +1170,7 @@ static int uncore_event_cpu_online(unsigned int cpu) pmu = type->pmus; for (i = 0; i < type->num_boxes; i++, pmu++) { box = pmu->boxes[pkg]; - if (!box && atomic_inc_return(&box->refcnt) == 1) + if (box && atomic_inc_return(&box->refcnt) == 1) uncore_box_init(box); } } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index be3d36254040..53728eea1bed 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -562,6 +562,9 @@ struct x86_pmu { ssize_t (*events_sysfs_show)(char *page, u64 config); struct attribute **cpu_events; + unsigned long attr_freeze_on_smi; + struct attribute **attrs; + /* * CPU Hotplug hooks */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index bdffcd9eab2b..5f01671c68f2 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -252,6 +252,8 @@ static inline int x2apic_enabled(void) { return 0; } #define x2apic_supported() (0) #endif /* !CONFIG_X86_X2APIC */ +struct irq_data; + /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -296,9 +298,9 @@ struct apic { /* Can't be NULL on 64-bit */ unsigned long (*set_apic_id)(unsigned int id); - int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask, - unsigned int *apicid); + int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, + struct irq_data *irqdata, + unsigned int *apicid); /* ipi */ void (*send_IPI)(int cpu, int vector); @@ -540,28 +542,12 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) #endif -static inline int -flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask, - unsigned int *apicid) -{ - unsigned long cpu_mask = cpumask_bits(cpumask)[0] & - cpumask_bits(andmask)[0] & - cpumask_bits(cpu_online_mask)[0] & - APIC_ALL_CPUS; - - if (likely(cpu_mask)) { - *apicid = (unsigned int)cpu_mask; - return 0; - } else { - return -EINVAL; - } -} - -extern int -default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask, - unsigned int *apicid); +extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask, + struct irq_data *irqdata, + unsigned int *apicid); +extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask, + struct irq_data *irqdata, + unsigned int *apicid); static inline void flat_vector_allocation_domain(int cpu, struct cpumask *retmask, diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index caa5798c92f4..33380b871463 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -246,19 +246,6 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) return c; } -/** - * atomic_inc_short - increment of a short integer - * @v: pointer to type int - * - * Atomically adds 1 to @v - * Returns the new value of @u - */ -static __always_inline short int atomic_inc_short(short int *v) -{ - asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); - return *v; -} - #ifdef CONFIG_X86_32 # include <asm/atomic64_32.h> #else diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 2f77bcefe6b4..d2ff779f347e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -74,7 +74,7 @@ struct efi_scratch { __kernel_fpu_begin(); \ \ if (efi_scratch.use_pgd) { \ - efi_scratch.prev_cr3 = read_cr3(); \ + efi_scratch.prev_cr3 = __read_cr3(); \ write_cr3((unsigned long)efi_scratch.efi_pgt); \ __flush_tlb_all(); \ } \ diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h index b8ad261d11dc..c66d19e3c23e 100644 --- a/arch/x86/include/asm/extable.h +++ b/arch/x86/include/asm/extable.h @@ -29,6 +29,7 @@ struct pt_regs; } while (0) extern int fixup_exception(struct pt_regs *regs, int trapnr); +extern int fixup_bug(struct pt_regs *regs, int trapnr); extern bool ex_has_fault_handler(unsigned long ip); extern void early_fixup_exception(struct pt_regs *regs, int trapnr); diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 59405a248fc2..9b76cd331990 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -22,8 +22,8 @@ typedef struct { #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; - unsigned int irq_tlb_count; #endif + unsigned int irq_tlb_count; #ifdef CONFIG_X86_THERMAL_VECTOR unsigned int irq_thermal_count; #endif diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 16d3fa211962..668cca540025 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -29,7 +29,6 @@ struct irq_desc; #include <linux/cpumask.h> extern int check_irq_vectors_for_cpu_disable(void); extern void fixup_irqs(void); -extern void irq_force_complete_move(struct irq_desc *desc); #endif #ifdef CONFIG_HAVE_KVM diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index a210eba2727c..023b4a9fc846 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -55,7 +55,8 @@ extern struct irq_domain * irq_remapping_get_irq_domain(struct irq_alloc_info *info); /* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */ -extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent); +extern struct irq_domain * +arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id); /* Get parent irqdomain for interrupt remapping irqdomain */ static inline struct irq_domain *arch_get_ir_parent_domain(void) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 055962615779..722d0e568863 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -296,6 +296,7 @@ struct x86_emulate_ctxt { bool perm_ok; /* do not check permissions if true */ bool ud; /* inject an #UD if host doesn't support insn */ + bool tf; /* TF value before instruction (after for syscall/sysret) */ bool have_exception; struct x86_exception exception; diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4fd5195deed0..3f9a3d2a5209 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -266,6 +266,7 @@ static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *s #endif int mce_available(struct cpuinfo_x86 *c); +bool mce_is_memory_error(struct mce *m); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index f9813b6d8b80..79b647a7ebd0 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -37,12 +37,6 @@ typedef struct { #endif } mm_context_t; -#ifdef CONFIG_SMP void leave_mm(int cpu); -#else -static inline void leave_mm(int cpu) -{ -} -#endif #endif /* _ASM_X86_MMU_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 68b329d77b3a..ecfcb6643c9b 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -47,7 +47,7 @@ struct ldt_struct { * allocations, but it's not worth trying to optimize. */ struct desc_struct *entries; - unsigned int size; + unsigned int nr_entries; }; /* @@ -87,22 +87,46 @@ static inline void load_mm_ldt(struct mm_struct *mm) */ if (unlikely(ldt)) - set_ldt(ldt->entries, ldt->size); + set_ldt(ldt->entries, ldt->nr_entries); else clear_LDT(); #else clear_LDT(); #endif +} + +static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) +{ +#ifdef CONFIG_MODIFY_LDT_SYSCALL + /* + * Load the LDT if either the old or new mm had an LDT. + * + * An mm will never go from having an LDT to not having an LDT. Two + * mms never share an LDT, so we don't gain anything by checking to + * see whether the LDT changed. There's also no guarantee that + * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, + * then prev->context.ldt will also be non-NULL. + * + * If we really cared, we could optimize the case where prev == next + * and we're exiting lazy mode. Most of the time, if this happens, + * we don't actually need to reload LDTR, but modify_ldt() is mostly + * used by legacy code and emulators where we don't need this level of + * performance. + * + * This uses | instead of || because it generates better code. + */ + if (unlikely((unsigned long)prev->context.ldt | + (unsigned long)next->context.ldt)) + load_mm_ldt(next); +#endif DEBUG_LOCKS_WARN_ON(preemptible()); } static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { -#ifdef CONFIG_SMP if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); -#endif } static inline int init_new_context(struct task_struct *tsk, @@ -220,18 +244,6 @@ static inline int vma_pkey(struct vm_area_struct *vma) } #endif -static inline bool __pkru_allows_pkey(u16 pkey, bool write) -{ - u32 pkru = read_pkru(); - - if (!__pkru_allows_read(pkru, pkey)) - return false; - if (write && !__pkru_allows_write(pkru, pkey)) - return false; - - return true; -} - /* * We only want to enforce protection keys on the current process * because we effectively have no access to PKRU for other @@ -268,4 +280,23 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, return __pkru_allows_pkey(vma_pkey(vma), write); } + +/* + * This can be used from process context to figure out what the value of + * CR3 is without needing to do a (slow) __read_cr3(). + * + * It's intended to be used for code like KVM that sneakily changes CR3 + * and needs to restore it. It needs to be used very carefully. + */ +static inline unsigned long __get_current_cr3_fast(void) +{ + unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd); + + /* For now, be very restrictive about when this can be called. */ + VM_WARN_ON(in_nmi() || !in_atomic()); + + VM_BUG_ON(cr3 != __read_cr3()); + return cr3; +} + #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index fba100713924..d5acc27ed1cc 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -2,8 +2,7 @@ #define _ASM_X86_MSHYPER_H #include <linux/types.h> -#include <linux/interrupt.h> -#include <linux/clocksource.h> +#include <linux/atomic.h> #include <asm/hyperv.h> /* diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 673f9ac50f6d..18b162322eff 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -137,6 +137,8 @@ #define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 +#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) #define MSR_PEBS_FRONTEND 0x000003f7 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 55fa56fe4e45..9ccac1926587 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -61,7 +61,7 @@ static inline void write_cr2(unsigned long x) PVOP_VCALL1(pv_mmu_ops.write_cr2, x); } -static inline unsigned long read_cr3(void) +static inline unsigned long __read_cr3(void) { return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3); } @@ -118,7 +118,7 @@ static inline u64 paravirt_read_msr(unsigned msr) static inline void paravirt_write_msr(unsigned msr, unsigned low, unsigned high) { - return PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high); + PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high); } static inline u64 paravirt_read_msr_safe(unsigned msr, int *err) @@ -312,11 +312,9 @@ static inline void __flush_tlb_single(unsigned long addr) } static inline void flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long start, - unsigned long end) + const struct flush_tlb_info *info) { - PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end); + PVOP_VCALL2(pv_mmu_ops.flush_tlb_others, cpumask, info); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 7465d6fe336f..cb976bab6299 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -51,6 +51,7 @@ struct mm_struct; struct desc_struct; struct task_struct; struct cpumask; +struct flush_tlb_info; /* * Wrapper type for pointers to code which uses the non-standard @@ -223,9 +224,7 @@ struct pv_mmu_ops { void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); void (*flush_tlb_others)(const struct cpumask *cpus, - struct mm_struct *mm, - unsigned long start, - unsigned long end); + const struct flush_tlb_info *info); /* Hooks for allocating and freeing a pagetable top-level */ int (*pgd_alloc)(struct mm_struct *mm); diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index f513cc231151..473a7295ab10 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -77,14 +77,8 @@ static inline bool is_vmd(struct pci_bus *bus) extern unsigned int pcibios_assign_all_busses(void); extern int pci_legacy_init(void); -# ifdef CONFIG_ACPI -# define x86_default_pci_init pci_acpi_init -# else -# define x86_default_pci_init pci_legacy_init -# endif #else -# define pcibios_assign_all_busses() 0 -# define x86_default_pci_init NULL +static inline int pcibios_assign_all_busses(void) { return 0; } #endif extern unsigned long pci_mem_start; diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 50d35e3185f5..c8821bab938f 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -212,4 +212,51 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp) #define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) #define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) +#define gup_get_pte gup_get_pte +/* + * WARNING: only to be used in the get_user_pages_fast() implementation. + * + * With get_user_pages_fast(), we walk down the pagetables without taking + * any locks. For this we would like to load the pointers atomically, + * but that is not possible (without expensive cmpxchg8b) on PAE. What + * we do have is the guarantee that a PTE will only either go from not + * present to present, or present to not present or both -- it will not + * switch to a completely different present page without a TLB flush in + * between; something that we are blocking by holding interrupts off. + * + * Setting ptes from not present to present goes: + * + * ptep->pte_high = h; + * smp_wmb(); + * ptep->pte_low = l; + * + * And present to not present goes: + * + * ptep->pte_low = 0; + * smp_wmb(); + * ptep->pte_high = 0; + * + * We must ensure here that the load of pte_low sees 'l' iff pte_high + * sees 'h'. We load pte_high *after* loading pte_low, which ensures we + * don't see an older value of pte_high. *Then* we recheck pte_low, + * which ensures that we haven't picked up a changed pte high. We might + * have gotten rubbish values from pte_low and pte_high, but we are + * guaranteed that pte_low will not have the present bit set *unless* + * it is 'l'. Because get_user_pages_fast() only operates on present ptes + * we're safe. + */ +static inline pte_t gup_get_pte(pte_t *ptep) +{ + pte_t pte; + + do { + pte.pte_low = ptep->pte_low; + smp_rmb(); + pte.pte_high = ptep->pte_high; + smp_rmb(); + } while (unlikely(pte.pte_low != ptep->pte_low)); + + return pte; +} + #endif /* _ASM_X86_PGTABLE_3LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index f5af95a0c6b8..77037b6f1caa 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -244,6 +244,11 @@ static inline int pud_devmap(pud_t pud) return 0; } #endif + +static inline int pgd_devmap(pgd_t pgd) +{ + return 0; +} #endif #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -917,7 +922,7 @@ extern pgd_t trampoline_pgd_entry; static inline void __meminit init_trampoline_default(void) { /* Default trampoline pgd value */ - trampoline_pgd_entry = init_level4_pgt[pgd_index(__PAGE_OFFSET)]; + trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; } # ifdef CONFIG_RANDOMIZE_MEMORY void __meminit init_trampoline(void); @@ -1185,6 +1190,54 @@ static inline u16 pte_flags_pkey(unsigned long pte_flags) #endif } +static inline bool __pkru_allows_pkey(u16 pkey, bool write) +{ + u32 pkru = read_pkru(); + + if (!__pkru_allows_read(pkru, pkey)) + return false; + if (write && !__pkru_allows_write(pkru, pkey)) + return false; + + return true; +} + +/* + * 'pteval' can come from a PTE, PMD or PUD. We only check + * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the + * same value on all 3 types. + */ +static inline bool __pte_access_permitted(unsigned long pteval, bool write) +{ + unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER; + + if (write) + need_pte_bits |= _PAGE_RW; + + if ((pteval & need_pte_bits) != need_pte_bits) + return 0; + + return __pkru_allows_pkey(pte_flags_pkey(pteval), write); +} + +#define pte_access_permitted pte_access_permitted +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + return __pte_access_permitted(pte_val(pte), write); +} + +#define pmd_access_permitted pmd_access_permitted +static inline bool pmd_access_permitted(pmd_t pmd, bool write) +{ + return __pte_access_permitted(pmd_val(pmd), write); +} + +#define pud_access_permitted pud_access_permitted +static inline bool pud_access_permitted(pud_t pud, bool write) +{ + return __pte_access_permitted(pud_val(pud), write); +} + #include <asm-generic/pgtable.h> #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 9991224f6238..2160c1fee920 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -14,15 +14,17 @@ #include <linux/bitops.h> #include <linux/threads.h> +extern p4d_t level4_kernel_pgt[512]; +extern p4d_t level4_ident_pgt[512]; extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; extern pmd_t level2_ident_pgt[512]; extern pte_t level1_fixmap_pgt[512]; -extern pgd_t init_level4_pgt[]; +extern pgd_t init_top_pgt[]; -#define swapper_pg_dir init_level4_pgt +#define swapper_pg_dir init_top_pgt extern void paging_init(void); @@ -227,6 +229,20 @@ extern void cleanup_highmap(void); extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); -#endif /* !__ASSEMBLY__ */ +#define gup_fast_permitted gup_fast_permitted +static inline bool gup_fast_permitted(unsigned long start, int nr_pages, + int write) +{ + unsigned long len, end; + + len = (unsigned long)nr_pages << PAGE_SHIFT; + end = start + len; + if (end < start) + return false; + if (end >> __VIRTUAL_MASK_SHIFT) + return false; + return true; +} +#endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 39fb618e2211..79aa2f98398d 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -8,4 +8,40 @@ #else #define X86_VM_MASK 0 /* No VM86 support */ #endif + +/* + * CR3's layout varies depending on several things. + * + * If CR4.PCIDE is set (64-bit only), then CR3[11:0] is the address space ID. + * If PAE is enabled, then CR3[11:5] is part of the PDPT address + * (i.e. it's 32-byte aligned, not page-aligned) and CR3[4:0] is ignored. + * Otherwise (non-PAE, non-PCID), CR3[3] is PWT, CR3[4] is PCD, and + * CR3[2:0] and CR3[11:5] are ignored. + * + * In all cases, Linux puts zeros in the low ignored bits and in PWT and PCD. + * + * CR3[63] is always read as zero. If CR4.PCIDE is set, then CR3[63] may be + * written as 1 to prevent the write to CR3 from flushing the TLB. + * + * On systems with SME, one bit (in a variable position!) is stolen to indicate + * that the top-level paging structure is encrypted. + * + * All of the remaining bits indicate the physical address of the top-level + * paging structure. + * + * CR3_ADDR_MASK is the mask used by read_cr3_pa(). + */ +#ifdef CONFIG_X86_64 +/* Mask off the address space ID bits. */ +#define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull +#define CR3_PCID_MASK 0xFFFull +#else +/* + * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save + * a tiny bit of code size by setting all the bits. + */ +#define CR3_ADDR_MASK 0xFFFFFFFFull +#define CR3_PCID_MASK 0ull +#endif + #endif /* _ASM_X86_PROCESSOR_FLAGS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3cada998a402..2e1696294af5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -231,6 +231,14 @@ native_cpuid_reg(ebx) native_cpuid_reg(ecx) native_cpuid_reg(edx) +/* + * Friendlier CR3 helpers. + */ +static inline unsigned long read_cr3_pa(void) +{ + return __read_cr3() & CR3_ADDR_MASK; +} + static inline void load_cr3(pgd_t *pgdir) { write_cr3(__pa(pgdir)); @@ -860,8 +868,6 @@ extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ -extern unsigned long thread_saved_pc(struct task_struct *tsk); - extern void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ac1d5da14734..e4585a393965 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -44,7 +44,6 @@ extern unsigned long saved_video_mode; extern void reserve_standard_io_resources(void); extern void i386_reserve_resources(void); -extern void setup_default_timer_irq(void); #ifdef CONFIG_X86_INTEL_MID extern void x86_intel_mid_early_setup(void); diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 12af3e35edfa..9efaabf5b54b 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -39,7 +39,7 @@ static inline void native_write_cr2(unsigned long val) asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); } -static inline unsigned long native_read_cr3(void) +static inline unsigned long __native_read_cr3(void) { unsigned long val; asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); @@ -159,9 +159,13 @@ static inline void write_cr2(unsigned long x) native_write_cr2(x); } -static inline unsigned long read_cr3(void) +/* + * Careful! CR3 contains more than just an address. You probably want + * read_cr3_pa() instead. + */ +static inline unsigned long __read_cr3(void) { - return native_read_cr3(); + return __native_read_cr3(); } static inline void write_cr3(unsigned long x) diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 27e9f9d769b8..2016962103df 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -29,11 +29,9 @@ struct cyc2ns_data { u32 cyc2ns_mul; u32 cyc2ns_shift; u64 cyc2ns_offset; - u32 __count; - /* u32 hole */ -}; /* 24 bytes -- do not grow */ +}; /* 16 bytes */ -extern struct cyc2ns_data *cyc2ns_read_begin(void); -extern void cyc2ns_read_end(struct cyc2ns_data *); +extern void cyc2ns_read_begin(struct cyc2ns_data *); +extern void cyc2ns_read_end(void); #endif /* _ASM_X86_TIMER_H */ diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h new file mode 100644 index 000000000000..f4a6ff352a0e --- /dev/null +++ b/arch/x86/include/asm/tlbbatch.h @@ -0,0 +1,14 @@ +#ifndef _ARCH_X86_TLBBATCH_H +#define _ARCH_X86_TLBBATCH_H + +#include <linux/cpumask.h> + +struct arch_tlbflush_unmap_batch { + /* + * Each bit set is a CPU that potentially has a TLB entry for one of + * the PFNs being flushed.. + */ + struct cpumask cpumask; +}; + +#endif /* _ARCH_X86_TLBBATCH_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6ed9ea469b48..50ea3482e1d1 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -7,6 +7,7 @@ #include <asm/processor.h> #include <asm/cpufeature.h> #include <asm/special_insns.h> +#include <asm/smp.h> static inline void __invpcid(unsigned long pcid, unsigned long addr, unsigned long type) @@ -65,10 +66,14 @@ static inline void invpcid_flush_all_nonglobals(void) #endif struct tlb_state { -#ifdef CONFIG_SMP - struct mm_struct *active_mm; + /* + * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts + * are on. This means that it may not match current->active_mm, + * which will contain the previous user mm when we're in lazy TLB + * mode even if we've already switched back to swapper_pg_dir. + */ + struct mm_struct *loaded_mm; int state; -#endif /* * Access to this CR4 shadow and to H/W CR4 is protected by @@ -151,7 +156,7 @@ static inline void __native_flush_tlb(void) * back: */ preempt_disable(); - native_write_cr3(native_read_cr3()); + native_write_cr3(__native_read_cr3()); preempt_enable(); } @@ -220,84 +225,16 @@ static inline void __flush_tlb_one(unsigned long addr) * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages - * - flush_tlb_others(cpumask, mm, start, end) flushes TLBs on other cpus + * - flush_tlb_others(cpumask, info) flushes TLBs on other cpus * * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. */ - -#ifndef CONFIG_SMP - -/* "_up" is for UniProcessor. - * - * This is a helper for other header functions. *Not* intended to be called - * directly. All global TLB flushes need to either call this, or to bump the - * vm statistics themselves. - */ -static inline void __flush_tlb_up(void) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb(); -} - -static inline void flush_tlb_all(void) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb_all(); -} - -static inline void local_flush_tlb(void) -{ - __flush_tlb_up(); -} - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - if (mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_one(addr); -} - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void flush_tlb_mm_range(struct mm_struct *mm, - unsigned long start, unsigned long end, unsigned long vmflag) -{ - if (mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long start, - unsigned long end) -{ -} - -static inline void reset_lazy_tlbstate(void) -{ -} - -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - flush_tlb_all(); -} - -#else /* SMP */ - -#include <asm/smp.h> +struct flush_tlb_info { + struct mm_struct *mm; + unsigned long start; + unsigned long end; +}; #define local_flush_tlb() __flush_tlb() @@ -307,29 +244,32 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) extern void flush_tlb_all(void); -extern void flush_tlb_page(struct vm_area_struct *, unsigned long); extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) +{ + flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE); +} + void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long start, unsigned long end); + const struct flush_tlb_info *info); #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 -static inline void reset_lazy_tlbstate(void) +static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm) { - this_cpu_write(cpu_tlbstate.state, 0); - this_cpu_write(cpu_tlbstate.active_mm, &init_mm); + cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); } -#endif /* SMP */ +extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); #ifndef CONFIG_PARAVIRT -#define flush_tlb_others(mask, mm, start, end) \ - native_flush_tlb_others(mask, mm, start, end) +#define flush_tlb_others(mask, info) \ + native_flush_tlb_others(mask, info) #endif #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 6686820feae9..b5a32231abd8 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_UV_UV_H #define _ASM_X86_UV_UV_H +#include <asm/tlbflush.h> + enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; struct cpumask; @@ -15,10 +17,7 @@ extern void uv_cpu_init(void); extern void uv_nmi_init(void); extern void uv_system_init(void); extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long start, - unsigned long end, - unsigned int cpu); + const struct flush_tlb_info *info); #else /* X86_UV */ @@ -28,8 +27,8 @@ static inline int is_uv_hubless(void) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } static inline const struct cpumask * -uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, - unsigned long start, unsigned long end, unsigned int cpu) +uv_flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info) { return cpumask; } #endif /* X86_UV */ diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 432df4b1baec..f4fef5a24ebd 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -34,16 +34,10 @@ #define HV_X64_MSR_REFERENCE_TSC 0x40000021 /* - * There is a single feature flag that signifies the presence of the MSR - * that can be used to retrieve both the local APIC Timer frequency as - * well as the TSC frequency. + * There is a single feature flag that signifies if the partition has access + * to MSRs with local APIC and TSC frequencies. */ - -/* Local APIC timer frequency MSR (HV_X64_MSR_APIC_FREQUENCY) is available */ -#define HV_X64_MSR_APIC_FREQUENCY_AVAILABLE (1 << 11) - -/* TSC frequency MSR (HV_X64_MSR_TSC_FREQUENCY) is available */ -#define HV_X64_MSR_TSC_FREQUENCY_AVAILABLE (1 << 11) +#define HV_X64_ACCESS_FREQUENCY_MSRS (1 << 11) /* * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM @@ -73,6 +67,9 @@ */ #define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8) +/* Frequency MSRs available */ +#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE (1 << 8) + /* Crash MSR available */ #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10) diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index 567de50a4c2a..185f3d10c194 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -104,6 +104,8 @@ #define X86_CR4_OSFXSR _BITUL(X86_CR4_OSFXSR_BIT) #define X86_CR4_OSXMMEXCPT_BIT 10 /* enable unmasked SSE exceptions */ #define X86_CR4_OSXMMEXCPT _BITUL(X86_CR4_OSXMMEXCPT_BIT) +#define X86_CR4_LA57_BIT 12 /* enable 5-level page tables */ +#define X86_CR4_LA57 _BITUL(X86_CR4_LA57_BIT) #define X86_CR4_VMXE_BIT 13 /* enable VMX virtualization */ #define X86_CR4_VMXE _BITUL(X86_CR4_VMXE_BIT) #define X86_CR4_SMXE_BIT 14 /* enable safer mode (TXT) */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4b994232cb57..a01892bdd61a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -18,6 +18,7 @@ CFLAGS_REMOVE_pvclock.o = -pg CFLAGS_REMOVE_kvmclock.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg +CFLAGS_REMOVE_head64.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n @@ -29,6 +30,7 @@ OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y OBJECT_FILES_NON_STANDARD_test_nx.o := y +OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y # If instrumentation of this dir is enabled, boot hangs during first second. # Probably could be more selective here, but note that files related to irqs, diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 26b78d86f25a..85a9e17e0dbc 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -1,3 +1,5 @@ +OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y + obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o obj-$(CONFIG_ACPI_APEI) += apei.o diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c5b8f760473c..32e14d137416 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -409,8 +409,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, memcpy(insnbuf, replacement, a->replacementlen); insnbuf_sz = a->replacementlen; - /* 0xe8 is a relative jump; fix the offset. */ - if (*insnbuf == 0xe8 && a->replacementlen == 5) { + /* + * 0xe8 is a relative jump; fix the offset. + * + * Instruction length is checked before the opcode to avoid + * accessing uninitialized bytes for zero-length replacements. + */ + if (a->replacementlen == 5 && *insnbuf == 0xe8) { *(s32 *)(insnbuf + 1) += replacement - instr; DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx", *(s32 *)(insnbuf + 1), diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 2d75faf743f2..98b3dd8cf2bf 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -54,6 +54,8 @@ #include <asm/mce.h> #include <asm/tsc.h> #include <asm/hypervisor.h> +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> unsigned int num_processors; @@ -545,6 +547,81 @@ static struct clock_event_device lapic_clockevent = { }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); +#define DEADLINE_MODEL_MATCH_FUNC(model, func) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&func } + +#define DEADLINE_MODEL_MATCH_REV(model, rev) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev } + +static u32 hsx_deadline_rev(void) +{ + switch (boot_cpu_data.x86_mask) { + case 0x02: return 0x3a; /* EP */ + case 0x04: return 0x0f; /* EX */ + } + + return ~0U; +} + +static u32 bdx_deadline_rev(void) +{ + switch (boot_cpu_data.x86_mask) { + case 0x02: return 0x00000011; + case 0x03: return 0x0700000e; + case 0x04: return 0x0f00000c; + case 0x05: return 0x0e000003; + } + + return ~0U; +} + +static const struct x86_cpu_id deadline_match[] = { + DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020), + DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_X, 0x02000014), + + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE, 0x22), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT, 0x20), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_GT3E, 0x17), + + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_CORE, 0x25), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_GT3E, 0x17), + + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_MOBILE, 0xb2), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_DESKTOP, 0xb2), + + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_MOBILE, 0x52), + DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_DESKTOP, 0x52), + + {}, +}; + +static void apic_check_deadline_errata(void) +{ + const struct x86_cpu_id *m = x86_match_cpu(deadline_match); + u32 rev; + + if (!m) + return; + + /* + * Function pointers will have the MSB set due to address layout, + * immediate revisions will not. + */ + if ((long)m->driver_data < 0) + rev = ((u32 (*)(void))(m->driver_data))(); + else + rev = (u32)m->driver_data; + + if (boot_cpu_data.microcode >= rev) + return; + + setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); + pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; " + "please update microcode to version: 0x%x (or later)\n", rev); +} + /* * Setup the local APIC timer for this CPU. Copy the initialized values * of the boot CPU and register the clock event in the framework. @@ -563,6 +640,7 @@ static void setup_APIC_timer(void) levt->cpumask = cpumask_of(smp_processor_id()); if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) { + levt->name = "lapic-deadline"; levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_DUMMY); levt->set_next_event = lapic_next_deadline; @@ -1779,6 +1857,8 @@ void __init init_apic_mappings(void) { unsigned int new_apicid; + apic_check_deadline_errata(); + if (x2apic_mode) { boot_cpu_physical_apicid = read_apic_id(); return; @@ -2201,23 +2281,32 @@ void default_init_apic_ldr(void) apic_write(APIC_LDR, val); } -int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask, - unsigned int *apicid) +int default_cpu_mask_to_apicid(const struct cpumask *mask, + struct irq_data *irqdata, + unsigned int *apicid) { - unsigned int cpu; + unsigned int cpu = cpumask_first(mask); - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } + if (cpu >= nr_cpu_ids) + return -EINVAL; + *apicid = per_cpu(x86_cpu_to_apicid, cpu); + irq_data_update_effective_affinity(irqdata, cpumask_of(cpu)); + return 0; +} - if (likely(cpu < nr_cpu_ids)) { - *apicid = per_cpu(x86_cpu_to_apicid, cpu); - return 0; - } +int flat_cpu_mask_to_apicid(const struct cpumask *mask, + struct irq_data *irqdata, + unsigned int *apicid) - return -EINVAL; +{ + struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata); + unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS; + + if (!cpu_mask) + return -EINVAL; + *apicid = (unsigned int)cpu_mask; + cpumask_bits(effmsk)[0] = cpu_mask; + return 0; } /* diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index a4d7ff20ed22..dedd5a41ba48 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -172,7 +172,7 @@ static struct apic apic_flat __ro_after_init = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .send_IPI = default_send_IPI_single, .send_IPI_mask = flat_send_IPI_mask, @@ -268,7 +268,7 @@ static struct apic apic_physflat __ro_after_init = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .send_IPI = default_send_IPI_single_phys, .send_IPI_mask = default_send_IPI_mask_sequence_phys, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 2262eb6df796..6599f437b4ab 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -141,7 +141,7 @@ struct apic apic_noop __ro_after_init = { .get_apic_id = noop_get_apic_id, .set_apic_id = NULL, - .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .send_IPI = noop_send_IPI, .send_IPI_mask = noop_send_IPI_mask, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index e08fe2c8dd8c..2fda912219a6 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -267,7 +267,7 @@ static const struct apic apic_numachip1 __refconst = { .get_apic_id = numachip1_get_apic_id, .set_apic_id = numachip1_set_apic_id, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .send_IPI = numachip_send_IPI_one, .send_IPI_mask = numachip_send_IPI_mask, @@ -318,7 +318,7 @@ static const struct apic apic_numachip2 __refconst = { .get_apic_id = numachip2_get_apic_id, .set_apic_id = numachip2_set_apic_id, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .send_IPI = numachip_send_IPI_one, .send_IPI_mask = numachip_send_IPI_mask, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 56012010332c..456e45e8bf84 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -172,7 +172,7 @@ static struct apic apic_bigsmp __ro_after_init = { .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .send_IPI = default_send_IPI_single_phys, .send_IPI_mask = default_send_IPI_mask_sequence_phys, diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c index ae50d3454d78..56ccf9346b08 100644 --- a/arch/x86/kernel/apic/htirq.c +++ b/arch/x86/kernel/apic/htirq.c @@ -150,16 +150,27 @@ static const struct irq_domain_ops htirq_domain_ops = { .deactivate = htirq_domain_deactivate, }; -void arch_init_htirq_domain(struct irq_domain *parent) +void __init arch_init_htirq_domain(struct irq_domain *parent) { + struct fwnode_handle *fn; + if (disable_apic) return; - htirq_domain = irq_domain_add_tree(NULL, &htirq_domain_ops, NULL); + fn = irq_domain_alloc_named_fwnode("PCI-HT"); + if (!fn) + goto warn; + + htirq_domain = irq_domain_create_tree(fn, &htirq_domain_ops, NULL); + irq_domain_free_fwnode(fn); if (!htirq_domain) - pr_warn("failed to initialize irqdomain for HTIRQ.\n"); - else - htirq_domain->parent = parent; + goto warn; + + htirq_domain->parent = parent; + return; + +warn: + pr_warn("Failed to initialize irqdomain for HTIRQ.\n"); } int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 347bb9f65737..b4f5f73febdb 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1200,28 +1200,6 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); static struct irq_chip ioapic_chip, ioapic_ir_chip; -#ifdef CONFIG_X86_32 -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for_each_ioapic_pin(apic, pin) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin, 0))) - return irq_trigger(idx); - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} -#else -static inline int IO_APIC_irq_trigger(int irq) -{ - return 1; -} -#endif - static void __init setup_IO_APIC_irqs(void) { unsigned int ioapic, pin; @@ -2223,6 +2201,8 @@ static int mp_irqdomain_create(int ioapic) struct ioapic *ip = &ioapics[ioapic]; struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg; struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic); + struct fwnode_handle *fn; + char *name = "IO-APIC"; if (cfg->type == IOAPIC_DOMAIN_INVALID) return 0; @@ -2233,9 +2213,25 @@ static int mp_irqdomain_create(int ioapic) parent = irq_remapping_get_ir_irq_domain(&info); if (!parent) parent = x86_vector_domain; + else + name = "IO-APIC-IR"; + + /* Handle device tree enumerated APICs proper */ + if (cfg->dev) { + fn = of_node_to_fwnode(cfg->dev); + } else { + fn = irq_domain_alloc_named_id_fwnode(name, ioapic); + if (!fn) + return -ENOMEM; + } + + ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops, + (void *)(long)ioapic); + + /* Release fw handle if it was allocated above */ + if (!cfg->dev) + irq_domain_free_fwnode(fn); - ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops, - (void *)(long)ioapic); if (!ip->irqdomain) return -ENOMEM; diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index c61aec7e65f4..9b18be764422 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -136,13 +136,20 @@ static struct msi_domain_info pci_msi_domain_info = { .handler_name = "edge", }; -void arch_init_msi_domain(struct irq_domain *parent) +void __init arch_init_msi_domain(struct irq_domain *parent) { + struct fwnode_handle *fn; + if (disable_apic) return; - msi_default_domain = pci_msi_create_irq_domain(NULL, - &pci_msi_domain_info, parent); + fn = irq_domain_alloc_named_fwnode("PCI-MSI"); + if (fn) { + msi_default_domain = + pci_msi_create_irq_domain(fn, &pci_msi_domain_info, + parent); + irq_domain_free_fwnode(fn); + } if (!msi_default_domain) pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); } @@ -167,9 +174,18 @@ static struct msi_domain_info pci_msi_ir_domain_info = { .handler_name = "edge", }; -struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent) +struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent, + const char *name, int id) { - return pci_msi_create_irq_domain(NULL, &pci_msi_ir_domain_info, parent); + struct fwnode_handle *fn; + struct irq_domain *d; + + fn = irq_domain_alloc_named_id_fwnode(name, id); + if (!fn) + return NULL; + d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent); + irq_domain_free_fwnode(fn); + return d; } #endif @@ -221,13 +237,20 @@ static struct irq_domain *dmar_get_irq_domain(void) { static struct irq_domain *dmar_domain; static DEFINE_MUTEX(dmar_lock); + struct fwnode_handle *fn; mutex_lock(&dmar_lock); - if (dmar_domain == NULL) - dmar_domain = msi_create_irq_domain(NULL, &dmar_msi_domain_info, + if (dmar_domain) + goto out; + + fn = irq_domain_alloc_named_fwnode("DMAR-MSI"); + if (fn) { + dmar_domain = msi_create_irq_domain(fn, &dmar_msi_domain_info, x86_vector_domain); + irq_domain_free_fwnode(fn); + } +out: mutex_unlock(&dmar_lock); - return dmar_domain; } @@ -317,9 +340,10 @@ static struct msi_domain_info hpet_msi_domain_info = { struct irq_domain *hpet_create_irq_domain(int hpet_id) { - struct irq_domain *parent; - struct irq_alloc_info info; struct msi_domain_info *domain_info; + struct irq_domain *parent, *d; + struct irq_alloc_info info; + struct fwnode_handle *fn; if (x86_vector_domain == NULL) return NULL; @@ -340,7 +364,16 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id) else hpet_msi_controller.name = "IR-HPET-MSI"; - return msi_create_irq_domain(NULL, domain_info, parent); + fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name, + hpet_id); + if (!fn) { + kfree(domain_info); + return NULL; + } + + d = msi_create_irq_domain(fn, domain_info, parent); + irq_domain_free_fwnode(fn); + return d; } int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 2e8f7f048f4f..63287659adb6 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -102,7 +102,7 @@ static struct apic apic_default __ro_after_init = { .get_apic_id = default_get_apic_id, .set_apic_id = NULL, - .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .send_IPI = default_send_IPI_single, .send_IPI_mask = default_send_IPI_mask_logical, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f3557a1eb562..b3af457ed667 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -103,7 +103,8 @@ static void free_apic_chip_data(struct apic_chip_data *data) } static int __assign_irq_vector(int irq, struct apic_chip_data *d, - const struct cpumask *mask) + const struct cpumask *mask, + struct irq_data *irqdata) { /* * NOTE! The local APIC isn't very good at handling @@ -141,7 +142,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, /* * Clear the offline cpus from @vector_cpumask for searching * and verify whether the result overlaps with @mask. If true, - * then the call to apic->cpu_mask_to_apicid_and() will + * then the call to apic->cpu_mask_to_apicid() will * succeed as well. If not, no point in trying to find a * vector in this mask. */ @@ -221,34 +222,40 @@ success: * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail * as we already established, that mask & d->domain & cpu_online_mask * is not empty. + * + * vector_searchmask is a subset of d->domain and has the offline + * cpus masked out. */ - BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain, - &d->cfg.dest_apicid)); + cpumask_and(vector_searchmask, vector_searchmask, mask); + BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqdata, + &d->cfg.dest_apicid)); return 0; } static int assign_irq_vector(int irq, struct apic_chip_data *data, - const struct cpumask *mask) + const struct cpumask *mask, + struct irq_data *irqdata) { int err; unsigned long flags; raw_spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, data, mask); + err = __assign_irq_vector(irq, data, mask, irqdata); raw_spin_unlock_irqrestore(&vector_lock, flags); return err; } static int assign_irq_vector_policy(int irq, int node, struct apic_chip_data *data, - struct irq_alloc_info *info) + struct irq_alloc_info *info, + struct irq_data *irqdata) { if (info && info->mask) - return assign_irq_vector(irq, data, info->mask); + return assign_irq_vector(irq, data, info->mask, irqdata); if (node != NUMA_NO_NODE && - assign_irq_vector(irq, data, cpumask_of_node(node)) == 0) + assign_irq_vector(irq, data, cpumask_of_node(node), irqdata) == 0) return 0; - return assign_irq_vector(irq, data, apic->target_cpus()); + return assign_irq_vector(irq, data, apic->target_cpus(), irqdata); } static void clear_irq_vector(int irq, struct apic_chip_data *data) @@ -360,9 +367,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, irq_data->chip = &lapic_controller; irq_data->chip_data = data; irq_data->hwirq = virq + i; - err = assign_irq_vector_policy(virq + i, node, data, info); + err = assign_irq_vector_policy(virq + i, node, data, info, + irq_data); if (err) goto error; + /* + * If the apic destination mode is physical, then the + * effective affinity is restricted to a single target + * CPU. Mark the interrupt accordingly. + */ + if (!apic->irq_dest_mode) + irqd_set_single_target(irq_data); } return 0; @@ -405,7 +420,7 @@ int __init arch_probe_nr_irqs(void) } #ifdef CONFIG_X86_IO_APIC -static void init_legacy_irqs(void) +static void __init init_legacy_irqs(void) { int i, node = cpu_to_node(0); struct apic_chip_data *data; @@ -424,16 +439,21 @@ static void init_legacy_irqs(void) } } #else -static void init_legacy_irqs(void) { } +static inline void init_legacy_irqs(void) { } #endif int __init arch_early_irq_init(void) { + struct fwnode_handle *fn; + init_legacy_irqs(); - x86_vector_domain = irq_domain_add_tree(NULL, &x86_vector_domain_ops, - NULL); + fn = irq_domain_alloc_named_fwnode("VECTOR"); + BUG_ON(!fn); + x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops, + NULL); BUG_ON(x86_vector_domain == NULL); + irq_domain_free_fwnode(fn); irq_set_default_host(x86_vector_domain); arch_init_msi_domain(x86_vector_domain); @@ -529,11 +549,12 @@ static int apic_set_affinity(struct irq_data *irq_data, if (!cpumask_intersects(dest, cpu_online_mask)) return -EINVAL; - err = assign_irq_vector(irq, data, dest); + err = assign_irq_vector(irq, data, dest, irq_data); return err ? err : IRQ_SET_MASK_OK; } static struct irq_chip lapic_controller = { + .name = "APIC", .irq_ack = apic_ack_edge, .irq_set_affinity = apic_set_affinity, .irq_retrigger = apic_retrigger_irq, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 5a35f208ed95..481237cb1544 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -4,6 +4,7 @@ #include <linux/kernel.h> #include <linux/ctype.h> #include <linux/dmar.h> +#include <linux/irq.h> #include <linux/cpu.h> #include <asm/smp.h> @@ -104,35 +105,30 @@ static void x2apic_send_IPI_all(int vector) } static int -x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask, - unsigned int *apicid) +x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata, + unsigned int *apicid) { + struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata); + unsigned int cpu; u32 dest = 0; u16 cluster; - int i; - - for_each_cpu_and(i, cpumask, andmask) { - if (!cpumask_test_cpu(i, cpu_online_mask)) - continue; - dest = per_cpu(x86_cpu_to_logical_apicid, i); - cluster = x2apic_cluster(i); - break; - } - if (!dest) + cpu = cpumask_first(mask); + if (cpu >= nr_cpu_ids) return -EINVAL; - for_each_cpu_and(i, cpumask, andmask) { - if (!cpumask_test_cpu(i, cpu_online_mask)) - continue; - if (cluster != x2apic_cluster(i)) + dest = per_cpu(x86_cpu_to_logical_apicid, cpu); + cluster = x2apic_cluster(cpu); + + cpumask_clear(effmsk); + for_each_cpu(cpu, mask) { + if (cluster != x2apic_cluster(cpu)) continue; - dest |= per_cpu(x86_cpu_to_logical_apicid, i); + dest |= per_cpu(x86_cpu_to_logical_apicid, cpu); + cpumask_set_cpu(cpu, effmsk); } *apicid = dest; - return 0; } @@ -256,7 +252,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .send_IPI = x2apic_send_IPI, .send_IPI_mask = x2apic_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index ff111f05a314..3baf0c3dc875 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -127,7 +127,7 @@ static struct apic apic_x2apic_phys __ro_after_init = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .send_IPI = x2apic_send_IPI, .send_IPI_mask = x2apic_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index b487b3a01615..0d57bb9079c9 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -526,27 +526,15 @@ static void uv_init_apic_ldr(void) } static int -uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask, - unsigned int *apicid) +uv_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata, + unsigned int *apicid) { - int unsigned cpu; + int ret = default_cpu_mask_to_apicid(mask, irqdata, apicid); - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - - if (likely(cpu < nr_cpu_ids)) { - *apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; - return 0; - } + if (!ret) + *apicid |= uv_apicid_hibits; - return -EINVAL; + return ret; } static unsigned int x2apic_get_apic_id(unsigned long x) @@ -614,7 +602,7 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = set_apic_id, - .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .send_IPI = uv_send_IPI_one, .send_IPI_mask = uv_send_IPI_mask, diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index a70fd61095f8..6f077445647a 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -255,6 +255,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) break; case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ + case 11: /* GX1 with inverted Device ID */ #ifdef CONFIG_PCI { u32 vendor, device; diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index f5af0cc7eb0d..9257bd9dc664 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -856,11 +856,13 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, dentry = kernfs_mount(fs_type, flags, rdt_root, RDTGROUP_SUPER_MAGIC, NULL); if (IS_ERR(dentry)) - goto out_cdp; + goto out_destroy; static_branch_enable(&rdt_enable_key); goto out; +out_destroy: + kernfs_remove(kn_info); out_cdp: cdp_disable(); out: diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5abd4bf73d6e..5cfbaeb6529a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -499,16 +499,14 @@ static int mce_usable_address(struct mce *m) return 1; } -static bool memory_error(struct mce *m) +bool mce_is_memory_error(struct mce *m) { - struct cpuinfo_x86 *c = &boot_cpu_data; - - if (c->x86_vendor == X86_VENDOR_AMD) { + if (m->cpuvendor == X86_VENDOR_AMD) { /* ErrCodeExt[20:16] */ u8 xec = (m->status >> 16) & 0x1f; return (xec == 0x0 || xec == 0x8); - } else if (c->x86_vendor == X86_VENDOR_INTEL) { + } else if (m->cpuvendor == X86_VENDOR_INTEL) { /* * Intel SDM Volume 3B - 15.9.2 Compound Error Codes * @@ -529,6 +527,7 @@ static bool memory_error(struct mce *m) return false; } +EXPORT_SYMBOL_GPL(mce_is_memory_error); static bool cec_add_mce(struct mce *m) { @@ -536,7 +535,7 @@ static bool cec_add_mce(struct mce *m) return false; /* We eat only correctable DRAM errors with usable addresses. */ - if (memory_error(m) && + if (mce_is_memory_error(m) && !(m->status & MCI_STATUS_UC) && mce_usable_address(m)) if (!cec_add_elem(m->addr >> PAGE_SHIFT)) @@ -713,7 +712,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); - if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) + if (severity == MCE_DEFERRED_SEVERITY && mce_is_memory_error(&m)) if (m.status & MCI_STATUS_ADDRV) m.severity = severity; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 45db4d2ebd01..21b185793c80 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -251,7 +251,7 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) #endif } -void __load_ucode_amd(unsigned int cpuid_1_eax, struct cpio_data *ret) +static void __load_ucode_amd(unsigned int cpuid_1_eax, struct cpio_data *ret) { struct ucode_cpu_info *uci; struct cpio_data cp; @@ -320,7 +320,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax) } static enum ucode_state -load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); +load_microcode_amd(bool save, u8 family, const u8 *data, size_t size); int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) { @@ -338,8 +338,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) if (!desc.mc) return -EINVAL; - ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax), - desc.data, desc.size); + ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size); if (ret != UCODE_OK) return -EINVAL; @@ -675,7 +674,7 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, } static enum ucode_state -load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) +load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) { enum ucode_state ret; @@ -689,8 +688,8 @@ load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) #ifdef CONFIG_X86_32 /* save BSP's matching patch for early load */ - if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) { - struct ucode_patch *p = find_patch(cpu); + if (save) { + struct ucode_patch *p = find_patch(0); if (p) { memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), @@ -722,11 +721,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, { char fw_name[36] = "amd-ucode/microcode_amd.bin"; struct cpuinfo_x86 *c = &cpu_data(cpu); + bool bsp = c->cpu_index == boot_cpu_data.cpu_index; enum ucode_state ret = UCODE_NFOUND; const struct firmware *fw; /* reload ucode container only on the boot cpu */ - if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) + if (!refresh_fw || !bsp) return UCODE_OK; if (c->x86 >= 0x15) @@ -743,7 +743,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, goto fw_release; } - ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size); + ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size); fw_release: release_firmware(fw); diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index e53d3c909840..9cb98ee103db 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -290,6 +290,17 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa) return (struct cpio_data){ NULL, 0, "" }; if (initrd_start) start = initrd_start; + } else { + /* + * The picture with physical addresses is a bit different: we + * need to get the *physical* address to which the ramdisk was + * relocated, i.e., relocated_ramdisk (not initrd_start) and + * since we're running from physical addresses, we need to access + * relocated_ramdisk through its *physical* address too. + */ + u64 *rr = (u64 *)__pa_nodebug(&relocated_ramdisk); + if (*rr) + start = *rr; } return find_cpio_data(path, (void *)start, size, NULL); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index afdfd237b59f..59edbe9d4ccb 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -42,7 +42,7 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin"; /* Current microcode patch used in early patching on the APs. */ -struct microcode_intel *intel_ucode_patch; +static struct microcode_intel *intel_ucode_patch; static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1, unsigned int s2, unsigned int p2) @@ -166,7 +166,7 @@ static struct ucode_patch *__alloc_microcode_buf(void *data, unsigned int size) static void save_microcode_patch(void *data, unsigned int size) { struct microcode_header_intel *mc_hdr, *mc_saved_hdr; - struct ucode_patch *iter, *tmp, *p; + struct ucode_patch *iter, *tmp, *p = NULL; bool prev_found = false; unsigned int sig, pf; @@ -202,6 +202,18 @@ static void save_microcode_patch(void *data, unsigned int size) else list_add_tail(&p->plist, µcode_cache); } + + /* + * Save for early loading. On 32-bit, that needs to be a physical + * address as the APs are running from physical addresses, before + * paging has been enabled. + */ + if (p) { + if (IS_ENABLED(CONFIG_X86_32)) + intel_ucode_patch = (struct microcode_intel *)__pa_nodebug(p->data); + else + intel_ucode_patch = p->data; + } } static int microcode_sanity_check(void *mc, int print_err) @@ -607,6 +619,14 @@ int __init save_microcode_in_initrd_intel(void) struct ucode_cpu_info uci; struct cpio_data cp; + /* + * initrd is going away, clear patch ptr. We will scan the microcode one + * last time before jettisoning and save a patch, if found. Then we will + * update that pointer too, with a stable patch address to use when + * resuming the cores. + */ + intel_ucode_patch = NULL; + if (!load_builtin_intel_microcode(&cp)) cp = find_microcode_in_initrd(ucode_path, false); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 04cb8d34ccb8..70e717fccdd6 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -161,6 +161,15 @@ static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs) } #endif +static unsigned long hv_get_tsc_khz(void) +{ + unsigned long freq; + + rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq); + + return freq / 1000; +} + static void __init ms_hyperv_init_platform(void) { int hv_host_info_eax; @@ -193,8 +202,15 @@ static void __init ms_hyperv_init_platform(void) hv_host_info_edx >> 24, hv_host_info_edx & 0xFFFFFF); } + if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && + ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { + x86_platform.calibrate_tsc = hv_get_tsc_khz; + x86_platform.calibrate_cpu = hv_get_tsc_khz; + } + #ifdef CONFIG_X86_LOCAL_APIC - if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) { + if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && + ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { /* * Get the APIC frequency. */ diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 8e598a1ad986..6b91e2eb8d3f 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -125,7 +125,7 @@ void __init init_espfix_bsp(void) p4d_t *p4d; /* Install the espfix pud into the kernel page directory */ - pgd = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; + pgd = &init_top_pgt[pgd_index(ESPFIX_BASE_ADDR)]; p4d = p4d_alloc(&init_mm, pgd, ESPFIX_BASE_ADDR); p4d_populate(&init_mm, p4d, espfix_pud_page); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 0651e974dcb3..9bef1bbeba63 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -689,8 +689,12 @@ static inline void *alloc_tramp(unsigned long size) { return module_alloc(size); } -static inline void tramp_free(void *tramp) +static inline void tramp_free(void *tramp, int size) { + int npages = PAGE_ALIGN(size) >> PAGE_SHIFT; + + set_memory_nx((unsigned long)tramp, npages); + set_memory_rw((unsigned long)tramp, npages); module_memfree(tramp); } #else @@ -699,7 +703,7 @@ static inline void *alloc_tramp(unsigned long size) { return NULL; } -static inline void tramp_free(void *tramp) { } +static inline void tramp_free(void *tramp, int size) { } #endif /* Defined as markers to the end of the ftrace default trampolines */ @@ -771,7 +775,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* Copy ftrace_caller onto the trampoline memory */ ret = probe_kernel_read(trampoline, (void *)start_offset, size); if (WARN_ON(ret < 0)) { - tramp_free(trampoline); + tramp_free(trampoline, *tramp_size); return 0; } @@ -797,7 +801,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* Are we pointing to the reference? */ if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0)) { - tramp_free(trampoline); + tramp_free(trampoline, *tramp_size); return 0; } @@ -839,7 +843,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) unsigned long offset; unsigned long ip; unsigned int size; - int ret; + int ret, npages; if (ops->trampoline) { /* @@ -848,11 +852,14 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) */ if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) return; + npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT; + set_memory_rw(ops->trampoline, npages); } else { ops->trampoline = create_trampoline(ops, &size); if (!ops->trampoline) return; ops->trampoline_size = size; + npages = PAGE_ALIGN(size) >> PAGE_SHIFT; } offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); @@ -863,6 +870,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops) /* Do a safe modify in case the trampoline is executing */ new = ftrace_call_replace(ip, (unsigned long)func); ret = update_ftrace_func(ip, new); + set_memory_ro(ops->trampoline, npages); /* The update should never fail */ WARN_ON(ret); @@ -939,7 +947,7 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops) if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) return; - tramp_free((void *)ops->trampoline); + tramp_free((void *)ops->trampoline, ops->trampoline_size); ops->trampoline = 0; } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 43b7002f44fb..46c3c73e7f43 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -33,17 +33,120 @@ /* * Manage page tables very early on. */ -extern pgd_t early_level4_pgt[PTRS_PER_PGD]; +extern pgd_t early_top_pgt[PTRS_PER_PGD]; extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; -static unsigned int __initdata next_early_pgt = 2; +static unsigned int __initdata next_early_pgt; pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); +#define __head __section(.head.text) + +static void __head *fixup_pointer(void *ptr, unsigned long physaddr) +{ + return ptr - (void *)_text + (void *)physaddr; +} + +void __head __startup_64(unsigned long physaddr) +{ + unsigned long load_delta, *p; + pgdval_t *pgd; + p4dval_t *p4d; + pudval_t *pud; + pmdval_t *pmd, pmd_entry; + int i; + + /* Is the address too large? */ + if (physaddr >> MAX_PHYSMEM_BITS) + for (;;); + + /* + * Compute the delta between the address I am compiled to run at + * and the address I am actually running at. + */ + load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map); + + /* Is the address not 2M aligned? */ + if (load_delta & ~PMD_PAGE_MASK) + for (;;); + + /* Fixup the physical addresses in the page table */ + + pgd = fixup_pointer(&early_top_pgt, physaddr); + pgd[pgd_index(__START_KERNEL_map)] += load_delta; + + if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + p4d = fixup_pointer(&level4_kernel_pgt, physaddr); + p4d[511] += load_delta; + } + + pud = fixup_pointer(&level3_kernel_pgt, physaddr); + pud[510] += load_delta; + pud[511] += load_delta; + + pmd = fixup_pointer(level2_fixmap_pgt, physaddr); + pmd[506] += load_delta; + + /* + * Set up the identity mapping for the switchover. These + * entries should *NOT* have the global bit set! This also + * creates a bunch of nonsense entries but that is fine -- + * it avoids problems around wraparound. + */ + + pud = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); + pmd = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); + + if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); + + i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; + pgd[i + 0] = (pgdval_t)p4d + _KERNPG_TABLE; + pgd[i + 1] = (pgdval_t)p4d + _KERNPG_TABLE; + + i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D; + p4d[i + 0] = (pgdval_t)pud + _KERNPG_TABLE; + p4d[i + 1] = (pgdval_t)pud + _KERNPG_TABLE; + } else { + i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; + pgd[i + 0] = (pgdval_t)pud + _KERNPG_TABLE; + pgd[i + 1] = (pgdval_t)pud + _KERNPG_TABLE; + } + + i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD; + pud[i + 0] = (pudval_t)pmd + _KERNPG_TABLE; + pud[i + 1] = (pudval_t)pmd + _KERNPG_TABLE; + + pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; + pmd_entry += physaddr; + + for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) { + int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD; + pmd[idx] = pmd_entry + i * PMD_SIZE; + } + + /* + * Fixup the kernel text+data virtual addresses. Note that + * we might write invalid pmds, when the kernel is relocated + * cleanup_highmap() fixes this up along with the mappings + * beyond _end. + */ + + pmd = fixup_pointer(level2_kernel_pgt, physaddr); + for (i = 0; i < PTRS_PER_PMD; i++) { + if (pmd[i] & _PAGE_PRESENT) + pmd[i] += load_delta; + } + + /* Fixup phys_base */ + p = fixup_pointer(&phys_base, physaddr); + *p += load_delta; +} + /* Wipe all early page tables except for the kernel symbol map */ static void __init reset_early_page_tables(void) { - memset(early_level4_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1)); + memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1)); next_early_pgt = 0; - write_cr3(__pa_nodebug(early_level4_pgt)); + write_cr3(__pa_nodebug(early_top_pgt)); } /* Create a new PMD entry */ @@ -51,15 +154,16 @@ int __init early_make_pgtable(unsigned long address) { unsigned long physaddr = address - __PAGE_OFFSET; pgdval_t pgd, *pgd_p; + p4dval_t p4d, *p4d_p; pudval_t pud, *pud_p; pmdval_t pmd, *pmd_p; /* Invalid address or early pgt is done ? */ - if (physaddr >= MAXMEM || read_cr3() != __pa_nodebug(early_level4_pgt)) + if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt)) return -1; again: - pgd_p = &early_level4_pgt[pgd_index(address)].pgd; + pgd_p = &early_top_pgt[pgd_index(address)].pgd; pgd = *pgd_p; /* @@ -67,8 +171,25 @@ again: * critical -- __PAGE_OFFSET would point us back into the dynamic * range and we might end up looping forever... */ - if (pgd) - pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); + if (!IS_ENABLED(CONFIG_X86_5LEVEL)) + p4d_p = pgd_p; + else if (pgd) + p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); + else { + if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { + reset_early_page_tables(); + goto again; + } + + p4d_p = (p4dval_t *)early_dynamic_pgts[next_early_pgt++]; + memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); + *pgd_p = (pgdval_t)p4d_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; + } + p4d_p += p4d_index(address); + p4d = *p4d_p; + + if (p4d) + pud_p = (pudval_t *)((p4d & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); else { if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { reset_early_page_tables(); @@ -77,7 +198,7 @@ again: pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); - *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; + *p4d_p = (p4dval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; } pud_p += pud_index(address); pud = *pud_p; @@ -156,7 +277,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) clear_bss(); - clear_page(init_level4_pgt); + clear_page(init_top_pgt); kasan_early_init(); @@ -171,8 +292,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) */ load_ucode_bsp(); - /* set init_level4_pgt kernel high mapping*/ - init_level4_pgt[511] = early_level4_pgt[511]; + /* set init_top_pgt kernel high mapping*/ + init_top_pgt[511] = early_top_pgt[511]; x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index ac9d327d2e42..6225550883df 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -37,10 +37,11 @@ * */ +#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1)) #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) -L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) -L4_START_KERNEL = pgd_index(__START_KERNEL_map) +PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) +PGD_START_KERNEL = pgd_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map) .text @@ -72,101 +73,12 @@ startup_64: /* Sanitize CPU configuration */ call verify_cpu - /* - * Compute the delta between the address I am compiled to run at and the - * address I am actually running at. - */ - leaq _text(%rip), %rbp - subq $_text - __START_KERNEL_map, %rbp - - /* Is the address not 2M aligned? */ - testl $~PMD_PAGE_MASK, %ebp - jnz bad_address - - /* - * Is the address too large? - */ - leaq _text(%rip), %rax - shrq $MAX_PHYSMEM_BITS, %rax - jnz bad_address - - /* - * Fixup the physical addresses in the page table - */ - addq %rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip) - - addq %rbp, level3_kernel_pgt + (510*8)(%rip) - addq %rbp, level3_kernel_pgt + (511*8)(%rip) - - addq %rbp, level2_fixmap_pgt + (506*8)(%rip) - - /* - * Set up the identity mapping for the switchover. These - * entries should *NOT* have the global bit set! This also - * creates a bunch of nonsense entries but that is fine -- - * it avoids problems around wraparound. - */ leaq _text(%rip), %rdi - leaq early_level4_pgt(%rip), %rbx - - movq %rdi, %rax - shrq $PGDIR_SHIFT, %rax - - leaq (PAGE_SIZE + _KERNPG_TABLE)(%rbx), %rdx - movq %rdx, 0(%rbx,%rax,8) - movq %rdx, 8(%rbx,%rax,8) - - addq $PAGE_SIZE, %rdx - movq %rdi, %rax - shrq $PUD_SHIFT, %rax - andl $(PTRS_PER_PUD-1), %eax - movq %rdx, PAGE_SIZE(%rbx,%rax,8) - incl %eax - andl $(PTRS_PER_PUD-1), %eax - movq %rdx, PAGE_SIZE(%rbx,%rax,8) - - addq $PAGE_SIZE * 2, %rbx - movq %rdi, %rax - shrq $PMD_SHIFT, %rdi - addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax - leaq (_end - 1)(%rip), %rcx - shrq $PMD_SHIFT, %rcx - subq %rdi, %rcx - incl %ecx - -1: - andq $(PTRS_PER_PMD - 1), %rdi - movq %rax, (%rbx,%rdi,8) - incq %rdi - addq $PMD_SIZE, %rax - decl %ecx - jnz 1b - - test %rbp, %rbp - jz .Lskip_fixup + pushq %rsi + call __startup_64 + popq %rsi - /* - * Fixup the kernel text+data virtual addresses. Note that - * we might write invalid pmds, when the kernel is relocated - * cleanup_highmap() fixes this up along with the mappings - * beyond _end. - */ - leaq level2_kernel_pgt(%rip), %rdi - leaq PAGE_SIZE(%rdi), %r8 - /* See if it is a valid page table entry */ -1: testb $_PAGE_PRESENT, 0(%rdi) - jz 2f - addq %rbp, 0(%rdi) - /* Go to the next page */ -2: addq $8, %rdi - cmp %r8, %rdi - jne 1b - - /* Fixup phys_base */ - addq %rbp, phys_base(%rip) - -.Lskip_fixup: - movq $(early_level4_pgt - __START_KERNEL_map), %rax + movq $(early_top_pgt - __START_KERNEL_map), %rax jmp 1f ENTRY(secondary_startup_64) /* @@ -186,14 +98,17 @@ ENTRY(secondary_startup_64) /* Sanitize CPU configuration */ call verify_cpu - movq $(init_level4_pgt - __START_KERNEL_map), %rax + movq $(init_top_pgt - __START_KERNEL_map), %rax 1: - /* Enable PAE mode and PGE */ + /* Enable PAE mode, PGE and LA57 */ movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx +#ifdef CONFIG_X86_5LEVEL + orl $X86_CR4_LA57, %ecx +#endif movq %rcx, %cr4 - /* Setup early boot stage 4 level pagetables. */ + /* Setup early boot stage 4-/5-level pagetables. */ addq phys_base(%rip), %rax movq %rax, %cr3 @@ -417,9 +332,13 @@ GLOBAL(name) .endr __INITDATA -NEXT_PAGE(early_level4_pgt) +NEXT_PAGE(early_top_pgt) .fill 511,8,0 +#ifdef CONFIG_X86_5LEVEL + .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE +#else .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE +#endif NEXT_PAGE(early_dynamic_pgts) .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 @@ -427,14 +346,14 @@ NEXT_PAGE(early_dynamic_pgts) .data #ifndef CONFIG_XEN -NEXT_PAGE(init_level4_pgt) +NEXT_PAGE(init_top_pgt) .fill 512,8,0 #else -NEXT_PAGE(init_level4_pgt) +NEXT_PAGE(init_top_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 + .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .org init_level4_pgt + L4_START_KERNEL*8, 0 + .org init_top_pgt + PGD_START_KERNEL*8, 0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE @@ -448,6 +367,12 @@ NEXT_PAGE(level2_ident_pgt) PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) #endif +#ifdef CONFIG_X86_5LEVEL +NEXT_PAGE(level4_kernel_pgt) + .fill 511,8,0 + .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE +#endif + NEXT_PAGE(level3_kernel_pgt) .fill L3_START_KERNEL,8,0 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 89ff7af2de50..16f82a3aaec7 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -285,7 +285,7 @@ static void hpet_legacy_clockevent_register(void) * Start hpet with the boot cpu mask and make it * global after the IO_APIC has been initialized. */ - hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); + hpet_clockevent.cpumask = cpumask_of(boot_cpu_data.cpu_index); clockevents_config_and_register(&hpet_clockevent, hpet_freq, HPET_MIN_PROG_DELTA, 0x7FFFFFFF); global_clock_event = &hpet_clockevent; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index f34fe7444836..4aa03c5a14c9 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -432,84 +432,12 @@ int check_irq_vectors_for_cpu_disable(void) /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { - unsigned int irq, vector; - static int warned; + unsigned int irr, vector; struct irq_desc *desc; struct irq_data *data; struct irq_chip *chip; - int ret; - for_each_irq_desc(irq, desc) { - int break_affinity = 0; - int set_affinity = 1; - const struct cpumask *affinity; - - if (!desc) - continue; - if (irq == 2) - continue; - - /* interrupt's are disabled at this point */ - raw_spin_lock(&desc->lock); - - data = irq_desc_get_irq_data(desc); - affinity = irq_data_get_affinity_mask(data); - if (!irq_has_action(irq) || irqd_is_per_cpu(data) || - cpumask_subset(affinity, cpu_online_mask)) { - raw_spin_unlock(&desc->lock); - continue; - } - - /* - * Complete the irq move. This cpu is going down and for - * non intr-remapping case, we can't wait till this interrupt - * arrives at this cpu before completing the irq move. - */ - irq_force_complete_move(desc); - - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - break_affinity = 1; - affinity = cpu_online_mask; - } - - chip = irq_data_get_irq_chip(data); - /* - * The interrupt descriptor might have been cleaned up - * already, but it is not yet removed from the radix tree - */ - if (!chip) { - raw_spin_unlock(&desc->lock); - continue; - } - - if (!irqd_can_move_in_process_context(data) && chip->irq_mask) - chip->irq_mask(data); - - if (chip->irq_set_affinity) { - ret = chip->irq_set_affinity(data, affinity, true); - if (ret == -ENOSPC) - pr_crit("IRQ %d set affinity failed because there are no available vectors. The device assigned to this IRQ is unstable.\n", irq); - } else { - if (!(warned++)) - set_affinity = 0; - } - - /* - * We unmask if the irq was not marked masked by the - * core code. That respects the lazy irq disable - * behaviour. - */ - if (!irqd_can_move_in_process_context(data) && - !irqd_irq_masked(data) && chip->irq_unmask) - chip->irq_unmask(data); - - raw_spin_unlock(&desc->lock); - - if (break_affinity && set_affinity) - pr_notice("Broke affinity for irq %i\n", irq); - else if (!set_affinity) - pr_notice("Cannot set affinity for irq %i\n", irq); - } + irq_migrate_all_off_this_cpu(); /* * We can remove mdelay() and then send spuriuous interrupts to @@ -528,8 +456,6 @@ void fixup_irqs(void) * nothing else will touch it. */ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - unsigned int irr; - if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) continue; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 5b2bbfbb3712..6b877807598b 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -52,6 +52,7 @@ #include <linux/ftrace.h> #include <linux/frame.h> #include <linux/kasan.h> +#include <linux/moduleloader.h> #include <asm/text-patching.h> #include <asm/cacheflush.h> @@ -417,6 +418,14 @@ static void prepare_boost(struct kprobe *p, struct insn *insn) } } +/* Recover page to RW mode before releasing it */ +void free_insn_page(void *page) +{ + set_memory_nx((unsigned long)page & PAGE_MASK, 1); + set_memory_rw((unsigned long)page & PAGE_MASK, 1); + module_memfree(page); +} + static int arch_copy_kprobe(struct kprobe *p) { struct insn insn; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 901c640d152f..69ea0bc1cfa3 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -28,6 +28,7 @@ #include <linux/kdebug.h> #include <linux/kallsyms.h> #include <linux/ftrace.h> +#include <linux/frame.h> #include <asm/text-patching.h> #include <asm/cacheflush.h> @@ -94,6 +95,7 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) } asm ( + "optprobe_template_func:\n" ".global optprobe_template_entry\n" "optprobe_template_entry:\n" #ifdef CONFIG_X86_64 @@ -131,7 +133,12 @@ asm ( " popf\n" #endif ".global optprobe_template_end\n" - "optprobe_template_end:\n"); + "optprobe_template_end:\n" + ".type optprobe_template_func, @function\n" + ".size optprobe_template_func, .-optprobe_template_func\n"); + +void optprobe_template_func(void); +STACK_FRAME_NON_STANDARD(optprobe_template_func); #define TMPL_MOVE_IDX \ ((long)&optprobe_template_val - (long)&optprobe_template_entry) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index da5c09789984..43e10d6fdbed 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -161,8 +161,8 @@ void kvm_async_pf_task_wait(u32 token) */ rcu_irq_exit(); native_safe_halt(); - rcu_irq_enter(); local_irq_disable(); + rcu_irq_enter(); } } if (!n.halted) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index d4a15831ac58..a870910c8565 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -22,24 +22,25 @@ #include <asm/syscalls.h> /* context.lock is held for us, so we don't need any locking. */ -static void flush_ldt(void *current_mm) +static void flush_ldt(void *__mm) { + struct mm_struct *mm = __mm; mm_context_t *pc; - if (current->active_mm != current_mm) + if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm) return; - pc = ¤t->active_mm->context; - set_ldt(pc->ldt->entries, pc->ldt->size); + pc = &mm->context; + set_ldt(pc->ldt->entries, pc->ldt->nr_entries); } /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ -static struct ldt_struct *alloc_ldt_struct(unsigned int size) +static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) { struct ldt_struct *new_ldt; unsigned int alloc_size; - if (size > LDT_ENTRIES) + if (num_entries > LDT_ENTRIES) return NULL; new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); @@ -47,7 +48,7 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int size) return NULL; BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); - alloc_size = size * LDT_ENTRY_SIZE; + alloc_size = num_entries * LDT_ENTRY_SIZE; /* * Xen is very picky: it requires a page-aligned LDT that has no @@ -65,14 +66,14 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int size) return NULL; } - new_ldt->size = size; + new_ldt->nr_entries = num_entries; return new_ldt; } /* After calling this, the LDT is immutable. */ static void finalize_ldt_struct(struct ldt_struct *ldt) { - paravirt_alloc_ldt(ldt->entries, ldt->size); + paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); } /* context.lock is held */ @@ -91,8 +92,8 @@ static void free_ldt_struct(struct ldt_struct *ldt) if (likely(!ldt)) return; - paravirt_free_ldt(ldt->entries, ldt->size); - if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) + paravirt_free_ldt(ldt->entries, ldt->nr_entries); + if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE) vfree_atomic(ldt->entries); else free_page((unsigned long)ldt->entries); @@ -122,14 +123,14 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) goto out_unlock; } - new_ldt = alloc_ldt_struct(old_mm->context.ldt->size); + new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries); if (!new_ldt) { retval = -ENOMEM; goto out_unlock; } memcpy(new_ldt->entries, old_mm->context.ldt->entries, - new_ldt->size * LDT_ENTRY_SIZE); + new_ldt->nr_entries * LDT_ENTRY_SIZE); finalize_ldt_struct(new_ldt); mm->context.ldt = new_ldt; @@ -152,9 +153,9 @@ void destroy_context_ldt(struct mm_struct *mm) static int read_ldt(void __user *ptr, unsigned long bytecount) { - int retval; - unsigned long size; struct mm_struct *mm = current->mm; + unsigned long entries_size; + int retval; mutex_lock(&mm->context.lock); @@ -166,18 +167,18 @@ static int read_ldt(void __user *ptr, unsigned long bytecount) if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; - size = mm->context.ldt->size * LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; + entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE; + if (entries_size > bytecount) + entries_size = bytecount; - if (copy_to_user(ptr, mm->context.ldt->entries, size)) { + if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) { retval = -EFAULT; goto out_unlock; } - if (size != bytecount) { + if (entries_size != bytecount) { /* Zero-fill the rest and pretend we read bytecount bytes. */ - if (clear_user(ptr + size, bytecount - size)) { + if (clear_user(ptr + entries_size, bytecount - entries_size)) { retval = -EFAULT; goto out_unlock; } @@ -208,7 +209,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) { struct mm_struct *mm = current->mm; struct ldt_struct *new_ldt, *old_ldt; - unsigned int oldsize, newsize; + unsigned int old_nr_entries, new_nr_entries; struct user_desc ldt_info; struct desc_struct ldt; int error; @@ -247,17 +248,18 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) mutex_lock(&mm->context.lock); - old_ldt = mm->context.ldt; - oldsize = old_ldt ? old_ldt->size : 0; - newsize = max(ldt_info.entry_number + 1, oldsize); + old_ldt = mm->context.ldt; + old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; + new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries); error = -ENOMEM; - new_ldt = alloc_ldt_struct(newsize); + new_ldt = alloc_ldt_struct(new_nr_entries); if (!new_ldt) goto out_unlock; if (old_ldt) - memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE); + memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE); + new_ldt->entries[ldt_info.entry_number] = ldt; finalize_ldt_struct(new_ldt); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 6f5ca4ebe6e5..cb0a30473c23 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -347,7 +347,7 @@ void machine_kexec(struct kimage *image) void arch_crash_save_vmcoreinfo(void) { VMCOREINFO_NUMBER(phys_base); - VMCOREINFO_SYMBOL(init_level4_pgt); + VMCOREINFO_SYMBOL(init_top_pgt); #ifdef CONFIG_NUMA VMCOREINFO_SYMBOL(node_data); diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index 6d9582ec0324..d27f8d84c4ff 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c @@ -78,7 +78,7 @@ static void __init test_nmi_ipi(struct cpumask *mask) /* Don't wait longer than a second */ timeout = USEC_PER_SEC; - while (!cpumask_empty(mask) && timeout--) + while (!cpumask_empty(mask) && --timeout) udelay(1); /* What happens if we timeout, do we still unregister?? */ diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 3586996fc50d..bc0a849589bb 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -391,7 +391,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = { .read_cr2 = native_read_cr2, .write_cr2 = native_write_cr2, - .read_cr3 = native_read_cr3, + .read_cr3 = __native_read_cr3, .write_cr3 = native_write_cr3, .flush_tlb_user = native_flush_tlb, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0bb88428cbf2..3ca198080ea9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -545,17 +545,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) } /* - * Return saved PC of a blocked thread. - * What is this good for? it will be always the scheduler or ret_from_fork. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct inactive_task_frame *frame = - (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp); - return READ_ONCE_NOCHECK(frame->ret_addr); -} - -/* * Called from fs/proc with a reference on @p to find the function * which called into schedule(). This needs to be done carefully * because the task might wake up and we might look at a stack diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index ff40e74c9181..c6d6dc5f8bb2 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -78,7 +78,7 @@ void __show_regs(struct pt_regs *regs, int all) printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip); printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags, - smp_processor_id()); + raw_smp_processor_id()); printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); @@ -92,7 +92,7 @@ void __show_regs(struct pt_regs *regs, int all) cr0 = read_cr0(); cr2 = read_cr2(); - cr3 = read_cr3(); + cr3 = __read_cr3(); cr4 = __read_cr4(); printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b6840bf3940b..c3169be4c596 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -104,7 +104,7 @@ void __show_regs(struct pt_regs *regs, int all) cr0 = read_cr0(); cr2 = read_cr2(); - cr3 = read_cr3(); + cr3 = __read_cr3(); cr4 = __read_cr4(); printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", @@ -142,7 +142,7 @@ void release_thread(struct task_struct *dead_task) pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, dead_task->mm->context.ldt->entries, - dead_task->mm->context.ldt->size); + dead_task->mm->context.ldt->nr_entries); BUG(); } #endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2544700a2a87..67393fc88353 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -9,6 +9,7 @@ #include <linux/sched.h> #include <linux/tboot.h> #include <linux/delay.h> +#include <linux/frame.h> #include <acpi/reboot.h> #include <asm/io.h> #include <asm/apic.h> @@ -123,6 +124,7 @@ void __noreturn machine_real_restart(unsigned int type) #ifdef CONFIG_APM_MODULE EXPORT_SYMBOL(machine_real_restart); #endif +STACK_FRAME_NON_STANDARD(machine_real_restart); /* * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0b4d3c686b1e..65622f07e633 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -503,7 +503,7 @@ static int __init reserve_crashkernel_low(void) return 0; } - low_base = memblock_find_in_range(low_size, 1ULL << 32, low_size, CRASH_ALIGN); + low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); if (!low_base) { pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", (unsigned long)(low_size >> 20)); @@ -980,8 +980,6 @@ void __init setup_arch(char **cmdline_p) */ x86_configure_nx(); - simple_udelay_calibration(); - parse_early_param(); #ifdef CONFIG_MEMORY_HOTPLUG @@ -1041,6 +1039,8 @@ void __init setup_arch(char **cmdline_p) */ init_hypervisor_platform(); + simple_udelay_calibration(); + x86_init.resources.probe_roms(); /* after parse_early_param, so could debug it */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f04479a8f74f..b474c8de7fba 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -863,7 +863,7 @@ static void announce_cpu(int cpu, int apicid) if (cpu == 1) printk(KERN_INFO "x86: Booting SMP configuration:\n"); - if (system_state == SYSTEM_BOOTING) { + if (system_state < SYSTEM_RUNNING) { if (node != current_node) { if (current_node > (-1)) pr_cont("\n"); @@ -1589,7 +1589,6 @@ void native_cpu_die(unsigned int cpu) void play_dead_common(void) { idle_task_exit(); - reset_lazy_tlbstate(); /* Ack it */ (void)cpu_report_death(); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index f07f83b3611b..5f25cfbd952e 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -34,7 +34,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re mutex_lock(&child->mm->context.lock); if (unlikely(!child->mm->context.ldt || - seg >= child->mm->context.ldt->size)) + seg >= child->mm->context.ldt->nr_entries)) addr = -1L; /* bogus selector, access would fault */ else { desc = &child->mm->context.ldt->entries[seg]; diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 207b8f2582c7..213ddf3e937d 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -144,7 +144,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (end - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -187,7 +187,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 4b1724059909..a4eb27918ceb 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -514,7 +514,7 @@ int tboot_force_iommu(void) if (!tboot_enabled()) return 0; - if (!intel_iommu_tboot_noforce) + if (intel_iommu_tboot_noforce) return 1; if (no_iommu || swiotlb || dmar_disabled) diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index d39c09119db6..e0754cdbad37 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -66,7 +66,7 @@ static struct irqaction irq0 = { .name = "timer" }; -void __init setup_default_timer_irq(void) +static void __init setup_default_timer_irq(void) { if (!nr_legacy_irqs()) return; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3995d3a777d4..bf54309b85da 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -182,7 +182,7 @@ int is_valid_bugaddr(unsigned long addr) return ud == INSN_UD0 || ud == INSN_UD2; } -static int fixup_bug(struct pt_regs *regs, int trapnr) +int fixup_bug(struct pt_regs *regs, int trapnr) { if (trapnr != X86_TRAP_UD) return 0; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 714dfba6a1e7..796d96bb0821 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -51,115 +51,34 @@ static u32 art_to_tsc_denominator; static u64 art_to_tsc_offset; struct clocksource *art_related_clocksource; -/* - * Use a ring-buffer like data structure, where a writer advances the head by - * writing a new data entry and a reader advances the tail when it observes a - * new entry. - * - * Writers are made to wait on readers until there's space to write a new - * entry. - * - * This means that we can always use an {offset, mul} pair to compute a ns - * value that is 'roughly' in the right direction, even if we're writing a new - * {offset, mul} pair during the clock read. - * - * The down-side is that we can no longer guarantee strict monotonicity anymore - * (assuming the TSC was that to begin with), because while we compute the - * intersection point of the two clock slopes and make sure the time is - * continuous at the point of switching; we can no longer guarantee a reader is - * strictly before or after the switch point. - * - * It does mean a reader no longer needs to disable IRQs in order to avoid - * CPU-Freq updates messing with his times, and similarly an NMI reader will - * no longer run the risk of hitting half-written state. - */ - struct cyc2ns { - struct cyc2ns_data data[2]; /* 0 + 2*24 = 48 */ - struct cyc2ns_data *head; /* 48 + 8 = 56 */ - struct cyc2ns_data *tail; /* 56 + 8 = 64 */ -}; /* exactly fits one cacheline */ - -static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); - -struct cyc2ns_data *cyc2ns_read_begin(void) -{ - struct cyc2ns_data *head; - - preempt_disable(); + struct cyc2ns_data data[2]; /* 0 + 2*16 = 32 */ + seqcount_t seq; /* 32 + 4 = 36 */ - head = this_cpu_read(cyc2ns.head); - /* - * Ensure we observe the entry when we observe the pointer to it. - * matches the wmb from cyc2ns_write_end(). - */ - smp_read_barrier_depends(); - head->__count++; - barrier(); - - return head; -} +}; /* fits one cacheline */ -void cyc2ns_read_end(struct cyc2ns_data *head) -{ - barrier(); - /* - * If we're the outer most nested read; update the tail pointer - * when we're done. This notifies possible pending writers - * that we've observed the head pointer and that the other - * entry is now free. - */ - if (!--head->__count) { - /* - * x86-TSO does not reorder writes with older reads; - * therefore once this write becomes visible to another - * cpu, we must be finished reading the cyc2ns_data. - * - * matches with cyc2ns_write_begin(). - */ - this_cpu_write(cyc2ns.tail, head); - } - preempt_enable(); -} +static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); -/* - * Begin writing a new @data entry for @cpu. - * - * Assumes some sort of write side lock; currently 'provided' by the assumption - * that cpufreq will call its notifiers sequentially. - */ -static struct cyc2ns_data *cyc2ns_write_begin(int cpu) +void cyc2ns_read_begin(struct cyc2ns_data *data) { - struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); - struct cyc2ns_data *data = c2n->data; + int seq, idx; - if (data == c2n->head) - data++; + preempt_disable_notrace(); - /* XXX send an IPI to @cpu in order to guarantee a read? */ + do { + seq = this_cpu_read(cyc2ns.seq.sequence); + idx = seq & 1; - /* - * When we observe the tail write from cyc2ns_read_end(), - * the cpu must be done with that entry and its safe - * to start writing to it. - */ - while (c2n->tail == data) - cpu_relax(); + data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset); + data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul); + data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift); - return data; + } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); } -static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data) +void cyc2ns_read_end(void) { - struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); - - /* - * Ensure the @data writes are visible before we publish the - * entry. Matches the data-depencency in cyc2ns_read_begin(). - */ - smp_wmb(); - - ACCESS_ONCE(c2n->head) = data; + preempt_enable_notrace(); } /* @@ -191,7 +110,6 @@ static void cyc2ns_data_init(struct cyc2ns_data *data) data->cyc2ns_mul = 0; data->cyc2ns_shift = 0; data->cyc2ns_offset = 0; - data->__count = 0; } static void cyc2ns_init(int cpu) @@ -201,51 +119,29 @@ static void cyc2ns_init(int cpu) cyc2ns_data_init(&c2n->data[0]); cyc2ns_data_init(&c2n->data[1]); - c2n->head = c2n->data; - c2n->tail = c2n->data; + seqcount_init(&c2n->seq); } static inline unsigned long long cycles_2_ns(unsigned long long cyc) { - struct cyc2ns_data *data, *tail; + struct cyc2ns_data data; unsigned long long ns; - /* - * See cyc2ns_read_*() for details; replicated in order to avoid - * an extra few instructions that came with the abstraction. - * Notable, it allows us to only do the __count and tail update - * dance when its actually needed. - */ - - preempt_disable_notrace(); - data = this_cpu_read(cyc2ns.head); - tail = this_cpu_read(cyc2ns.tail); - - if (likely(data == tail)) { - ns = data->cyc2ns_offset; - ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); - } else { - data->__count++; - - barrier(); + cyc2ns_read_begin(&data); - ns = data->cyc2ns_offset; - ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); + ns = data.cyc2ns_offset; + ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift); - barrier(); - - if (!--data->__count) - this_cpu_write(cyc2ns.tail, data); - } - preempt_enable_notrace(); + cyc2ns_read_end(); return ns; } -static void set_cyc2ns_scale(unsigned long khz, int cpu) +static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now) { - unsigned long long tsc_now, ns_now; - struct cyc2ns_data *data; + unsigned long long ns_now; + struct cyc2ns_data data; + struct cyc2ns *c2n; unsigned long flags; local_irq_save(flags); @@ -254,9 +150,6 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu) if (!khz) goto done; - data = cyc2ns_write_begin(cpu); - - tsc_now = rdtsc(); ns_now = cycles_2_ns(tsc_now); /* @@ -264,7 +157,7 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu) * time function is continuous; see the comment near struct * cyc2ns_data. */ - clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz, + clocks_calc_mult_shift(&data.cyc2ns_mul, &data.cyc2ns_shift, khz, NSEC_PER_MSEC, 0); /* @@ -273,20 +166,26 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu) * conversion algorithm shifting a 32-bit value (now specifies a 64-bit * value) - refer perf_event_mmap_page documentation in perf_event.h. */ - if (data->cyc2ns_shift == 32) { - data->cyc2ns_shift = 31; - data->cyc2ns_mul >>= 1; + if (data.cyc2ns_shift == 32) { + data.cyc2ns_shift = 31; + data.cyc2ns_mul >>= 1; } - data->cyc2ns_offset = ns_now - - mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift); + data.cyc2ns_offset = ns_now - + mul_u64_u32_shr(tsc_now, data.cyc2ns_mul, data.cyc2ns_shift); - cyc2ns_write_end(cpu, data); + c2n = per_cpu_ptr(&cyc2ns, cpu); + + raw_write_seqcount_latch(&c2n->seq); + c2n->data[0] = data; + raw_write_seqcount_latch(&c2n->seq); + c2n->data[1] = data; done: - sched_clock_idle_wakeup_event(0); + sched_clock_idle_wakeup_event(); local_irq_restore(flags); } + /* * Scheduler clock - returns current time in nanosec units. */ @@ -374,6 +273,8 @@ static int __init tsc_setup(char *str) tsc_clocksource_reliable = 1; if (!strncmp(str, "noirqtime", 9)) no_sched_irq_time = 1; + if (!strcmp(str, "unstable")) + mark_tsc_unstable("boot parameter"); return 1; } @@ -986,7 +887,6 @@ void tsc_restore_sched_clock_state(void) } #ifdef CONFIG_CPU_FREQ - /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency * changes. * @@ -1027,7 +927,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (!(freq->flags & CPUFREQ_CONST_LOOPS)) mark_tsc_unstable("cpufreq changes"); - set_cyc2ns_scale(tsc_khz, freq->cpu); + set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); } return 0; @@ -1127,6 +1027,15 @@ static void tsc_cs_mark_unstable(struct clocksource *cs) pr_info("Marking TSC unstable due to clocksource watchdog\n"); } +static void tsc_cs_tick_stable(struct clocksource *cs) +{ + if (tsc_unstable) + return; + + if (using_native_sched_clock()) + sched_clock_tick_stable(); +} + /* * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() */ @@ -1140,6 +1049,7 @@ static struct clocksource clocksource_tsc = { .archdata = { .vclock_mode = VCLOCK_TSC }, .resume = tsc_resume, .mark_unstable = tsc_cs_mark_unstable, + .tick_stable = tsc_cs_tick_stable, }; void mark_tsc_unstable(char *reason) @@ -1255,6 +1165,7 @@ static void tsc_refine_calibration_work(struct work_struct *work) static int hpet; u64 tsc_stop, ref_stop, delta; unsigned long freq; + int cpu; /* Don't bother refining TSC on unstable systems */ if (check_tsc_unstable()) @@ -1305,6 +1216,10 @@ static void tsc_refine_calibration_work(struct work_struct *work) /* Inform the TSC deadline clockevent devices about the recalibration */ lapic_update_tsc_freq(); + /* Update the sched_clock() rate to match the clocksource one */ + for_each_possible_cpu(cpu) + set_cyc2ns_scale(tsc_khz, cpu, tsc_stop); + out: if (boot_cpu_has(X86_FEATURE_ART)) art_related_clocksource = &clocksource_tsc; @@ -1350,7 +1265,7 @@ device_initcall(init_tsc_clocksource); void __init tsc_init(void) { - u64 lpj; + u64 lpj, cyc; int cpu; if (!boot_cpu_has(X86_FEATURE_TSC)) { @@ -1390,9 +1305,10 @@ void __init tsc_init(void) * speed as the bootup CPU. (cpufreq notifiers will fix this * up if their speed diverges) */ + cyc = rdtsc(); for_each_possible_cpu(cpu) { cyc2ns_init(cpu); - set_cyc2ns_scale(tsc_khz, cpu); + set_cyc2ns_scale(tsc_khz, cpu, cyc); } if (tsc_disabled > 0) @@ -1412,11 +1328,11 @@ void __init tsc_init(void) use_tsc_delay(); + check_system_tsc_reliable(); + if (unsynchronized_tsc()) mark_tsc_unstable("TSCs unsynchronized"); - check_system_tsc_reliable(); - detect_art(); } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 728f75378475..7842371bc9e4 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -71,13 +71,8 @@ static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval, * non zero. We don't do that on non boot cpus because physical * hotplug should have set the ADJUST register to a value > 0 so * the TSC is in sync with the already running cpus. - * - * But we always force positive ADJUST values. Otherwise the TSC - * deadline timer creates an interrupt storm. We also have to - * prevent values > 0x7FFFFFFF as those wreckage the timer as well. */ - if ((bootcpu && bootval != 0) || (!bootcpu && bootval < 0) || - (bootval > 0x7FFFFFFF)) { + if (bootcpu && bootval != 0) { pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n", cpu, bootval); wrmsrl(MSR_IA32_TSC_ADJUST, 0); @@ -451,20 +446,6 @@ retry: */ cur->adjusted += cur_max_warp; - /* - * TSC deadline timer stops working or creates an interrupt storm - * with adjust values < 0 and > x07ffffff. - * - * To allow adjust values > 0x7FFFFFFF we need to disable the - * deadline timer and use the local APIC timer, but that requires - * more intrusive changes and we do not have any useful information - * from Intel about the underlying HW wreckage yet. - */ - if (cur->adjusted < 0) - cur->adjusted = 0; - if (cur->adjusted > 0x7FFFFFFF) - cur->adjusted = 0x7FFFFFFF; - pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n", cpu, cur_max_warp, cur->adjusted); diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 82c6d7f1fd73..b9389d72b2f7 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -104,6 +104,11 @@ static inline unsigned long *last_frame(struct unwind_state *state) return (unsigned long *)task_pt_regs(state->task) - 2; } +static bool is_last_frame(struct unwind_state *state) +{ + return state->bp == last_frame(state); +} + #ifdef CONFIG_X86_32 #define GCC_REALIGN_WORDS 3 #else @@ -115,16 +120,15 @@ static inline unsigned long *last_aligned_frame(struct unwind_state *state) return last_frame(state) - GCC_REALIGN_WORDS; } -static bool is_last_task_frame(struct unwind_state *state) +static bool is_last_aligned_frame(struct unwind_state *state) { unsigned long *last_bp = last_frame(state); unsigned long *aligned_bp = last_aligned_frame(state); /* - * We have to check for the last task frame at two different locations - * because gcc can occasionally decide to realign the stack pointer and - * change the offset of the stack frame in the prologue of a function - * called by head/entry code. Examples: + * GCC can occasionally decide to realign the stack pointer and change + * the offset of the stack frame in the prologue of a function called + * by head/entry code. Examples: * * <start_secondary>: * push %edi @@ -141,11 +145,38 @@ static bool is_last_task_frame(struct unwind_state *state) * push %rbp * mov %rsp,%rbp * - * Note that after aligning the stack, it pushes a duplicate copy of - * the return address before pushing the frame pointer. + * After aligning the stack, it pushes a duplicate copy of the return + * address before pushing the frame pointer. + */ + return (state->bp == aligned_bp && *(aligned_bp + 1) == *(last_bp + 1)); +} + +static bool is_last_ftrace_frame(struct unwind_state *state) +{ + unsigned long *last_bp = last_frame(state); + unsigned long *last_ftrace_bp = last_bp - 3; + + /* + * When unwinding from an ftrace handler of a function called by entry + * code, the stack layout of the last frame is: + * + * bp + * parent ret addr + * bp + * function ret addr + * parent ret addr + * pt_regs + * ----------------- */ - return (state->bp == last_bp || - (state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1))); + return (state->bp == last_ftrace_bp && + *state->bp == *(state->bp + 2) && + *(state->bp + 1) == *(state->bp + 4)); +} + +static bool is_last_task_frame(struct unwind_state *state) +{ + return is_last_frame(state) || is_last_aligned_frame(state) || + is_last_ftrace_frame(state); } /* diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a181ae76c71c..59ca2eea522c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -780,18 +780,20 @@ out: static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) { struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; - int j, nent = vcpu->arch.cpuid_nent; + struct kvm_cpuid_entry2 *ej; + int j = i; + int nent = vcpu->arch.cpuid_nent; e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; /* when no next entry is found, the current entry[i] is reselected */ - for (j = i + 1; ; j = (j + 1) % nent) { - struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; - if (ej->function == e->function) { - ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; - return j; - } - } - return 0; /* silence gcc, even though control never reaches here */ + do { + j = (j + 1) % nent; + ej = &vcpu->arch.cpuid_entries[j]; + } while (ej->function != e->function); + + ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; + + return j; } /* find an entry with matching function, matching index (if needed), and that diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0816ab2e8adc..80890dee66ce 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); } + ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0; return X86EMUL_CONTINUE; } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c329d2894905..d24c8742d9b0 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1495,8 +1495,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); static void cancel_hv_timer(struct kvm_lapic *apic) { + preempt_disable(); kvm_x86_ops->cancel_hv_timer(apic->vcpu); apic->lapic_timer.hv_timer_in_use = false; + preempt_enable(); } static bool start_hv_timer(struct kvm_lapic *apic) @@ -1934,7 +1936,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) for (i = 0; i < KVM_APIC_LVT_NUM; i++) kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); apic_update_lvtt(apic); - if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) + if (kvm_vcpu_is_reset_bsp(vcpu) && + kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) kvm_lapic_set_reg(apic, APIC_LVT0, SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5d3376f67794..cb8225969255 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3698,12 +3698,15 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); } -static bool can_do_async_pf(struct kvm_vcpu *vcpu) +bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) { if (unlikely(!lapic_in_kernel(vcpu) || kvm_event_needs_reinjection(vcpu))) return false; + if (is_guest_mode(vcpu)) + return false; + return kvm_x86_ops->interrupt_allowed(vcpu); } @@ -3719,7 +3722,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, if (!async) return false; /* *pfn has correct page already */ - if (!prefault && can_do_async_pf(vcpu)) { + if (!prefault && kvm_can_do_async_pf(vcpu)) { trace_kvm_try_async_get_page(gva, gfn); if (kvm_find_async_pf_gfn(vcpu, gfn)) { trace_kvm_async_pf_doublefault(gva, gfn); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 27975807cc64..330bf3a811fb 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -76,6 +76,7 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, bool accessed_dirty); +bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 183ddb235fb4..33460fcdeef9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -36,6 +36,7 @@ #include <linux/slab.h> #include <linux/amd-iommu.h> #include <linux/hashtable.h> +#include <linux/frame.h> #include <asm/apic.h> #include <asm/perf_event.h> @@ -1807,7 +1808,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, * AMD's VMCB does not have an explicit unusable field, so emulate it * for cross vendor migration purposes by "not present" */ - var->unusable = !var->present || (var->type == 0); + var->unusable = !var->present; switch (seg) { case VCPU_SREG_TR: @@ -1840,6 +1841,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, */ if (var->unusable) var->db = 0; + /* This is symmetric with svm_set_segment() */ var->dpl = to_svm(vcpu)->vmcb->save.cpl; break; } @@ -1980,18 +1982,14 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, s->base = var->base; s->limit = var->limit; s->selector = var->selector; - if (var->unusable) - s->attrib = 0; - else { - s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); - s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; - s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; - s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT; - s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; - s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; - s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; - s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; - } + s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); + s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; + s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; + s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT; + s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; + s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; + s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; + s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; /* * This is always accurate, except if SYSRET returned to a segment @@ -2000,7 +1998,8 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, * would entail passing the CPL to userspace and back. */ if (seg == VCPU_SREG_SS) - svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; + /* This is symmetric with svm_get_segment() */ + svm->vmcb->save.cpl = (var->dpl & 3); mark_dirty(svm->vmcb, VMCB_SEG); } @@ -4908,6 +4907,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) mark_all_clean(svm->vmcb); } +STACK_FRAME_NON_STANDARD(svm_vcpu_run); static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 72f78396bc09..6dcc4873e435 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -33,6 +33,7 @@ #include <linux/slab.h> #include <linux/tboot.h> #include <linux/hrtimer.h> +#include <linux/frame.h> #include "kvm_cache_regs.h" #include "x86.h" @@ -48,6 +49,7 @@ #include <asm/kexec.h> #include <asm/apic.h> #include <asm/irq_remapping.h> +#include <asm/mmu_context.h> #include "trace.h" #include "pmu.h" @@ -596,6 +598,7 @@ struct vcpu_vmx { int gs_ldt_reload_needed; int fs_reload_needed; u64 msr_host_bndcfgs; + unsigned long vmcs_host_cr3; /* May not match real cr3 */ unsigned long vmcs_host_cr4; /* May not match real cr4 */ } host_state; struct { @@ -2425,7 +2428,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) if (!(vmcs12->exception_bitmap & (1u << nr))) return 0; - nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, vmcs_read32(VM_EXIT_INTR_INFO), vmcs_readl(EXIT_QUALIFICATION)); return 1; @@ -5012,12 +5015,19 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) u32 low32, high32; unsigned long tmpl; struct desc_ptr dt; - unsigned long cr0, cr4; + unsigned long cr0, cr3, cr4; cr0 = read_cr0(); WARN_ON(cr0 & X86_CR0_TS); vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */ - vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ + + /* + * Save the most likely value for this task's CR3 in the VMCS. + * We can't use __get_current_cr3_fast() because we're not atomic. + */ + cr3 = __read_cr3(); + vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ + vmx->host_state.vmcs_host_cr3 = cr3; /* Save the most likely value for this task's CR4 in the VMCS. */ cr4 = cr4_read_shadow(); @@ -6914,97 +6924,21 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, return 0; } -/* - * This function performs the various checks including - * - if it's 4KB aligned - * - No bits beyond the physical address width are set - * - Returns 0 on success or else 1 - * (Intel SDM Section 30.3) - */ -static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, - gpa_t *vmpointer) +static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) { gva_t gva; - gpa_t vmptr; struct x86_exception e; - struct page *page; - struct vcpu_vmx *vmx = to_vmx(vcpu); - int maxphyaddr = cpuid_maxphyaddr(vcpu); if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva)) return 1; - if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr, - sizeof(vmptr), &e)) { + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer, + sizeof(*vmpointer), &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } - switch (exit_reason) { - case EXIT_REASON_VMON: - /* - * SDM 3: 24.11.5 - * The first 4 bytes of VMXON region contain the supported - * VMCS revision identifier - * - * Note - IA32_VMX_BASIC[48] will never be 1 - * for the nested case; - * which replaces physical address width with 32 - * - */ - if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - if (*(u32 *)kmap(page) != VMCS12_REVISION) { - kunmap(page); - nested_release_page_clean(page); - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - kunmap(page); - nested_release_page_clean(page); - vmx->nested.vmxon_ptr = vmptr; - break; - case EXIT_REASON_VMCLEAR: - if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { - nested_vmx_failValid(vcpu, - VMXERR_VMCLEAR_INVALID_ADDRESS); - return kvm_skip_emulated_instruction(vcpu); - } - - if (vmptr == vmx->nested.vmxon_ptr) { - nested_vmx_failValid(vcpu, - VMXERR_VMCLEAR_VMXON_POINTER); - return kvm_skip_emulated_instruction(vcpu); - } - break; - case EXIT_REASON_VMPTRLD: - if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { - nested_vmx_failValid(vcpu, - VMXERR_VMPTRLD_INVALID_ADDRESS); - return kvm_skip_emulated_instruction(vcpu); - } - - if (vmptr == vmx->nested.vmxon_ptr) { - nested_vmx_failValid(vcpu, - VMXERR_VMPTRLD_VMXON_POINTER); - return kvm_skip_emulated_instruction(vcpu); - } - break; - default: - return 1; /* shouldn't happen */ - } - - if (vmpointer) - *vmpointer = vmptr; return 0; } @@ -7066,6 +7000,8 @@ out_msr_bitmap: static int handle_vmon(struct kvm_vcpu *vcpu) { int ret; + gpa_t vmptr; + struct page *page; struct vcpu_vmx *vmx = to_vmx(vcpu); const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; @@ -7095,9 +7031,37 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } - if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL)) + if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; - + + /* + * SDM 3: 24.11.5 + * The first 4 bytes of VMXON region contain the supported + * VMCS revision identifier + * + * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case; + * which replaces physical address width with 32 + */ + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + + page = nested_get_page(vcpu, vmptr); + if (page == NULL) { + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + if (*(u32 *)kmap(page) != VMCS12_REVISION) { + kunmap(page); + nested_release_page_clean(page); + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + kunmap(page); + nested_release_page_clean(page); + + vmx->nested.vmxon_ptr = vmptr; ret = enter_vmx_operation(vcpu); if (ret) return ret; @@ -7213,9 +7177,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr)) + if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { + nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); + return kvm_skip_emulated_instruction(vcpu); + } + + if (vmptr == vmx->nested.vmxon_ptr) { + nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER); + return kvm_skip_emulated_instruction(vcpu); + } + if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); @@ -7545,9 +7519,19 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMPTRLD, &vmptr)) + if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { + nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); + return kvm_skip_emulated_instruction(vcpu); + } + + if (vmptr == vmx->nested.vmxon_ptr) { + nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER); + return kvm_skip_emulated_instruction(vcpu); + } + if (vmx->nested.current_vmptr != vmptr) { struct vmcs12 *new_vmcs12; struct page *page; @@ -7913,11 +7897,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); int cr = exit_qualification & 15; - int reg = (exit_qualification >> 8) & 15; - unsigned long val = kvm_register_readl(vcpu, reg); + int reg; + unsigned long val; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ + reg = (exit_qualification >> 8) & 15; + val = kvm_register_readl(vcpu, reg); switch (cr) { case 0: if (vmcs12->cr0_guest_host_mask & @@ -7972,6 +7958,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, * lmsw can change bits 1..3 of cr0, and only set bit 0 of * cr0. Other attempted changes are ignored, with no exit. */ + val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; if (vmcs12->cr0_guest_host_mask & 0xe & (val ^ vmcs12->cr0_read_shadow)) return true; @@ -8675,6 +8662,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) ); } } +STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); static bool vmx_has_high_real_mode_segbase(void) { @@ -8843,7 +8831,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long debugctlmsr, cr4; + unsigned long debugctlmsr, cr3, cr4; /* Don't enter VMX if guest state is invalid, let the exit handler start emulation until we arrive back to a valid state */ @@ -8865,6 +8853,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->host_state.vmcs_host_cr3 = cr3; + } + cr4 = cr4_read_shadow(); if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { vmcs_writel(HOST_CR4, cr4); @@ -9051,6 +9045,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); } +STACK_FRAME_NON_STANDARD(vmx_vcpu_run); static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02363e37d4a6..0e846f0cb83b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); ctxt->eflags = kvm_get_rflags(vcpu); + ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0; + ctxt->eip = kvm_rip_read(vcpu); ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 : @@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, return dr6; } -static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r) +static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r) { struct kvm_run *kvm_run = vcpu->run; - /* - * rflags is the old, "raw" value of the flags. The new value has - * not been saved yet. - * - * This is correct even for TF set by the guest, because "the - * processor will not generate this exception after the instruction - * that sets the TF flag". - */ - if (unlikely(rflags & X86_EFLAGS_TF)) { - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { - kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | - DR6_RTM; - kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; - kvm_run->debug.arch.exception = DB_VECTOR; - kvm_run->exit_reason = KVM_EXIT_DEBUG; - *r = EMULATE_USER_EXIT; - } else { - /* - * "Certain debug exceptions may clear bit 0-3. The - * remaining contents of the DR6 register are never - * cleared by the processor". - */ - vcpu->arch.dr6 &= ~15; - vcpu->arch.dr6 |= DR6_BS | DR6_RTM; - kvm_queue_exception(vcpu, DB_VECTOR); - } + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM; + kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; + kvm_run->debug.arch.exception = DB_VECTOR; + kvm_run->exit_reason = KVM_EXIT_DEBUG; + *r = EMULATE_USER_EXIT; + } else { + /* + * "Certain debug exceptions may clear bit 0-3. The + * remaining contents of the DR6 register are never + * cleared by the processor". + */ + vcpu->arch.dr6 &= ~15; + vcpu->arch.dr6 |= DR6_BS | DR6_RTM; + kvm_queue_exception(vcpu, DB_VECTOR); } } @@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) int r = EMULATE_DONE; kvm_x86_ops->skip_emulated_instruction(vcpu); - kvm_vcpu_check_singlestep(vcpu, rflags, &r); + + /* + * rflags is the old, "raw" value of the flags. The new value has + * not been saved yet. + * + * This is correct even for TF set by the guest, because "the + * processor will not generate this exception after the instruction + * that sets the TF flag". + */ + if (unlikely(rflags & X86_EFLAGS_TF)) + kvm_vcpu_do_singlestep(vcpu, &r); return r == EMULATE_DONE; } EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); @@ -5726,8 +5727,9 @@ restart: toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE) - kvm_vcpu_check_singlestep(vcpu, rflags, &r); + if (r == EMULATE_DONE && + (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) + kvm_vcpu_do_singlestep(vcpu, &r); if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) __kvm_set_rflags(vcpu, ctxt->eflags); @@ -8394,10 +8396,13 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) if (vcpu->arch.pv.pv_unhalted) return true; - if (atomic_read(&vcpu->arch.nmi_queued)) + if (kvm_test_request(KVM_REQ_NMI, vcpu) || + (vcpu->arch.nmi_pending && + kvm_x86_ops->nmi_allowed(vcpu))) return true; - if (kvm_test_request(KVM_REQ_SMI, vcpu)) + if (kvm_test_request(KVM_REQ_SMI, vcpu) || + (vcpu->arch.smi_pending && !is_smm(vcpu))) return true; if (kvm_arch_interrupt_allowed(vcpu) && @@ -8604,8 +8609,7 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED)) return true; else - return !kvm_event_needs_reinjection(vcpu) && - kvm_x86_ops->interrupt_allowed(vcpu); + return kvm_can_do_async_pf(vcpu); } void kvm_arch_start_assignment(struct kvm *kvm) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index c5959576c315..020f75cc8cf6 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -37,7 +37,7 @@ ENTRY(copy_user_generic_unrolled) movl %edx,%ecx andl $63,%edx shrl $6,%ecx - jz 17f + jz .L_copy_short_string 1: movq (%rsi),%r8 2: movq 1*8(%rsi),%r9 3: movq 2*8(%rsi),%r10 @@ -58,7 +58,8 @@ ENTRY(copy_user_generic_unrolled) leaq 64(%rdi),%rdi decl %ecx jnz 1b -17: movl %edx,%ecx +.L_copy_short_string: + movl %edx,%ecx andl $7,%edx shrl $3,%ecx jz 20f @@ -174,6 +175,8 @@ EXPORT_SYMBOL(copy_user_generic_string) */ ENTRY(copy_user_enhanced_fast_string) ASM_STAC + cmpl $64,%edx + jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ movl %edx,%ecx 1: rep movsb diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index c81556409bbb..10ffa7e8519f 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -13,14 +13,14 @@ .macro op_safe_regs op ENTRY(\op\()_safe_regs) pushq %rbx - pushq %rbp + pushq %r12 movq %rdi, %r10 /* Save pointer */ xorl %r11d, %r11d /* Return value */ movl (%rdi), %eax movl 4(%rdi), %ecx movl 8(%rdi), %edx movl 12(%rdi), %ebx - movl 20(%rdi), %ebp + movl 20(%rdi), %r12d movl 24(%rdi), %esi movl 28(%rdi), %edi 1: \op @@ -29,10 +29,10 @@ ENTRY(\op\()_safe_regs) movl %ecx, 4(%r10) movl %edx, 8(%r10) movl %ebx, 12(%r10) - movl %ebp, 20(%r10) + movl %r12d, 20(%r10) movl %esi, 24(%r10) movl %edi, 28(%r10) - popq %rbp + popq %r12 popq %rbx ret 3: diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 767be7c76034..12e377184ee4 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -1009,7 +1009,7 @@ GrpTable: Grp15 1: fxstor | RDGSBASE Ry (F3),(11B) 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) -4: XSAVE +4: XSAVE | ptwrite Ey (F3),(11B) 5: XRSTOR | lfence (11B) 6: XSAVEOPT | clwb (66) | mfence (11B) 7: clflush | clflushopt (66) | sfence (11B) diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 5e044d506b7a..a179254a5122 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -27,7 +27,7 @@ static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg) #ifdef CONFIG_MODIFY_LDT_SYSCALL seg >>= 3; mutex_lock(¤t->mm->context.lock); - if (current->mm->context.ldt && seg < current->mm->context.ldt->size) + if (current->mm->context.ldt && seg < current->mm->context.ldt->nr_entries) ret = current->mm->context.ldt->entries[seg]; mutex_unlock(¤t->mm->context.lock); #endif diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 96d2b847e09e..0fbdcb64f9f8 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -2,7 +2,7 @@ KCOV_INSTRUMENT_tlb.o := n obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o + pat.o pgtable.o physaddr.o setup_nx.o tlb.o # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index bce6990b1d81..0470826d2bdc 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -431,7 +431,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, bool checkwx) { #ifdef CONFIG_X86_64 - pgd_t *start = (pgd_t *) &init_level4_pgt; + pgd_t *start = (pgd_t *) &init_top_pgt; #else pgd_t *start = swapper_pg_dir; #endif diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 35ea061010a1..0ea8afcb929c 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -162,6 +162,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) if (fixup_exception(regs, trapnr)) return; + if (fixup_bug(regs, trapnr)) + return; + fail: early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n", (unsigned)trapnr, (unsigned long)regs->cs, regs->ip, diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8ad91a01cbc8..2a1fa10c6a98 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -346,7 +346,7 @@ static noinline int vmalloc_fault(unsigned long address) * Do _not_ use "current" here. We might be inside * an interrupt in the middle of a task switch.. */ - pgd_paddr = read_cr3(); + pgd_paddr = read_cr3_pa(); pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); if (!pmd_k) return -1; @@ -388,7 +388,7 @@ static bool low_pfn(unsigned long pfn) static void dump_pagetable(unsigned long address) { - pgd_t *base = __va(read_cr3()); + pgd_t *base = __va(read_cr3_pa()); pgd_t *pgd = &base[pgd_index(address)]; p4d_t *p4d; pud_t *pud; @@ -451,7 +451,7 @@ static noinline int vmalloc_fault(unsigned long address) * happen within a race in page table update. In the later * case just flush: */ - pgd = (pgd_t *)__va(read_cr3()) + pgd_index(address); + pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address); pgd_ref = pgd_offset_k(address); if (pgd_none(*pgd_ref)) return -1; @@ -555,7 +555,7 @@ static int bad_address(void *p) static void dump_pagetable(unsigned long address) { - pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK); + pgd_t *base = __va(read_cr3_pa()); pgd_t *pgd = base + pgd_index(address); p4d_t *p4d; pud_t *pud; @@ -700,7 +700,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, pgd_t *pgd; pte_t *pte; - pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK); + pgd = __va(read_cr3_pa()); pgd += pgd_index(address); pte = lookup_address_in_pgd(pgd, address, &level); diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c deleted file mode 100644 index 456dfdfd2249..000000000000 --- a/arch/x86/mm/gup.c +++ /dev/null @@ -1,496 +0,0 @@ -/* - * Lockless get_user_pages_fast for x86 - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/vmstat.h> -#include <linux/highmem.h> -#include <linux/swap.h> -#include <linux/memremap.h> - -#include <asm/mmu_context.h> -#include <asm/pgtable.h> - -static inline pte_t gup_get_pte(pte_t *ptep) -{ -#ifndef CONFIG_X86_PAE - return READ_ONCE(*ptep); -#else - /* - * With get_user_pages_fast, we walk down the pagetables without taking - * any locks. For this we would like to load the pointers atomically, - * but that is not possible (without expensive cmpxchg8b) on PAE. What - * we do have is the guarantee that a pte will only either go from not - * present to present, or present to not present or both -- it will not - * switch to a completely different present page without a TLB flush in - * between; something that we are blocking by holding interrupts off. - * - * Setting ptes from not present to present goes: - * ptep->pte_high = h; - * smp_wmb(); - * ptep->pte_low = l; - * - * And present to not present goes: - * ptep->pte_low = 0; - * smp_wmb(); - * ptep->pte_high = 0; - * - * We must ensure here that the load of pte_low sees l iff pte_high - * sees h. We load pte_high *after* loading pte_low, which ensures we - * don't see an older value of pte_high. *Then* we recheck pte_low, - * which ensures that we haven't picked up a changed pte high. We might - * have got rubbish values from pte_low and pte_high, but we are - * guaranteed that pte_low will not have the present bit set *unless* - * it is 'l'. And get_user_pages_fast only operates on present ptes, so - * we're safe. - * - * gup_get_pte should not be used or copied outside gup.c without being - * very careful -- it does not atomically load the pte or anything that - * is likely to be useful for you. - */ - pte_t pte; - -retry: - pte.pte_low = ptep->pte_low; - smp_rmb(); - pte.pte_high = ptep->pte_high; - smp_rmb(); - if (unlikely(pte.pte_low != ptep->pte_low)) - goto retry; - - return pte; -#endif -} - -static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages) -{ - while ((*nr) - nr_start) { - struct page *page = pages[--(*nr)]; - - ClearPageReferenced(page); - put_page(page); - } -} - -/* - * 'pteval' can come from a pte, pmd, pud or p4d. We only check - * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the - * same value on all 4 types. - */ -static inline int pte_allows_gup(unsigned long pteval, int write) -{ - unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER; - - if (write) - need_pte_bits |= _PAGE_RW; - - if ((pteval & need_pte_bits) != need_pte_bits) - return 0; - - /* Check memory protection keys permissions. */ - if (!__pkru_allows_pkey(pte_flags_pkey(pteval), write)) - return 0; - - return 1; -} - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - struct dev_pagemap *pgmap = NULL; - int nr_start = *nr, ret = 0; - pte_t *ptep, *ptem; - - /* - * Keep the original mapped PTE value (ptem) around since we - * might increment ptep off the end of the page when finishing - * our loop iteration. - */ - ptem = ptep = pte_offset_map(&pmd, addr); - do { - pte_t pte = gup_get_pte(ptep); - struct page *page; - - /* Similar to the PMD case, NUMA hinting must take slow path */ - if (pte_protnone(pte)) - break; - - if (!pte_allows_gup(pte_val(pte), write)) - break; - - if (pte_devmap(pte)) { - pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); - if (unlikely(!pgmap)) { - undo_dev_pagemap(nr, nr_start, pages); - break; - } - } else if (pte_special(pte)) - break; - - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - get_page(page); - put_dev_pagemap(pgmap); - SetPageReferenced(page); - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - if (addr == end) - ret = 1; - pte_unmap(ptem); - - return ret; -} - -static inline void get_head_page_multiple(struct page *page, int nr) -{ - VM_BUG_ON_PAGE(page != compound_head(page), page); - VM_BUG_ON_PAGE(page_count(page) == 0, page); - page_ref_add(page, nr); - SetPageReferenced(page); -} - -static int __gup_device_huge(unsigned long pfn, unsigned long addr, - unsigned long end, struct page **pages, int *nr) -{ - int nr_start = *nr; - struct dev_pagemap *pgmap = NULL; - - do { - struct page *page = pfn_to_page(pfn); - - pgmap = get_dev_pagemap(pfn, pgmap); - if (unlikely(!pgmap)) { - undo_dev_pagemap(nr, nr_start, pages); - return 0; - } - SetPageReferenced(page); - pages[*nr] = page; - get_page(page); - put_dev_pagemap(pgmap); - (*nr)++; - pfn++; - } while (addr += PAGE_SIZE, addr != end); - return 1; -} - -static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, - unsigned long end, struct page **pages, int *nr) -{ - unsigned long fault_pfn; - - fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - return __gup_device_huge(fault_pfn, addr, end, pages, nr); -} - -static int __gup_device_huge_pud(pud_t pud, unsigned long addr, - unsigned long end, struct page **pages, int *nr) -{ - unsigned long fault_pfn; - - fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - return __gup_device_huge(fault_pfn, addr, end, pages, nr); -} - -static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - struct page *head, *page; - int refs; - - if (!pte_allows_gup(pmd_val(pmd), write)) - return 0; - - VM_BUG_ON(!pfn_valid(pmd_pfn(pmd))); - if (pmd_devmap(pmd)) - return __gup_device_huge_pmd(pmd, addr, end, pages, nr); - - /* hugepages are never "special" */ - VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL); - - refs = 0; - head = pmd_page(pmd); - page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON_PAGE(compound_head(page) != head, page); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - get_head_page_multiple(head, refs); - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = *pmdp; - - next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) - return 0; - if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) { - /* - * NUMA hinting faults need to be handled in the GUP - * slowpath for accounting purposes and so that they - * can be serialised against THP migration. - */ - if (pmd_protnone(pmd)) - return 0; - if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) - return 0; - } else { - if (!gup_pte_range(pmd, addr, next, write, pages, nr)) - return 0; - } - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static noinline int gup_huge_pud(pud_t pud, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - struct page *head, *page; - int refs; - - if (!pte_allows_gup(pud_val(pud), write)) - return 0; - - VM_BUG_ON(!pfn_valid(pud_pfn(pud))); - if (pud_devmap(pud)) - return __gup_device_huge_pud(pud, addr, end, pages, nr); - - /* hugepages are never "special" */ - VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL); - - refs = 0; - head = pud_page(pud); - page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON_PAGE(compound_head(page) != head, page); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - get_head_page_multiple(head, refs); - - return 1; -} - -static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&p4d, addr); - do { - pud_t pud = *pudp; - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (unlikely(pud_large(pud))) { - if (!gup_huge_pud(pud, addr, next, write, pages, nr)) - return 0; - } else { - if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } - } while (pudp++, addr = next, addr != end); - - return 1; -} - -static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - p4d_t *p4dp; - - p4dp = p4d_offset(&pgd, addr); - do { - p4d_t p4d = *p4dp; - - next = p4d_addr_end(addr, end); - if (p4d_none(p4d)) - return 0; - BUILD_BUG_ON(p4d_large(p4d)); - if (!gup_pud_range(p4d, addr, next, write, pages, nr)) - return 0; - } while (p4dp++, addr = next, addr != end); - - return 1; -} - -/* - * Like get_user_pages_fast() except its IRQ-safe in that it won't fall - * back to the regular GUP. - */ -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - (void __user *)start, len))) - return 0; - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables and pages from being freed on x86. - * - * So long as we atomically load page table pointers versus teardown - * (which we do on x86, with the above PAE exception), we can follow the - * address down to the the page and take a ref on it. - */ - local_irq_save(flags); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (!gup_p4d_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - local_irq_restore(flags); - - return nr; -} - -/** - * get_user_pages_fast() - pin user pages in memory - * @start: starting user address - * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to - * @pages: array that receives pointers to the pages pinned. - * Should be at least nr_pages long. - * - * Attempt to pin user pages in memory without taking mm->mmap_sem. - * If not successful, it will fall back to taking the lock and - * calling get_user_pages(). - * - * Returns number of pages pinned. This may be fewer than the number - * requested. If nr_pages is 0 or negative, returns 0. If no pages - * were pinned, returns -errno. - */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - pgd_t *pgdp; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - - end = start + len; - if (end < start) - goto slow_irqon; - -#ifdef CONFIG_X86_64 - if (end >> __VIRTUAL_MASK_SHIFT) - goto slow_irqon; -#endif - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables and pages from being freed on x86. - * - * So long as we atomically load page table pointers versus teardown - * (which we do on x86, with the above PAE exception), we can follow the - * address down to the the page and take a ref on it. - */ - local_irq_disable(); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - goto slow; - if (!gup_p4d_range(pgd, addr, next, write, pages, &nr)) - goto slow; - } while (pgdp++, addr = next, addr != end); - local_irq_enable(); - - VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); - return nr; - - { - int ret; - -slow: - local_irq_enable(); -slow_irqon: - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - ret = get_user_pages_unlocked(start, - (end - start) >> PAGE_SHIFT, - pages, write ? FOLL_WRITE : 0); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - - return ret; - } -} diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 302f43fd9c28..adad702b39cd 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -148,7 +148,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } if (mm->get_unmapped_area == arch_get_unmapped_area) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index cbc87ea98751..673541eb3b3f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -161,16 +161,16 @@ static int page_size_mask; static void __init probe_page_size_mask(void) { -#if !defined(CONFIG_KMEMCHECK) /* * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will * use small pages. * This will simplify cpa(), which otherwise needs to support splitting * large pages into small in interrupt context, etc. */ - if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled()) + if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled() && !IS_ENABLED(CONFIG_KMEMCHECK)) page_size_mask |= 1 << PG_LEVEL_2M; -#endif + else + direct_gbpages = 0; /* Enable PSE if available */ if (boot_cpu_has(X86_FEATURE_PSE)) @@ -811,10 +811,8 @@ void __init zone_sizes_init(void) } DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { -#ifdef CONFIG_SMP - .active_mm = &init_mm, + .loaded_mm = &init_mm, .state = 0, -#endif .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; EXPORT_SYMBOL_GPL(cpu_tlbstate); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 95651dc58e09..dae6a5e5ad4a 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -92,6 +92,44 @@ __setup("noexec32=", nonx32_setup); * When memory was added make sure all the processes MM have * suitable PGD entries in the local PGD level page. */ +#ifdef CONFIG_X86_5LEVEL +void sync_global_pgds(unsigned long start, unsigned long end) +{ + unsigned long addr; + + for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) { + const pgd_t *pgd_ref = pgd_offset_k(addr); + struct page *page; + + /* Check for overflow */ + if (addr < start) + break; + + if (pgd_none(*pgd_ref)) + continue; + + spin_lock(&pgd_lock); + list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd; + spinlock_t *pgt_lock; + + pgd = (pgd_t *)page_address(page) + pgd_index(addr); + /* the pgt_lock only for Xen */ + pgt_lock = &pgd_page_get_mm(page)->page_table_lock; + spin_lock(pgt_lock); + + if (!pgd_none(*pgd_ref) && !pgd_none(*pgd)) + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + + spin_unlock(pgt_lock); + } + spin_unlock(&pgd_lock); + } +} +#else void sync_global_pgds(unsigned long start, unsigned long end) { unsigned long addr; @@ -135,6 +173,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) spin_unlock(&pgd_lock); } } +#endif /* * NOTE: This function is marked __ref because it calls __init function @@ -585,6 +624,57 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, return paddr_last; } +static unsigned long __meminit +phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, + unsigned long page_size_mask) +{ + unsigned long paddr_next, paddr_last = paddr_end; + unsigned long vaddr = (unsigned long)__va(paddr); + int i = p4d_index(vaddr); + + if (!IS_ENABLED(CONFIG_X86_5LEVEL)) + return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask); + + for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { + p4d_t *p4d; + pud_t *pud; + + vaddr = (unsigned long)__va(paddr); + p4d = p4d_page + p4d_index(vaddr); + paddr_next = (paddr & P4D_MASK) + P4D_SIZE; + + if (paddr >= paddr_end) { + if (!after_bootmem && + !e820__mapped_any(paddr & P4D_MASK, paddr_next, + E820_TYPE_RAM) && + !e820__mapped_any(paddr & P4D_MASK, paddr_next, + E820_TYPE_RESERVED_KERN)) + set_p4d(p4d, __p4d(0)); + continue; + } + + if (!p4d_none(*p4d)) { + pud = pud_offset(p4d, 0); + paddr_last = phys_pud_init(pud, paddr, + paddr_end, + page_size_mask); + __flush_tlb_all(); + continue; + } + + pud = alloc_low_page(); + paddr_last = phys_pud_init(pud, paddr, paddr_end, + page_size_mask); + + spin_lock(&init_mm.page_table_lock); + p4d_populate(&init_mm, p4d, pud); + spin_unlock(&init_mm.page_table_lock); + } + __flush_tlb_all(); + + return paddr_last; +} + /* * Create page table mapping for the physical memory for specific physical * addresses. The virtual and physical addresses have to be aligned on PMD level @@ -606,26 +696,26 @@ kernel_physical_mapping_init(unsigned long paddr_start, for (; vaddr < vaddr_end; vaddr = vaddr_next) { pgd_t *pgd = pgd_offset_k(vaddr); p4d_t *p4d; - pud_t *pud; vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE; - BUILD_BUG_ON(pgd_none(*pgd)); - p4d = p4d_offset(pgd, vaddr); - if (p4d_val(*p4d)) { - pud = (pud_t *)p4d_page_vaddr(*p4d); - paddr_last = phys_pud_init(pud, __pa(vaddr), + if (pgd_val(*pgd)) { + p4d = (p4d_t *)pgd_page_vaddr(*pgd); + paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), page_size_mask); continue; } - pud = alloc_low_page(); - paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end), + p4d = alloc_low_page(); + paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), page_size_mask); spin_lock(&init_mm.page_table_lock); - p4d_populate(&init_mm, p4d, pud); + if (IS_ENABLED(CONFIG_X86_5LEVEL)) + pgd_populate(&init_mm, pgd, p4d); + else + p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); spin_unlock(&init_mm.page_table_lock); pgd_changed = true; } @@ -990,7 +1080,13 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, pud_base = pud_offset(p4d, 0); remove_pud_table(pud_base, addr, next, direct); - free_pud_table(pud_base, p4d); + /* + * For 4-level page tables we do not want to free PUDs, but in the + * 5-level case we should free them. This code will have to change + * to adapt for boot-time switching between 4 and 5 level page tables. + */ + if (CONFIG_PGTABLE_LEVELS == 5) + free_pud_table(pud_base, p4d); } if (direct) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index bbc558b88a88..4c1b5fd0c7ad 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -424,7 +424,7 @@ static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { /* Don't assume we're using swapper_pg_dir at this point */ - pgd_t *base = __va(read_cr3()); + pgd_t *base = __va(read_cr3_pa()); pgd_t *pgd = &base[pgd_index(addr)]; p4d_t *p4d = p4d_offset(pgd, addr); pud_t *pud = pud_offset(p4d, addr); diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 0c7d8129bed6..88215ac16b24 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -12,7 +12,7 @@ #include <asm/tlbflush.h> #include <asm/sections.h> -extern pgd_t early_level4_pgt[PTRS_PER_PGD]; +extern pgd_t early_top_pgt[PTRS_PER_PGD]; extern struct range pfn_mapped[E820_MAX_ENTRIES]; static int __init map_range(struct range *range) @@ -109,8 +109,8 @@ void __init kasan_early_init(void) for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++) kasan_zero_p4d[i] = __p4d(p4d_val); - kasan_map_early_shadow(early_level4_pgt); - kasan_map_early_shadow(init_level4_pgt); + kasan_map_early_shadow(early_top_pgt); + kasan_map_early_shadow(init_top_pgt); } void __init kasan_init(void) @@ -121,8 +121,8 @@ void __init kasan_init(void) register_die_notifier(&kasan_die_notifier); #endif - memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt)); - load_cr3(early_level4_pgt); + memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); + load_cr3(early_top_pgt); __flush_tlb_all(); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); @@ -148,7 +148,7 @@ void __init kasan_init(void) kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), (void *)KASAN_SHADOW_END); - load_cr3(init_level4_pgt); + load_cr3(init_top_pgt); __flush_tlb_all(); /* diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index aed206475aa7..af599167fe3c 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -6,12 +6,12 @@ * * Entropy is generated using the KASLR early boot functions now shared in * the lib directory (originally written by Kees Cook). Randomization is - * done on PGD & PUD page table levels to increase possible addresses. The - * physical memory mapping code was adapted to support PUD level virtual - * addresses. This implementation on the best configuration provides 30,000 - * possible virtual addresses in average for each memory region. An additional - * low memory page is used to ensure each CPU can start with a PGD aligned - * virtual address (for realmode). + * done on PGD & P4D/PUD page table levels to increase possible addresses. + * The physical memory mapping code was adapted to support P4D/PUD level + * virtual addresses. This implementation on the best configuration provides + * 30,000 possible virtual addresses in average for each memory region. + * An additional low memory page is used to ensure each CPU can start with + * a PGD aligned virtual address (for realmode). * * The order of each memory region is not changed. The feature looks at * the available space for the regions based on different configuration @@ -70,7 +70,7 @@ static __initdata struct kaslr_memory_region { unsigned long *base; unsigned long size_tb; } kaslr_regions[] = { - { &page_offset_base, 64/* Maximum */ }, + { &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ }, { &vmalloc_base, VMALLOC_SIZE_TB }, { &vmemmap_base, 1 }, }; @@ -142,7 +142,10 @@ void __init kernel_randomize_memory(void) */ entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i); prandom_bytes_state(&rand_state, &rand, sizeof(rand)); - entropy = (rand % (entropy + 1)) & PUD_MASK; + if (IS_ENABLED(CONFIG_X86_5LEVEL)) + entropy = (rand % (entropy + 1)) & P4D_MASK; + else + entropy = (rand % (entropy + 1)) & PUD_MASK; vaddr += entropy; *kaslr_regions[i].base = vaddr; @@ -151,27 +154,21 @@ void __init kernel_randomize_memory(void) * randomization alignment. */ vaddr += get_padding(&kaslr_regions[i]); - vaddr = round_up(vaddr + 1, PUD_SIZE); + if (IS_ENABLED(CONFIG_X86_5LEVEL)) + vaddr = round_up(vaddr + 1, P4D_SIZE); + else + vaddr = round_up(vaddr + 1, PUD_SIZE); remain_entropy -= entropy; } } -/* - * Create PGD aligned trampoline table to allow real mode initialization - * of additional CPUs. Consume only 1 low memory page. - */ -void __meminit init_trampoline(void) +static void __meminit init_trampoline_pud(void) { unsigned long paddr, paddr_next; pgd_t *pgd; pud_t *pud_page, *pud_page_tramp; int i; - if (!kaslr_memory_enabled()) { - init_trampoline_default(); - return; - } - pud_page_tramp = alloc_low_page(); paddr = 0; @@ -192,3 +189,49 @@ void __meminit init_trampoline(void) set_pgd(&trampoline_pgd_entry, __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); } + +static void __meminit init_trampoline_p4d(void) +{ + unsigned long paddr, paddr_next; + pgd_t *pgd; + p4d_t *p4d_page, *p4d_page_tramp; + int i; + + p4d_page_tramp = alloc_low_page(); + + paddr = 0; + pgd = pgd_offset_k((unsigned long)__va(paddr)); + p4d_page = (p4d_t *) pgd_page_vaddr(*pgd); + + for (i = p4d_index(paddr); i < PTRS_PER_P4D; i++, paddr = paddr_next) { + p4d_t *p4d, *p4d_tramp; + unsigned long vaddr = (unsigned long)__va(paddr); + + p4d_tramp = p4d_page_tramp + p4d_index(paddr); + p4d = p4d_page + p4d_index(vaddr); + paddr_next = (paddr & P4D_MASK) + P4D_SIZE; + + *p4d_tramp = *p4d; + } + + set_pgd(&trampoline_pgd_entry, + __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp))); +} + +/* + * Create PGD aligned trampoline table to allow real mode initialization + * of additional CPUs. Consume only 1 low memory page. + */ +void __meminit init_trampoline(void) +{ + + if (!kaslr_memory_enabled()) { + init_trampoline_default(); + return; + } + + if (IS_ENABLED(CONFIG_X86_5LEVEL)) + init_trampoline_p4d(); + else + init_trampoline_pud(); +} diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 19ad095b41df..797295e792b2 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -74,9 +74,6 @@ static int mmap_is_legacy(void) if (current->personality & ADDR_COMPAT_LAYOUT) return 1; - if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) - return 1; - return sysctl_legacy_va_layout; } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1dcd2be4cce4..c8520b2c62d2 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -186,7 +186,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) unsigned int i, level; unsigned long addr; - BUG_ON(irqs_disabled()); + BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); WARN_ON(PAGE_ALIGN(start) != start); on_each_cpu(__cpa_flush_range, NULL, 1); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 6e7bedf69af7..014d07a80053 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -15,7 +15,7 @@ #include <linux/debugfs.h> /* - * Smarter SMP flushing macros. + * TLB flushing, formerly SMP-only * c/o Linus Torvalds. * * These mean you can really definitely utterly forget about @@ -28,39 +28,28 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ -#ifdef CONFIG_SMP - -struct flush_tlb_info { - struct mm_struct *flush_mm; - unsigned long flush_start; - unsigned long flush_end; -}; - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - */ void leave_mm(int cpu) { - struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm); + struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); + + /* + * It's plausible that we're in lazy TLB mode while our mm is init_mm. + * If so, our callers still expect us to flush the TLB, but there + * aren't any user TLB entries in init_mm to worry about. + * + * This needs to happen before any other sanity checks due to + * intel_idle's shenanigans. + */ + if (loaded_mm == &init_mm) + return; + if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) BUG(); - if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { - cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); - load_cr3(swapper_pg_dir); - /* - * This gets called in the idle path where RCU - * functions differently. Tracing normally - * uses RCU, so we have to call the tracepoint - * specially here. - */ - trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); - } + + switch_mm(NULL, &init_mm, NULL); } EXPORT_SYMBOL_GPL(leave_mm); -#endif /* CONFIG_SMP */ - void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -75,216 +64,167 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { unsigned cpu = smp_processor_id(); + struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm); - if (likely(prev != next)) { - if (IS_ENABLED(CONFIG_VMAP_STACK)) { - /* - * If our current stack is in vmalloc space and isn't - * mapped in the new pgd, we'll double-fault. Forcibly - * map it. - */ - unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); - - pgd_t *pgd = next->pgd + stack_pgd_index; - - if (unlikely(pgd_none(*pgd))) - set_pgd(pgd, init_mm.pgd[stack_pgd_index]); - } + /* + * NB: The scheduler will call us with prev == next when + * switching from lazy TLB mode to normal mode if active_mm + * isn't changing. When this happens, there is no guarantee + * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next. + * + * NB: leave_mm() calls us with prev == NULL and tsk == NULL. + */ -#ifdef CONFIG_SMP - this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); - this_cpu_write(cpu_tlbstate.active_mm, next); -#endif + this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); - cpumask_set_cpu(cpu, mm_cpumask(next)); + if (real_prev == next) { + /* + * There's nothing to do: we always keep the per-mm control + * regs in sync with cpu_tlbstate.loaded_mm. Just + * sanity-check mm_cpumask. + */ + if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next)))) + cpumask_set_cpu(cpu, mm_cpumask(next)); + return; + } + if (IS_ENABLED(CONFIG_VMAP_STACK)) { /* - * Re-load page tables. - * - * This logic has an ordering constraint: - * - * CPU 0: Write to a PTE for 'next' - * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. - * CPU 1: set bit 1 in next's mm_cpumask - * CPU 1: load from the PTE that CPU 0 writes (implicit) - * - * We need to prevent an outcome in which CPU 1 observes - * the new PTE value and CPU 0 observes bit 1 clear in - * mm_cpumask. (If that occurs, then the IPI will never - * be sent, and CPU 0's TLB will contain a stale entry.) - * - * The bad outcome can occur if either CPU's load is - * reordered before that CPU's store, so both CPUs must - * execute full barriers to prevent this from happening. - * - * Thus, switch_mm needs a full barrier between the - * store to mm_cpumask and any operation that could load - * from next->pgd. TLB fills are special and can happen - * due to instruction fetches or for no reason at all, - * and neither LOCK nor MFENCE orders them. - * Fortunately, load_cr3() is serializing and gives the - * ordering guarantee we need. - * + * If our current stack is in vmalloc space and isn't + * mapped in the new pgd, we'll double-fault. Forcibly + * map it. */ - load_cr3(next->pgd); + unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + pgd_t *pgd = next->pgd + stack_pgd_index; - /* Stop flush ipis for the previous mm */ - cpumask_clear_cpu(cpu, mm_cpumask(prev)); + if (unlikely(pgd_none(*pgd))) + set_pgd(pgd, init_mm.pgd[stack_pgd_index]); + } - /* Load per-mm CR4 state */ - load_mm_cr4(next); + this_cpu_write(cpu_tlbstate.loaded_mm, next); -#ifdef CONFIG_MODIFY_LDT_SYSCALL - /* - * Load the LDT, if the LDT is different. - * - * It's possible that prev->context.ldt doesn't match - * the LDT register. This can happen if leave_mm(prev) - * was called and then modify_ldt changed - * prev->context.ldt but suppressed an IPI to this CPU. - * In this case, prev->context.ldt != NULL, because we - * never set context.ldt to NULL while the mm still - * exists. That means that next->context.ldt != - * prev->context.ldt, because mms never share an LDT. - */ - if (unlikely(prev->context.ldt != next->context.ldt)) - load_mm_ldt(next); -#endif + WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next))); + cpumask_set_cpu(cpu, mm_cpumask(next)); + + /* + * Re-load page tables. + * + * This logic has an ordering constraint: + * + * CPU 0: Write to a PTE for 'next' + * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. + * CPU 1: set bit 1 in next's mm_cpumask + * CPU 1: load from the PTE that CPU 0 writes (implicit) + * + * We need to prevent an outcome in which CPU 1 observes + * the new PTE value and CPU 0 observes bit 1 clear in + * mm_cpumask. (If that occurs, then the IPI will never + * be sent, and CPU 0's TLB will contain a stale entry.) + * + * The bad outcome can occur if either CPU's load is + * reordered before that CPU's store, so both CPUs must + * execute full barriers to prevent this from happening. + * + * Thus, switch_mm needs a full barrier between the + * store to mm_cpumask and any operation that could load + * from next->pgd. TLB fills are special and can happen + * due to instruction fetches or for no reason at all, + * and neither LOCK nor MFENCE orders them. + * Fortunately, load_cr3() is serializing and gives the + * ordering guarantee we need. + */ + load_cr3(next->pgd); + + /* + * This gets called via leave_mm() in the idle path where RCU + * functions differently. Tracing normally uses RCU, so we have to + * call the tracepoint specially here. + */ + trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + + /* Stop flush ipis for the previous mm */ + WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) && + real_prev != &init_mm); + cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); + + /* Load per-mm CR4 and LDTR state */ + load_mm_cr4(next); + switch_ldt(real_prev, next); +} + +static void flush_tlb_func_common(const struct flush_tlb_info *f, + bool local, enum tlb_flush_reason reason) +{ + /* This code cannot presently handle being reentered. */ + VM_WARN_ON(!irqs_disabled()); + + if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) { + leave_mm(smp_processor_id()); + return; } -#ifdef CONFIG_SMP - else { - this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); - BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); - - if (!cpumask_test_cpu(cpu, mm_cpumask(next))) { - /* - * On established mms, the mm_cpumask is only changed - * from irq context, from ptep_clear_flush() while in - * lazy tlb mode, and here. Irqs are blocked during - * schedule, protecting us from simultaneous changes. - */ - cpumask_set_cpu(cpu, mm_cpumask(next)); - /* - * We were in lazy tlb mode and leave_mm disabled - * tlb flush IPI delivery. We must reload CR3 - * to make sure to use no freed page tables. - * - * As above, load_cr3() is serializing and orders TLB - * fills with respect to the mm_cpumask write. - */ - load_cr3(next->pgd); - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); - load_mm_cr4(next); - load_mm_ldt(next); + if (f->end == TLB_FLUSH_ALL) { + local_flush_tlb(); + if (local) + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); + trace_tlb_flush(reason, TLB_FLUSH_ALL); + } else { + unsigned long addr; + unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT; + addr = f->start; + while (addr < f->end) { + __flush_tlb_single(addr); + addr += PAGE_SIZE; } + if (local) + count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages); + trace_tlb_flush(reason, nr_pages); } -#endif } -#ifdef CONFIG_SMP +static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason) +{ + const struct flush_tlb_info *f = info; -/* - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] - * 1) switch_mm() either 1a) or 1b) - * 1a) thread switch to a different mm - * 1a1) set cpu_tlbstate to TLBSTATE_OK - * Now the tlb flush NMI handler flush_tlb_func won't call leave_mm - * if cpu0 was in lazy tlb mode. - * 1a2) update cpu active_mm - * Now cpu0 accepts tlb flushes for the new mm. - * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask); - * Now the other cpus will send tlb flush ipis. - * 1a4) change cr3. - * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask); - * Stop ipi delivery for the old mm. This is not synchronized with - * the other cpus, but flush_tlb_func ignore flush ipis for the wrong - * mm, and in the worst case we perform a superfluous tlb flush. - * 1b) thread switch without mm change - * cpu active_mm is correct, cpu0 already handles flush ipis. - * 1b1) set cpu_tlbstate to TLBSTATE_OK - * 1b2) test_and_set the cpu bit in cpu_vm_mask. - * Atomically set the bit [other cpus will start sending flush ipis], - * and test the bit. - * 1b3) if the bit was 0: leave_mm was called, flush the tlb. - * 2) switch %%esp, ie current - * - * The interrupt must handle 2 special cases: - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. - * - the cpu performs speculative tlb reads, i.e. even if the cpu only - * runs in kernel space, the cpu could load tlb entries for user space - * pages. - * - * The good news is that cpu_tlbstate is local to each cpu, no - * write/read ordering problems. - */ + flush_tlb_func_common(f, true, reason); +} -/* - * TLB flush funcation: - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - */ -static void flush_tlb_func(void *info) +static void flush_tlb_func_remote(void *info) { - struct flush_tlb_info *f = info; + const struct flush_tlb_info *f = info; inc_irq_stat(irq_tlb_count); - if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) + if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm)) return; count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); - if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { - if (f->flush_end == TLB_FLUSH_ALL) { - local_flush_tlb(); - trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL); - } else { - unsigned long addr; - unsigned long nr_pages = - (f->flush_end - f->flush_start) / PAGE_SIZE; - addr = f->flush_start; - while (addr < f->flush_end) { - __flush_tlb_single(addr); - addr += PAGE_SIZE; - } - trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages); - } - } else - leave_mm(smp_processor_id()); - + flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN); } void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, unsigned long start, - unsigned long end) + const struct flush_tlb_info *info) { - struct flush_tlb_info info; - - info.flush_mm = mm; - info.flush_start = start; - info.flush_end = end; - count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); - if (end == TLB_FLUSH_ALL) + if (info->end == TLB_FLUSH_ALL) trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL); else trace_tlb_flush(TLB_REMOTE_SEND_IPI, - (end - start) >> PAGE_SHIFT); + (info->end - info->start) >> PAGE_SHIFT); if (is_uv_system()) { unsigned int cpu; cpu = smp_processor_id(); - cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu); + cpumask = uv_flush_tlb_others(cpumask, info); if (cpumask) - smp_call_function_many(cpumask, flush_tlb_func, - &info, 1); + smp_call_function_many(cpumask, flush_tlb_func_remote, + (void *)info, 1); return; } - smp_call_function_many(cpumask, flush_tlb_func, &info, 1); + smp_call_function_many(cpumask, flush_tlb_func_remote, + (void *)info, 1); } /* @@ -302,85 +242,41 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag) { - unsigned long addr; - /* do a global flush by default */ - unsigned long base_pages_to_flush = TLB_FLUSH_ALL; - - preempt_disable(); + int cpu; - if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) - base_pages_to_flush = (end - start) >> PAGE_SHIFT; - if (base_pages_to_flush > tlb_single_page_flush_ceiling) - base_pages_to_flush = TLB_FLUSH_ALL; + struct flush_tlb_info info = { + .mm = mm, + }; - if (current->active_mm != mm) { - /* Synchronize with switch_mm. */ - smp_mb(); + cpu = get_cpu(); - goto out; - } - - if (!current->mm) { - leave_mm(smp_processor_id()); + /* Synchronize with switch_mm. */ + smp_mb(); - /* Synchronize with switch_mm. */ - smp_mb(); - - goto out; - } - - /* - * Both branches below are implicit full barriers (MOV to CR or - * INVLPG) that synchronize with switch_mm. - */ - if (base_pages_to_flush == TLB_FLUSH_ALL) { - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - local_flush_tlb(); + /* Should we flush just the requested range? */ + if ((end != TLB_FLUSH_ALL) && + !(vmflag & VM_HUGETLB) && + ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) { + info.start = start; + info.end = end; } else { - /* flush range by one by one 'invlpg' */ - for (addr = start; addr < end; addr += PAGE_SIZE) { - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); - __flush_tlb_single(addr); - } - } - trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush); -out: - if (base_pages_to_flush == TLB_FLUSH_ALL) { - start = 0UL; - end = TLB_FLUSH_ALL; + info.start = 0UL; + info.end = TLB_FLUSH_ALL; } - if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), mm, start, end); - preempt_enable(); -} -void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) -{ - struct mm_struct *mm = vma->vm_mm; - - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) { - /* - * Implicit full barrier (INVLPG) that synchronizes - * with switch_mm. - */ - __flush_tlb_one(start); - } else { - leave_mm(smp_processor_id()); - - /* Synchronize with switch_mm. */ - smp_mb(); - } + if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { + VM_WARN_ON(irqs_disabled()); + local_irq_disable(); + flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN); + local_irq_enable(); } - if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE); - - preempt_enable(); + if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) + flush_tlb_others(mm_cpumask(mm), &info); + put_cpu(); } + static void do_flush_tlb_all(void *info) { count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); @@ -401,7 +297,7 @@ static void do_kernel_range_flush(void *info) unsigned long addr; /* flush range by one by one 'invlpg' */ - for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE) + for (addr = f->start; addr < f->end; addr += PAGE_SIZE) __flush_tlb_single(addr); } @@ -410,16 +306,40 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) /* Balance as user space task's flush, a bit conservative */ if (end == TLB_FLUSH_ALL || - (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) { + (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) { on_each_cpu(do_flush_tlb_all, NULL, 1); } else { struct flush_tlb_info info; - info.flush_start = start; - info.flush_end = end; + info.start = start; + info.end = end; on_each_cpu(do_kernel_range_flush, &info, 1); } } +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) +{ + struct flush_tlb_info info = { + .mm = NULL, + .start = 0UL, + .end = TLB_FLUSH_ALL, + }; + + int cpu = get_cpu(); + + if (cpumask_test_cpu(cpu, &batch->cpumask)) { + VM_WARN_ON(irqs_disabled()); + local_irq_disable(); + flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN); + local_irq_enable(); + } + + if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) + flush_tlb_others(&batch->cpumask, &info); + cpumask_clear(&batch->cpumask); + + put_cpu(); +} + static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { @@ -465,5 +385,3 @@ static int __init create_tlb_single_page_flush_ceiling(void) return 0; } late_initcall(create_tlb_single_page_flush_ceiling); - -#endif /* CONFIG_SMP */ diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile index 90568c33ddb0..fefb4b619598 100644 --- a/arch/x86/net/Makefile +++ b/arch/x86/net/Makefile @@ -1,4 +1,6 @@ # # Arch-specific network modules # +OBJECT_FILES_NON_STANDARD_bpf_jit.o += y + obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c index b914e20b5a00..3353b76dcff0 100644 --- a/arch/x86/pci/ce4100.c +++ b/arch/x86/pci/ce4100.c @@ -65,6 +65,9 @@ struct sim_reg_op { { PCI_DEVFN(device, func), offset, init_op, read_op, write_op,\ {0, SIZE_TO_MASK(size)} }, +/* + * All read/write functions are called with pci_config_lock held. + */ static void reg_init(struct sim_dev_reg *reg) { pci_direct_conf1.read(0, 1, reg->dev_func, reg->reg, 4, @@ -73,21 +76,13 @@ static void reg_init(struct sim_dev_reg *reg) static void reg_read(struct sim_dev_reg *reg, u32 *value) { - unsigned long flags; - - raw_spin_lock_irqsave(&pci_config_lock, flags); *value = reg->sim_reg.value; - raw_spin_unlock_irqrestore(&pci_config_lock, flags); } static void reg_write(struct sim_dev_reg *reg, u32 value) { - unsigned long flags; - - raw_spin_lock_irqsave(&pci_config_lock, flags); reg->sim_reg.value = (value & reg->sim_reg.mask) | (reg->sim_reg.value & ~reg->sim_reg.mask); - raw_spin_unlock_irqrestore(&pci_config_lock, flags); } static void sata_reg_init(struct sim_dev_reg *reg) @@ -117,12 +112,8 @@ static void sata_revid_read(struct sim_dev_reg *reg, u32 *value) static void reg_noirq_read(struct sim_dev_reg *reg, u32 *value) { - unsigned long flags; - - raw_spin_lock_irqsave(&pci_config_lock, flags); /* force interrupt pin value to 0 */ *value = reg->sim_reg.value & 0xfff00ff; - raw_spin_unlock_irqrestore(&pci_config_lock, flags); } static struct sim_dev_reg bus1_fixups[] = { @@ -265,24 +256,33 @@ int bridge_read(unsigned int devfn, int reg, int len, u32 *value) return retval; } -static int ce4100_conf_read(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 *value) +static int ce4100_bus1_read(unsigned int devfn, int reg, int len, u32 *value) { + unsigned long flags; int i; - WARN_ON(seg); - if (bus == 1) { - for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { - if (bus1_fixups[i].dev_func == devfn && - bus1_fixups[i].reg == (reg & ~3) && - bus1_fixups[i].read) { - bus1_fixups[i].read(&(bus1_fixups[i]), - value); - extract_bytes(value, reg, len); - return 0; - } + for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { + if (bus1_fixups[i].dev_func == devfn && + bus1_fixups[i].reg == (reg & ~3) && + bus1_fixups[i].read) { + + raw_spin_lock_irqsave(&pci_config_lock, flags); + bus1_fixups[i].read(&(bus1_fixups[i]), value); + raw_spin_unlock_irqrestore(&pci_config_lock, flags); + extract_bytes(value, reg, len); + return 0; } } + return -1; +} + +static int ce4100_conf_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + WARN_ON(seg); + + if (bus == 1 && !ce4100_bus1_read(devfn, reg, len, value)) + return 0; if (bus == 0 && (PCI_DEVFN(1, 0) == devfn) && !bridge_read(devfn, reg, len, value)) @@ -291,23 +291,32 @@ static int ce4100_conf_read(unsigned int seg, unsigned int bus, return pci_direct_conf1.read(seg, bus, devfn, reg, len, value); } -static int ce4100_conf_write(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 value) +static int ce4100_bus1_write(unsigned int devfn, int reg, int len, u32 value) { + unsigned long flags; int i; - WARN_ON(seg); - if (bus == 1) { - for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { - if (bus1_fixups[i].dev_func == devfn && - bus1_fixups[i].reg == (reg & ~3) && - bus1_fixups[i].write) { - bus1_fixups[i].write(&(bus1_fixups[i]), - value); - return 0; - } + for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { + if (bus1_fixups[i].dev_func == devfn && + bus1_fixups[i].reg == (reg & ~3) && + bus1_fixups[i].write) { + + raw_spin_lock_irqsave(&pci_config_lock, flags); + bus1_fixups[i].write(&(bus1_fixups[i]), value); + raw_spin_unlock_irqrestore(&pci_config_lock, flags); + return 0; } } + return -1; +} + +static int ce4100_conf_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + WARN_ON(seg); + + if (bus == 1 && !ce4100_bus1_write(devfn, reg, len, value)) + return 0; /* Discard writes to A/V bridge BAR. */ if (bus == 0 && PCI_DEVFN(1, 0) == devfn && @@ -318,8 +327,8 @@ static int ce4100_conf_write(unsigned int seg, unsigned int bus, } static const struct pci_raw_ops ce4100_pci_conf = { - .read = ce4100_conf_read, - .write = ce4100_conf_write, + .read = ce4100_conf_read, + .write = ce4100_conf_write, }; int __init ce4100_pci_init(void) diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 190e718694b1..cfd1a89fd04e 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -75,8 +75,8 @@ struct pci_ops pci_root_ops = { }; /* - * This interrupt-safe spinlock protects all accesses to PCI - * configuration space. + * This interrupt-safe spinlock protects all accesses to PCI configuration + * space, except for the mmconfig (ECAM) based operations. */ DEFINE_RAW_SPINLOCK(pci_config_lock); diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index ea6f3802c17b..1cb01abcb1be 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -24,12 +24,10 @@ static void pcibios_fixup_peer_bridges(void) int __init pci_legacy_init(void) { - if (!raw_pci_ops) { - printk("PCI: System does not support PCI\n"); - return 0; - } + if (!raw_pci_ops) + return 1; - printk("PCI: Probing PCI hardware\n"); + pr_info("PCI: Probing PCI hardware\n"); pcibios_scan_root(0); return 0; } @@ -46,7 +44,7 @@ void pcibios_scan_specific_bus(int busn) if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) && l != 0x0000 && l != 0xffff) { DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l); - printk(KERN_INFO "PCI: Discovered peer bus %02x\n", busn); + pr_info("PCI: Discovered peer bus %02x\n", busn); pcibios_scan_root(busn); return; } @@ -60,8 +58,12 @@ static int __init pci_subsys_init(void) * The init function returns an non zero value when * pci_legacy_init should be invoked. */ - if (x86_init.pci.init()) - pci_legacy_init(); + if (x86_init.pci.init()) { + if (pci_legacy_init()) { + pr_info("PCI: System does not support PCI\n"); + return -ENODEV; + } + } pcibios_fixup_peer_bridges(); x86_init.pci.init_irq(); diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index f1d83b34c329..2f56e1ed61c3 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,4 +1,5 @@ OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 7e76a4d8304b..f084d8718ac4 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -828,9 +828,11 @@ static void __init kexec_enter_virtual_mode(void) /* * We don't do virtual mode, since we don't do runtime services, on - * non-native EFI + * non-native EFI. With efi=old_map, we don't do runtime services in + * kexec kernel because in the initial boot something else might + * have been mapped at these virtual addresses. */ - if (!efi_is_native()) { + if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) { efi_memmap_unmap(); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; @@ -1012,7 +1014,6 @@ static void __init __efi_enter_virtual_mode(void) * necessary relocation fixups for the new virtual addresses. */ efi_runtime_update_mappings(); - efi_dump_pagetable(); /* clean DUMMY object */ efi_delete_dummy_variable(); @@ -1027,6 +1028,8 @@ void __init efi_enter_virtual_mode(void) kexec_enter_virtual_mode(); else __efi_enter_virtual_mode(); + + efi_dump_pagetable(); } /* diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 3481268da3d0..52f7faa1538f 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -44,7 +44,14 @@ int __init efi_alloc_page_tables(void) } void efi_sync_low_kernel_mappings(void) {} -void __init efi_dump_pagetable(void) {} + +void __init efi_dump_pagetable(void) +{ +#ifdef CONFIG_EFI_PGT_DUMP + ptdump_walk_pgd_level(NULL, swapper_pg_dir); +#endif +} + int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { return 0; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index c488625c9712..9bf72f5bfedb 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -71,14 +71,16 @@ static void __init early_code_mapping_set_exec(int executable) pgd_t * __init efi_call_phys_prolog(void) { - unsigned long vaddress; - pgd_t *save_pgd; + unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; + pgd_t *save_pgd, *pgd_k, *pgd_efi; + p4d_t *p4d, *p4d_k, *p4d_efi; + pud_t *pud; int pgd; - int n_pgds; + int n_pgds, i, j; if (!efi_enabled(EFI_OLD_MEMMAP)) { - save_pgd = (pgd_t *)read_cr3(); + save_pgd = (pgd_t *)__read_cr3(); write_cr3((unsigned long)efi_scratch.efi_pgt); goto out; } @@ -88,10 +90,49 @@ pgd_t * __init efi_call_phys_prolog(void) n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); + /* + * Build 1:1 identity mapping for efi=old_map usage. Note that + * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while + * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical + * address X, the pud_index(X) != pud_index(__va(X)), we can only copy + * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. + * This means here we can only reuse the PMD tables of the direct mapping. + */ for (pgd = 0; pgd < n_pgds; pgd++) { - save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE); - vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); - set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); + addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); + vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); + pgd_efi = pgd_offset_k(addr_pgd); + save_pgd[pgd] = *pgd_efi; + + p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); + if (!p4d) { + pr_err("Failed to allocate p4d table!\n"); + goto out; + } + + for (i = 0; i < PTRS_PER_P4D; i++) { + addr_p4d = addr_pgd + i * P4D_SIZE; + p4d_efi = p4d + p4d_index(addr_p4d); + + pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); + if (!pud) { + pr_err("Failed to allocate pud table!\n"); + goto out; + } + + for (j = 0; j < PTRS_PER_PUD; j++) { + addr_pud = addr_p4d + j * PUD_SIZE; + + if (addr_pud > (max_pfn << PAGE_SHIFT)) + break; + + vaddr = (unsigned long)__va(addr_pud); + + pgd_k = pgd_offset_k(vaddr); + p4d_k = p4d_offset(pgd_k, vaddr); + pud[j] = *pud_offset(p4d_k, vaddr); + } + } } out: __flush_tlb_all(); @@ -104,8 +145,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) /* * After the lock is released, the original page table is restored. */ - int pgd_idx; + int pgd_idx, i; int nr_pgds; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; if (!efi_enabled(EFI_OLD_MEMMAP)) { write_cr3((unsigned long)save_pgd); @@ -115,9 +159,28 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); - for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) + for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { + pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); + if (!(pgd_val(*pgd) & _PAGE_PRESENT)) + continue; + + for (i = 0; i < PTRS_PER_P4D; i++) { + p4d = p4d_offset(pgd, + pgd_idx * PGDIR_SIZE + i * P4D_SIZE); + + if (!(p4d_val(*p4d) & _PAGE_PRESENT)) + continue; + + pud = (pud_t *)p4d_page_vaddr(*p4d); + pud_free(&init_mm, pud); + } + + p4d = (p4d_t *)pgd_page_vaddr(*pgd); + p4d_free(&init_mm, p4d); + } + kfree(save_pgd); __flush_tlb_all(); @@ -526,7 +589,10 @@ void __init efi_runtime_update_mappings(void) void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - ptdump_walk_pgd_level(NULL, efi_pgd); + if (efi_enabled(EFI_OLD_MEMMAP)) + ptdump_walk_pgd_level(NULL, swapper_pg_dir); + else + ptdump_walk_pgd_level(NULL, efi_pgd); #endif } @@ -583,7 +649,7 @@ efi_status_t efi_thunk_set_virtual_address_map( efi_sync_low_kernel_mappings(); local_irq_save(flags); - efi_scratch.prev_cr3 = read_cr3(); + efi_scratch.prev_cr3 = __read_cr3(); write_cr3((unsigned long)efi_scratch.efi_pgt); __flush_tlb_all(); diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 26615991d69c..8a99a2e96537 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -15,12 +15,66 @@ #include <asm/e820/api.h> #include <asm/efi.h> #include <asm/uv/uv.h> +#include <asm/cpu_device_id.h> #define EFI_MIN_RESERVE 5120 #define EFI_DUMMY_GUID \ EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9) +#define QUARK_CSH_SIGNATURE 0x5f435348 /* _CSH */ +#define QUARK_SECURITY_HEADER_SIZE 0x400 + +/* + * Header prepended to the standard EFI capsule on Quark systems the are based + * on Intel firmware BSP. + * @csh_signature: Unique identifier to sanity check signed module + * presence ("_CSH"). + * @version: Current version of CSH used. Should be one for Quark A0. + * @modulesize: Size of the entire module including the module header + * and payload. + * @security_version_number_index: Index of SVN to use for validation of signed + * module. + * @security_version_number: Used to prevent against roll back of modules. + * @rsvd_module_id: Currently unused for Clanton (Quark). + * @rsvd_module_vendor: Vendor Identifier. For Intel products value is + * 0x00008086. + * @rsvd_date: BCD representation of build date as yyyymmdd, where + * yyyy=4 digit year, mm=1-12, dd=1-31. + * @headersize: Total length of the header including including any + * padding optionally added by the signing tool. + * @hash_algo: What Hash is used in the module signing. + * @cryp_algo: What Crypto is used in the module signing. + * @keysize: Total length of the key data including including any + * padding optionally added by the signing tool. + * @signaturesize: Total length of the signature including including any + * padding optionally added by the signing tool. + * @rsvd_next_header: 32-bit pointer to the next Secure Boot Module in the + * chain, if there is a next header. + * @rsvd: Reserved, padding structure to required size. + * + * See also QuartSecurityHeader_t in + * Quark_EDKII_v1.2.1.1/QuarkPlatformPkg/Include/QuarkBootRom.h + * from https://downloadcenter.intel.com/download/23197/Intel-Quark-SoC-X1000-Board-Support-Package-BSP + */ +struct quark_security_header { + u32 csh_signature; + u32 version; + u32 modulesize; + u32 security_version_number_index; + u32 security_version_number; + u32 rsvd_module_id; + u32 rsvd_module_vendor; + u32 rsvd_date; + u32 headersize; + u32 hash_algo; + u32 cryp_algo; + u32 keysize; + u32 signaturesize; + u32 rsvd_next_header; + u32 rsvd[2]; +}; + static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 }; static bool efi_no_storage_paranoia; @@ -360,6 +414,9 @@ void __init efi_free_boot_services(void) free_bootmem_late(start, size); } + if (!num_entries) + return; + new_size = efi.memmap.desc_size * num_entries; new_phys = efi_memmap_alloc(num_entries); if (!new_phys) { @@ -501,3 +558,86 @@ bool efi_poweroff_required(void) { return acpi_gbl_reduced_hardware || acpi_no_s5; } + +#ifdef CONFIG_EFI_CAPSULE_QUIRK_QUARK_CSH + +static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff, + size_t hdr_bytes) +{ + struct quark_security_header *csh = *pkbuff; + + /* Only process data block that is larger than the security header */ + if (hdr_bytes < sizeof(struct quark_security_header)) + return 0; + + if (csh->csh_signature != QUARK_CSH_SIGNATURE || + csh->headersize != QUARK_SECURITY_HEADER_SIZE) + return 1; + + /* Only process data block if EFI header is included */ + if (hdr_bytes < QUARK_SECURITY_HEADER_SIZE + + sizeof(efi_capsule_header_t)) + return 0; + + pr_debug("Quark security header detected\n"); + + if (csh->rsvd_next_header != 0) { + pr_err("multiple Quark security headers not supported\n"); + return -EINVAL; + } + + *pkbuff += csh->headersize; + cap_info->total_size = csh->headersize; + + /* + * Update the first page pointer to skip over the CSH header. + */ + cap_info->pages[0] += csh->headersize; + + return 1; +} + +#define ICPU(family, model, quirk_handler) \ + { X86_VENDOR_INTEL, family, model, X86_FEATURE_ANY, \ + (unsigned long)&quirk_handler } + +static const struct x86_cpu_id efi_capsule_quirk_ids[] = { + ICPU(5, 9, qrk_capsule_setup_info), /* Intel Quark X1000 */ + { } +}; + +int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, + size_t hdr_bytes) +{ + int (*quirk_handler)(struct capsule_info *, void **, size_t); + const struct x86_cpu_id *id; + int ret; + + if (hdr_bytes < sizeof(efi_capsule_header_t)) + return 0; + + cap_info->total_size = 0; + + id = x86_match_cpu(efi_capsule_quirk_ids); + if (id) { + /* + * The quirk handler is supposed to return + * - a value > 0 if the setup should continue, after advancing + * kbuff as needed + * - 0 if not enough hdr_bytes are available yet + * - a negative error code otherwise + */ + quirk_handler = (typeof(quirk_handler))id->driver_data; + ret = quirk_handler(cap_info, &kbuff, hdr_bytes); + if (ret <= 0) + return ret; + } + + memcpy(&cap_info->header, kbuff, sizeof(cap_info->header)); + + cap_info->total_size += cap_info->header.imagesize; + + return __efi_capsule_setup_info(cap_info); +} + +#endif diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c index c5350fd27d70..0668aaff8bfe 100644 --- a/arch/x86/platform/olpc/olpc-xo1-pm.c +++ b/arch/x86/platform/olpc/olpc-xo1-pm.c @@ -77,7 +77,7 @@ static int xo1_power_state_enter(suspend_state_t pm_state) asmlinkage __visible int xo1_do_sleep(u8 sleep_state) { - void *pgd_addr = __va(read_cr3()); + void *pgd_addr = __va(read_cr3_pa()); /* Program wakeup mask (using dword access to CS5536_PM1_EN) */ outl(wakeup_mask << 16, acpi_base + CS5536_PM1_STS); diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 42e65fee5673..2983faab5b18 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -456,12 +456,13 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp) */ static inline unsigned long long cycles_2_ns(unsigned long long cyc) { - struct cyc2ns_data *data = cyc2ns_read_begin(); + struct cyc2ns_data data; unsigned long long ns; - ns = mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); + cyc2ns_read_begin(&data); + ns = mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift); + cyc2ns_read_end(); - cyc2ns_read_end(data); return ns; } @@ -470,12 +471,13 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) */ static inline unsigned long long ns_2_cycles(unsigned long long ns) { - struct cyc2ns_data *data = cyc2ns_read_begin(); + struct cyc2ns_data data; unsigned long long cyc; - cyc = (ns << data->cyc2ns_shift) / data->cyc2ns_mul; + cyc2ns_read_begin(&data); + cyc = (ns << data.cyc2ns_shift) / data.cyc2ns_mul; + cyc2ns_read_end(); - cyc2ns_read_end(data); return cyc; } @@ -1121,11 +1123,9 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp, * done. The returned pointer is valid till preemption is re-enabled. */ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long start, - unsigned long end, - unsigned int cpu) + const struct flush_tlb_info *info) { + unsigned int cpu = smp_processor_id(); int locals = 0, remotes = 0, hubs = 0; struct bau_desc *bau_desc; struct cpumask *flush_mask; @@ -1179,8 +1179,8 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, record_send_statistics(stat, locals, hubs, remotes, bau_desc); - if (!end || (end - start) <= PAGE_SIZE) - address = start; + if (!info->end || (info->end - info->start) <= PAGE_SIZE) + address = info->start; else address = TLB_FLUSH_ALL; diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 776c6592136c..03fc397335b7 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -160,13 +160,21 @@ static struct irq_domain *uv_get_irq_domain(void) { static struct irq_domain *uv_domain; static DEFINE_MUTEX(uv_lock); + struct fwnode_handle *fn; mutex_lock(&uv_lock); - if (uv_domain == NULL) { - uv_domain = irq_domain_add_tree(NULL, &uv_domain_ops, NULL); - if (uv_domain) - uv_domain->parent = x86_vector_domain; - } + if (uv_domain) + goto out; + + fn = irq_domain_alloc_named_fwnode("UV-CORE"); + if (!fn) + goto out; + + uv_domain = irq_domain_create_tree(fn, &uv_domain_ops, NULL); + irq_domain_free_fwnode(fn); + if (uv_domain) + uv_domain->parent = x86_vector_domain; +out: mutex_unlock(&uv_lock); return uv_domain; diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile index a6a198c33623..05041871ac90 100644 --- a/arch/x86/power/Makefile +++ b/arch/x86/power/Makefile @@ -1,3 +1,5 @@ +OBJECT_FILES_NON_STANDARD_hibernate_asm_$(BITS).o := y + # __restore_processor_state() restores %gs after S3 resume and so should not # itself be stack-protected nostackp := $(call cc-option, -fno-stack-protector) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 6b05a9219ea2..78459a6d455a 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -129,7 +129,7 @@ static void __save_processor_state(struct saved_context *ctxt) */ ctxt->cr0 = read_cr0(); ctxt->cr2 = read_cr2(); - ctxt->cr3 = read_cr3(); + ctxt->cr3 = __read_cr3(); ctxt->cr4 = __read_cr4(); #ifdef CONFIG_X86_64 ctxt->cr8 = read_cr8(); diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index a6e21fee22ea..e3e62c8a8e70 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -150,7 +150,8 @@ static int relocate_restore_code(void) memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE); /* Make the page containing the relocated code executable */ - pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code); + pgd = (pgd_t *)__va(read_cr3_pa()) + + pgd_index(relocated_restore_code); p4d = p4d_offset(pgd, relocated_restore_code); if (p4d_large(*p4d)) { set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX)); diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index a163a90af4aa..cd4be19c36dc 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -102,7 +102,7 @@ static void __init setup_real_mode(void) trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); trampoline_pgd[0] = trampoline_pgd_entry.pgd; - trampoline_pgd[511] = init_level4_pgt[511].pgd; + trampoline_pgd[511] = init_top_pgt[511].pgd; #endif } diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index fffb0a16f9e3..bced7a369a11 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,3 +1,6 @@ +OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_xen-pvh.o := y + ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_spinlock.o = -pg diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index bcea81f36fc5..b5e48da7fbff 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -178,7 +178,7 @@ static struct apic xen_pv_apic = { .get_apic_id = xen_get_apic_id, .set_apic_id = xen_set_apic_id, /* Can be NULL on 32-bit. */ - .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, #ifdef CONFIG_SMP .send_IPI_mask = xen_send_IPI_mask, diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 30bb2e80cfe7..a18703be9ead 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -54,38 +54,6 @@ static efi_system_table_t efi_systab_xen __initdata = { .tables = EFI_INVALID_TABLE_ADDR /* Initialized later. */ }; -static const struct efi efi_xen __initconst = { - .systab = NULL, /* Initialized later. */ - .runtime_version = 0, /* Initialized later. */ - .mps = EFI_INVALID_TABLE_ADDR, - .acpi = EFI_INVALID_TABLE_ADDR, - .acpi20 = EFI_INVALID_TABLE_ADDR, - .smbios = EFI_INVALID_TABLE_ADDR, - .smbios3 = EFI_INVALID_TABLE_ADDR, - .sal_systab = EFI_INVALID_TABLE_ADDR, - .boot_info = EFI_INVALID_TABLE_ADDR, - .hcdp = EFI_INVALID_TABLE_ADDR, - .uga = EFI_INVALID_TABLE_ADDR, - .uv_systab = EFI_INVALID_TABLE_ADDR, - .fw_vendor = EFI_INVALID_TABLE_ADDR, - .runtime = EFI_INVALID_TABLE_ADDR, - .config_table = EFI_INVALID_TABLE_ADDR, - .get_time = xen_efi_get_time, - .set_time = xen_efi_set_time, - .get_wakeup_time = xen_efi_get_wakeup_time, - .set_wakeup_time = xen_efi_set_wakeup_time, - .get_variable = xen_efi_get_variable, - .get_next_variable = xen_efi_get_next_variable, - .set_variable = xen_efi_set_variable, - .query_variable_info = xen_efi_query_variable_info, - .update_capsule = xen_efi_update_capsule, - .query_capsule_caps = xen_efi_query_capsule_caps, - .get_next_high_mono_count = xen_efi_get_next_high_mono_count, - .reset_system = xen_efi_reset_system, - .set_virtual_address_map = NULL, /* Not used under Xen. */ - .flags = 0 /* Initialized later. */ -}; - static efi_system_table_t __init *xen_efi_probe(void) { struct xen_platform_op op = { @@ -102,7 +70,18 @@ static efi_system_table_t __init *xen_efi_probe(void) /* Here we know that Xen runs on EFI platform. */ - efi = efi_xen; + efi.get_time = xen_efi_get_time; + efi.set_time = xen_efi_set_time; + efi.get_wakeup_time = xen_efi_get_wakeup_time; + efi.set_wakeup_time = xen_efi_set_wakeup_time; + efi.get_variable = xen_efi_get_variable; + efi.get_next_variable = xen_efi_get_next_variable; + efi.set_variable = xen_efi_set_variable; + efi.query_variable_info = xen_efi_query_variable_info; + efi.update_capsule = xen_efi_update_capsule; + efi.query_capsule_caps = xen_efi_query_capsule_caps; + efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; + efi.reset_system = xen_efi_reset_system; efi_systab_xen.tables = info->cfg.addr; efi_systab_xen.nr_tables = info->cfg.nent; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 1f386d7fdf70..1d7a7213a310 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -975,37 +975,32 @@ static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } - -#ifdef CONFIG_SMP -/* Another cpu may still have their %cr3 pointing at the pagetable, so - we need to repoint it somewhere else before we can unpin it. */ -static void drop_other_mm_ref(void *info) +static void drop_mm_ref_this_cpu(void *info) { struct mm_struct *mm = info; - struct mm_struct *active_mm; - - active_mm = this_cpu_read(cpu_tlbstate.active_mm); - if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) + if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm) leave_mm(smp_processor_id()); - /* If this cpu still has a stale cr3 reference, then make sure - it has been flushed. */ + /* + * If this cpu still has a stale cr3 reference, then make sure + * it has been flushed. + */ if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd)) - load_cr3(swapper_pg_dir); + xen_mc_flush(); } +#ifdef CONFIG_SMP +/* + * Another cpu may still have their %cr3 pointing at the pagetable, so + * we need to repoint it somewhere else before we can unpin it. + */ static void xen_drop_mm_ref(struct mm_struct *mm) { cpumask_var_t mask; unsigned cpu; - if (current->active_mm == mm) { - if (current->mm == mm) - load_cr3(swapper_pg_dir); - else - leave_mm(smp_processor_id()); - } + drop_mm_ref_this_cpu(mm); /* Get the "official" set of cpus referring to our pagetable. */ if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { @@ -1013,31 +1008,31 @@ static void xen_drop_mm_ref(struct mm_struct *mm) if (!cpumask_test_cpu(cpu, mm_cpumask(mm)) && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) continue; - smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); + smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1); } return; } cpumask_copy(mask, mm_cpumask(mm)); - /* It's possible that a vcpu may have a stale reference to our - cr3, because its in lazy mode, and it hasn't yet flushed - its set of pending hypercalls yet. In this case, we can - look at its actual current cr3 value, and force it to flush - if needed. */ + /* + * It's possible that a vcpu may have a stale reference to our + * cr3, because its in lazy mode, and it hasn't yet flushed + * its set of pending hypercalls yet. In this case, we can + * look at its actual current cr3 value, and force it to flush + * if needed. + */ for_each_online_cpu(cpu) { if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) cpumask_set_cpu(cpu, mask); } - if (!cpumask_empty(mask)) - smp_call_function_many(mask, drop_other_mm_ref, mm, 1); + smp_call_function_many(mask, drop_mm_ref_this_cpu, mm, 1); free_cpumask_var(mask); } #else static void xen_drop_mm_ref(struct mm_struct *mm) { - if (current->active_mm == mm) - load_cr3(swapper_pg_dir); + drop_mm_ref_this_cpu(mm); } #endif @@ -1366,8 +1361,7 @@ static void xen_flush_tlb_single(unsigned long addr) } static void xen_flush_tlb_others(const struct cpumask *cpus, - struct mm_struct *mm, unsigned long start, - unsigned long end) + const struct flush_tlb_info *info) { struct { struct mmuext_op op; @@ -1379,7 +1373,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, } *args; struct multicall_space mcs; - trace_xen_mmu_flush_tlb_others(cpus, mm, start, end); + trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end); if (cpumask_empty(cpus)) return; /* nothing to do */ @@ -1393,9 +1387,10 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; - if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) { + if (info->end != TLB_FLUSH_ALL && + (info->end - info->start) <= PAGE_SIZE) { args->op.cmd = MMUEXT_INVLPG_MULTI; - args->op.arg1.linear_addr = start; + args->op.arg1.linear_addr = info->start; } MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); @@ -1470,8 +1465,8 @@ static void xen_write_cr3(unsigned long cr3) * At the start of the day - when Xen launches a guest, it has already * built pagetables for the guest. We diligently look over them * in xen_setup_kernel_pagetable and graft as appropriate them in the - * init_level4_pgt and its friends. Then when we are happy we load - * the new init_level4_pgt - and continue on. + * init_top_pgt and its friends. Then when we are happy we load + * the new init_top_pgt - and continue on. * * The generic code starts (start_kernel) and 'init_mem_mapping' sets * up the rest of the pagetables. When it has completed it loads the cr3. @@ -1914,12 +1909,12 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) pt_end = pt_base + xen_start_info->nr_pt_frames; /* Zap identity mapping */ - init_level4_pgt[0] = __pgd(0); + init_top_pgt[0] = __pgd(0); /* Pre-constructed entries are in pfn, so convert to mfn */ /* L4[272] -> level3_ident_pgt */ /* L4[511] -> level3_kernel_pgt */ - convert_pfn_mfn(init_level4_pgt); + convert_pfn_mfn(init_top_pgt); /* L3_i[0] -> level2_ident_pgt */ convert_pfn_mfn(level3_ident_pgt); @@ -1950,10 +1945,10 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Copy the initial P->M table mappings if necessary. */ i = pgd_index(xen_start_info->mfn_list); if (i && i < pgd_index(__START_KERNEL_map)) - init_level4_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i]; + init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i]; /* Make pagetable pieces RO */ - set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); + set_page_prot(init_top_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); @@ -1964,7 +1959,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Pin down new L4 */ pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, - PFN_DOWN(__pa_symbol(init_level4_pgt))); + PFN_DOWN(__pa_symbol(init_top_pgt))); /* Unpin Xen-provided one */ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); @@ -1974,7 +1969,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) * attach it to, so make sure we just set kernel pgd. */ xen_mc_batch(); - __xen_write_cr3(true, __pa(init_level4_pgt)); + __xen_write_cr3(true, __pa(init_top_pgt)); xen_mc_issue(PARAVIRT_LAZY_CPU); /* We can't that easily rip out L3 and L2, as the Xen pagetables are @@ -2022,7 +2017,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) pmd_t pmd; pte_t pte; - pa = read_cr3(); + pa = read_cr3_pa(); pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) * sizeof(pgd))); if (!pgd_present(pgd)) @@ -2102,7 +2097,7 @@ void __init xen_relocate_p2m(void) pt_phys = pmd_phys + PFN_PHYS(n_pmd); p2m_pfn = PFN_DOWN(pt_phys) + n_pt; - pgd = __va(read_cr3()); + pgd = __va(read_cr3_pa()); new_p2m = (unsigned long *)(2 * PGDIR_SIZE); idx_p4d = 0; save_pud = n_pud; @@ -2209,7 +2204,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) { unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); - BUG_ON(read_cr3() != __pa(initial_page_table)); + BUG_ON(read_cr3_pa() != __pa(initial_page_table)); BUG_ON(cr3 != __pa(swapper_pg_dir)); /* diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S index 5e246716d58f..e1a5fbeae08d 100644 --- a/arch/x86/xen/xen-pvh.S +++ b/arch/x86/xen/xen-pvh.S @@ -87,7 +87,7 @@ ENTRY(pvh_start_xen) wrmsr /* Enable pre-constructed page tables. */ - mov $_pa(init_level4_pgt), %eax + mov $_pa(init_top_pgt), %eax mov %eax, %cr3 mov $(X86_CR0_PG | X86_CR0_PE), %eax mov %eax, %cr0 diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index cc23e9ecc6bb..30f6290109d4 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -25,7 +25,6 @@ generic-y += preempt.h generic-y += resource.h generic-y += rwsem.h generic-y += sections.h -generic-y += siginfo.h generic-y += statfs.h generic-y += termios.h generic-y += topology.h diff --git a/arch/xtensa/include/asm/irq.h b/arch/xtensa/include/asm/irq.h index f71f88ea7646..19707db966f1 100644 --- a/arch/xtensa/include/asm/irq.h +++ b/arch/xtensa/include/asm/irq.h @@ -29,7 +29,8 @@ static inline void variant_irq_disable(unsigned int irq) { } # define PLATFORM_NR_IRQS 0 #endif #define XTENSA_NR_IRQS XCHAL_NUM_INTERRUPTS -#define NR_IRQS (XTENSA_NR_IRQS + VARIANT_NR_IRQS + PLATFORM_NR_IRQS) +#define NR_IRQS (XTENSA_NR_IRQS + VARIANT_NR_IRQS + PLATFORM_NR_IRQS + 1) +#define XTENSA_PIC_LINUX_IRQ(hwirq) ((hwirq) + 1) #if VARIANT_NR_IRQS == 0 static inline void variant_init_irq(void) { } diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 003eeee3fbc6..30ee8c608853 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -213,8 +213,6 @@ struct mm_struct; #define release_segments(mm) do { } while(0) #define forget_segments() do { } while (0) -#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc) - extern unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc) diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild index b15bf6bc0e94..4cb0d2f8868c 100644 --- a/arch/xtensa/include/uapi/asm/Kbuild +++ b/arch/xtensa/include/uapi/asm/Kbuild @@ -1,2 +1,3 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += siginfo.h diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index a265edd6ac37..99341028cc77 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -34,11 +34,6 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs) { int irq = irq_find_mapping(NULL, hwirq); - if (hwirq >= NR_IRQS) { - printk(KERN_EMERG "%s: cannot handle IRQ %d\n", - __func__, hwirq); - } - #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ { diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 394ef08300b6..33bfa5270d95 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -593,8 +593,7 @@ c_show(struct seq_file *f, void *slot) (ccount_freq/10000) % 100, loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); - - seq_printf(f,"flags\t\t: " + seq_puts(f, "flags\t\t: " #if XCHAL_HAVE_NMI "nmi " #endif diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c index 06937928cb72..74afbf02d07e 100644 --- a/arch/xtensa/kernel/syscall.c +++ b/arch/xtensa/kernel/syscall.c @@ -88,7 +88,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, /* At this point: (!vmm || addr < vmm->vm_end). */ if (TASK_SIZE - len < addr) return -ENOMEM; - if (!vmm || addr + len <= vmm->vm_start) + if (!vmm || addr + len <= vm_start_gap(vmm)) return addr; addr = vmm->vm_end; if (flags & MAP_SHARED) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 668c1056f9e4..fd524a54d2ab 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -187,7 +187,7 @@ void __init time_init(void) local_timer_setup(0); setup_irq(this_cpu_ptr(&ccount_timer)->evt.irq, &timer_irqaction); sched_clock_register(ccount_sched_clock_read, 32, ccount_freq); - clocksource_probe(); + timer_probe(); } /* diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 30d9fc21e076..162c77e53ca8 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -118,7 +118,7 @@ SECTIONS SECTION_VECTOR (.KernelExceptionVector.text, KERNEL_VECTOR_VADDR) SECTION_VECTOR (.UserExceptionVector.literal, USER_VECTOR_VADDR - 4) SECTION_VECTOR (.UserExceptionVector.text, USER_VECTOR_VADDR) - SECTION_VECTOR (.DoubleExceptionVector.literal, DOUBLEEXC_VECTOR_VADDR - 48) + SECTION_VECTOR (.DoubleExceptionVector.literal, DOUBLEEXC_VECTOR_VADDR - 20) SECTION_VECTOR (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR) #endif @@ -306,13 +306,13 @@ SECTIONS .UserExceptionVector.literal) SECTION_VECTOR (_DoubleExceptionVector_literal, .DoubleExceptionVector.literal, - DOUBLEEXC_VECTOR_VADDR - 48, + DOUBLEEXC_VECTOR_VADDR - 20, SIZEOF(.UserExceptionVector.text), .UserExceptionVector.text) SECTION_VECTOR (_DoubleExceptionVector_text, .DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR, - 48, + 20, .DoubleExceptionVector.literal) . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3; diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 02e94bb3ad3e..c45b90bb9339 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -317,8 +317,7 @@ static int __init simdisk_init(void) if (simdisk_count > MAX_SIMDISK_COUNT) simdisk_count = MAX_SIMDISK_COUNT; - sddev = kmalloc(simdisk_count * sizeof(struct simdisk), - GFP_KERNEL); + sddev = kmalloc_array(simdisk_count, sizeof(*sddev), GFP_KERNEL); if (sddev == NULL) goto out_unregister; diff --git a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h index dbeea2b440a1..1fda7e20dfcb 100644 --- a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h +++ b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h @@ -24,16 +24,18 @@ /* Interrupt configuration. */ -#define PLATFORM_NR_IRQS 10 +#define PLATFORM_NR_IRQS 0 /* Default assignment of LX60 devices to external interrupts. */ #ifdef CONFIG_XTENSA_MX #define DUART16552_INTNUM XCHAL_EXTINT3_NUM #define OETH_IRQ XCHAL_EXTINT4_NUM +#define C67X00_IRQ XCHAL_EXTINT8_NUM #else #define DUART16552_INTNUM XCHAL_EXTINT0_NUM #define OETH_IRQ XCHAL_EXTINT1_NUM +#define C67X00_IRQ XCHAL_EXTINT5_NUM #endif /* @@ -63,5 +65,5 @@ #define C67X00_PADDR (XCHAL_KIO_PADDR + 0x0D0D0000) #define C67X00_SIZE 0x10 -#define C67X00_IRQ 5 + #endif /* __XTENSA_XTAVNET_HARDWARE_H */ diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 779be723eb2b..42285f35d313 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -175,8 +175,8 @@ static struct resource ethoc_res[] = { .flags = IORESOURCE_MEM, }, [2] = { /* IRQ number */ - .start = OETH_IRQ, - .end = OETH_IRQ, + .start = XTENSA_PIC_LINUX_IRQ(OETH_IRQ), + .end = XTENSA_PIC_LINUX_IRQ(OETH_IRQ), .flags = IORESOURCE_IRQ, }, }; @@ -213,8 +213,8 @@ static struct resource c67x00_res[] = { .flags = IORESOURCE_MEM, }, [1] = { /* IRQ number */ - .start = C67X00_IRQ, - .end = C67X00_IRQ, + .start = XTENSA_PIC_LINUX_IRQ(C67X00_IRQ), + .end = XTENSA_PIC_LINUX_IRQ(C67X00_IRQ), .flags = IORESOURCE_IRQ, }, }; @@ -247,7 +247,7 @@ static struct resource serial_resource = { static struct plat_serial8250_port serial_platform_data[] = { [0] = { .mapbase = DUART16552_PADDR, - .irq = DUART16552_INTNUM, + .irq = XTENSA_PIC_LINUX_IRQ(DUART16552_INTNUM), .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_IOREMAP, .iotype = XCHAL_HAVE_BE ? UPIO_MEM32BE : UPIO_MEM32, |