diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-05 22:39:30 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-05 22:39:30 +0300 |
commit | 7ae77150d94d3b535c7b85e6b3647113095e79bf (patch) | |
tree | 90fe894e7efd92898e813d88acfd4611d79be969 /arch | |
parent | 084623e468d535d98f883cc2ccf2c4fdf2108556 (diff) | |
parent | 1395375c592770fe5158a592944aaeed67fa94ff (diff) | |
download | linux-7ae77150d94d3b535c7b85e6b3647113095e79bf.tar.xz |
Merge tag 'powerpc-5.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
- Support for userspace to send requests directly to the on-chip GZIP
accelerator on Power9.
- Rework of our lockless page table walking (__find_linux_pte()) to
make it safe against parallel page table manipulations without
relying on an IPI for serialisation.
- A series of fixes & enhancements to make our machine check handling
more robust.
- Lots of plumbing to add support for "prefixed" (64-bit) instructions
on Power10.
- Support for using huge pages for the linear mapping on 8xx (32-bit).
- Remove obsolete Xilinx PPC405/PPC440 support, and an associated sound
driver.
- Removal of some obsolete 40x platforms and associated cruft.
- Initial support for booting on Power10.
- Lots of other small features, cleanups & fixes.
Thanks to: Alexey Kardashevskiy, Alistair Popple, Andrew Donnellan,
Andrey Abramov, Aneesh Kumar K.V, Balamuruhan S, Bharata B Rao, Bulent
Abali, Cédric Le Goater, Chen Zhou, Christian Zigotzky, Christophe
JAILLET, Christophe Leroy, Dmitry Torokhov, Emmanuel Nicolet, Erhard F.,
Gautham R. Shenoy, Geoff Levand, George Spelvin, Greg Kurz, Gustavo A.
R. Silva, Gustavo Walbon, Haren Myneni, Hari Bathini, Joel Stanley,
Jordan Niethe, Kajol Jain, Kees Cook, Leonardo Bras, Madhavan
Srinivasan., Mahesh Salgaonkar, Markus Elfring, Michael Neuling, Michal
Simek, Nathan Chancellor, Nathan Lynch, Naveen N. Rao, Nicholas Piggin,
Oliver O'Halloran, Paul Mackerras, Pingfan Liu, Qian Cai, Ram Pai,
Raphael Moreira Zinsly, Ravi Bangoria, Sam Bobroff, Sandipan Das, Segher
Boessenkool, Stephen Rothwell, Sukadev Bhattiprolu, Tyrel Datwyler,
Wolfram Sang, Xiongfeng Wang.
* tag 'powerpc-5.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (299 commits)
powerpc/pseries: Make vio and ibmebus initcalls pseries specific
cxl: Remove dead Kconfig options
powerpc: Add POWER10 architected mode
powerpc/dt_cpu_ftrs: Add MMA feature
powerpc/dt_cpu_ftrs: Enable Prefixed Instructions
powerpc/dt_cpu_ftrs: Advertise support for ISA v3.1 if selected
powerpc: Add support for ISA v3.1
powerpc: Add new HWCAP bits
powerpc/64s: Don't set FSCR bits in INIT_THREAD
powerpc/64s: Save FSCR to init_task.thread.fscr after feature init
powerpc/64s: Don't let DT CPU features set FSCR_DSCR
powerpc/64s: Don't init FSCR_DSCR in __init_FSCR()
powerpc/32s: Fix another build failure with CONFIG_PPC_KUAP_DEBUG
powerpc/module_64: Use special stub for _mcount() with -mprofile-kernel
powerpc/module_64: Simplify check for -mprofile-kernel ftrace relocations
powerpc/module_64: Consolidate ftrace code
powerpc/32: Disable KASAN with pages bigger than 16k
powerpc/uaccess: Don't set KUEP by default on book3s/32
powerpc/uaccess: Don't set KUAP by default on book3s/32
powerpc/8xx: Reduce time spent in allow_user_access() and friends
...
Diffstat (limited to 'arch')
283 files changed, 6537 insertions, 6198 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c4f36a0b6b6e..9fa23eb320ff 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -171,8 +171,8 @@ config PPC select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL - select HAVE_ARCH_KASAN if PPC32 - select HAVE_ARCH_KASAN_VMALLOC if PPC32 + select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 + select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT @@ -477,7 +477,7 @@ config LD_HEAD_STUB_CATCH If unsure, say "N". config MPROFILE_KERNEL - depends on PPC64 && CPU_LITTLE_ENDIAN + depends on PPC64 && CPU_LITTLE_ENDIAN && FUNCTION_TRACER def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -I$(srctree)/include -D__KERNEL__) config HOTPLUG_CPU @@ -764,41 +764,18 @@ config THREAD_SHIFT range 13 15 default "15" if PPC_256K_PAGES default "14" if PPC64 + default "14" if KASAN default "13" help Used to define the stack size. The default is almost always what you want. Only change this if you know what you are doing. -config ETEXT_SHIFT_BOOL - bool "Set custom etext alignment" if STRICT_KERNEL_RWX && \ - (PPC_BOOK3S_32 || PPC_8xx) - depends on ADVANCED_OPTIONS - help - This option allows you to set the kernel end of text alignment. When - RAM is mapped by blocks, the alignment needs to fit the size and - number of possible blocks. The default should be OK for most configs. - - Say N here unless you know what you are doing. - -config ETEXT_SHIFT - int "_etext shift" if ETEXT_SHIFT_BOOL - range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 - range 19 23 if STRICT_KERNEL_RWX && PPC_8xx - default 17 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 - default 19 if STRICT_KERNEL_RWX && PPC_8xx - default PPC_PAGE_SHIFT - help - On Book3S 32 (603+), IBATs are used to map kernel text. - Smaller is the alignment, greater is the number of necessary IBATs. - - On 8xx, large pages (512kb or 8M) are used to map kernel linear - memory. Aligning to 8M reduces TLB misses as only 8M pages are used - in that case. - config DATA_SHIFT_BOOL - bool "Set custom data alignment" if STRICT_KERNEL_RWX && \ - (PPC_BOOK3S_32 || PPC_8xx) + bool "Set custom data alignment" depends on ADVANCED_OPTIONS + depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC + depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && \ + (!PIN_TLB_TEXT || !STRICT_KERNEL_RWX)) help This option allows you to set the kernel data alignment. When RAM is mapped by blocks, the alignment needs to fit the size and @@ -809,10 +786,13 @@ config DATA_SHIFT_BOOL config DATA_SHIFT int "Data shift" if DATA_SHIFT_BOOL default 24 if STRICT_KERNEL_RWX && PPC64 - range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 - range 19 23 if STRICT_KERNEL_RWX && PPC_8xx + range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_BOOK3S_32 + range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_8xx default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 + default 18 if DEBUG_PAGEALLOC && PPC_BOOK3S_32 default 23 if STRICT_KERNEL_RWX && PPC_8xx + default 23 if DEBUG_PAGEALLOC && PPC_8xx && PIN_TLB_DATA + default 19 if DEBUG_PAGEALLOC && PPC_8xx default PPC_PAGE_SHIFT help On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO. @@ -820,7 +800,8 @@ config DATA_SHIFT On 8xx, large pages (512kb or 8M) are used to map kernel linear memory. Aligning to 8M reduces TLB misses as only 8M pages are used - in that case. + in that case. If PIN_TLB is selected, it must be aligned to 8M as + 8M pages will be pinned. config FORCE_MAX_ZONEORDER int "Maximum zone order" @@ -1218,26 +1199,6 @@ config TASK_SIZE hex "Size of user task space" if TASK_SIZE_BOOL default "0x80000000" if PPC_8xx default "0xc0000000" - -config PIN_TLB - bool "Pinned Kernel TLBs (860 ONLY)" - depends on ADVANCED_OPTIONS && PPC_8xx && \ - !DEBUG_PAGEALLOC && !STRICT_KERNEL_RWX - -config PIN_TLB_DATA - bool "Pinned TLB for DATA" - depends on PIN_TLB - default y - -config PIN_TLB_IMMR - bool "Pinned TLB for IMMR" - depends on PIN_TLB || PPC_EARLY_DEBUG_CPM - default y - -config PIN_TLB_TEXT - bool "Pinned TLB for TEXT" - depends on PIN_TLB - default y endmenu if PPC64 diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 0b063830eea8..b88900f4832f 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -230,7 +230,7 @@ config PPC_EARLY_DEBUG_40x help Select this to enable early debugging for IBM 40x chips via the inbuilt serial port. This works on chips with a 16550 compatible - UART. Xilinx chips with uartlite cannot use this option. + UART. config PPC_EARLY_DEBUG_CPM bool "Early serial debugging for Freescale CPM-based serial ports" diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index c53a1b8bba8b..63d7456b9518 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -75,11 +75,9 @@ $(obj)/cuboot-hotfoot.o: BOOTCFLAGS += -mcpu=405 $(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405 $(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405 $(obj)/cuboot-acadia.o: BOOTCFLAGS += -mcpu=405 -$(obj)/treeboot-walnut.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-iss4xx.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-currituck.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-akebono.o: BOOTCFLAGS += -mcpu=405 -$(obj)/virtex405-head.o: BOOTAFLAGS += -mcpu=405 # The pre-boot decompressors pull in a lot of kernel headers and other source # files. This creates a bit of a dependency headache since we need to copy @@ -129,14 +127,12 @@ src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c src-wlib-$(CONFIG_EMBEDDED6xx) += ugecon.c fsl-soc.c -src-wlib-$(CONFIG_XILINX_VIRTEX) += uartlite.c src-wlib-$(CONFIG_CPM) += cpm-serial.c src-plat-y := of.c epapr.c -src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \ - treeboot-walnut.c cuboot-acadia.c \ - cuboot-kilauea.c simpleboot.c \ - virtex405-head.S virtex.c +src-plat-$(CONFIG_40x) += fixed-head.S cuboot-hotfoot.c \ + cuboot-acadia.c \ + cuboot-kilauea.c simpleboot.c src-plat-$(CONFIG_44x) += treeboot-ebony.c cuboot-ebony.c treeboot-bamboo.c \ cuboot-bamboo.c cuboot-sam440ep.c \ cuboot-sequoia.c cuboot-rainier.c \ @@ -144,7 +140,7 @@ src-plat-$(CONFIG_44x) += treeboot-ebony.c cuboot-ebony.c treeboot-bamboo.c \ cuboot-warp.c cuboot-yosemite.c \ treeboot-iss4xx.c treeboot-currituck.c \ treeboot-akebono.c \ - simpleboot.c fixed-head.S virtex.c + simpleboot.c fixed-head.S src-plat-$(CONFIG_PPC_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c src-plat-$(CONFIG_PPC_MPC52xx) += cuboot-52xx.c src-plat-$(CONFIG_PPC_82xx) += cuboot-pq2.c fixed-head.S ep8248e.c cuboot-824x.c @@ -279,9 +275,7 @@ image-$(CONFIG_EPAPR_BOOT) += zImage.epapr # # Board ports in arch/powerpc/platform/40x/Kconfig -image-$(CONFIG_EP405) += dtbImage.ep405 image-$(CONFIG_HOTFOOT) += cuImage.hotfoot -image-$(CONFIG_WALNUT) += treeImage.walnut image-$(CONFIG_ACADIA) += cuImage.acadia image-$(CONFIG_OBS600) += uImage.obs600 diff --git a/arch/powerpc/boot/dts/Makefile b/arch/powerpc/boot/dts/Makefile index 1cbc0e4ce857..fb335d05aae8 100644 --- a/arch/powerpc/boot/dts/Makefile +++ b/arch/powerpc/boot/dts/Makefile @@ -4,4 +4,3 @@ subdir-y += fsl dtstree := $(srctree)/$(src) dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts)) -dtb-$(CONFIG_XILINX_VIRTEX440_GENERIC_BOARD) += virtex440-ml507.dtb virtex440-ml510.dtb diff --git a/arch/powerpc/boot/dts/ep405.dts b/arch/powerpc/boot/dts/ep405.dts deleted file mode 100644 index 4ac9c5ab6e6b..000000000000 --- a/arch/powerpc/boot/dts/ep405.dts +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Device Tree Source for EP405 - * - * Copyright 2007 IBM Corp. - * Benjamin Herrenschmidt <benh@kernel.crashing.org> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without - * any warranty of any kind, whether express or implied. - */ - -/dts-v1/; - -/ { - #address-cells = <1>; - #size-cells = <1>; - model = "ep405"; - compatible = "ep405"; - dcr-parent = <&{/cpus/cpu@0}>; - - aliases { - ethernet0 = &EMAC; - serial0 = &UART0; - serial1 = &UART1; - }; - - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu@0 { - device_type = "cpu"; - model = "PowerPC,405GP"; - reg = <0x00000000>; - clock-frequency = <200000000>; /* Filled in by zImage */ - timebase-frequency = <0>; /* Filled in by zImage */ - i-cache-line-size = <32>; - d-cache-line-size = <32>; - i-cache-size = <16384>; - d-cache-size = <16384>; - dcr-controller; - dcr-access-method = "native"; - }; - }; - - memory { - device_type = "memory"; - reg = <0x00000000 0x00000000>; /* Filled in by zImage */ - }; - - UIC0: interrupt-controller { - compatible = "ibm,uic"; - interrupt-controller; - cell-index = <0>; - dcr-reg = <0x0c0 0x009>; - #address-cells = <0>; - #size-cells = <0>; - #interrupt-cells = <2>; - }; - - plb { - compatible = "ibm,plb3"; - #address-cells = <1>; - #size-cells = <1>; - ranges; - clock-frequency = <0>; /* Filled in by zImage */ - - SDRAM0: memory-controller { - compatible = "ibm,sdram-405gp"; - dcr-reg = <0x010 0x002>; - }; - - MAL: mcmal { - compatible = "ibm,mcmal-405gp", "ibm,mcmal"; - dcr-reg = <0x180 0x062>; - num-tx-chans = <1>; - num-rx-chans = <1>; - interrupt-parent = <&UIC0>; - interrupts = < - 0xb 0x4 /* TXEOB */ - 0xc 0x4 /* RXEOB */ - 0xa 0x4 /* SERR */ - 0xd 0x4 /* TXDE */ - 0xe 0x4 /* RXDE */>; - }; - - POB0: opb { - compatible = "ibm,opb-405gp", "ibm,opb"; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0xef600000 0xef600000 0x00a00000>; - dcr-reg = <0x0a0 0x005>; - clock-frequency = <0>; /* Filled in by zImage */ - - UART0: serial@ef600300 { - device_type = "serial"; - compatible = "ns16550"; - reg = <0xef600300 0x00000008>; - virtual-reg = <0xef600300>; - clock-frequency = <0>; /* Filled in by zImage */ - current-speed = <9600>; - interrupt-parent = <&UIC0>; - interrupts = <0x0 0x4>; - }; - - UART1: serial@ef600400 { - device_type = "serial"; - compatible = "ns16550"; - reg = <0xef600400 0x00000008>; - virtual-reg = <0xef600400>; - clock-frequency = <0>; /* Filled in by zImage */ - current-speed = <9600>; - interrupt-parent = <&UIC0>; - interrupts = <0x1 0x4>; - }; - - IIC: i2c@ef600500 { - compatible = "ibm,iic-405gp", "ibm,iic"; - reg = <0xef600500 0x00000011>; - interrupt-parent = <&UIC0>; - interrupts = <0x2 0x4>; - }; - - GPIO: gpio@ef600700 { - compatible = "ibm,gpio-405gp"; - reg = <0xef600700 0x00000020>; - }; - - EMAC: ethernet@ef600800 { - linux,network-index = <0x0>; - device_type = "network"; - compatible = "ibm,emac-405gp", "ibm,emac"; - interrupt-parent = <&UIC0>; - interrupts = < - 0xf 0x4 /* Ethernet */ - 0x9 0x4 /* Ethernet Wake Up */>; - local-mac-address = [000000000000]; /* Filled in by zImage */ - reg = <0xef600800 0x00000070>; - mal-device = <&MAL>; - mal-tx-channel = <0>; - mal-rx-channel = <0>; - cell-index = <0>; - max-frame-size = <1500>; - rx-fifo-size = <4096>; - tx-fifo-size = <2048>; - phy-mode = "rmii"; - phy-map = <0x00000000>; - }; - - }; - - EBC0: ebc { - compatible = "ibm,ebc-405gp", "ibm,ebc"; - dcr-reg = <0x012 0x002>; - #address-cells = <2>; - #size-cells = <1>; - - - /* The ranges property is supplied by the bootwrapper - * and is based on the firmware's configuration of the - * EBC bridge - */ - clock-frequency = <0>; /* Filled in by zImage */ - - /* NVRAM and RTC */ - nvrtc@4,200000 { - compatible = "ds1742"; - reg = <0x00000004 0x00200000 0x00000000>; /* size fixed up by zImage */ - }; - - /* "BCSR" CPLD contains a PCI irq controller */ - bcsr@4,0 { - compatible = "ep405-bcsr"; - reg = <0x00000004 0x00000000 0x00000010>; - interrupt-controller; - /* Routing table */ - irq-routing = [ 00 /* SYSERR */ - 01 /* STTM */ - 01 /* RTC */ - 01 /* FENET */ - 02 /* NB PCIIRQ mux ? */ - 03 /* SB Winbond 8259 ? */ - 04 /* Serial Ring */ - 05 /* USB (ep405pc) */ - 06 /* XIRQ 0 */ - 06 /* XIRQ 1 */ - 06 /* XIRQ 2 */ - 06 /* XIRQ 3 */ - 06 /* XIRQ 4 */ - 06 /* XIRQ 5 */ - 06 /* XIRQ 6 */ - 07]; /* Reserved */ - }; - }; - - PCI0: pci@ec000000 { - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - compatible = "ibm,plb405gp-pci", "ibm,plb-pci"; - primary; - reg = <0xeec00000 0x00000008 /* Config space access */ - 0xeed80000 0x00000004 /* IACK */ - 0xeed80000 0x00000004 /* Special cycle */ - 0xef480000 0x00000040>; /* Internal registers */ - - /* Outbound ranges, one memory and one IO, - * later cannot be changed. Chip supports a second - * IO range but we don't use it for now - */ - ranges = <0x02000000 0x00000000 0x80000000 0x80000000 0x00000000 0x20000000 - 0x01000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>; - - /* Inbound 2GB range starting at 0 */ - dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>; - - /* That's all I know about IRQs on that thing ... */ - interrupt-map-mask = <0xf800 0x0 0x0 0x0>; - interrupt-map = < - /* USB */ - 0x7000 0x0 0x0 0x0 &UIC0 0x1e 0x8 /* IRQ5 */ - >; - }; - }; - - chosen { - stdout-path = "/plb/opb/serial@ef600300"; - }; -}; diff --git a/arch/powerpc/boot/dts/pcm032.dts b/arch/powerpc/boot/dts/pcm032.dts index c259c6b3ac5a..780e13d99e7b 100644 --- a/arch/powerpc/boot/dts/pcm032.dts +++ b/arch/powerpc/boot/dts/pcm032.dts @@ -3,9 +3,7 @@ * phyCORE-MPC5200B-IO (pcm032) board Device Tree Source * * Copyright (C) 2006-2009 Pengutronix - * Sascha Hauer <s.hauer@pengutronix.de> - * Juergen Beisert <j.beisert@pengutronix.de> - * Wolfram Sang <w.sang@pengutronix.de> + * Sascha Hauer, Juergen Beisert, Wolfram Sang <kernel@pengutronix.de> */ /include/ "mpc5200b.dtsi" diff --git a/arch/powerpc/boot/dts/virtex440-ml507.dts b/arch/powerpc/boot/dts/virtex440-ml507.dts deleted file mode 100644 index 66f1c6312de6..000000000000 --- a/arch/powerpc/boot/dts/virtex440-ml507.dts +++ /dev/null @@ -1,406 +0,0 @@ -/* - * This file supports the Xilinx ML507 board with the 440 processor. - * A reference design for the FPGA is provided at http://git.xilinx.com. - * - * (C) Copyright 2008 Xilinx, Inc. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - * - * --- - * - * Device Tree Generator version: 1.1 - * - * CAUTION: This file is automatically generated by libgen. - * Version: Xilinx EDK 10.1.03 EDK_K_SP3.6 - * - * XPS project directory: ml507_ppc440_emb_ref - */ - -/dts-v1/; - -/ { - #address-cells = <1>; - #size-cells = <1>; - compatible = "xlnx,virtex440"; - dcr-parent = <&ppc440_0>; - model = "testing"; - DDR2_SDRAM: memory@0 { - device_type = "memory"; - reg = < 0 0x10000000 >; - } ; - chosen { - bootargs = "console=ttyS0 root=/dev/ram"; - stdout-path = &RS232_Uart_1; - } ; - cpus { - #address-cells = <1>; - #cpus = <1>; - #size-cells = <0>; - ppc440_0: cpu@0 { - clock-frequency = <400000000>; - compatible = "PowerPC,440", "ibm,ppc440"; - d-cache-line-size = <0x20>; - d-cache-size = <0x8000>; - dcr-access-method = "native"; - dcr-controller ; - device_type = "cpu"; - i-cache-line-size = <0x20>; - i-cache-size = <0x8000>; - model = "PowerPC,440"; - reg = <0>; - timebase-frequency = <400000000>; - xlnx,apu-control = <1>; - xlnx,apu-udi-0 = <0>; - xlnx,apu-udi-1 = <0>; - xlnx,apu-udi-10 = <0>; - xlnx,apu-udi-11 = <0>; - xlnx,apu-udi-12 = <0>; - xlnx,apu-udi-13 = <0>; - xlnx,apu-udi-14 = <0>; - xlnx,apu-udi-15 = <0>; - xlnx,apu-udi-2 = <0>; - xlnx,apu-udi-3 = <0>; - xlnx,apu-udi-4 = <0>; - xlnx,apu-udi-5 = <0>; - xlnx,apu-udi-6 = <0>; - xlnx,apu-udi-7 = <0>; - xlnx,apu-udi-8 = <0>; - xlnx,apu-udi-9 = <0>; - xlnx,dcr-autolock-enable = <1>; - xlnx,dcu-rd-ld-cache-plb-prio = <0>; - xlnx,dcu-rd-noncache-plb-prio = <0>; - xlnx,dcu-rd-touch-plb-prio = <0>; - xlnx,dcu-rd-urgent-plb-prio = <0>; - xlnx,dcu-wr-flush-plb-prio = <0>; - xlnx,dcu-wr-store-plb-prio = <0>; - xlnx,dcu-wr-urgent-plb-prio = <0>; - xlnx,dma0-control = <0>; - xlnx,dma0-plb-prio = <0>; - xlnx,dma0-rxchannelctrl = <0x1010000>; - xlnx,dma0-rxirqtimer = <0x3ff>; - xlnx,dma0-txchannelctrl = <0x1010000>; - xlnx,dma0-txirqtimer = <0x3ff>; - xlnx,dma1-control = <0>; - xlnx,dma1-plb-prio = <0>; - xlnx,dma1-rxchannelctrl = <0x1010000>; - xlnx,dma1-rxirqtimer = <0x3ff>; - xlnx,dma1-txchannelctrl = <0x1010000>; - xlnx,dma1-txirqtimer = <0x3ff>; - xlnx,dma2-control = <0>; - xlnx,dma2-plb-prio = <0>; - xlnx,dma2-rxchannelctrl = <0x1010000>; - xlnx,dma2-rxirqtimer = <0x3ff>; - xlnx,dma2-txchannelctrl = <0x1010000>; - xlnx,dma2-txirqtimer = <0x3ff>; - xlnx,dma3-control = <0>; - xlnx,dma3-plb-prio = <0>; - xlnx,dma3-rxchannelctrl = <0x1010000>; - xlnx,dma3-rxirqtimer = <0x3ff>; - xlnx,dma3-txchannelctrl = <0x1010000>; - xlnx,dma3-txirqtimer = <0x3ff>; - xlnx,endian-reset = <0>; - xlnx,generate-plb-timespecs = <1>; - xlnx,icu-rd-fetch-plb-prio = <0>; - xlnx,icu-rd-spec-plb-prio = <0>; - xlnx,icu-rd-touch-plb-prio = <0>; - xlnx,interconnect-imask = <0xffffffff>; - xlnx,mplb-allow-lock-xfer = <1>; - xlnx,mplb-arb-mode = <0>; - xlnx,mplb-awidth = <0x20>; - xlnx,mplb-counter = <0x500>; - xlnx,mplb-dwidth = <0x80>; - xlnx,mplb-max-burst = <8>; - xlnx,mplb-native-dwidth = <0x80>; - xlnx,mplb-p2p = <0>; - xlnx,mplb-prio-dcur = <2>; - xlnx,mplb-prio-dcuw = <3>; - xlnx,mplb-prio-icu = <4>; - xlnx,mplb-prio-splb0 = <1>; - xlnx,mplb-prio-splb1 = <0>; - xlnx,mplb-read-pipe-enable = <1>; - xlnx,mplb-sync-tattribute = <0>; - xlnx,mplb-wdog-enable = <1>; - xlnx,mplb-write-pipe-enable = <1>; - xlnx,mplb-write-post-enable = <1>; - xlnx,num-dma = <1>; - xlnx,pir = <0xf>; - xlnx,ppc440mc-addr-base = <0>; - xlnx,ppc440mc-addr-high = <0xfffffff>; - xlnx,ppc440mc-arb-mode = <0>; - xlnx,ppc440mc-bank-conflict-mask = <0xc00000>; - xlnx,ppc440mc-control = <0xf810008f>; - xlnx,ppc440mc-max-burst = <8>; - xlnx,ppc440mc-prio-dcur = <2>; - xlnx,ppc440mc-prio-dcuw = <3>; - xlnx,ppc440mc-prio-icu = <4>; - xlnx,ppc440mc-prio-splb0 = <1>; - xlnx,ppc440mc-prio-splb1 = <0>; - xlnx,ppc440mc-row-conflict-mask = <0x3ffe00>; - xlnx,ppcdm-asyncmode = <0>; - xlnx,ppcds-asyncmode = <0>; - xlnx,user-reset = <0>; - DMA0: sdma@80 { - compatible = "xlnx,ll-dma-1.00.a"; - dcr-reg = < 0x80 0x11 >; - interrupt-parent = <&xps_intc_0>; - interrupts = < 10 2 11 2 >; - } ; - } ; - } ; - plb_v46_0: plb@0 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "xlnx,plb-v46-1.03.a", "simple-bus"; - ranges ; - DIP_Switches_8Bit: gpio@81460000 { - compatible = "xlnx,xps-gpio-1.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 7 2 >; - reg = < 0x81460000 0x10000 >; - xlnx,all-inputs = <1>; - xlnx,all-inputs-2 = <0>; - xlnx,dout-default = <0>; - xlnx,dout-default-2 = <0>; - xlnx,family = "virtex5"; - xlnx,gpio-width = <8>; - xlnx,interrupt-present = <1>; - xlnx,is-bidir = <1>; - xlnx,is-bidir-2 = <1>; - xlnx,is-dual = <0>; - xlnx,tri-default = <0xffffffff>; - xlnx,tri-default-2 = <0xffffffff>; - } ; - FLASH: flash@fc000000 { - bank-width = <2>; - compatible = "xlnx,xps-mch-emc-2.00.a", "cfi-flash"; - reg = < 0xfc000000 0x2000000 >; - xlnx,family = "virtex5"; - xlnx,include-datawidth-matching-0 = <0x1>; - xlnx,include-datawidth-matching-1 = <0x0>; - xlnx,include-datawidth-matching-2 = <0x0>; - xlnx,include-datawidth-matching-3 = <0x0>; - xlnx,include-negedge-ioregs = <0x0>; - xlnx,include-plb-ipif = <0x1>; - xlnx,include-wrbuf = <0x1>; - xlnx,max-mem-width = <0x10>; - xlnx,mch-native-dwidth = <0x20>; - xlnx,mch-plb-clk-period-ps = <0x2710>; - xlnx,mch-splb-awidth = <0x20>; - xlnx,mch0-accessbuf-depth = <0x10>; - xlnx,mch0-protocol = <0x0>; - xlnx,mch0-rddatabuf-depth = <0x10>; - xlnx,mch1-accessbuf-depth = <0x10>; - xlnx,mch1-protocol = <0x0>; - xlnx,mch1-rddatabuf-depth = <0x10>; - xlnx,mch2-accessbuf-depth = <0x10>; - xlnx,mch2-protocol = <0x0>; - xlnx,mch2-rddatabuf-depth = <0x10>; - xlnx,mch3-accessbuf-depth = <0x10>; - xlnx,mch3-protocol = <0x0>; - xlnx,mch3-rddatabuf-depth = <0x10>; - xlnx,mem0-width = <0x10>; - xlnx,mem1-width = <0x20>; - xlnx,mem2-width = <0x20>; - xlnx,mem3-width = <0x20>; - xlnx,num-banks-mem = <0x1>; - xlnx,num-channels = <0x2>; - xlnx,priority-mode = <0x0>; - xlnx,synch-mem-0 = <0x0>; - xlnx,synch-mem-1 = <0x0>; - xlnx,synch-mem-2 = <0x0>; - xlnx,synch-mem-3 = <0x0>; - xlnx,synch-pipedelay-0 = <0x2>; - xlnx,synch-pipedelay-1 = <0x2>; - xlnx,synch-pipedelay-2 = <0x2>; - xlnx,synch-pipedelay-3 = <0x2>; - xlnx,tavdv-ps-mem-0 = <0x1adb0>; - xlnx,tavdv-ps-mem-1 = <0x3a98>; - xlnx,tavdv-ps-mem-2 = <0x3a98>; - xlnx,tavdv-ps-mem-3 = <0x3a98>; - xlnx,tcedv-ps-mem-0 = <0x1adb0>; - xlnx,tcedv-ps-mem-1 = <0x3a98>; - xlnx,tcedv-ps-mem-2 = <0x3a98>; - xlnx,tcedv-ps-mem-3 = <0x3a98>; - xlnx,thzce-ps-mem-0 = <0x88b8>; - xlnx,thzce-ps-mem-1 = <0x1b58>; - xlnx,thzce-ps-mem-2 = <0x1b58>; - xlnx,thzce-ps-mem-3 = <0x1b58>; - xlnx,thzoe-ps-mem-0 = <0x1b58>; - xlnx,thzoe-ps-mem-1 = <0x1b58>; - xlnx,thzoe-ps-mem-2 = <0x1b58>; - xlnx,thzoe-ps-mem-3 = <0x1b58>; - xlnx,tlzwe-ps-mem-0 = <0x88b8>; - xlnx,tlzwe-ps-mem-1 = <0x0>; - xlnx,tlzwe-ps-mem-2 = <0x0>; - xlnx,tlzwe-ps-mem-3 = <0x0>; - xlnx,twc-ps-mem-0 = <0x2af8>; - xlnx,twc-ps-mem-1 = <0x3a98>; - xlnx,twc-ps-mem-2 = <0x3a98>; - xlnx,twc-ps-mem-3 = <0x3a98>; - xlnx,twp-ps-mem-0 = <0x11170>; - xlnx,twp-ps-mem-1 = <0x2ee0>; - xlnx,twp-ps-mem-2 = <0x2ee0>; - xlnx,twp-ps-mem-3 = <0x2ee0>; - xlnx,xcl0-linesize = <0x4>; - xlnx,xcl0-writexfer = <0x1>; - xlnx,xcl1-linesize = <0x4>; - xlnx,xcl1-writexfer = <0x1>; - xlnx,xcl2-linesize = <0x4>; - xlnx,xcl2-writexfer = <0x1>; - xlnx,xcl3-linesize = <0x4>; - xlnx,xcl3-writexfer = <0x1>; - } ; - Hard_Ethernet_MAC: xps-ll-temac@81c00000 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "xlnx,compound"; - ethernet@81c00000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "xlnx,xps-ll-temac-1.01.b"; - device_type = "network"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 5 2 >; - llink-connected = <&DMA0>; - local-mac-address = [ 02 00 00 00 00 00 ]; - reg = < 0x81c00000 0x40 >; - xlnx,bus2core-clk-ratio = <1>; - xlnx,phy-type = <1>; - xlnx,phyaddr = <1>; - xlnx,rxcsum = <1>; - xlnx,rxfifo = <0x1000>; - xlnx,temac-type = <0>; - xlnx,txcsum = <1>; - xlnx,txfifo = <0x1000>; - phy-handle = <&phy7>; - clock-frequency = <100000000>; - phy7: phy@7 { - compatible = "marvell,88e1111"; - reg = <7>; - } ; - } ; - } ; - IIC_EEPROM: i2c@81600000 { - compatible = "xlnx,xps-iic-2.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 6 2 >; - reg = < 0x81600000 0x10000 >; - xlnx,clk-freq = <0x5f5e100>; - xlnx,family = "virtex5"; - xlnx,gpo-width = <0x1>; - xlnx,iic-freq = <0x186a0>; - xlnx,scl-inertial-delay = <0x0>; - xlnx,sda-inertial-delay = <0x0>; - xlnx,ten-bit-adr = <0x0>; - } ; - LEDs_8Bit: gpio@81400000 { - compatible = "xlnx,xps-gpio-1.00.a"; - reg = < 0x81400000 0x10000 >; - xlnx,all-inputs = <0>; - xlnx,all-inputs-2 = <0>; - xlnx,dout-default = <0>; - xlnx,dout-default-2 = <0>; - xlnx,family = "virtex5"; - xlnx,gpio-width = <8>; - xlnx,interrupt-present = <0>; - xlnx,is-bidir = <1>; - xlnx,is-bidir-2 = <1>; - xlnx,is-dual = <0>; - xlnx,tri-default = <0xffffffff>; - xlnx,tri-default-2 = <0xffffffff>; - } ; - LEDs_Positions: gpio@81420000 { - compatible = "xlnx,xps-gpio-1.00.a"; - reg = < 0x81420000 0x10000 >; - xlnx,all-inputs = <0>; - xlnx,all-inputs-2 = <0>; - xlnx,dout-default = <0>; - xlnx,dout-default-2 = <0>; - xlnx,family = "virtex5"; - xlnx,gpio-width = <5>; - xlnx,interrupt-present = <0>; - xlnx,is-bidir = <1>; - xlnx,is-bidir-2 = <1>; - xlnx,is-dual = <0>; - xlnx,tri-default = <0xffffffff>; - xlnx,tri-default-2 = <0xffffffff>; - } ; - Push_Buttons_5Bit: gpio@81440000 { - compatible = "xlnx,xps-gpio-1.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 8 2 >; - reg = < 0x81440000 0x10000 >; - xlnx,all-inputs = <1>; - xlnx,all-inputs-2 = <0>; - xlnx,dout-default = <0>; - xlnx,dout-default-2 = <0>; - xlnx,family = "virtex5"; - xlnx,gpio-width = <5>; - xlnx,interrupt-present = <1>; - xlnx,is-bidir = <1>; - xlnx,is-bidir-2 = <1>; - xlnx,is-dual = <0>; - xlnx,tri-default = <0xffffffff>; - xlnx,tri-default-2 = <0xffffffff>; - } ; - RS232_Uart_1: serial@83e00000 { - clock-frequency = <100000000>; - compatible = "xlnx,xps-uart16550-2.00.b", "ns16550"; - current-speed = <9600>; - device_type = "serial"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 9 2 >; - reg = < 0x83e00000 0x10000 >; - reg-offset = <0x1003>; - reg-shift = <2>; - xlnx,family = "virtex5"; - xlnx,has-external-rclk = <0>; - xlnx,has-external-xin = <0>; - xlnx,is-a-16550 = <1>; - } ; - SysACE_CompactFlash: sysace@83600000 { - compatible = "xlnx,xps-sysace-1.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 4 2 >; - reg = < 0x83600000 0x10000 >; - xlnx,family = "virtex5"; - xlnx,mem-width = <0x10>; - } ; - xps_bram_if_cntlr_1: xps-bram-if-cntlr@ffff0000 { - compatible = "xlnx,xps-bram-if-cntlr-1.00.a"; - reg = < 0xffff0000 0x10000 >; - xlnx,family = "virtex5"; - } ; - xps_intc_0: interrupt-controller@81800000 { - #interrupt-cells = <2>; - compatible = "xlnx,xps-intc-1.00.a"; - interrupt-controller ; - reg = < 0x81800000 0x10000 >; - xlnx,num-intr-inputs = <0xc>; - } ; - xps_timebase_wdt_1: xps-timebase-wdt@83a00000 { - compatible = "xlnx,xps-timebase-wdt-1.00.b"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 2 0 1 2 >; - reg = < 0x83a00000 0x10000 >; - xlnx,family = "virtex5"; - xlnx,wdt-enable-once = <0>; - xlnx,wdt-interval = <0x1e>; - } ; - xps_timer_1: timer@83c00000 { - compatible = "xlnx,xps-timer-1.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 3 2 >; - reg = < 0x83c00000 0x10000 >; - xlnx,count-width = <0x20>; - xlnx,family = "virtex5"; - xlnx,gen0-assert = <1>; - xlnx,gen1-assert = <1>; - xlnx,one-timer-only = <1>; - xlnx,trig0-assert = <1>; - xlnx,trig1-assert = <1>; - } ; - } ; -} ; diff --git a/arch/powerpc/boot/dts/virtex440-ml510.dts b/arch/powerpc/boot/dts/virtex440-ml510.dts deleted file mode 100644 index 3b736ca26ddc..000000000000 --- a/arch/powerpc/boot/dts/virtex440-ml510.dts +++ /dev/null @@ -1,466 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Xilinx ML510 Reference Design support - * - * This DTS file was created for the ml510_bsb1_pcores_ppc440 reference design. - * The reference design contains a bug which prevent PCI DMA from working - * properly. A description of the bug is given in the plbv46_pci section. It - * needs to be fixed by the user until Xilinx updates their reference design. - * - * Copyright 2009, Roderick Colenbrander - */ - -/dts-v1/; -/ { - #address-cells = <1>; - #size-cells = <1>; - compatible = "xlnx,ml510-ref-design", "xlnx,virtex440"; - dcr-parent = <&ppc440_0>; - DDR2_SDRAM_DIMM0: memory@0 { - device_type = "memory"; - reg = < 0x0 0x20000000 >; - } ; - alias { - ethernet0 = &Hard_Ethernet_MAC; - serial0 = &RS232_Uart_1; - } ; - chosen { - bootargs = "console=ttyS0 root=/dev/ram"; - stdout-path = "/plb@0/serial@83e00000"; - } ; - cpus { - #address-cells = <1>; - #cpus = <0x1>; - #size-cells = <0>; - ppc440_0: cpu@0 { - #address-cells = <1>; - #size-cells = <1>; - clock-frequency = <300000000>; - compatible = "PowerPC,440", "ibm,ppc440"; - d-cache-line-size = <0x20>; - d-cache-size = <0x8000>; - dcr-access-method = "native"; - dcr-controller ; - device_type = "cpu"; - i-cache-line-size = <0x20>; - i-cache-size = <0x8000>; - model = "PowerPC,440"; - reg = <0>; - timebase-frequency = <300000000>; - xlnx,apu-control = <0x2000>; - xlnx,apu-udi-0 = <0x0>; - xlnx,apu-udi-1 = <0x0>; - xlnx,apu-udi-10 = <0x0>; - xlnx,apu-udi-11 = <0x0>; - xlnx,apu-udi-12 = <0x0>; - xlnx,apu-udi-13 = <0x0>; - xlnx,apu-udi-14 = <0x0>; - xlnx,apu-udi-15 = <0x0>; - xlnx,apu-udi-2 = <0x0>; - xlnx,apu-udi-3 = <0x0>; - xlnx,apu-udi-4 = <0x0>; - xlnx,apu-udi-5 = <0x0>; - xlnx,apu-udi-6 = <0x0>; - xlnx,apu-udi-7 = <0x0>; - xlnx,apu-udi-8 = <0x0>; - xlnx,apu-udi-9 = <0x0>; - xlnx,dcr-autolock-enable = <0x1>; - xlnx,dcu-rd-ld-cache-plb-prio = <0x0>; - xlnx,dcu-rd-noncache-plb-prio = <0x0>; - xlnx,dcu-rd-touch-plb-prio = <0x0>; - xlnx,dcu-rd-urgent-plb-prio = <0x0>; - xlnx,dcu-wr-flush-plb-prio = <0x0>; - xlnx,dcu-wr-store-plb-prio = <0x0>; - xlnx,dcu-wr-urgent-plb-prio = <0x0>; - xlnx,dma0-control = <0x0>; - xlnx,dma0-plb-prio = <0x0>; - xlnx,dma0-rxchannelctrl = <0x1010000>; - xlnx,dma0-rxirqtimer = <0x3ff>; - xlnx,dma0-txchannelctrl = <0x1010000>; - xlnx,dma0-txirqtimer = <0x3ff>; - xlnx,dma1-control = <0x0>; - xlnx,dma1-plb-prio = <0x0>; - xlnx,dma1-rxchannelctrl = <0x1010000>; - xlnx,dma1-rxirqtimer = <0x3ff>; - xlnx,dma1-txchannelctrl = <0x1010000>; - xlnx,dma1-txirqtimer = <0x3ff>; - xlnx,dma2-control = <0x0>; - xlnx,dma2-plb-prio = <0x0>; - xlnx,dma2-rxchannelctrl = <0x1010000>; - xlnx,dma2-rxirqtimer = <0x3ff>; - xlnx,dma2-txchannelctrl = <0x1010000>; - xlnx,dma2-txirqtimer = <0x3ff>; - xlnx,dma3-control = <0x0>; - xlnx,dma3-plb-prio = <0x0>; - xlnx,dma3-rxchannelctrl = <0x1010000>; - xlnx,dma3-rxirqtimer = <0x3ff>; - xlnx,dma3-txchannelctrl = <0x1010000>; - xlnx,dma3-txirqtimer = <0x3ff>; - xlnx,endian-reset = <0x0>; - xlnx,generate-plb-timespecs = <0x1>; - xlnx,icu-rd-fetch-plb-prio = <0x0>; - xlnx,icu-rd-spec-plb-prio = <0x0>; - xlnx,icu-rd-touch-plb-prio = <0x0>; - xlnx,interconnect-imask = <0xffffffff>; - xlnx,mplb-allow-lock-xfer = <0x1>; - xlnx,mplb-arb-mode = <0x0>; - xlnx,mplb-awidth = <0x20>; - xlnx,mplb-counter = <0x500>; - xlnx,mplb-dwidth = <0x80>; - xlnx,mplb-max-burst = <0x8>; - xlnx,mplb-native-dwidth = <0x80>; - xlnx,mplb-p2p = <0x0>; - xlnx,mplb-prio-dcur = <0x2>; - xlnx,mplb-prio-dcuw = <0x3>; - xlnx,mplb-prio-icu = <0x4>; - xlnx,mplb-prio-splb0 = <0x1>; - xlnx,mplb-prio-splb1 = <0x0>; - xlnx,mplb-read-pipe-enable = <0x1>; - xlnx,mplb-sync-tattribute = <0x0>; - xlnx,mplb-wdog-enable = <0x1>; - xlnx,mplb-write-pipe-enable = <0x1>; - xlnx,mplb-write-post-enable = <0x1>; - xlnx,num-dma = <0x0>; - xlnx,pir = <0xf>; - xlnx,ppc440mc-addr-base = <0x0>; - xlnx,ppc440mc-addr-high = <0x1fffffff>; - xlnx,ppc440mc-arb-mode = <0x0>; - xlnx,ppc440mc-bank-conflict-mask = <0x1800000>; - xlnx,ppc440mc-control = <0xf810008f>; - xlnx,ppc440mc-max-burst = <0x8>; - xlnx,ppc440mc-prio-dcur = <0x2>; - xlnx,ppc440mc-prio-dcuw = <0x3>; - xlnx,ppc440mc-prio-icu = <0x4>; - xlnx,ppc440mc-prio-splb0 = <0x1>; - xlnx,ppc440mc-prio-splb1 = <0x0>; - xlnx,ppc440mc-row-conflict-mask = <0x7ffe00>; - xlnx,ppcdm-asyncmode = <0x0>; - xlnx,ppcds-asyncmode = <0x0>; - xlnx,user-reset = <0x0>; - } ; - } ; - plb_v46_0: plb@0 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "xlnx,plb-v46-1.03.a", "simple-bus"; - ranges ; - FLASH: flash@fc000000 { - bank-width = <2>; - compatible = "xlnx,xps-mch-emc-2.00.a", "cfi-flash"; - reg = < 0xfc000000 0x2000000 >; - xlnx,family = "virtex5"; - xlnx,include-datawidth-matching-0 = <0x1>; - xlnx,include-datawidth-matching-1 = <0x0>; - xlnx,include-datawidth-matching-2 = <0x0>; - xlnx,include-datawidth-matching-3 = <0x0>; - xlnx,include-negedge-ioregs = <0x0>; - xlnx,include-plb-ipif = <0x1>; - xlnx,include-wrbuf = <0x1>; - xlnx,max-mem-width = <0x10>; - xlnx,mch-native-dwidth = <0x20>; - xlnx,mch-plb-clk-period-ps = <0x2710>; - xlnx,mch-splb-awidth = <0x20>; - xlnx,mch0-accessbuf-depth = <0x10>; - xlnx,mch0-protocol = <0x0>; - xlnx,mch0-rddatabuf-depth = <0x10>; - xlnx,mch1-accessbuf-depth = <0x10>; - xlnx,mch1-protocol = <0x0>; - xlnx,mch1-rddatabuf-depth = <0x10>; - xlnx,mch2-accessbuf-depth = <0x10>; - xlnx,mch2-protocol = <0x0>; - xlnx,mch2-rddatabuf-depth = <0x10>; - xlnx,mch3-accessbuf-depth = <0x10>; - xlnx,mch3-protocol = <0x0>; - xlnx,mch3-rddatabuf-depth = <0x10>; - xlnx,mem0-width = <0x10>; - xlnx,mem1-width = <0x20>; - xlnx,mem2-width = <0x20>; - xlnx,mem3-width = <0x20>; - xlnx,num-banks-mem = <0x1>; - xlnx,num-channels = <0x2>; - xlnx,priority-mode = <0x0>; - xlnx,synch-mem-0 = <0x0>; - xlnx,synch-mem-1 = <0x0>; - xlnx,synch-mem-2 = <0x0>; - xlnx,synch-mem-3 = <0x0>; - xlnx,synch-pipedelay-0 = <0x2>; - xlnx,synch-pipedelay-1 = <0x2>; - xlnx,synch-pipedelay-2 = <0x2>; - xlnx,synch-pipedelay-3 = <0x2>; - xlnx,tavdv-ps-mem-0 = <0x1adb0>; - xlnx,tavdv-ps-mem-1 = <0x3a98>; - xlnx,tavdv-ps-mem-2 = <0x3a98>; - xlnx,tavdv-ps-mem-3 = <0x3a98>; - xlnx,tcedv-ps-mem-0 = <0x1adb0>; - xlnx,tcedv-ps-mem-1 = <0x3a98>; - xlnx,tcedv-ps-mem-2 = <0x3a98>; - xlnx,tcedv-ps-mem-3 = <0x3a98>; - xlnx,thzce-ps-mem-0 = <0x88b8>; - xlnx,thzce-ps-mem-1 = <0x1b58>; - xlnx,thzce-ps-mem-2 = <0x1b58>; - xlnx,thzce-ps-mem-3 = <0x1b58>; - xlnx,thzoe-ps-mem-0 = <0x1b58>; - xlnx,thzoe-ps-mem-1 = <0x1b58>; - xlnx,thzoe-ps-mem-2 = <0x1b58>; - xlnx,thzoe-ps-mem-3 = <0x1b58>; - xlnx,tlzwe-ps-mem-0 = <0x88b8>; - xlnx,tlzwe-ps-mem-1 = <0x0>; - xlnx,tlzwe-ps-mem-2 = <0x0>; - xlnx,tlzwe-ps-mem-3 = <0x0>; - xlnx,twc-ps-mem-0 = <0x1adb0>; - xlnx,twc-ps-mem-1 = <0x3a98>; - xlnx,twc-ps-mem-2 = <0x3a98>; - xlnx,twc-ps-mem-3 = <0x3a98>; - xlnx,twp-ps-mem-0 = <0x11170>; - xlnx,twp-ps-mem-1 = <0x2ee0>; - xlnx,twp-ps-mem-2 = <0x2ee0>; - xlnx,twp-ps-mem-3 = <0x2ee0>; - xlnx,xcl0-linesize = <0x4>; - xlnx,xcl0-writexfer = <0x1>; - xlnx,xcl1-linesize = <0x4>; - xlnx,xcl1-writexfer = <0x1>; - xlnx,xcl2-linesize = <0x4>; - xlnx,xcl2-writexfer = <0x1>; - xlnx,xcl3-linesize = <0x4>; - xlnx,xcl3-writexfer = <0x1>; - } ; - Hard_Ethernet_MAC: xps-ll-temac@81c00000 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "xlnx,compound"; - ethernet@81c00000 { - compatible = "xlnx,xps-ll-temac-1.01.b"; - device_type = "network"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 8 2 >; - llink-connected = <&Hard_Ethernet_MAC_fifo>; - local-mac-address = [ 02 00 00 00 00 00 ]; - reg = < 0x81c00000 0x40 >; - xlnx,bus2core-clk-ratio = <0x1>; - xlnx,phy-type = <0x3>; - xlnx,phyaddr = <0x1>; - xlnx,rxcsum = <0x0>; - xlnx,rxfifo = <0x8000>; - xlnx,temac-type = <0x0>; - xlnx,txcsum = <0x0>; - xlnx,txfifo = <0x8000>; - } ; - } ; - Hard_Ethernet_MAC_fifo: xps-ll-fifo@81a00000 { - compatible = "xlnx,xps-ll-fifo-1.01.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 6 2 >; - reg = < 0x81a00000 0x10000 >; - xlnx,family = "virtex5"; - } ; - IIC_EEPROM: i2c@81600000 { - compatible = "xlnx,xps-iic-2.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 9 2 >; - reg = < 0x81600000 0x10000 >; - xlnx,clk-freq = <0x5f5e100>; - xlnx,family = "virtex5"; - xlnx,gpo-width = <0x1>; - xlnx,iic-freq = <0x186a0>; - xlnx,scl-inertial-delay = <0x5>; - xlnx,sda-inertial-delay = <0x5>; - xlnx,ten-bit-adr = <0x0>; - } ; - LCD_OPTIONAL: gpio@81420000 { - compatible = "xlnx,xps-gpio-1.00.a"; - reg = < 0x81420000 0x10000 >; - xlnx,all-inputs = <0x0>; - xlnx,all-inputs-2 = <0x0>; - xlnx,dout-default = <0x0>; - xlnx,dout-default-2 = <0x0>; - xlnx,family = "virtex5"; - xlnx,gpio-width = <0xb>; - xlnx,interrupt-present = <0x0>; - xlnx,is-bidir = <0x1>; - xlnx,is-bidir-2 = <0x1>; - xlnx,is-dual = <0x0>; - xlnx,tri-default = <0xffffffff>; - xlnx,tri-default-2 = <0xffffffff>; - } ; - LEDs_4Bit: gpio@81400000 { - compatible = "xlnx,xps-gpio-1.00.a"; - reg = < 0x81400000 0x10000 >; - xlnx,all-inputs = <0x0>; - xlnx,all-inputs-2 = <0x0>; - xlnx,dout-default = <0x0>; - xlnx,dout-default-2 = <0x0>; - xlnx,family = "virtex5"; - xlnx,gpio-width = <0x4>; - xlnx,interrupt-present = <0x0>; - xlnx,is-bidir = <0x1>; - xlnx,is-bidir-2 = <0x1>; - xlnx,is-dual = <0x0>; - xlnx,tri-default = <0xffffffff>; - xlnx,tri-default-2 = <0xffffffff>; - } ; - RS232_Uart_1: serial@83e00000 { - clock-frequency = <100000000>; - compatible = "xlnx,xps-uart16550-2.00.b", "ns16550"; - current-speed = <9600>; - device_type = "serial"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 11 2 >; - reg = < 0x83e00000 0x10000 >; - reg-offset = <0x1003>; - reg-shift = <2>; - xlnx,family = "virtex5"; - xlnx,has-external-rclk = <0x0>; - xlnx,has-external-xin = <0x0>; - xlnx,is-a-16550 = <0x1>; - } ; - SPI_EEPROM: xps-spi@feff8000 { - compatible = "xlnx,xps-spi-2.00.b"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 10 2 >; - reg = < 0xfeff8000 0x80 >; - xlnx,family = "virtex5"; - xlnx,fifo-exist = <0x1>; - xlnx,num-ss-bits = <0x1>; - xlnx,num-transfer-bits = <0x8>; - xlnx,sck-ratio = <0x80>; - } ; - SysACE_CompactFlash: sysace@83600000 { - compatible = "xlnx,xps-sysace-1.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 7 2 >; - reg = < 0x83600000 0x10000 >; - xlnx,family = "virtex5"; - xlnx,mem-width = <0x10>; - } ; - plbv46_pci_0: plbv46-pci@85e00000 { - #size-cells = <2>; - #address-cells = <3>; - compatible = "xlnx,plbv46-pci-1.03.a"; - device_type = "pci"; - reg = < 0x85e00000 0x10000 >; - - /* - * The default ML510 BSB has C_IPIFBAR2PCIBAR_0 set to - * 0 which means that a read/write to the memory mapped - * i/o region (which starts at 0xa0000000) for pci - * bar 0 on the plb side translates to 0. - * It is important to set this value to 0xa0000000, so - * that inbound and outbound pci transactions work - * properly including DMA. - */ - ranges = <0x02000000 0 0xa0000000 0xa0000000 0 0x20000000 - 0x01000000 0 0x00000000 0xf0000000 0 0x00010000>; - - #interrupt-cells = <1>; - interrupt-parent = <&xps_intc_0>; - interrupt-map-mask = <0xff00 0x0 0x0 0x7>; - interrupt-map = < - /* IRQ mapping for pci slots and ALI M1533 - * periperhals. In total there are 5 interrupt - * lines connected to a xps_intc controller. - * Four of them are PCI IRQ A, B, C, D and - * which correspond to respectively xpx_intc - * 5, 4, 3 and 2. The fifth interrupt line is - * connected to the south bridge and this one - * uses irq 1 and is active high instead of - * active low. - * - * The M1533 contains various peripherals - * including AC97 audio, a modem, USB, IDE and - * some power management stuff. The modem - * isn't connected on the ML510 and the power - * management core also isn't used. - */ - - /* IDSEL 0x16 / dev=6, bus=0 / PCI slot 3 */ - 0x3000 0 0 1 &xps_intc_0 3 2 - 0x3000 0 0 2 &xps_intc_0 2 2 - 0x3000 0 0 3 &xps_intc_0 5 2 - 0x3000 0 0 4 &xps_intc_0 4 2 - - /* IDSEL 0x13 / dev=3, bus=1 / PCI slot 4 */ - /* - 0x11800 0 0 1 &xps_intc_0 5 0 2 - 0x11800 0 0 2 &xps_intc_0 4 0 2 - 0x11800 0 0 3 &xps_intc_0 3 0 2 - 0x11800 0 0 4 &xps_intc_0 2 0 2 - */ - - /* According to the datasheet + schematic - * ABCD [FPGA] of slot 5 is mapped to DABC. - * Testing showed that at least A maps to B, - * the mapping of the other pins is a guess - * and for that reason the lines have been - * commented out. - */ - /* IDSEL 0x15 / dev=5, bus=0 / PCI slot 5 */ - 0x2800 0 0 1 &xps_intc_0 4 2 - /* - 0x2800 0 0 2 &xps_intc_0 3 2 - 0x2800 0 0 3 &xps_intc_0 2 2 - 0x2800 0 0 4 &xps_intc_0 5 2 - */ - - /* IDSEL 0x12 / dev=2, bus=1 / PCI slot 6 */ - /* - 0x11000 0 0 1 &xps_intc_0 4 0 2 - 0x11000 0 0 2 &xps_intc_0 3 0 2 - 0x11000 0 0 3 &xps_intc_0 2 0 2 - 0x11000 0 0 4 &xps_intc_0 5 0 2 - */ - - /* IDSEL 0x11 / dev=1, bus=0 / AC97 audio */ - 0x0800 0 0 1 &i8259 7 2 - - /* IDSEL 0x1b / dev=11, bus=0 / IDE */ - 0x5800 0 0 1 &i8259 14 2 - - /* IDSEL 0x1f / dev 15, bus=0 / 2x USB 1.1 */ - 0x7800 0 0 1 &i8259 7 2 - >; - ali_m1533 { - #size-cells = <1>; - #address-cells = <2>; - i8259: interrupt-controller@20 { - reg = <1 0x20 2 - 1 0xa0 2 - 1 0x4d0 2>; - interrupt-controller; - device_type = "interrupt-controller"; - #address-cells = <0>; - #interrupt-cells = <2>; - compatible = "chrp,iic"; - - /* south bridge irq is active high */ - interrupts = <1 3>; - interrupt-parent = <&xps_intc_0>; - }; - }; - } ; - xps_bram_if_cntlr_1: xps-bram-if-cntlr@ffff0000 { - compatible = "xlnx,xps-bram-if-cntlr-1.00.a"; - reg = < 0xffff0000 0x10000 >; - xlnx,family = "virtex5"; - } ; - xps_intc_0: interrupt-controller@81800000 { - #interrupt-cells = <0x2>; - compatible = "xlnx,xps-intc-1.00.a"; - interrupt-controller ; - reg = < 0x81800000 0x10000 >; - xlnx,num-intr-inputs = <0xc>; - } ; - xps_tft_0: tft@86e00000 { - compatible = "xlnx,xps-tft-1.00.a"; - reg = < 0x86e00000 0x10000 >; - xlnx,dcr-splb-slave-if = <0x1>; - xlnx,default-tft-base-addr = <0x0>; - xlnx,family = "virtex5"; - xlnx,i2c-slave-addr = <0x76>; - xlnx,mplb-awidth = <0x20>; - xlnx,mplb-dwidth = <0x80>; - xlnx,mplb-native-dwidth = <0x40>; - xlnx,mplb-smallest-slave = <0x20>; - xlnx,tft-interface = <0x1>; - } ; - } ; -} ; diff --git a/arch/powerpc/boot/dts/walnut.dts b/arch/powerpc/boot/dts/walnut.dts deleted file mode 100644 index 0872862c9363..000000000000 --- a/arch/powerpc/boot/dts/walnut.dts +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Device Tree Source for IBM Walnut - * - * Copyright 2007 IBM Corp. - * Josh Boyer <jwboyer@linux.vnet.ibm.com> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without - * any warranty of any kind, whether express or implied. - */ - -/dts-v1/; - -/ { - #address-cells = <1>; - #size-cells = <1>; - model = "ibm,walnut"; - compatible = "ibm,walnut"; - dcr-parent = <&{/cpus/cpu@0}>; - - aliases { - ethernet0 = &EMAC; - serial0 = &UART0; - serial1 = &UART1; - }; - - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu@0 { - device_type = "cpu"; - model = "PowerPC,405GP"; - reg = <0x00000000>; - clock-frequency = <200000000>; /* Filled in by zImage */ - timebase-frequency = <0>; /* Filled in by zImage */ - i-cache-line-size = <32>; - d-cache-line-size = <32>; - i-cache-size = <16384>; - d-cache-size = <16384>; - dcr-controller; - dcr-access-method = "native"; - }; - }; - - memory { - device_type = "memory"; - reg = <0x00000000 0x00000000>; /* Filled in by zImage */ - }; - - UIC0: interrupt-controller { - compatible = "ibm,uic"; - interrupt-controller; - cell-index = <0>; - dcr-reg = <0x0c0 0x009>; - #address-cells = <0>; - #size-cells = <0>; - #interrupt-cells = <2>; - }; - - plb { - compatible = "ibm,plb3"; - #address-cells = <1>; - #size-cells = <1>; - ranges; - clock-frequency = <0>; /* Filled in by zImage */ - - SDRAM0: memory-controller { - compatible = "ibm,sdram-405gp"; - dcr-reg = <0x010 0x002>; - }; - - MAL: mcmal { - compatible = "ibm,mcmal-405gp", "ibm,mcmal"; - dcr-reg = <0x180 0x062>; - num-tx-chans = <1>; - num-rx-chans = <1>; - interrupt-parent = <&UIC0>; - interrupts = < - 0xb 0x4 /* TXEOB */ - 0xc 0x4 /* RXEOB */ - 0xa 0x4 /* SERR */ - 0xd 0x4 /* TXDE */ - 0xe 0x4 /* RXDE */>; - }; - - POB0: opb { - compatible = "ibm,opb-405gp", "ibm,opb"; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0xef600000 0xef600000 0x00a00000>; - dcr-reg = <0x0a0 0x005>; - clock-frequency = <0>; /* Filled in by zImage */ - - UART0: serial@ef600300 { - device_type = "serial"; - compatible = "ns16550"; - reg = <0xef600300 0x00000008>; - virtual-reg = <0xef600300>; - clock-frequency = <0>; /* Filled in by zImage */ - current-speed = <9600>; - interrupt-parent = <&UIC0>; - interrupts = <0x0 0x4>; - }; - - UART1: serial@ef600400 { - device_type = "serial"; - compatible = "ns16550"; - reg = <0xef600400 0x00000008>; - virtual-reg = <0xef600400>; - clock-frequency = <0>; /* Filled in by zImage */ - current-speed = <9600>; - interrupt-parent = <&UIC0>; - interrupts = <0x1 0x4>; - }; - - IIC: i2c@ef600500 { - compatible = "ibm,iic-405gp", "ibm,iic"; - reg = <0xef600500 0x00000011>; - interrupt-parent = <&UIC0>; - interrupts = <0x2 0x4>; - }; - - GPIO: gpio@ef600700 { - compatible = "ibm,gpio-405gp"; - reg = <0xef600700 0x00000020>; - }; - - EMAC: ethernet@ef600800 { - device_type = "network"; - compatible = "ibm,emac-405gp", "ibm,emac"; - interrupt-parent = <&UIC0>; - interrupts = < - 0xf 0x4 /* Ethernet */ - 0x9 0x4 /* Ethernet Wake Up */>; - local-mac-address = [000000000000]; /* Filled in by zImage */ - reg = <0xef600800 0x00000070>; - mal-device = <&MAL>; - mal-tx-channel = <0>; - mal-rx-channel = <0>; - cell-index = <0>; - max-frame-size = <1500>; - rx-fifo-size = <4096>; - tx-fifo-size = <2048>; - phy-mode = "rmii"; - phy-map = <0x00000001>; - }; - - }; - - EBC0: ebc { - compatible = "ibm,ebc-405gp", "ibm,ebc"; - dcr-reg = <0x012 0x002>; - #address-cells = <2>; - #size-cells = <1>; - /* The ranges property is supplied by the bootwrapper - * and is based on the firmware's configuration of the - * EBC bridge - */ - clock-frequency = <0>; /* Filled in by zImage */ - - sram@0,0 { - reg = <0x00000000 0x00000000 0x00080000>; - }; - - flash@0,80000 { - compatible = "jedec-flash"; - bank-width = <1>; - reg = <0x00000000 0x00080000 0x00080000>; - #address-cells = <1>; - #size-cells = <1>; - partition@0 { - label = "OpenBIOS"; - reg = <0x00000000 0x00080000>; - read-only; - }; - }; - - nvram@1,0 { - /* NVRAM and RTC */ - compatible = "ds1743-nvram"; - #bytes = <0x2000>; - reg = <0x00000001 0x00000000 0x00002000>; - }; - - keyboard@2,0 { - compatible = "intel,82C42PC"; - reg = <0x00000002 0x00000000 0x00000002>; - }; - - ir@3,0 { - compatible = "ti,TIR2000PAG"; - reg = <0x00000003 0x00000000 0x00000010>; - }; - - fpga@7,0 { - compatible = "Walnut-FPGA"; - reg = <0x00000007 0x00000000 0x00000010>; - virtual-reg = <0xf0300005>; - }; - }; - - PCI0: pci@ec000000 { - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - compatible = "ibm,plb405gp-pci", "ibm,plb-pci"; - primary; - reg = <0xeec00000 0x00000008 /* Config space access */ - 0xeed80000 0x00000004 /* IACK */ - 0xeed80000 0x00000004 /* Special cycle */ - 0xef480000 0x00000040>; /* Internal registers */ - - /* Outbound ranges, one memory and one IO, - * later cannot be changed. Chip supports a second - * IO range but we don't use it for now - */ - ranges = <0x02000000 0x00000000 0x80000000 0x80000000 0x00000000 0x20000000 - 0x01000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>; - - /* Inbound 2GB range starting at 0 */ - dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>; - - /* Walnut has all 4 IRQ pins tied together per slot */ - interrupt-map-mask = <0xf800 0x0 0x0 0x0>; - interrupt-map = < - /* IDSEL 1 */ - 0x800 0x0 0x0 0x0 &UIC0 0x1c 0x8 - - /* IDSEL 2 */ - 0x1000 0x0 0x0 0x0 &UIC0 0x1d 0x8 - - /* IDSEL 3 */ - 0x1800 0x0 0x0 0x0 &UIC0 0x1e 0x8 - - /* IDSEL 4 */ - 0x2000 0x0 0x0 0x0 &UIC0 0x1f 0x8 - >; - }; - }; - - chosen { - stdout-path = "/plb/opb/serial@ef600300"; - }; -}; diff --git a/arch/powerpc/boot/ep405.c b/arch/powerpc/boot/ep405.c deleted file mode 100644 index f9ad1e6a844e..000000000000 --- a/arch/powerpc/boot/ep405.c +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Embedded Planet EP405 with PlanetCore firmware - * - * (c) Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp,\ - * - * Based on ep88xc.c by - * - * Scott Wood <scottwood@freescale.com> - * - * Copyright (c) 2007 Freescale Semiconductor, Inc. - */ - -#include "ops.h" -#include "stdio.h" -#include "planetcore.h" -#include "dcr.h" -#include "4xx.h" -#include "io.h" - -static char *table; -static u64 mem_size; - -static void platform_fixups(void) -{ - u64 val; - void *nvrtc; - - dt_fixup_memory(0, mem_size); - planetcore_set_mac_addrs(table); - - if (!planetcore_get_decimal(table, PLANETCORE_KEY_CRYSTAL_HZ, &val)) { - printf("No PlanetCore crystal frequency key.\r\n"); - return; - } - ibm405gp_fixup_clocks(val, 0xa8c000); - ibm4xx_quiesce_eth((u32 *)0xef600800, NULL); - ibm4xx_fixup_ebc_ranges("/plb/ebc"); - - if (!planetcore_get_decimal(table, PLANETCORE_KEY_KB_NVRAM, &val)) { - printf("No PlanetCore NVRAM size key.\r\n"); - return; - } - nvrtc = finddevice("/plb/ebc/nvrtc@4,200000"); - if (nvrtc != NULL) { - u32 reg[3] = { 4, 0x200000, 0}; - getprop(nvrtc, "reg", reg, 3); - reg[2] = (val << 10) & 0xffffffff; - setprop(nvrtc, "reg", reg, 3); - } -} - -void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7) -{ - table = (char *)r3; - planetcore_prepare_table(table); - - if (!planetcore_get_decimal(table, PLANETCORE_KEY_MB_RAM, &mem_size)) - return; - - mem_size *= 1024 * 1024; - simple_alloc_init(_end, mem_size - (unsigned long)_end, 32, 64); - - fdt_init(_dtb_start); - - planetcore_set_stdout_path(table); - - serial_console_init(); - platform_ops.fixups = platform_fixups; -} diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h index e0606766480f..6455fc9a244f 100644 --- a/arch/powerpc/boot/ops.h +++ b/arch/powerpc/boot/ops.h @@ -88,7 +88,6 @@ int serial_console_init(void); int ns16550_console_init(void *devp, struct serial_console_data *scdp); int cpm_console_init(void *devp, struct serial_console_data *scdp); int mpc5200_psc_console_init(void *devp, struct serial_console_data *scdp); -int uartlite_console_init(void *devp, struct serial_console_data *scdp); int opal_console_init(void *devp, struct serial_console_data *scdp); void *simple_alloc_init(char *base, unsigned long heap_size, unsigned long granularity, unsigned long max_allocs); diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index 9457863147f9..0bfa7e87e546 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -132,11 +132,6 @@ int serial_console_init(void) else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart")) rc = mpc5200_psc_console_init(devp, &serial_cd); #endif -#ifdef CONFIG_XILINX_VIRTEX - else if (dt_is_compatible(devp, "xlnx,opb-uartlite-1.00.b") || - dt_is_compatible(devp, "xlnx,xps-uartlite-1.00.a")) - rc = uartlite_console_init(devp, &serial_cd); -#endif #ifdef CONFIG_PPC64_BOOT_WRAPPER else if (dt_is_compatible(devp, "ibm,opal-console-raw")) rc = opal_console_init(devp, &serial_cd); diff --git a/arch/powerpc/boot/treeboot-walnut.c b/arch/powerpc/boot/treeboot-walnut.c deleted file mode 100644 index 623f58e7f7c9..000000000000 --- a/arch/powerpc/boot/treeboot-walnut.c +++ /dev/null @@ -1,81 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Old U-boot compatibility for Walnut - * - * Author: Josh Boyer <jwboyer@linux.vnet.ibm.com> - * - * Copyright 2007 IBM Corporation - * Based on cuboot-83xx.c, which is: - * Copyright (c) 2007 Freescale Semiconductor, Inc. - */ - -#include "ops.h" -#include "stdio.h" -#include "dcr.h" -#include "4xx.h" -#include "io.h" - -BSS_STACK(4096); - -static void walnut_flashsel_fixup(void) -{ - void *devp, *sram; - u32 reg_flash[3] = {0x0, 0x0, 0x80000}; - u32 reg_sram[3] = {0x0, 0x0, 0x80000}; - u8 *fpga; - u8 fpga_brds1 = 0x0; - - devp = finddevice("/plb/ebc/fpga"); - if (!devp) - fatal("Couldn't locate FPGA node\n\r"); - - if (getprop(devp, "virtual-reg", &fpga, sizeof(fpga)) != sizeof(fpga)) - fatal("no virtual-reg property\n\r"); - - fpga_brds1 = in_8(fpga); - - devp = finddevice("/plb/ebc/flash"); - if (!devp) - fatal("Couldn't locate flash node\n\r"); - - if (getprop(devp, "reg", reg_flash, sizeof(reg_flash)) != sizeof(reg_flash)) - fatal("flash reg property has unexpected size\n\r"); - - sram = finddevice("/plb/ebc/sram"); - if (!sram) - fatal("Couldn't locate sram node\n\r"); - - if (getprop(sram, "reg", reg_sram, sizeof(reg_sram)) != sizeof(reg_sram)) - fatal("sram reg property has unexpected size\n\r"); - - if (fpga_brds1 & 0x1) { - reg_flash[1] ^= 0x80000; - reg_sram[1] ^= 0x80000; - } - - setprop(devp, "reg", reg_flash, sizeof(reg_flash)); - setprop(sram, "reg", reg_sram, sizeof(reg_sram)); -} - -#define WALNUT_OPENBIOS_MAC_OFF 0xfffffe0b -static void walnut_fixups(void) -{ - ibm4xx_sdram_fixup_memsize(); - ibm405gp_fixup_clocks(33330000, 0xa8c000); - ibm4xx_quiesce_eth((u32 *)0xef600800, NULL); - ibm4xx_fixup_ebc_ranges("/plb/ebc"); - walnut_flashsel_fixup(); - dt_fixup_mac_address_by_alias("ethernet0", (u8 *) WALNUT_OPENBIOS_MAC_OFF); -} - -void platform_init(void) -{ - unsigned long end_of_ram = 0x2000000; - unsigned long avail_ram = end_of_ram - (unsigned long) _end; - - simple_alloc_init(_end, avail_ram, 32, 32); - platform_ops.fixups = walnut_fixups; - platform_ops.exit = ibm40x_dbcr_reset; - fdt_init(_dtb_start); - serial_console_init(); -} diff --git a/arch/powerpc/boot/uartlite.c b/arch/powerpc/boot/uartlite.c deleted file mode 100644 index 46bed69b4169..000000000000 --- a/arch/powerpc/boot/uartlite.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Xilinx UARTLITE bootloader driver - * - * Copyright (C) 2007 Secret Lab Technologies Ltd. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include <stdarg.h> -#include <stddef.h> -#include "types.h" -#include "string.h" -#include "stdio.h" -#include "io.h" -#include "ops.h" - -#define ULITE_RX 0x00 -#define ULITE_TX 0x04 -#define ULITE_STATUS 0x08 -#define ULITE_CONTROL 0x0c - -#define ULITE_STATUS_RXVALID 0x01 -#define ULITE_STATUS_TXFULL 0x08 - -#define ULITE_CONTROL_RST_RX 0x02 - -static void * reg_base; - -static int uartlite_open(void) -{ - /* Clear the RX FIFO */ - out_be32(reg_base + ULITE_CONTROL, ULITE_CONTROL_RST_RX); - return 0; -} - -static void uartlite_putc(unsigned char c) -{ - u32 reg = ULITE_STATUS_TXFULL; - while (reg & ULITE_STATUS_TXFULL) /* spin on TXFULL bit */ - reg = in_be32(reg_base + ULITE_STATUS); - out_be32(reg_base + ULITE_TX, c); -} - -static unsigned char uartlite_getc(void) -{ - u32 reg = 0; - while (!(reg & ULITE_STATUS_RXVALID)) /* spin waiting for RXVALID bit */ - reg = in_be32(reg_base + ULITE_STATUS); - return in_be32(reg_base + ULITE_RX); -} - -static u8 uartlite_tstc(void) -{ - u32 reg = in_be32(reg_base + ULITE_STATUS); - return reg & ULITE_STATUS_RXVALID; -} - -int uartlite_console_init(void *devp, struct serial_console_data *scdp) -{ - int n; - unsigned long reg_phys; - - n = getprop(devp, "virtual-reg", ®_base, sizeof(reg_base)); - if (n != sizeof(reg_base)) { - if (!dt_xlate_reg(devp, 0, ®_phys, NULL)) - return -1; - - reg_base = (void *)reg_phys; - } - - scdp->open = uartlite_open; - scdp->putc = uartlite_putc; - scdp->getc = uartlite_getc; - scdp->tstc = uartlite_tstc; - scdp->close = NULL; - return 0; -} diff --git a/arch/powerpc/boot/virtex.c b/arch/powerpc/boot/virtex.c deleted file mode 100644 index f731cbb4bff0..000000000000 --- a/arch/powerpc/boot/virtex.c +++ /dev/null @@ -1,97 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * The platform specific code for virtex devices since a boot loader is not - * always used. - * - * (C) Copyright 2008 Xilinx, Inc. - */ - -#include "ops.h" -#include "io.h" -#include "stdio.h" - -#define UART_DLL 0 /* Out: Divisor Latch Low */ -#define UART_DLM 1 /* Out: Divisor Latch High */ -#define UART_FCR 2 /* Out: FIFO Control Register */ -#define UART_FCR_CLEAR_RCVR 0x02 /* Clear the RCVR FIFO */ -#define UART_FCR_CLEAR_XMIT 0x04 /* Clear the XMIT FIFO */ -#define UART_LCR 3 /* Out: Line Control Register */ -#define UART_MCR 4 /* Out: Modem Control Register */ -#define UART_MCR_RTS 0x02 /* RTS complement */ -#define UART_MCR_DTR 0x01 /* DTR complement */ -#define UART_LCR_DLAB 0x80 /* Divisor latch access bit */ -#define UART_LCR_WLEN8 0x03 /* Wordlength: 8 bits */ - -static int virtex_ns16550_console_init(void *devp) -{ - unsigned char *reg_base; - u32 reg_shift, reg_offset, clk, spd; - u16 divisor; - int n; - - if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) - return -1; - - n = getprop(devp, "reg-offset", ®_offset, sizeof(reg_offset)); - if (n == sizeof(reg_offset)) - reg_base += reg_offset; - - n = getprop(devp, "reg-shift", ®_shift, sizeof(reg_shift)); - if (n != sizeof(reg_shift)) - reg_shift = 0; - - n = getprop(devp, "current-speed", (void *)&spd, sizeof(spd)); - if (n != sizeof(spd)) - spd = 9600; - - /* should there be a default clock rate?*/ - n = getprop(devp, "clock-frequency", (void *)&clk, sizeof(clk)); - if (n != sizeof(clk)) - return -1; - - divisor = clk / (16 * spd); - - /* Access baud rate */ - out_8(reg_base + (UART_LCR << reg_shift), UART_LCR_DLAB); - - /* Baud rate based on input clock */ - out_8(reg_base + (UART_DLL << reg_shift), divisor & 0xFF); - out_8(reg_base + (UART_DLM << reg_shift), divisor >> 8); - - /* 8 data, 1 stop, no parity */ - out_8(reg_base + (UART_LCR << reg_shift), UART_LCR_WLEN8); - - /* RTS/DTR */ - out_8(reg_base + (UART_MCR << reg_shift), UART_MCR_RTS | UART_MCR_DTR); - - /* Clear transmitter and receiver */ - out_8(reg_base + (UART_FCR << reg_shift), - UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); - return 0; -} - -/* For virtex, the kernel may be loaded without using a bootloader and if so - some UARTs need more setup than is provided in the normal console init -*/ -int platform_specific_init(void) -{ - void *devp; - char devtype[MAX_PROP_LEN]; - char path[MAX_PATH_LEN]; - - devp = finddevice("/chosen"); - if (devp == NULL) - return -1; - - if (getprop(devp, "linux,stdout-path", path, MAX_PATH_LEN) > 0) { - devp = finddevice(path); - if (devp == NULL) - return -1; - - if ((getprop(devp, "device_type", devtype, sizeof(devtype)) > 0) - && !strcmp(devtype, "serial") - && (dt_is_compatible(devp, "ns16550"))) - virtex_ns16550_console_init(devp); - } - return 0; -} diff --git a/arch/powerpc/boot/virtex405-head.S b/arch/powerpc/boot/virtex405-head.S deleted file mode 100644 index 00bab7d7c48c..000000000000 --- a/arch/powerpc/boot/virtex405-head.S +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include "ppc_asm.h" - - .text - .global _zimage_start -_zimage_start: - - /* PPC errata 213: needed by Virtex-4 FX */ - mfccr0 0 - oris 0,0,0x50000000@h - mtccr0 0 - - /* - * Invalidate the data cache if the data cache is turned off. - * - The 405 core does not invalidate the data cache on power-up - * or reset but does turn off the data cache. We cannot assume - * that the cache contents are valid. - * - If the data cache is turned on this must have been done by - * a bootloader and we assume that the cache contents are - * valid. - */ - mfdccr r9 - cmplwi r9,0 - bne 2f - lis r9,0 - li r8,256 - mtctr r8 -1: dccci r0,r9 - addi r9,r9,0x20 - bdnz 1b -2: b _zimage_start_lib diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index ed6266367bc0..cd58a62e810d 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -29,6 +29,7 @@ set -e # Allow for verbose output if [ "$V" = 1 ]; then set -x + map="-Map wrapper.map" fi # defaults @@ -323,14 +324,6 @@ adder875-redboot) platformo="$object/fixed-head.o $object/redboot-8xx.o" binary=y ;; -simpleboot-virtex405-*) - platformo="$object/virtex405-head.o $object/simpleboot.o $object/virtex.o" - binary=y - ;; -simpleboot-virtex440-*) - platformo="$object/fixed-head.o $object/simpleboot.o $object/virtex.o" - binary=y - ;; simpleboot-*) platformo="$object/fixed-head.o $object/simpleboot.o" binary=y @@ -500,7 +493,7 @@ if [ "$platform" != "miboot" ]; then text_start="-Ttext $link_address" fi #link everything - ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" \ + ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" $map \ $platformo $tmp $object/wrapper.a rm $tmp fi @@ -570,7 +563,18 @@ ps3) count=$overlay_size bs=1 odir="$(dirname "$ofile.bin")" - rm -f "$odir/otheros.bld" - gzip -n --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld" + + # The ps3's flash loader has a size limit of 16 MiB for the uncompressed + # image. If a compressed image that exceeded this limit is written to + # flash the loader will decompress that image until the 16 MiB limit is + # reached, then enter the system reset vector of the partially decompressed + # image. No warning is issued. + rm -f "$odir"/{otheros,otheros-too-big}.bld + size=$(${CROSS}nm --no-sort --radix=d "$ofile" | egrep ' _end$' | cut -d' ' -f1) + bld="otheros.bld" + if [ $size -gt $((0x1000000)) ]; then + bld="otheros-too-big.bld" + fi + gzip -n --force -9 --stdout "$ofile.bin" > "$odir/$bld" ;; esac diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig index db93c117be36..25eed86ec528 100644 --- a/arch/powerpc/configs/40x/acadia_defconfig +++ b/arch/powerpc/configs/40x/acadia_defconfig @@ -9,7 +9,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_ACADIA=y -# CONFIG_WALNUT is not set CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig deleted file mode 100644 index a3854cf65f8d..000000000000 --- a/arch/powerpc/configs/40x/ep405_defconfig +++ /dev/null @@ -1,62 +0,0 @@ -CONFIG_40x=y -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -CONFIG_KALLSYMS_ALL=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_EP405=y -# CONFIG_WALNUT is not set -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -# CONFIG_IPV6 is not set -CONFIG_CONNECTOR=y -CONFIG_MTD=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_BLOCK=m -CONFIG_MTD_CFI=y -CONFIG_MTD_JEDECPROBE=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_PHYSMAP_OF=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=35000 -CONFIG_NETDEVICES=y -CONFIG_IBM_EMAC=y -# CONFIG_INPUT is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_EXTENDED=y -CONFIG_SERIAL_8250_SHARE_IRQ=y -CONFIG_SERIAL_OF_PLATFORM=y -# CONFIG_HW_RANDOM is not set -# CONFIG_HWMON is not set -CONFIG_THERMAL=y -CONFIG_USB=y -CONFIG_USB_MON=y -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_OHCI_HCD_PPC_OF_BE=y -CONFIG_USB_OHCI_HCD_PPC_OF_LE=y -CONFIG_EXT2_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_CRAMFS=y -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y -CONFIG_DEBUG_FS=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_CRYPTO_CBC=y -CONFIG_CRYPTO_ECB=y -CONFIG_CRYPTO_PCBC=y -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_DES=y diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig index edc22464dfb5..3549c9e950e8 100644 --- a/arch/powerpc/configs/40x/kilauea_defconfig +++ b/arch/powerpc/configs/40x/kilauea_defconfig @@ -11,7 +11,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_KILAUEA=y -# CONFIG_WALNUT is not set CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/powerpc/configs/40x/klondike_defconfig b/arch/powerpc/configs/40x/klondike_defconfig index 579fa846839c..6a735ee75715 100644 --- a/arch/powerpc/configs/40x/klondike_defconfig +++ b/arch/powerpc/configs/40x/klondike_defconfig @@ -8,7 +8,6 @@ CONFIG_EMBEDDED=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_WALNUT is not set CONFIG_APM8018X=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_MATH_EMULATION=y diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig index 188789b9aa4c..4563f88acf0c 100644 --- a/arch/powerpc/configs/40x/makalu_defconfig +++ b/arch/powerpc/configs/40x/makalu_defconfig @@ -9,7 +9,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_MAKALU=y -# CONFIG_WALNUT is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig index 5bf6af7ef093..2a2bb3f46847 100644 --- a/arch/powerpc/configs/40x/obs600_defconfig +++ b/arch/powerpc/configs/40x/obs600_defconfig @@ -10,7 +10,6 @@ CONFIG_KALLSYMS_ALL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set -# CONFIG_WALNUT is not set CONFIG_OBS600=y CONFIG_MATH_EMULATION=y CONFIG_NET=y diff --git a/arch/powerpc/configs/40x/virtex_defconfig b/arch/powerpc/configs/40x/virtex_defconfig deleted file mode 100644 index 5e7c61d1d7d0..000000000000 --- a/arch/powerpc/configs/40x/virtex_defconfig +++ /dev/null @@ -1,75 +0,0 @@ -CONFIG_40x=y -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_BLK_DEV_INITRD=y -CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -# CONFIG_BLK_DEV_BSG is not set -# CONFIG_WALNUT is not set -CONFIG_XILINX_VIRTEX_GENERIC_BOARD=y -CONFIG_PREEMPT=y -CONFIG_MATH_EMULATION=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" -CONFIG_PCI=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_NETFILTER=y -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_MANGLE=m -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_XILINX_SYSACE=y -CONFIG_NETDEVICES=y -# CONFIG_SERIO_SERPORT is not set -CONFIG_SERIO_XILINX_XPS_PS2=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_UARTLITE=y -CONFIG_SERIAL_UARTLITE_CONSOLE=y -CONFIG_XILINX_HWICAP=y -CONFIG_GPIOLIB=y -CONFIG_GPIO_SYSFS=y -CONFIG_GPIO_XILINX=y -# CONFIG_HWMON is not set -CONFIG_FB=y -CONFIG_FB_XILINX=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y -# CONFIG_USB_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_AUTOFS4_FS=y -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_TMPFS=y -CONFIG_CRAMFS=y -CONFIG_ROMFS_FS=y -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_UTF8=m -CONFIG_CRC_CCITT=y -CONFIG_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y -CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_KERNEL=y diff --git a/arch/powerpc/configs/44x/virtex5_defconfig b/arch/powerpc/configs/44x/virtex5_defconfig deleted file mode 100644 index 1f74079e1703..000000000000 --- a/arch/powerpc/configs/44x/virtex5_defconfig +++ /dev/null @@ -1,74 +0,0 @@ -CONFIG_44x=y -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_BLK_DEV_INITRD=y -CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -# CONFIG_BLK_DEV_BSG is not set -# CONFIG_EBONY is not set -CONFIG_XILINX_VIRTEX440_GENERIC_BOARD=y -CONFIG_PREEMPT=y -CONFIG_MATH_EMULATION=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_NETFILTER=y -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_MANGLE=m -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_XILINX_SYSACE=y -CONFIG_NETDEVICES=y -# CONFIG_SERIO_SERPORT is not set -CONFIG_SERIO_XILINX_XPS_PS2=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_UARTLITE=y -CONFIG_SERIAL_UARTLITE_CONSOLE=y -CONFIG_XILINX_HWICAP=y -CONFIG_GPIOLIB=y -CONFIG_GPIO_SYSFS=y -CONFIG_GPIO_XILINX=y -# CONFIG_HWMON is not set -CONFIG_FB=y -CONFIG_FB_XILINX=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y -# CONFIG_USB_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_AUTOFS4_FS=y -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_TMPFS=y -CONFIG_CRAMFS=y -CONFIG_ROMFS_FS=y -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_UTF8=m -CONFIG_CRC_CCITT=y -CONFIG_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y -CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_KERNEL=y diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index f55e23cb176c..5326bc739279 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -10,7 +10,6 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_PPC_ADDER875=y -CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y CONFIG_HZ_1000=y # CONFIG_SECCOMP is not set diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index 0e2e5e81a359..f5c3e72da719 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -12,7 +12,6 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_PPC_EP88XC=y -CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y CONFIG_HZ_100=y # CONFIG_SECCOMP is not set diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index 5320735395e7..5c56d36cdfc5 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -12,7 +12,6 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_MPC86XADS=y -CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y CONFIG_HZ_1000=y CONFIG_MATH_EMULATION=y diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 82a008c04eae..949ff9ccda5e 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -11,7 +11,6 @@ CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y -CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y CONFIG_HZ_100=y # CONFIG_SECCOMP is not set diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index df8bdbaa5d8f..2de9aadf0f50 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -347,3 +347,4 @@ CONFIG_KVM_BOOK3S_64=m CONFIG_KVM_BOOK3S_64_HV=m CONFIG_VHOST_NET=m CONFIG_PRINTK_TIME=y +CONFIG_PRINTK_CALLER=y diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig index a5f683aed328..25f6c91e843a 100644 --- a/arch/powerpc/configs/ppc40x_defconfig +++ b/arch/powerpc/configs/ppc40x_defconfig @@ -10,11 +10,9 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PPC4xx_GPIO=y CONFIG_ACADIA=y -CONFIG_EP405=y CONFIG_HOTFOOT=y CONFIG_KILAUEA=y CONFIG_MAKALU=y -CONFIG_XILINX_VIRTEX_GENERIC_BOARD=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -37,33 +35,26 @@ CONFIG_MTD_UBI=m CONFIG_MTD_UBI_GLUEBI=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 -CONFIG_XILINX_SYSACE=m CONFIG_NETDEVICES=y CONFIG_IBM_EMAC=y # CONFIG_INPUT is not set CONFIG_SERIO=m # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set -CONFIG_SERIO_XILINX_XPS_PS2=m # CONFIG_VT is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_UARTLITE=y -CONFIG_SERIAL_UARTLITE_CONSOLE=y # CONFIG_HW_RANDOM is not set -CONFIG_XILINX_HWICAP=m CONFIG_I2C=m CONFIG_I2C_CHARDEV=m CONFIG_I2C_GPIO=m CONFIG_I2C_IBM_IIC=m -CONFIG_GPIO_XILINX=y # CONFIG_HWMON is not set CONFIG_THERMAL=y CONFIG_FB=m -CONFIG_FB_XILINX=m CONFIG_EXT2_FS=y CONFIG_EXT4_FS=m CONFIG_VFAT_FS=m diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index a41eedfe0a5f..8b595f67068c 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -22,7 +22,6 @@ CONFIG_GLACIER=y CONFIG_REDWOOD=y CONFIG_EIGER=y CONFIG_YOSEMITE=y -CONFIG_XILINX_VIRTEX440_GENERIC_BOARD=y CONFIG_PPC4xx_GPIO=y CONFIG_MATH_EMULATION=y CONFIG_NET=y @@ -46,7 +45,6 @@ CONFIG_MTD_UBI=m CONFIG_MTD_UBI_GLUEBI=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 -CONFIG_XILINX_SYSACE=m CONFIG_SCSI=m CONFIG_BLK_DEV_SD=m # CONFIG_SCSI_LOWLEVEL is not set @@ -57,7 +55,6 @@ CONFIG_IBM_EMAC=y CONFIG_SERIO=m # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set -CONFIG_SERIO_XILINX_XPS_PS2=m # CONFIG_VT is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y @@ -65,18 +62,13 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_UARTLITE=y -CONFIG_SERIAL_UARTLITE_CONSOLE=y # CONFIG_HW_RANDOM is not set -CONFIG_XILINX_HWICAP=m CONFIG_I2C=m CONFIG_I2C_CHARDEV=m CONFIG_I2C_GPIO=m CONFIG_I2C_IBM_IIC=m -CONFIG_GPIO_XILINX=y # CONFIG_HWMON is not set CONFIG_FB=m -CONFIG_FB_XILINX=m CONFIG_USB=m CONFIG_USB_EHCI_HCD=m CONFIG_USB_OHCI_HCD=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index bae8170d7401..8d7e3e98856d 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -281,6 +281,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m +CONFIG_LIBNVDIMM=y CONFIG_RAS=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y @@ -358,6 +359,7 @@ CONFIG_CRYPTO_DEV_NX=y CONFIG_CRYPTO_DEV_NX_ENCRYPT=m CONFIG_CRYPTO_DEV_VMX=y CONFIG_PRINTK_TIME=y +CONFIG_PRINTK_CALLER=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 0bea4d3ffb85..dfa4a726333b 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -322,3 +322,4 @@ CONFIG_KVM_BOOK3S_64=m CONFIG_KVM_BOOK3S_64_HV=m CONFIG_VHOST_NET=m CONFIG_PRINTK_TIME=y +CONFIG_PRINTK_CALLER=y diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index eda8bfb2d0a3..77857d513022 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -15,7 +15,6 @@ CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_TQM8XX=y -CONFIG_8xx_COPYBACK=y # CONFIG_8xx_CPU15 is not set CONFIG_GEN_RTC=y CONFIG_HZ_100=y diff --git a/arch/powerpc/include/asm/asm-405.h b/arch/powerpc/include/asm/asm-405.h deleted file mode 100644 index 7270d3ae7c8e..000000000000 --- a/arch/powerpc/include/asm/asm-405.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _ASM_POWERPC_ASM_405_H -#define _ASM_POWERPC_ASM_405_H - -#include <asm/asm-const.h> - -#ifdef __KERNEL__ -#ifdef CONFIG_IBM405_ERR77 -/* Erratum #77 on the 405 means we need a sync or dcbt before every - * stwcx. The old ATOMIC_SYNC_FIX covered some but not all of this. - */ -#define PPC405_ERR77(ra,rb) stringify_in_c(dcbt ra, rb;) -#define PPC405_ERR77_SYNC stringify_in_c(sync;) -#else -#define PPC405_ERR77(ra,rb) -#define PPC405_ERR77_SYNC -#endif -#endif - -#endif /* _ASM_POWERPC_ASM_405_H */ diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 31c231ea56b7..498785ffc25f 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -10,7 +10,6 @@ #include <linux/types.h> #include <asm/cmpxchg.h> #include <asm/barrier.h> -#include <asm/asm-405.h> #define ATOMIC_INIT(i) { (i) } @@ -47,7 +46,6 @@ static __inline__ void atomic_##op(int a, atomic_t *v) \ __asm__ __volatile__( \ "1: lwarx %0,0,%3 # atomic_" #op "\n" \ #asm_op " %0,%2,%0\n" \ - PPC405_ERR77(0,%3) \ " stwcx. %0,0,%3 \n" \ " bne- 1b\n" \ : "=&r" (t), "+m" (v->counter) \ @@ -63,7 +61,6 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \ __asm__ __volatile__( \ "1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \ #asm_op " %0,%2,%0\n" \ - PPC405_ERR77(0, %3) \ " stwcx. %0,0,%3\n" \ " bne- 1b\n" \ : "=&r" (t), "+m" (v->counter) \ @@ -81,7 +78,6 @@ static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \ __asm__ __volatile__( \ "1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \ #asm_op " %1,%3,%0\n" \ - PPC405_ERR77(0, %4) \ " stwcx. %1,0,%4\n" \ " bne- 1b\n" \ : "=&r" (res), "=&r" (t), "+m" (v->counter) \ @@ -130,7 +126,6 @@ static __inline__ void atomic_inc(atomic_t *v) __asm__ __volatile__( "1: lwarx %0,0,%2 # atomic_inc\n\ addic %0,%0,1\n" - PPC405_ERR77(0,%2) " stwcx. %0,0,%2 \n\ bne- 1b" : "=&r" (t), "+m" (v->counter) @@ -146,7 +141,6 @@ static __inline__ int atomic_inc_return_relaxed(atomic_t *v) __asm__ __volatile__( "1: lwarx %0,0,%2 # atomic_inc_return_relaxed\n" " addic %0,%0,1\n" - PPC405_ERR77(0, %2) " stwcx. %0,0,%2\n" " bne- 1b" : "=&r" (t), "+m" (v->counter) @@ -163,7 +157,6 @@ static __inline__ void atomic_dec(atomic_t *v) __asm__ __volatile__( "1: lwarx %0,0,%2 # atomic_dec\n\ addic %0,%0,-1\n" - PPC405_ERR77(0,%2)\ " stwcx. %0,0,%2\n\ bne- 1b" : "=&r" (t), "+m" (v->counter) @@ -179,7 +172,6 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v) __asm__ __volatile__( "1: lwarx %0,0,%2 # atomic_dec_return_relaxed\n" " addic %0,%0,-1\n" - PPC405_ERR77(0, %2) " stwcx. %0,0,%2\n" " bne- 1b" : "=&r" (t), "+m" (v->counter) @@ -220,7 +212,6 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u) cmpw 0,%0,%3 \n\ beq 2f \n\ add %0,%2,%0 \n" - PPC405_ERR77(0,%2) " stwcx. %0,0,%1 \n\ bne- 1b \n" PPC_ATOMIC_EXIT_BARRIER @@ -251,7 +242,6 @@ static __inline__ int atomic_inc_not_zero(atomic_t *v) cmpwi 0,%0,0\n\ beq- 2f\n\ addic %1,%0,1\n" - PPC405_ERR77(0,%2) " stwcx. %1,0,%2\n\ bne- 1b\n" PPC_ATOMIC_EXIT_BARRIER @@ -280,7 +270,6 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v) cmpwi %0,1\n\ addi %0,%0,-1\n\ blt- 2f\n" - PPC405_ERR77(0,%1) " stwcx. %0,0,%1\n\ bne- 1b" PPC_ATOMIC_EXIT_BARRIER diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 28dcf8222943..4a4d3afd5340 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -41,7 +41,6 @@ #include <linux/compiler.h> #include <asm/asm-compat.h> #include <asm/synch.h> -#include <asm/asm-405.h> /* PPC bit number conversion */ #define PPC_BITLSHIFT(be) (BITS_PER_LONG - 1 - (be)) @@ -73,7 +72,6 @@ static inline void fn(unsigned long mask, \ prefix \ "1:" PPC_LLARX(%0,0,%3,0) "\n" \ stringify_in_c(op) "%0,%0,%2\n" \ - PPC405_ERR77(0,%3) \ PPC_STLCX "%0,0,%3\n" \ "bne- 1b\n" \ : "=&r" (old), "+m" (*p) \ @@ -119,7 +117,6 @@ static inline unsigned long fn( \ prefix \ "1:" PPC_LLARX(%0,0,%3,eh) "\n" \ stringify_in_c(op) "%1,%0,%2\n" \ - PPC405_ERR77(0,%3) \ PPC_STLCX "%1,0,%3\n" \ "bne- 1b\n" \ postfix \ @@ -175,7 +172,6 @@ clear_bit_unlock_return_word(int nr, volatile unsigned long *addr) PPC_RELEASE_BARRIER "1:" PPC_LLARX(%0,0,%3,0) "\n" "andc %1,%0,%2\n" - PPC405_ERR77(0,%3) PPC_STLCX "%1,0,%3\n" "bne- 1b\n" : "=&r" (old), "=&r" (t) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index db0a1c281587..32fd4452e960 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -2,6 +2,7 @@ #ifndef _ASM_POWERPC_BOOK3S_32_KUP_H #define _ASM_POWERPC_BOOK3S_32_KUP_H +#include <asm/bug.h> #include <asm/book3s/32/mmu-hash.h> #ifdef __ASSEMBLY__ @@ -75,7 +76,7 @@ .macro kuap_check current, gpr #ifdef CONFIG_PPC_KUAP_DEBUG - lwz \gpr, KUAP(thread) + lwz \gpr, THREAD + KUAP(\current) 999: twnei \gpr, 0 EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) #endif @@ -108,7 +109,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user u32 addr, end; BUILD_BUG_ON(!__builtin_constant_p(dir)); - BUILD_BUG_ON(dir == KUAP_CURRENT); + BUILD_BUG_ON(dir & ~KUAP_READ_WRITE); if (!(dir & KUAP_WRITE)) return; @@ -131,7 +132,7 @@ static __always_inline void prevent_user_access(void __user *to, const void __us BUILD_BUG_ON(!__builtin_constant_p(dir)); - if (dir == KUAP_CURRENT) { + if (dir & KUAP_CURRENT_WRITE) { u32 kuap = current->thread.kuap; if (unlikely(!kuap)) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 6052b72216a6..d7978a5a79c3 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -187,14 +187,14 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); * memory shall not share segments. */ #if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_MODULES) -#define VMALLOC_START ((_ALIGN((long)high_memory, 256L << 20) + VMALLOC_OFFSET) & \ +#define VMALLOC_START ((ALIGN((long)high_memory, 256L << 20) + VMALLOC_OFFSET) & \ ~(VMALLOC_OFFSET - 1)) #else #define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) #endif #ifdef CONFIG_KASAN_VMALLOC -#define VMALLOC_END _ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) +#define VMALLOC_END ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) #else #define VMALLOC_END ioremap_bot #endif @@ -217,7 +217,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #define pte_clear(mm, addr, ptep) \ - do { pte_update(ptep, ~_PAGE_HASHPTE, 0); } while (0) + do { pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0); } while (0) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD) @@ -252,84 +252,68 @@ extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, * and the PTE may be either 32 or 64 bit wide. In the later case, * when using atomic updates, only the low part of the PTE is * accessed atomically. - * - * In addition, on 44x, we also maintain a global flag indicating - * that an executable user mapping was modified, which is needed - * to properly flush the virtually tagged instruction cache of - * those implementations. */ -#ifndef CONFIG_PTE_64BIT -static inline unsigned long pte_update(pte_t *p, - unsigned long clr, - unsigned long set) +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge) { - unsigned long old, tmp; - - __asm__ __volatile__("\ -1: lwarx %0,0,%3\n\ - andc %1,%0,%4\n\ - or %1,%1,%5\n" -" stwcx. %1,0,%3\n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p) - : "r" (p), "r" (clr), "r" (set), "m" (*p) - : "cc" ); - - return old; -} -#else /* CONFIG_PTE_64BIT */ -static inline unsigned long long pte_update(pte_t *p, - unsigned long clr, - unsigned long set) -{ - unsigned long long old; + pte_basic_t old; unsigned long tmp; - __asm__ __volatile__("\ -1: lwarx %L0,0,%4\n\ - lwzx %0,0,%3\n\ - andc %1,%L0,%5\n\ - or %1,%1,%6\n" -" stwcx. %1,0,%4\n\ - bne- 1b" + __asm__ __volatile__( +#ifndef CONFIG_PTE_64BIT +"1: lwarx %0, 0, %3\n" +" andc %1, %0, %4\n" +#else +"1: lwarx %L0, 0, %3\n" +" lwz %0, -4(%3)\n" +" andc %1, %L0, %4\n" +#endif +" or %1, %1, %5\n" +" stwcx. %1, 0, %3\n" +" bne- 1b" : "=&r" (old), "=&r" (tmp), "=m" (*p) - : "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p) +#ifndef CONFIG_PTE_64BIT + : "r" (p), +#else + : "b" ((unsigned long)(p) + 4), +#endif + "r" (clr), "r" (set), "m" (*p) : "cc" ); return old; } -#endif /* CONFIG_PTE_64BIT */ /* * 2.6 calls this without flushing the TLB entry; this is wrong * for our hash-based implementation, we fix that up here. */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep) +static inline int __ptep_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { unsigned long old; - old = pte_update(ptep, _PAGE_ACCESSED, 0); + old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); if (old & _PAGE_HASHPTE) { unsigned long ptephys = __pa(ptep) & PAGE_MASK; - flush_hash_pages(context, addr, ptephys, 1); + flush_hash_pages(mm->context.id, addr, ptephys, 1); } return (old & _PAGE_ACCESSED) != 0; } #define ptep_test_and_clear_young(__vma, __addr, __ptep) \ - __ptep_test_and_clear_young((__vma)->vm_mm->context.id, __addr, __ptep) + __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep) #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0)); + return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0)); } #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_update(ptep, _PAGE_RW, 0); + pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); } static inline void __ptep_set_access_flags(struct vm_area_struct *vma, @@ -340,7 +324,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - pte_update(ptep, 0, set); + pte_update(vma->vm_mm, address, ptep, 0, set, 0); flush_tlb_page(vma, address); } @@ -538,7 +522,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) | (pte_val(pte) & ~_PAGE_HASHPTE)); else - pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte)); + pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, pte_val(pte), 0); #elif defined(CONFIG_PTE_64BIT) /* Second case is 32-bit with 64-bit PTE. In this case, we diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 3bcef989a35d..3ee1ec60be84 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -12,11 +12,17 @@ #ifdef __ASSEMBLY__ -.macro kuap_restore_amr gpr +.macro kuap_restore_amr gpr1, gpr2 #ifdef CONFIG_PPC_KUAP BEGIN_MMU_FTR_SECTION_NESTED(67) - ld \gpr, STACK_REGS_KUAP(r1) - mtspr SPRN_AMR, \gpr + mfspr \gpr1, SPRN_AMR + ld \gpr2, STACK_REGS_KUAP(r1) + cmpd \gpr1, \gpr2 + beq 998f + isync + mtspr SPRN_AMR, \gpr2 + /* No isync required, see kuap_restore_amr() */ +998: END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) #endif .endm @@ -60,10 +66,28 @@ #include <asm/mmu.h> #include <asm/ptrace.h> -static inline void kuap_restore_amr(struct pt_regs *regs) +static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { - if (mmu_has_feature(MMU_FTR_RADIX_KUAP)) + if (mmu_has_feature(MMU_FTR_RADIX_KUAP) && unlikely(regs->kuap != amr)) { + isync(); mtspr(SPRN_AMR, regs->kuap); + /* + * No isync required here because we are about to RFI back to + * previous context before any user accesses would be made, + * which is a CSI. + */ + } +} + +static inline unsigned long kuap_get_and_check_amr(void) +{ + if (mmu_has_feature(MMU_FTR_RADIX_KUAP)) { + unsigned long amr = mfspr(SPRN_AMR); + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */ + WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); + return amr; + } + return 0; } static inline void kuap_check_amr(void) @@ -142,13 +166,18 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); } #else /* CONFIG_PPC_KUAP */ -static inline void kuap_restore_amr(struct pt_regs *regs) +static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { } static inline void kuap_check_amr(void) { } + +static inline unsigned long kuap_get_and_check_amr(void) +{ + return 0; +} #endif /* CONFIG_PPC_KUAP */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index bb3deb76c951..5393a535240c 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -116,6 +116,9 @@ typedef struct { /* Number of users of the external (Nest) MMU */ atomic_t copros; + /* Number of user space windows opened in process mm_context */ + atomic_t vas_windows; + struct hash_mm_context *hash_context; unsigned long vdso_base; @@ -208,7 +211,7 @@ void hash__early_init_devtree(void); void radix__early_init_devtree(void); extern void hash__early_init_mmu(void); extern void radix__early_init_mmu(void); -static inline void early_init_mmu(void) +static inline void __init early_init_mmu(void) { if (radix_enabled()) return radix__early_init_mmu(); diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 87168eb9490c..f17442c3a092 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -553,6 +553,12 @@ static inline pte_t pte_clear_savedwrite(pte_t pte) } #endif /* CONFIG_NUMA_BALANCING */ +static inline bool pte_hw_valid(pte_t pte) +{ + return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE)) == + cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE); +} + static inline int pte_present(pte_t pte) { /* @@ -561,12 +567,11 @@ static inline int pte_present(pte_t pte) * invalid during ptep_set_access_flags. Hence we look for _PAGE_INVALID * if we find _PAGE_PRESENT cleared. */ - return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)); -} -static inline bool pte_hw_valid(pte_t pte) -{ - return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT)); + if (pte_hw_valid(pte)) + return true; + return (pte_raw(pte) & cpu_to_be64(_PAGE_INVALID | _PAGE_PTE)) == + cpu_to_be64(_PAGE_INVALID | _PAGE_PTE); } #ifdef CONFIG_PPC_MEM_KEYS @@ -1004,10 +1009,25 @@ extern struct page *p4d_page(p4d_t p4d); #define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) #define p4d_page_vaddr(p4d) __va(p4d_val(p4d) & ~P4D_MASKED_BITS) -#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) -#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) -#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) -#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) +static inline unsigned long pgd_index(unsigned long address) +{ + return (address >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1); +} + +static inline unsigned long pud_index(unsigned long address) +{ + return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); +} + +static inline unsigned long pmd_index(unsigned long address) +{ + return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); +} + +static inline unsigned long pte_index(unsigned long address) +{ + return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); +} /* * Find an entry in a page-table-directory. We combine the address region @@ -1145,8 +1165,11 @@ extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); -extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd); +static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmd) +{ +} + extern int hash__has_transparent_hugepage(void); static inline int has_transparent_hugepage(void) { @@ -1262,6 +1285,11 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, } #define pmdp_collapse_flush pmdp_collapse_flush +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL +pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, + unsigned long addr, + pmd_t *pmdp, int full); + #define __HAVE_ARCH_PGTABLE_DEPOSIT static inline void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index 64d02a704bcb..3b95769739c7 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -113,8 +113,7 @@ static inline void hash__flush_tlb_kernel_range(unsigned long start, struct mmu_gather; extern void hash__tlb_flush(struct mmu_gather *tlb); /* Private function for use by PCI IO mapping code */ -extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, - unsigned long end); +extern void __flush_hash_table_range(unsigned long start, unsigned long end); extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr); #endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H */ diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 609cab1d58f2..2124b7090db9 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -6,7 +6,7 @@ /* bytes per L1 cache line */ -#if defined(CONFIG_PPC_8xx) || defined(CONFIG_403GCX) +#if defined(CONFIG_PPC_8xx) #define L1_CACHE_SHIFT 4 #define MAX_COPY_PREFETCH 1 #define IFETCH_ALIGN_SHIFT 2 diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 27183871eb3b..cf091c4c22e5 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -6,7 +6,6 @@ #include <linux/compiler.h> #include <asm/synch.h> #include <linux/bug.h> -#include <asm/asm-405.h> #ifdef __BIG_ENDIAN #define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) @@ -29,7 +28,6 @@ static inline u32 __xchg_##type##sfx(volatile void *p, u32 val) \ "1: lwarx %0,0,%3\n" \ " andc %1,%0,%5\n" \ " or %1,%1,%4\n" \ - PPC405_ERR77(0,%3) \ " stwcx. %1,0,%3\n" \ " bne- 1b\n" \ : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ @@ -60,7 +58,6 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \ " bne- 2f\n" \ " andc %1,%0,%6\n" \ " or %1,%1,%5\n" \ - PPC405_ERR77(0,%3) \ " stwcx. %1,0,%3\n" \ " bne- 1b\n" \ br2 \ @@ -92,7 +89,6 @@ __xchg_u32_local(volatile void *p, unsigned long val) __asm__ __volatile__( "1: lwarx %0,0,%2 \n" - PPC405_ERR77(0,%2) " stwcx. %3,0,%2 \n\ bne- 1b" : "=&r" (prev), "+m" (*(volatile unsigned int *)p) @@ -109,7 +105,6 @@ __xchg_u32_relaxed(u32 *p, unsigned long val) __asm__ __volatile__( "1: lwarx %0,0,%2\n" - PPC405_ERR77(0, %2) " stwcx. %3,0,%2\n" " bne- 1b" : "=&r" (prev), "+m" (*p) @@ -127,7 +122,6 @@ __xchg_u64_local(volatile void *p, unsigned long val) __asm__ __volatile__( "1: ldarx %0,0,%2 \n" - PPC405_ERR77(0,%2) " stdcx. %3,0,%2 \n\ bne- 1b" : "=&r" (prev), "+m" (*(volatile unsigned long *)p) @@ -144,7 +138,6 @@ __xchg_u64_relaxed(u64 *p, unsigned long val) __asm__ __volatile__( "1: ldarx %0,0,%2\n" - PPC405_ERR77(0, %2) " stdcx. %3,0,%2\n" " bne- 1b" : "=&r" (prev), "+m" (*p) @@ -229,7 +222,6 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) "1: lwarx %0,0,%2 # __cmpxchg_u32\n\ cmpw 0,%0,%3\n\ bne- 2f\n" - PPC405_ERR77(0,%2) " stwcx. %4,0,%2\n\ bne- 1b" PPC_ATOMIC_EXIT_BARRIER @@ -252,7 +244,6 @@ __cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, "1: lwarx %0,0,%2 # __cmpxchg_u32\n\ cmpw 0,%0,%3\n\ bne- 2f\n" - PPC405_ERR77(0,%2) " stwcx. %4,0,%2\n\ bne- 1b" "\n\ @@ -273,7 +264,6 @@ __cmpxchg_u32_relaxed(u32 *p, unsigned long old, unsigned long new) "1: lwarx %0,0,%2 # __cmpxchg_u32_relaxed\n" " cmpw 0,%0,%3\n" " bne- 2f\n" - PPC405_ERR77(0, %2) " stwcx. %4,0,%2\n" " bne- 1b\n" "2:" @@ -301,7 +291,6 @@ __cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new) "1: lwarx %0,0,%2 # __cmpxchg_u32_acquire\n" " cmpw 0,%0,%3\n" " bne- 2f\n" - PPC405_ERR77(0, %2) " stwcx. %4,0,%2\n" " bne- 1b\n" PPC_ACQUIRE_BARRIER diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 898b54262881..eacc9102c251 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -11,6 +11,7 @@ #include <linux/string.h> #include <linux/kallsyms.h> #include <asm/asm-compat.h> +#include <asm/inst.h> /* Flags for create_branch: * "b" == create_branch(addr, target, 0); @@ -22,33 +23,33 @@ #define BRANCH_ABSOLUTE 0x2 bool is_offset_in_branch_range(long offset); -unsigned int create_branch(const unsigned int *addr, - unsigned long target, int flags); -unsigned int create_cond_branch(const unsigned int *addr, - unsigned long target, int flags); -int patch_branch(unsigned int *addr, unsigned long target, int flags); -int patch_instruction(unsigned int *addr, unsigned int instr); -int raw_patch_instruction(unsigned int *addr, unsigned int instr); +int create_branch(struct ppc_inst *instr, const struct ppc_inst *addr, + unsigned long target, int flags); +int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr, + unsigned long target, int flags); +int patch_branch(struct ppc_inst *addr, unsigned long target, int flags); +int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr); +int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr); static inline unsigned long patch_site_addr(s32 *site) { return (unsigned long)site + *site; } -static inline int patch_instruction_site(s32 *site, unsigned int instr) +static inline int patch_instruction_site(s32 *site, struct ppc_inst instr) { - return patch_instruction((unsigned int *)patch_site_addr(site), instr); + return patch_instruction((struct ppc_inst *)patch_site_addr(site), instr); } static inline int patch_branch_site(s32 *site, unsigned long target, int flags) { - return patch_branch((unsigned int *)patch_site_addr(site), target, flags); + return patch_branch((struct ppc_inst *)patch_site_addr(site), target, flags); } static inline int modify_instruction(unsigned int *addr, unsigned int clr, unsigned int set) { - return patch_instruction(addr, (*addr & ~clr) | set); + return patch_instruction((struct ppc_inst *)addr, ppc_inst((*addr & ~clr) | set)); } static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int set) @@ -56,13 +57,13 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned return modify_instruction((unsigned int *)patch_site_addr(site), clr, set); } -int instr_is_relative_branch(unsigned int instr); -int instr_is_relative_link_branch(unsigned int instr); -int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr); -unsigned long branch_target(const unsigned int *instr); -unsigned int translate_branch(const unsigned int *dest, - const unsigned int *src); -extern bool is_conditional_branch(unsigned int instr); +int instr_is_relative_branch(struct ppc_inst instr); +int instr_is_relative_link_branch(struct ppc_inst instr); +int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr); +unsigned long branch_target(const struct ppc_inst *instr); +int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest, + const struct ppc_inst *src); +extern bool is_conditional_branch(struct ppc_inst instr); #ifdef CONFIG_PPC_BOOK3E_64 void __patch_exception(int exc, unsigned long addr); #define patch_exception(exc, name) do { \ diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 40a4d3c6fd99..bac2252c839e 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -213,6 +213,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000) #define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000) #define CPU_FTR_P9_RADIX_PREFETCH_BUG LONG_ASM_CONST(0x0002000000000000) +#define CPU_FTR_ARCH_31 LONG_ASM_CONST(0x0004000000000000) #ifndef __ASSEMBLY__ @@ -467,6 +468,17 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \ CPU_FTR_P9_TM_HV_ASSIST | \ CPU_FTR_P9_TM_XER_SO_BUG) +#define CPU_FTRS_POWER10 (CPU_FTR_LWSYNC | \ + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ + CPU_FTR_MMCRA | CPU_FTR_SMT | \ + CPU_FTR_COHERENT_ICACHE | \ + CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ + CPU_FTR_DSCR | CPU_FTR_SAO | \ + CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ + CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ + CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ + CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ + CPU_FTR_ARCH_31) #define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -485,14 +497,14 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | CPU_FTRS_POWER9 | \ - CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2) + CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | CPU_FTRS_POWER10) #else #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \ CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | CPU_FTRS_POWER9 | \ - CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2) + CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | CPU_FTRS_POWER10) #endif /* CONFIG_CPU_LITTLE_ENDIAN */ #endif #else @@ -614,7 +626,11 @@ enum { }; #endif /* __powerpc64__ */ -#define HBP_NUM 1 +/* + * Maximum number of hw breakpoint supported on powerpc. Number of + * breakpoints supported by actual hw might be less than this. + */ +#define HBP_NUM_MAX 1 #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index 7756026b95ca..ec57daf87f40 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -45,7 +45,7 @@ static inline int debugger_break_match(struct pt_regs *regs) { return 0; } static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } #endif -void __set_breakpoint(struct arch_hw_breakpoint *brk); +void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk); bool ppc_breakpoint_available(void); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 28c3d936fdf3..414d209f45bb 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -65,6 +65,7 @@ struct of_drconf_cell_v2 { #define DRCONF_MEM_ASSIGNED 0x00000008 #define DRCONF_MEM_AI_INVALID 0x00000040 #define DRCONF_MEM_RESERVED 0x00000080 +#define DRCONF_MEM_HOTREMOVABLE 0x00000100 static inline u32 drmem_lmb_size(void) { diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h index c814a2b55389..8d61c8f3fec4 100644 --- a/arch/powerpc/include/asm/fadump-internal.h +++ b/arch/powerpc/include/asm/fadump-internal.h @@ -64,12 +64,14 @@ struct fadump_memory_range { }; /* fadump memory ranges info */ +#define RNG_NAME_SZ 16 struct fadump_mrange_info { - char name[16]; + char name[RNG_NAME_SZ]; struct fadump_memory_range *mem_ranges; u32 mem_ranges_sz; u32 mem_range_cnt; u32 max_mem_ranges; + bool is_static; }; /* Platform specific callback functions */ diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index ca33f4ef6cb4..6003c2e533a0 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -128,6 +128,7 @@ extern void machine_check_fwnmi(void); /* This is true if we are using the firmware NMI handler (typically LPAR) */ extern int fwnmi_active; +extern int ibm_nmi_interlock_token; extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup; diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 2ef155a3c821..ccbe2e83c950 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -86,6 +86,10 @@ enum fixed_addresses { #define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) +#define FIXMAP_ALIGNED_SIZE (ALIGN(FIXADDR_TOP, PGDIR_SIZE) - \ + ALIGN_DOWN(FIXADDR_START, PGDIR_SIZE)) +#define FIXMAP_PTE_SIZE (FIXMAP_ALIGNED_SIZE / PGDIR_SIZE * PTE_TABLE_SIZE) + #define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NCG #define FIXMAP_PAGE_IO PAGE_KERNEL_NCG diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index f54a08a2cd70..bc76970b6ee5 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -108,9 +108,23 @@ static inline void this_cpu_enable_ftrace(void) { get_paca()->ftrace_enabled = 1; } + +/* Disable ftrace on this CPU if possible (may not be implemented) */ +static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled) +{ + get_paca()->ftrace_enabled = ftrace_enabled; +} + +static inline u8 this_cpu_get_ftrace_enabled(void) +{ + return get_paca()->ftrace_enabled; +} + #else /* CONFIG_PPC64 */ static inline void this_cpu_disable_ftrace(void) { } static inline void this_cpu_enable_ftrace(void) { } +static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled) { } +static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; } #endif /* CONFIG_PPC64 */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index f187bb5e524e..e93ee3202e4c 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -8,14 +8,12 @@ #include <linux/uaccess.h> #include <asm/errno.h> #include <asm/synch.h> -#include <asm/asm-405.h> #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ __asm__ __volatile ( \ PPC_ATOMIC_ENTRY_BARRIER \ "1: lwarx %0,0,%2\n" \ insn \ - PPC405_ERR77(0, %2) \ "2: stwcx. %1,0,%2\n" \ "bne- 1b\n" \ PPC_ATOMIC_EXIT_BARRIER \ @@ -82,7 +80,6 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ cmpw 0,%1,%4\n\ bne- 3f\n" - PPC405_ERR77(0,%3) "2: stwcx. %5,0,%3\n\ bne- 1b\n" PPC_ATOMIC_EXIT_BARRIER diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index e6dfa63da552..551a9d4d3958 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -41,11 +41,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { -#ifdef CONFIG_PPC64 return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1)); -#else - return __pte(pte_update(ptep, ~0UL, 0)); -#endif } #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index f2f8d8aa8e3b..cb424799da0d 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -34,15 +34,21 @@ struct arch_hw_breakpoint { #define HW_BRK_TYPE_PRIV_ALL (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \ HW_BRK_TYPE_HYP) +/* Minimum granularity */ #ifdef CONFIG_PPC_8xx -#define HW_BREAKPOINT_ALIGN 0x3 +#define HW_BREAKPOINT_SIZE 0x4 #else -#define HW_BREAKPOINT_ALIGN 0x7 +#define HW_BREAKPOINT_SIZE 0x8 #endif #define DABR_MAX_LEN 8 #define DAWR_MAX_LEN 512 +static inline int nr_wp_slots(void) +{ + return HBP_NUM_MAX; +} + #ifdef CONFIG_HAVE_HW_BREAKPOINT #include <linux/kdebug.h> #include <asm/reg.h> @@ -64,7 +70,6 @@ extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); int arch_install_hw_breakpoint(struct perf_event *bp); void arch_uninstall_hw_breakpoint(struct perf_event *bp); -void arch_unregister_hw_breakpoint(struct perf_event *bp); void hw_breakpoint_pmu_read(struct perf_event *bp); extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); @@ -73,14 +78,14 @@ extern void ptrace_triggered(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs); static inline void hw_breakpoint_disable(void) { - struct arch_hw_breakpoint brk; - - brk.address = 0; - brk.type = 0; - brk.len = 0; - brk.hw_len = 0; - if (ppc_breakpoint_available()) - __set_breakpoint(&brk); + int i; + struct arch_hw_breakpoint null_brk = {0}; + + if (!ppc_breakpoint_available()) + return; + + for (i = 0; i < nr_wp_slots(); i++) + __set_breakpoint(i, &null_brk); } extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); int hw_breakpoint_handler(struct die_args *args); @@ -99,10 +104,10 @@ static inline bool dawr_enabled(void) { return dawr_force_enable; } -int set_dawr(struct arch_hw_breakpoint *brk); +int set_dawr(int nr, struct arch_hw_breakpoint *brk); #else static inline bool dawr_enabled(void) { return false; } -static inline int set_dawr(struct arch_hw_breakpoint *brk) { return -1; } +static inline int set_dawr(int nr, struct arch_hw_breakpoint *brk) { return -1; } #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h index 9872f85d356f..965b1f39b2a5 100644 --- a/arch/powerpc/include/asm/icswx.h +++ b/arch/powerpc/include/asm/icswx.h @@ -108,6 +108,17 @@ struct data_descriptor_entry { __be64 address; } __packed __aligned(DDE_ALIGN); +/* 4.3.2 NX-stamped Fault CRB */ + +#define NX_STAMP_ALIGN (0x10) + +struct nx_fault_stamp { + __be64 fault_storage_addr; + __be16 reserved; + __u8 flags; + __u8 fault_status; + __be32 pswid; +} __packed __aligned(NX_STAMP_ALIGN); /* Chapter 6.5.2 Coprocessor-Request Block (CRB) */ @@ -135,10 +146,15 @@ struct coprocessor_request_block { struct coprocessor_completion_block ccb; - u8 reserved[48]; + union { + struct nx_fault_stamp nx; + u8 reserved[16]; + } stamp; + + u8 reserved[32]; struct coprocessor_status_block csb; -} __packed __aligned(CRB_ALIGN); +} __packed; /* RFC02167 Initiate Coprocessor Instructions document diff --git a/arch/powerpc/include/asm/idle.h b/arch/powerpc/include/asm/idle.h new file mode 100644 index 000000000000..accd1f50085a --- /dev/null +++ b/arch/powerpc/include/asm/idle.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_IDLE_H +#define _ASM_POWERPC_IDLE_H +#include <asm/runlatch.h> +#include <asm/paca.h> + +#ifdef CONFIG_PPC_PSERIES +DECLARE_PER_CPU(u64, idle_spurr_cycles); +DECLARE_PER_CPU(u64, idle_entry_purr_snap); +DECLARE_PER_CPU(u64, idle_entry_spurr_snap); + +static inline void snapshot_purr_idle_entry(void) +{ + *this_cpu_ptr(&idle_entry_purr_snap) = mfspr(SPRN_PURR); +} + +static inline void snapshot_spurr_idle_entry(void) +{ + *this_cpu_ptr(&idle_entry_spurr_snap) = mfspr(SPRN_SPURR); +} + +static inline void update_idle_purr_accounting(void) +{ + u64 wait_cycles; + u64 in_purr = *this_cpu_ptr(&idle_entry_purr_snap); + + wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles); + wait_cycles += mfspr(SPRN_PURR) - in_purr; + get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles); +} + +static inline void update_idle_spurr_accounting(void) +{ + u64 *idle_spurr_cycles_ptr = this_cpu_ptr(&idle_spurr_cycles); + u64 in_spurr = *this_cpu_ptr(&idle_entry_spurr_snap); + + *idle_spurr_cycles_ptr += mfspr(SPRN_SPURR) - in_spurr; +} + +static inline void pseries_idle_prolog(void) +{ + ppc64_runlatch_off(); + snapshot_purr_idle_entry(); + snapshot_spurr_idle_entry(); + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + get_lppaca()->idle = 1; +} + +static inline void pseries_idle_epilog(void) +{ + update_idle_purr_accounting(); + update_idle_spurr_accounting(); + get_lppaca()->idle = 0; + ppc64_runlatch_on(); +} + +static inline u64 read_this_idle_purr(void) +{ + /* + * If we are reading from an idle context, update the + * idle-purr cycles corresponding to the last idle period. + * Since the idle context is not yet over, take a fresh + * snapshot of the idle-purr. + */ + if (unlikely(get_lppaca()->idle == 1)) { + update_idle_purr_accounting(); + snapshot_purr_idle_entry(); + } + + return be64_to_cpu(get_lppaca()->wait_state_cycles); +} + +static inline u64 read_this_idle_spurr(void) +{ + /* + * If we are reading from an idle context, update the + * idle-spurr cycles corresponding to the last idle period. + * Since the idle context is not yet over, take a fresh + * snapshot of the idle-spurr. + */ + if (get_lppaca()->idle == 1) { + update_idle_spurr_accounting(); + snapshot_spurr_idle_entry(); + } + + return *this_cpu_ptr(&idle_spurr_cycles); +} + +#endif /* CONFIG_PPC_PSERIES */ +#endif diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h new file mode 100644 index 000000000000..45f3ec868258 --- /dev/null +++ b/arch/powerpc/include/asm/inst.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_INST_H +#define _ASM_POWERPC_INST_H + +#include <asm/ppc-opcode.h> + +/* + * Instruction data type for POWER + */ + +struct ppc_inst { + u32 val; +#ifdef CONFIG_PPC64 + u32 suffix; +#endif +} __packed; + +static inline u32 ppc_inst_val(struct ppc_inst x) +{ + return x.val; +} + +static inline int ppc_inst_primary_opcode(struct ppc_inst x) +{ + return ppc_inst_val(x) >> 26; +} + +#ifdef CONFIG_PPC64 +#define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff }) + +#define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) }) + +static inline u32 ppc_inst_suffix(struct ppc_inst x) +{ + return x.suffix; +} + +static inline bool ppc_inst_prefixed(struct ppc_inst x) +{ + return (ppc_inst_primary_opcode(x) == 1) && ppc_inst_suffix(x) != 0xff; +} + +static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) +{ + return ppc_inst_prefix(swab32(ppc_inst_val(x)), + swab32(ppc_inst_suffix(x))); +} + +static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) +{ + u32 val, suffix; + + val = *(u32 *)ptr; + if ((val >> 26) == OP_PREFIX) { + suffix = *((u32 *)ptr + 1); + return ppc_inst_prefix(val, suffix); + } else { + return ppc_inst(val); + } +} + +static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) +{ + return *(u64 *)&x == *(u64 *)&y; +} + +#else + +#define ppc_inst(x) ((struct ppc_inst){ .val = x }) + +static inline bool ppc_inst_prefixed(struct ppc_inst x) +{ + return false; +} + +static inline u32 ppc_inst_suffix(struct ppc_inst x) +{ + return 0; +} + +static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) +{ + return ppc_inst(swab32(ppc_inst_val(x))); +} + +static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) +{ + return *ptr; +} + +static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) +{ + return ppc_inst_val(x) == ppc_inst_val(y); +} + +#endif /* CONFIG_PPC64 */ + +static inline int ppc_inst_len(struct ppc_inst x) +{ + return ppc_inst_prefixed(x) ? 8 : 4; +} + +/* + * Return the address of the next instruction, if the instruction @value was + * located at @location. + */ +static inline struct ppc_inst *ppc_inst_next(void *location, struct ppc_inst *value) +{ + struct ppc_inst tmp; + + tmp = ppc_inst_read(value); + + return location + ppc_inst_len(tmp); +} + +static inline u64 ppc_inst_as_u64(struct ppc_inst x) +{ +#ifdef CONFIG_CPU_LITTLE_ENDIAN + return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x); +#else + return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x); +#endif +} + +int probe_user_read_inst(struct ppc_inst *inst, + struct ppc_inst __user *nip); + +int probe_kernel_read_inst(struct ppc_inst *inst, + struct ppc_inst *src); + +#endif /* _ASM_POWERPC_INST_H */ diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 350101e11ddb..5032f1593299 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -22,11 +22,11 @@ #define IOMMU_PAGE_SHIFT_4K 12 #define IOMMU_PAGE_SIZE_4K (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K) #define IOMMU_PAGE_MASK_4K (~((1 << IOMMU_PAGE_SHIFT_4K) - 1)) -#define IOMMU_PAGE_ALIGN_4K(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE_4K) +#define IOMMU_PAGE_ALIGN_4K(addr) ALIGN(addr, IOMMU_PAGE_SIZE_4K) #define IOMMU_PAGE_SIZE(tblptr) (ASM_CONST(1) << (tblptr)->it_page_shift) #define IOMMU_PAGE_MASK(tblptr) (~((1 << (tblptr)->it_page_shift) - 1)) -#define IOMMU_PAGE_ALIGN(addr, tblptr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE(tblptr)) +#define IOMMU_PAGE_ALIGN(addr, tblptr) ALIGN(addr, IOMMU_PAGE_SIZE(tblptr)) /* Boot time flags */ extern int iommu_is_off; diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index fbff9ff9032e..be85c7005fb1 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -23,20 +23,20 @@ #define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET) -#define KASAN_SHADOW_END 0UL - -#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START) +#define KASAN_SHADOW_END (-(-KASAN_SHADOW_START >> KASAN_SHADOW_SCALE_SHIFT)) #ifdef CONFIG_KASAN void kasan_early_init(void); -void kasan_mmu_init(void); void kasan_init(void); void kasan_late_init(void); #else static inline void kasan_init(void) { } -static inline void kasan_mmu_init(void) { } static inline void kasan_late_init(void) { } #endif +void kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte); +int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end); +int kasan_init_region(void *start, size_t size); + #endif /* __ASSEMBLY */ #endif diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index 66b3f2983b22..4fc0e15e23a5 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -43,7 +43,7 @@ extern kprobe_opcode_t optprobe_template_ret[]; extern kprobe_opcode_t optprobe_template_end[]; /* Fixed instruction size for powerpc */ -#define MAX_INSN_SIZE 1 +#define MAX_INSN_SIZE 2 #define MAX_OPTIMIZED_LENGTH sizeof(kprobe_opcode_t) /* 4 bytes */ #define MAX_OPTINSN_SIZE (optprobe_template_end - optprobe_template_entry) #define RELATIVEJUMP_SIZE sizeof(kprobe_opcode_t) /* 4 bytes */ diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 92bcd1a26d73..c745ee41ad66 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -10,7 +10,9 @@ * Use the current saved situation instead of the to/from/size params. * Used on book3s/32 */ -#define KUAP_CURRENT 4 +#define KUAP_CURRENT_READ 4 +#define KUAP_CURRENT_WRITE 8 +#define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE) #ifdef CONFIG_PPC64 #include <asm/book3s/64/kup-radix.h> @@ -101,6 +103,16 @@ static inline void prevent_current_access_user(void) prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT); } +static inline void prevent_current_read_from_user(void) +{ + prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_READ); +} + +static inline void prevent_current_write_to_user(void) +{ + prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_WRITE); +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_KUAP_H_ */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 6e5d85ba588d..8dd24c7692a0 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -198,7 +198,7 @@ extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, unsigned int shift, const struct kvm_memory_slot *memslot, unsigned int lpid); -extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, +extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing, unsigned long gpa, unsigned int lpid); extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 04b2b927bb5a..9bb9bb370b53 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -14,6 +14,7 @@ #include <asm/book3s/64/mmu-hash.h> #include <asm/cpu_has_feature.h> #include <asm/ppc-opcode.h> +#include <asm/pte-walk.h> #ifdef CONFIG_PPC_PSERIES static inline bool kvmhv_on_pseries(void) @@ -434,7 +435,7 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing) continue; } /* If pte is not present return None */ - if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT))) + if (unlikely(!pte_present(old_pte))) return __pte(0); new_pte = pte_mkyoung(old_pte); @@ -634,6 +635,47 @@ extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm, unsigned long gpa, unsigned long hpa, unsigned long nbytes); +static inline pte_t * +find_kvm_secondary_pte_unlocked(struct kvm *kvm, unsigned long ea, + unsigned *hshift) +{ + pte_t *pte; + + pte = __find_linux_pte(kvm->arch.pgtable, ea, NULL, hshift); + return pte; +} + +static inline pte_t *find_kvm_secondary_pte(struct kvm *kvm, unsigned long ea, + unsigned *hshift) +{ + pte_t *pte; + + VM_WARN(!spin_is_locked(&kvm->mmu_lock), + "%s called with kvm mmu_lock not held \n", __func__); + pte = __find_linux_pte(kvm->arch.pgtable, ea, NULL, hshift); + + return pte; +} + +static inline pte_t *find_kvm_host_pte(struct kvm *kvm, unsigned long mmu_seq, + unsigned long ea, unsigned *hshift) +{ + pte_t *pte; + + VM_WARN(!spin_is_locked(&kvm->mmu_lock), + "%s called with kvm mmu_lock not held \n", __func__); + + if (mmu_notifier_retry(kvm, mmu_seq)) + return NULL; + + pte = __find_linux_pte(kvm->mm->pgd, ea, NULL, hshift); + + return pte; +} + +extern pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid, + unsigned long ea, unsigned *hshift); + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 0699cfeeb8c9..f4ac25d4df05 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -122,6 +122,7 @@ #define MMU_FTRS_POWER7 MMU_FTRS_POWER6 #define MMU_FTRS_POWER8 MMU_FTRS_POWER6 #define MMU_FTRS_POWER9 MMU_FTRS_POWER6 +#define MMU_FTRS_POWER10 MMU_FTRS_POWER6 #define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ MMU_FTR_CI_LARGE_PAGE #define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ @@ -291,15 +292,6 @@ static inline bool early_radix_enabled(void) } #endif -#ifdef CONFIG_PPC_MEM_KEYS -extern u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address); -#else -static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) -{ - return 0; -} -#endif /* CONFIG_PPC_MEM_KEYS */ - #ifdef CONFIG_STRICT_KERNEL_RWX static inline bool strict_kernel_rwx_enabled(void) { diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 360367c579de..1a474f6b1992 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -185,11 +185,41 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) dec_mm_active_cpus(mm); } } + +/* + * vas_windows counter shows number of open windows in the mm + * context. During context switch, use this counter to clear the + * foreign real address mapping (CP_ABORT) for the thread / process + * that intend to use COPY/PASTE. When a process closes all windows, + * disable CP_ABORT which is expensive to run. + * + * For user context, register a copro so that TLBIs are seen by the + * nest MMU. mm_context_add/remove_vas_window() are used only for user + * space windows. + */ +static inline void mm_context_add_vas_window(struct mm_struct *mm) +{ + atomic_inc(&mm->context.vas_windows); + mm_context_add_copro(mm); +} + +static inline void mm_context_remove_vas_window(struct mm_struct *mm) +{ + int v; + + mm_context_remove_copro(mm); + v = atomic_dec_if_positive(&mm->context.vas_windows); + + /* Detect imbalance between add and remove */ + WARN_ON(v < 0); +} #else static inline void inc_mm_active_cpus(struct mm_struct *mm) { } static inline void dec_mm_active_cpus(struct mm_struct *mm) { } static inline void mm_context_add_copro(struct mm_struct *mm) { } static inline void mm_context_remove_copro(struct mm_struct *mm) { } +static inline void mm_context_add_vas_windows(struct mm_struct *mm) { } +static inline void mm_context_remove_vas_windows(struct mm_struct *mm) { } #endif diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 5398bfc465b4..857d9ff24295 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -72,12 +72,9 @@ struct mod_arch_specific { # ifdef MODULE asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous"); # endif /* MODULE */ -#endif int module_trampoline_target(struct module *mod, unsigned long trampoline, unsigned long *target); - -#ifdef CONFIG_DYNAMIC_FTRACE int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs); #else static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h index a46616937d20..e752a5807a59 100644 --- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -13,13 +13,13 @@ static inline pte_t *hugepd_page(hugepd_t hpd) static inline unsigned int hugepd_shift(hugepd_t hpd) { - return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17; + return PAGE_SHIFT_8M; } static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned int pdshift) { - unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT; + unsigned long idx = (addr & (SZ_4M - 1)) >> PAGE_SHIFT; return hugepd_page(hpd) + idx; } @@ -32,8 +32,12 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) { - *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT | - (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : _PMD_PAGE_512K)); + *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT | _PMD_PAGE_8M); +} + +static inline void hugepd_populate_kernel(hugepd_t *hpdp, pte_t *new, unsigned int pshift) +{ + *hpdp = __hugepd(__pa(new) | _PMD_PRESENT | _PMD_PAGE_8M); } static inline int check_and_get_huge_psize(int shift) @@ -41,4 +45,24 @@ static inline int check_and_get_huge_psize(int shift) return shift_to_mmu_psize(shift); } +#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); + +#define __HAVE_ARCH_HUGE_PTE_CLEAR +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz) +{ + pte_update(mm, addr, ptep, ~0UL, 0, 1); +} + +#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0))); + unsigned long set = pte_val(pte_wrprotect(__pte(0))); + + pte_update(mm, addr, ptep, clr, set, 1); +} + #endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 76af5b0cb16e..1d9ac0f9c794 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -19,7 +19,6 @@ #define MI_RSV4I 0x08000000 /* Reserve 4 TLB entries */ #define MI_PPCS 0x02000000 /* Use MI_RPN prob/priv state */ #define MI_IDXMASK 0x00001f00 /* TLB index to be loaded */ -#define MI_RESETVAL 0x00000000 /* Value of register at reset */ /* These are the Ks and Kp from the PowerPC books. For proper operation, * Ks = 0, Kp = 1. @@ -37,16 +36,16 @@ * Therefore, we define 2 APG groups. lsb is _PMD_USER * 0 => Kernel => 01 (all accesses performed according to page definition) * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + * 2-15 => Not Used */ -#define MI_APG_INIT 0x4fffffff +#define MI_APG_INIT 0x40000000 /* * 0 => Kernel => 01 (all accesses performed according to page definition) * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + * 2-15 => Not Used */ -#define MI_APG_KUEP 0x6fffffff +#define MI_APG_KUEP 0x60000000 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -95,7 +94,6 @@ #define MD_TWAM 0x04000000 /* Use 4K page hardware assist */ #define MD_PPCS 0x02000000 /* Use MI_RPN prob/priv state */ #define MD_IDXMASK 0x00001f00 /* TLB index to be loaded */ -#define MD_RESETVAL 0x04000000 /* Value of register at reset */ #define SPRN_M_CASID 793 /* Address space ID (context) to match */ #define MC_ASIDMASK 0x0000000f /* Bits used for ASID value */ @@ -117,16 +115,16 @@ * Therefore, we define 2 APG groups. lsb is _PMD_USER * 0 => Kernel => 01 (all accesses performed according to page definition) * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + * 2-15 => Not Used */ -#define MD_APG_INIT 0x4fffffff +#define MD_APG_INIT 0x40000000 /* * 0 => No user => 01 (all accesses performed according to page definition) * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + * 2-15 => Not Used */ -#define MD_APG_KUAP 0x6fffffff +#define MD_APG_KUAP 0x60000000 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in @@ -178,12 +176,6 @@ */ #define SPRN_M_TW 799 -#ifdef CONFIG_PPC_MM_SLICES -#include <asm/nohash/32/slice.h> -#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1)) -#define LOW_SLICE_ARRAY_SZ SLICE_ARRAY_SIZE -#endif - #if defined(CONFIG_PPC_4K_PAGES) #define mmu_virtual_psize MMU_PAGE_4K #elif defined(CONFIG_PPC_16K_PAGES) @@ -201,71 +193,15 @@ #include <linux/mmdebug.h> -struct slice_mask { - u64 low_slices; - DECLARE_BITMAP(high_slices, 0); -}; +void mmu_pin_tlb(unsigned long top, bool readonly); typedef struct { unsigned int id; unsigned int active; unsigned long vdso_base; -#ifdef CONFIG_PPC_MM_SLICES - u16 user_psize; /* page size index */ - unsigned char low_slices_psize[SLICE_ARRAY_SIZE]; - unsigned char high_slices_psize[0]; - unsigned long slb_addr_limit; - struct slice_mask mask_base_psize; /* 4k or 16k */ - struct slice_mask mask_512k; - struct slice_mask mask_8m; -#endif void *pte_frag; } mm_context_t; -#ifdef CONFIG_PPC_MM_SLICES -static inline u16 mm_ctx_user_psize(mm_context_t *ctx) -{ - return ctx->user_psize; -} - -static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize) -{ - ctx->user_psize = user_psize; -} - -static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx) -{ - return ctx->low_slices_psize; -} - -static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx) -{ - return ctx->high_slices_psize; -} - -static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx) -{ - return ctx->slb_addr_limit; -} - -static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit) -{ - ctx->slb_addr_limit = limit; -} - -static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize) -{ - if (psize == MMU_PAGE_512K) - return &ctx->mask_512k; - if (psize == MMU_PAGE_8M) - return &ctx->mask_8m; - - BUG_ON(psize != mmu_virtual_psize); - - return &ctx->mask_base_psize; -} -#endif /* CONFIG_PPC_MM_SLICE */ - #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) @@ -304,13 +240,7 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) } /* patch sites */ -extern s32 patch__itlbmiss_linmem_top, patch__itlbmiss_linmem_top8; -extern s32 patch__dtlbmiss_linmem_top, patch__dtlbmiss_immr_jmp; -extern s32 patch__fixupdar_linmem_top; -extern s32 patch__dtlbmiss_romem_top, patch__dtlbmiss_romem_top8; - -extern s32 patch__itlbmiss_exit_1, patch__itlbmiss_exit_2; -extern s32 patch__dtlbmiss_exit_1, patch__dtlbmiss_exit_2, patch__dtlbmiss_exit_3; +extern s32 patch__itlbmiss_exit_1, patch__dtlbmiss_exit_1; extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf; #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 3d0bc99dd520..af7f13cf90cf 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -8,7 +8,6 @@ #include <linux/sched.h> #include <linux/threads.h> #include <asm/mmu.h> /* For sub-arch specific PPC_PIN_SIZE */ -#include <asm/asm-405.h> #ifdef CONFIG_44x extern int icache_44x_need_flush; @@ -109,13 +108,13 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #define VMALLOC_OFFSET (0x1000000) /* 16M */ #ifdef PPC_PIN_SIZE -#define VMALLOC_START (((_ALIGN((long)high_memory, PPC_PIN_SIZE) + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) +#define VMALLOC_START (((ALIGN((long)high_memory, PPC_PIN_SIZE) + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) #else #define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) #endif #ifdef CONFIG_KASAN_VMALLOC -#define VMALLOC_END _ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) +#define VMALLOC_END ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) #else #define VMALLOC_END ioremap_bot #endif @@ -165,7 +164,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #ifndef __ASSEMBLY__ #define pte_clear(mm, addr, ptep) \ - do { pte_update(ptep, ~0, 0); } while (0) + do { pte_update(mm, addr, ptep, ~0, 0, 0); } while (0) #ifndef pte_mkwrite static inline pte_t pte_mkwrite(pte_t pte) @@ -205,6 +204,12 @@ static inline void pmd_clear(pmd_t *pmdp) } +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +/* to find an entry in a page-table-directory */ +#define pgd_index(address) ((address) >> PGDIR_SHIFT) +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) /* * PTE updates. This function is called whenever an existing @@ -220,66 +225,42 @@ static inline void pmd_clear(pmd_t *pmdp) * that an executable user mapping was modified, which is needed * to properly flush the virtually tagged instruction cache of * those implementations. + * + * On the 8xx, the page tables are a bit special. For 16k pages, we have + * 4 identical entries. For 512k pages, we have 128 entries as if it was + * 4k pages, but they are flagged as 512k pages for the hardware. + * For other page sizes, we have a single entry in the table. */ -#ifndef CONFIG_PTE_64BIT -static inline unsigned long pte_update(pte_t *p, - unsigned long clr, - unsigned long set) +#ifdef CONFIG_PPC_8xx +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge) { -#ifdef PTE_ATOMIC_UPDATES - unsigned long old, tmp; - - __asm__ __volatile__("\ -1: lwarx %0,0,%3\n\ - andc %1,%0,%4\n\ - or %1,%1,%5\n" - PPC405_ERR77(0,%3) -" stwcx. %1,0,%3\n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p) - : "r" (p), "r" (clr), "r" (set), "m" (*p) - : "cc" ); -#else /* PTE_ATOMIC_UPDATES */ - unsigned long old = pte_val(*p); - unsigned long new = (old & ~clr) | set; - -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) - p->pte = p->pte1 = p->pte2 = p->pte3 = new; -#else - *p = __pte(new); -#endif -#endif /* !PTE_ATOMIC_UPDATES */ + pte_basic_t *entry = &p->pte; + pte_basic_t old = pte_val(*p); + pte_basic_t new = (old & ~(pte_basic_t)clr) | set; + int num, i; + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, addr), addr), addr), addr); + + if (!huge) + num = PAGE_SIZE / SZ_4K; + else if ((pmd_val(*pmd) & _PMD_PAGE_MASK) != _PMD_PAGE_8M) + num = SZ_512K / SZ_4K; + else + num = 1; + + for (i = 0; i < num; i++, entry++, new += SZ_4K) + *entry = new; -#ifdef CONFIG_44x - if ((old & _PAGE_USER) && (old & _PAGE_EXEC)) - icache_44x_need_flush = 1; -#endif return old; } -#else /* CONFIG_PTE_64BIT */ -static inline unsigned long long pte_update(pte_t *p, - unsigned long clr, - unsigned long set) +#else +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge) { -#ifdef PTE_ATOMIC_UPDATES - unsigned long long old; - unsigned long tmp; - - __asm__ __volatile__("\ -1: lwarx %L0,0,%4\n\ - lwzx %0,0,%3\n\ - andc %1,%L0,%5\n\ - or %1,%1,%6\n" - PPC405_ERR77(0,%3) -" stwcx. %1,0,%4\n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p) - : "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p) - : "cc" ); -#else /* PTE_ATOMIC_UPDATES */ - unsigned long long old = pte_val(*p); - *p = __pte((old & ~(unsigned long long)clr) | set); -#endif /* !PTE_ATOMIC_UPDATES */ + pte_basic_t old = pte_val(*p); + pte_basic_t new = (old & ~(pte_basic_t)clr) | set; + + *p = __pte(new); #ifdef CONFIG_44x if ((old & _PAGE_USER) && (old & _PAGE_EXEC)) @@ -287,23 +268,24 @@ static inline unsigned long long pte_update(pte_t *p, #endif return old; } -#endif /* CONFIG_PTE_64BIT */ +#endif #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep) +static inline int __ptep_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { unsigned long old; - old = pte_update(ptep, _PAGE_ACCESSED, 0); + old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); return (old & _PAGE_ACCESSED) != 0; } #define ptep_test_and_clear_young(__vma, __addr, __ptep) \ - __ptep_test_and_clear_young((__vma)->vm_mm->context.id, __addr, __ptep) + __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep) #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - return __pte(pte_update(ptep, ~0, 0)); + return __pte(pte_update(mm, addr, ptep, ~0, 0, 0)); } #define __HAVE_ARCH_PTEP_SET_WRPROTECT @@ -313,7 +295,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0))); unsigned long set = pte_val(pte_wrprotect(__pte(0))); - pte_update(ptep, clr, set); + pte_update(mm, addr, ptep, clr, set, 0); } static inline void __ptep_set_access_flags(struct vm_area_struct *vma, @@ -325,8 +307,9 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, pte_t pte_clr = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(~0))))); unsigned long set = pte_val(entry) & pte_val(pte_set); unsigned long clr = ~pte_val(entry) & ~pte_val(pte_clr); + int huge = psize > mmu_virtual_psize ? 1 : 0; - pte_update(ptep, clr, set); + pte_update(vma->vm_mm, address, ptep, clr, set, huge); flush_tlb_page(vma, address); } @@ -358,13 +341,6 @@ static inline int pte_young(pte_t pte) pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT)) #endif -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(address) pgd_offset(&init_mm, address) - -/* to find an entry in a page-table-directory */ -#define pgd_index(address) ((address) >> PGDIR_SHIFT) -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) - /* Find an entry in the third-level page table.. */ #define pte_index(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h index 12c6811e344b..2d3153cfc0d7 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-40x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h @@ -44,9 +44,8 @@ #define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */ #define _PAGE_USER 0x010 /* matches one of the zone permission bits */ #define _PAGE_SPECIAL 0x020 /* software: Special page */ -#define _PAGE_RW 0x040 /* software: Writes permitted */ #define _PAGE_DIRTY 0x080 /* software: dirty page */ -#define _PAGE_HWWRITE 0x100 /* hardware: Dirty & RW, set in exception */ +#define _PAGE_RW 0x100 /* hardware: WR, anded with dirty in exception */ #define _PAGE_EXEC 0x200 /* hardware: EX permission */ #define _PAGE_ACCESSED 0x400 /* software: R: page referenced */ @@ -58,8 +57,8 @@ #define _PAGE_KERNEL_RO 0 #define _PAGE_KERNEL_ROX _PAGE_EXEC -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE) -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC) +#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) +#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) #define _PMD_PRESENT 0x400 /* PMD points to page of PTEs */ #define _PMD_PRESENT_MASK _PMD_PRESENT @@ -85,21 +84,5 @@ #define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) #define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) -#ifndef __ASSEMBLY__ -static inline pte_t pte_wrprotect(pte_t pte) -{ - return __pte(pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE)); -} - -#define pte_wrprotect pte_wrprotect - -static inline pte_t pte_mkclean(pte_t pte) -{ - return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE)); -} - -#define pte_mkclean pte_mkclean -#endif - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_40x_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index c9e4b2d90f65..66f403a7da44 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -46,6 +46,8 @@ #define _PAGE_NA 0x0200 /* Supervisor NA, User no access */ #define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ +#define _PAGE_HUGE 0x0800 /* Copied to L1 PS bit 29 */ + /* cache related flags non existing on 8xx */ #define _PAGE_COHERENT 0 #define _PAGE_WRITETHRU 0 @@ -128,7 +130,7 @@ static inline pte_t pte_mkuser(pte_t pte) static inline pte_t pte_mkhuge(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_SPS); + return __pte(pte_val(pte) | _PAGE_SPS | _PAGE_HUGE); } #define pte_mkhuge pte_mkhuge diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h deleted file mode 100644 index 39eb0154ae2d..000000000000 --- a/arch/powerpc/include/asm/nohash/32/slice.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_32_SLICE_H -#define _ASM_POWERPC_NOHASH_32_SLICE_H - -#ifdef CONFIG_PPC_MM_SLICES - -#define SLICE_LOW_SHIFT 26 /* 64 slices */ -#define SLICE_LOW_TOP (0x100000000ull) -#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) -#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) - -#define SLICE_HIGH_SHIFT 0 -#define SLICE_NUM_HIGH 0ul -#define GET_HIGH_SLICE_INDEX(addr) (addr & 0) - -#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW - -#endif /* CONFIG_PPC_MM_SLICES */ - -#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index b360f262b9c6..3424381b81da 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -211,22 +211,9 @@ static inline unsigned long pte_update(struct mm_struct *mm, unsigned long set, int huge) { -#ifdef PTE_ATOMIC_UPDATES - unsigned long old, tmp; - - __asm__ __volatile__( - "1: ldarx %0,0,%3 # pte_update\n\ - andc %1,%0,%4 \n\ - or %1,%1,%6\n\ - stdcx. %1,0,%3 \n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*ptep) - : "r" (ptep), "r" (clr), "m" (*ptep), "r" (set) - : "cc" ); -#else unsigned long old = pte_val(*ptep); *ptep = __pte((old & ~clr) | set); -#endif + /* huge pages use the old page table lock */ if (!huge) assert_pte_locked(mm, addr); @@ -310,21 +297,8 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, unsigned long bits = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); -#ifdef PTE_ATOMIC_UPDATES - unsigned long old, tmp; - - __asm__ __volatile__( - "1: ldarx %0,0,%4\n\ - or %0,%3,%0\n\ - stdcx. %0,0,%4\n\ - bne- 1b" - :"=&r" (old), "=&r" (tmp), "=m" (*ptep) - :"r" (bits), "r" (ptep), "m" (*ptep) - :"cc"); -#else unsigned long old = pte_val(*ptep); *ptep = __pte(old | bits); -#endif flush_tlb_page(vma, address); } diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 7fed9dc0f147..50a4b0bb8d16 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -130,12 +130,10 @@ static inline pte_t pte_exprotect(pte_t pte) return __pte(pte_val(pte) & ~_PAGE_EXEC); } -#ifndef pte_mkclean static inline pte_t pte_mkclean(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_DIRTY); } -#endif static inline pte_t pte_mkold(pte_t pte) { @@ -267,7 +265,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, static inline int hugepd_ok(hugepd_t hpd) { #ifdef CONFIG_PPC_8xx - return ((hpd_val(hpd) & 0x4) != 0); + return ((hpd_val(hpd) & _PMD_PAGE_MASK) == _PMD_PAGE_8M); #else /* We clear the top bit to indicate hugepd */ return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0); diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index e3cc9eb9204d..45a839a7c6cf 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -29,6 +29,7 @@ #include <asm/hmi.h> #include <asm/cpuidle.h> #include <asm/atomic.h> +#include <asm/rtas-types.h> #include <asm-generic/mmiowb_types.h> @@ -256,6 +257,7 @@ struct paca_struct { u64 l1d_flush_size; #endif #ifdef CONFIG_PPC_PSERIES + struct rtas_args *rtas_args_reentrant; u8 *mce_data_buf; /* buffer to hold per cpu rtas errlog */ #endif /* CONFIG_PPC_PSERIES */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 3ee8df0f66e0..a63fe6f3a0ff 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -249,13 +249,6 @@ static inline bool pfn_valid(unsigned long pfn) #include <asm/page_32.h> #endif -/* align addr on a size boundary - adjust address up/down if needed */ -#define _ALIGN_UP(addr, size) __ALIGN_KERNEL(addr, size) -#define _ALIGN_DOWN(addr, size) ((addr)&(~((typeof(addr))(size)-1))) - -/* align addr on a size boundary - adjust address up if needed */ -#define _ALIGN(addr,size) _ALIGN_UP(addr,size) - /* * Don't compare things with KERNELBASE or PAGE_OFFSET to test for * "kernelness", use is_kernel_addr() - it should do what you want. diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index bad9b324559d..ae58b524a924 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -107,6 +107,8 @@ unsigned long vmalloc_to_phys(void *vmalloc_addr); void pgtable_cache_add(unsigned int shift); +pte_t *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va); + #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32) void mark_initmem_nx(void); #else diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index c1df75edde44..2a39c716c343 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -158,6 +158,9 @@ /* VMX Vector Store Instructions */ #define OP_31_XOP_STVX 231 +/* Prefixed Instructions */ +#define OP_PREFIX 1 + #define OP_31 31 #define OP_LWZ 32 #define OP_STFS 52 diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index eedcbfb9a6ff..52a67835057a 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -180,14 +180,14 @@ struct thread_struct { int fpexc_mode; /* floating-point exception mode */ unsigned int align_ctl; /* alignment handling control */ #ifdef CONFIG_HAVE_HW_BREAKPOINT - struct perf_event *ptrace_bps[HBP_NUM]; + struct perf_event *ptrace_bps[HBP_NUM_MAX]; /* * Helps identify source of single-step exception and subsequent * hw-breakpoint enablement */ - struct perf_event *last_hit_ubp; + struct perf_event *last_hit_ubp[HBP_NUM_MAX]; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ - struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */ + struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint info */ unsigned long trap_nr; /* last trap # on this thread */ u8 load_slb; /* Ages out SLB preload cache entries */ u8 load_fp; @@ -272,7 +272,6 @@ struct thread_struct { unsigned mmcr0; unsigned used_ebb; - unsigned int used_vas; #endif }; @@ -301,14 +300,12 @@ struct thread_struct { #else #define INIT_THREAD { \ .ksp = INIT_SP, \ - .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ .addr_limit = KERNEL_DS, \ .fpexc_mode = 0, \ - .fscr = FSCR_TAR | FSCR_EBB \ } #endif -#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.regs) +#define task_pt_regs(tsk) ((tsk)->thread.regs) unsigned long get_wchan(struct task_struct *p); diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 94e3fd54f2c8..324a13351749 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -117,6 +117,7 @@ extern int of_read_drc_info_cell(struct property **prop, #define OV1_PPC_2_07 0x01 /* set if we support PowerPC 2.07 */ #define OV1_PPC_3_00 0x80 /* set if we support PowerPC 3.00 */ +#define OV1_PPC_3_1 0x40 /* set if we support PowerPC 3.1 */ /* Option vector 2: Open Firmware options supported */ #define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index e0195e6b892b..ac3970fff0d5 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -179,6 +179,22 @@ extern int ptrace_put_reg(struct task_struct *task, int regno, #define current_pt_regs() \ ((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1) + +#ifdef __powerpc64__ +#ifdef CONFIG_PPC_BOOK3S +#define TRAP_FLAGS_MASK 0x10 +#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) +#define FULL_REGS(regs) true +#define SET_FULL_REGS(regs) do { } while (0) +#else +#define TRAP_FLAGS_MASK 0x11 +#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) +#define FULL_REGS(regs) (((regs)->trap & 1) == 0) +#define SET_FULL_REGS(regs) ((regs)->trap |= 1) +#endif +#define CHECK_FULL_REGS(regs) BUG_ON(!FULL_REGS(regs)) +#define NV_REG_POISON 0xdeadbeefdeadbeefUL +#else /* * We use the least-significant bit of the trap field to indicate * whether we have saved the full set of registers, or only a @@ -186,17 +202,13 @@ extern int ptrace_put_reg(struct task_struct *task, int regno, * On 4xx we use the next bit to indicate whether the exception * is a critical exception (1 means it is). */ +#define TRAP_FLAGS_MASK 0x1F +#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) #define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#ifndef __powerpc64__ +#define SET_FULL_REGS(regs) ((regs)->trap |= 1) #define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0) #define IS_MCHECK_EXC(regs) (((regs)->trap & 4) != 0) #define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0) -#endif /* ! __powerpc64__ */ -#define TRAP(regs) ((regs)->trap & ~0xF) -#ifdef __powerpc64__ -#define NV_REG_POISON 0xdeadbeefdeadbeefUL -#define CHECK_FULL_REGS(regs) BUG_ON(regs->trap & 1) -#else #define NV_REG_POISON 0xdeadbeef #define CHECK_FULL_REGS(regs) \ do { \ @@ -205,6 +217,26 @@ do { \ } while (0) #endif /* __powerpc64__ */ +static inline void set_trap(struct pt_regs *regs, unsigned long val) +{ + regs->trap = (regs->trap & TRAP_FLAGS_MASK) | (val & ~TRAP_FLAGS_MASK); +} + +static inline bool trap_is_syscall(struct pt_regs *regs) +{ + return TRAP(regs) == 0xc00; +} + +static inline bool trap_norestart(struct pt_regs *regs) +{ + return regs->trap & 0x10; +} + +static inline void set_trap_norestart(struct pt_regs *regs) +{ + regs->trap |= 0x10; +} + #define arch_has_single_step() (1) #ifndef CONFIG_BOOK3S_601 #define arch_has_block_step() (true) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index da5cab038e25..88e6c78100d9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -283,14 +283,16 @@ #define CTRL_CT1 0x40000000 /* thread 1 */ #define CTRL_TE 0x00c00000 /* thread enable */ #define CTRL_RUNLATCH 0x1 -#define SPRN_DAWR 0xB4 +#define SPRN_DAWR0 0xB4 +#define SPRN_DAWR1 0xB5 #define SPRN_RPR 0xBA /* Relative Priority Register */ #define SPRN_CIABR 0xBB #define CIABR_PRIV 0x3 #define CIABR_PRIV_USER 1 #define CIABR_PRIV_SUPER 2 #define CIABR_PRIV_HYPER 3 -#define SPRN_DAWRX 0xBC +#define SPRN_DAWRX0 0xBC +#define SPRN_DAWRX1 0xBD #define DAWRX_USER __MASK(0) #define DAWRX_KERNEL __MASK(1) #define DAWRX_HYP __MASK(2) @@ -397,6 +399,7 @@ #define SPRN_RWMR 0x375 /* Region-Weighting Mode Register */ /* HFSCR and FSCR bit numbers are the same */ +#define FSCR_PREFIX_LG 13 /* Enable Prefix Instructions */ #define FSCR_SCV_LG 12 /* Enable System Call Vectored */ #define FSCR_MSGP_LG 10 /* Enable MSGP */ #define FSCR_TAR_LG 8 /* Enable Target Address Register */ @@ -408,11 +411,13 @@ #define FSCR_VECVSX_LG 1 /* Enable VMX/VSX */ #define FSCR_FP_LG 0 /* Enable Floating Point */ #define SPRN_FSCR 0x099 /* Facility Status & Control Register */ +#define FSCR_PREFIX __MASK(FSCR_PREFIX_LG) #define FSCR_SCV __MASK(FSCR_SCV_LG) #define FSCR_TAR __MASK(FSCR_TAR_LG) #define FSCR_EBB __MASK(FSCR_EBB_LG) #define FSCR_DSCR __MASK(FSCR_DSCR_LG) #define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ +#define HFSCR_PREFIX __MASK(FSCR_PREFIX_LG) #define HFSCR_MSGP __MASK(FSCR_MSGP_LG) #define HFSCR_TAR __MASK(FSCR_TAR_LG) #define HFSCR_EBB __MASK(FSCR_EBB_LG) @@ -476,16 +481,18 @@ #define PCR_VEC_DIS (__MASK(63-0)) /* Vec. disable (bit NA since POWER8) */ #define PCR_VSX_DIS (__MASK(63-1)) /* VSX disable (bit NA since POWER8) */ #define PCR_TM_DIS (__MASK(63-2)) /* Trans. memory disable (POWER8) */ -#define PCR_HIGH_BITS (PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS) +#define PCR_MMA_DIS (__MASK(63-3)) /* Matrix-Multiply Accelerator */ +#define PCR_HIGH_BITS (PCR_MMA_DIS | PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS) /* * These bits are used in the function kvmppc_set_arch_compat() to specify and * determine both the compatibility level which we want to emulate and the * compatibility level which the host is capable of emulating. */ +#define PCR_ARCH_300 0x10 /* Architecture 3.00 */ #define PCR_ARCH_207 0x8 /* Architecture 2.07 */ #define PCR_ARCH_206 0x4 /* Architecture 2.06 */ #define PCR_ARCH_205 0x2 /* Architecture 2.05 */ -#define PCR_LOW_BITS (PCR_ARCH_207 | PCR_ARCH_206 | PCR_ARCH_205) +#define PCR_LOW_BITS (PCR_ARCH_207 | PCR_ARCH_206 | PCR_ARCH_205 | PCR_ARCH_300) #define PCR_MASK ~(PCR_HIGH_BITS | PCR_LOW_BITS) /* PCR Reserved Bits */ #define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ #define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */ @@ -759,7 +766,7 @@ #endif #define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */ -#define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */ +#define SRR1_ISI_N_G_OR_CIP 0x10000000 /* ISI: Access is no-exec or G or CI for a prefixed instruction */ #define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */ #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ #define SRR1_WAKEMASK_P8 0x003c0000 /* reason for wakeup on POWER8 and 9 */ @@ -786,6 +793,8 @@ #define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */ #define SRR1_MCE_MCP 0x00080000 /* Machine check signal caused interrupt */ +#define SRR1_BOUNDARY 0x10000000 /* Prefixed instruction crosses 64-byte boundary */ +#define SRR1_PREFIXED 0x20000000 /* Exception caused by prefixed instruction */ #define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ #define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index f26fe482fbca..ff30f1076162 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -663,60 +663,6 @@ #define EPC_EPID 0x00003fff #define EPC_EPID_SHIFT 0 -/* - * The IBM-403 is an even more odd special case, as it is much - * older than the IBM-405 series. We put these down here incase someone - * wishes to support these machines again. - */ -#ifdef CONFIG_403GCX -/* Special Purpose Registers (SPRNs)*/ -#define SPRN_TBHU 0x3CC /* Time Base High User-mode */ -#define SPRN_TBLU 0x3CD /* Time Base Low User-mode */ -#define SPRN_CDBCR 0x3D7 /* Cache Debug Control Register */ -#define SPRN_TBHI 0x3DC /* Time Base High */ -#define SPRN_TBLO 0x3DD /* Time Base Low */ -#define SPRN_DBCR 0x3F2 /* Debug Control Register */ -#define SPRN_PBL1 0x3FC /* Protection Bound Lower 1 */ -#define SPRN_PBL2 0x3FE /* Protection Bound Lower 2 */ -#define SPRN_PBU1 0x3FD /* Protection Bound Upper 1 */ -#define SPRN_PBU2 0x3FF /* Protection Bound Upper 2 */ - - -/* Bit definitions for the DBCR. */ -#define DBCR_EDM DBCR0_EDM -#define DBCR_IDM DBCR0_IDM -#define DBCR_RST(x) (((x) & 0x3) << 28) -#define DBCR_RST_NONE 0 -#define DBCR_RST_CORE 1 -#define DBCR_RST_CHIP 2 -#define DBCR_RST_SYSTEM 3 -#define DBCR_IC DBCR0_IC /* Instruction Completion Debug Evnt */ -#define DBCR_BT DBCR0_BT /* Branch Taken Debug Event */ -#define DBCR_EDE DBCR0_EDE /* Exception Debug Event */ -#define DBCR_TDE DBCR0_TDE /* TRAP Debug Event */ -#define DBCR_FER 0x00F80000 /* First Events Remaining Mask */ -#define DBCR_FT 0x00040000 /* Freeze Timers on Debug Event */ -#define DBCR_IA1 0x00020000 /* Instr. Addr. Compare 1 Enable */ -#define DBCR_IA2 0x00010000 /* Instr. Addr. Compare 2 Enable */ -#define DBCR_D1R 0x00008000 /* Data Addr. Compare 1 Read Enable */ -#define DBCR_D1W 0x00004000 /* Data Addr. Compare 1 Write Enable */ -#define DBCR_D1S(x) (((x) & 0x3) << 12) /* Data Adrr. Compare 1 Size */ -#define DAC_BYTE 0 -#define DAC_HALF 1 -#define DAC_WORD 2 -#define DAC_QUAD 3 -#define DBCR_D2R 0x00000800 /* Data Addr. Compare 2 Read Enable */ -#define DBCR_D2W 0x00000400 /* Data Addr. Compare 2 Write Enable */ -#define DBCR_D2S(x) (((x) & 0x3) << 8) /* Data Addr. Compare 2 Size */ -#define DBCR_SBT 0x00000040 /* Second Branch Taken Debug Event */ -#define DBCR_SED 0x00000020 /* Second Exception Debug Event */ -#define DBCR_STD 0x00000010 /* Second Trap Debug Event */ -#define DBCR_SIA 0x00000008 /* Second IAC Enable */ -#define DBCR_SDA 0x00000004 /* Second DAC Enable */ -#define DBCR_JOI 0x00000002 /* JTAG Serial Outbound Int. Enable */ -#define DBCR_JII 0x00000001 /* JTAG Serial Inbound Int. Enable */ -#endif /* 403GCX */ - /* Some 476 specific registers */ #define SPRN_SSPCR 830 #define SPRN_USPCR 831 diff --git a/arch/powerpc/include/asm/rtas-types.h b/arch/powerpc/include/asm/rtas-types.h new file mode 100644 index 000000000000..aa420561bc10 --- /dev/null +++ b/arch/powerpc/include/asm/rtas-types.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_RTAS_TYPES_H +#define _ASM_POWERPC_RTAS_TYPES_H + +#include <linux/spinlock_types.h> + +typedef __be32 rtas_arg_t; + +struct rtas_args { + __be32 token; + __be32 nargs; + __be32 nret; + rtas_arg_t args[16]; + rtas_arg_t *rets; /* Pointer to return values in args[]. */ +}; + +struct rtas_t { + unsigned long entry; /* physical address pointer */ + unsigned long base; /* physical address pointer */ + unsigned long size; + arch_spinlock_t lock; + struct rtas_args args; + struct device_node *dev; /* virtual address pointer */ +}; + +struct rtas_suspend_me_data { + atomic_t working; /* number of cpus accessing this struct */ + atomic_t done; + int token; /* ibm,suspend-me */ + atomic_t error; + struct completion *complete; /* wait on this until working == 0 */ +}; + +struct rtas_error_log { + /* Byte 0 */ + u8 byte0; /* Architectural version */ + + /* Byte 1 */ + u8 byte1; + /* XXXXXXXX + * XXX 3: Severity level of error + * XX 2: Degree of recovery + * X 1: Extended log present? + * XX 2: Reserved + */ + + /* Byte 2 */ + u8 byte2; + /* XXXXXXXX + * XXXX 4: Initiator of event + * XXXX 4: Target of failed operation + */ + u8 byte3; /* General event or error*/ + __be32 extended_log_length; /* length in bytes */ + unsigned char buffer[1]; /* Start of extended log */ + /* Variable length. */ +}; + +/* RTAS general extended event log, Version 6. The extended log starts + * from "buffer" field of struct rtas_error_log defined above. + */ +struct rtas_ext_event_log_v6 { + /* Byte 0 */ + u8 byte0; + /* XXXXXXXX + * X 1: Log valid + * X 1: Unrecoverable error + * X 1: Recoverable (correctable or successfully retried) + * X 1: Bypassed unrecoverable error (degraded operation) + * X 1: Predictive error + * X 1: "New" log (always 1 for data returned from RTAS) + * X 1: Big Endian + * X 1: Reserved + */ + + /* Byte 1 */ + u8 byte1; /* reserved */ + + /* Byte 2 */ + u8 byte2; + /* XXXXXXXX + * X 1: Set to 1 (indicating log is in PowerPC format) + * XXX 3: Reserved + * XXXX 4: Log format used for bytes 12-2047 + */ + + /* Byte 3 */ + u8 byte3; /* reserved */ + /* Byte 4-11 */ + u8 reserved[8]; /* reserved */ + /* Byte 12-15 */ + __be32 company_id; /* Company ID of the company */ + /* that defines the format for */ + /* the vendor specific log type */ + /* Byte 16-end of log */ + u8 vendor_log[1]; /* Start of vendor specific log */ + /* Variable length. */ +}; + +/* Vendor specific Platform Event Log Format, Version 6, section header */ +struct pseries_errorlog { + __be16 id; /* 0x00 2-byte ASCII section ID */ + __be16 length; /* 0x02 Section length in bytes */ + u8 version; /* 0x04 Section version */ + u8 subtype; /* 0x05 Section subtype */ + __be16 creator_component; /* 0x06 Creator component ID */ + u8 data[]; /* 0x08 Start of section data */ +}; + +/* RTAS pseries hotplug errorlog section */ +struct pseries_hp_errorlog { + u8 resource; + u8 action; + u8 id_type; + u8 reserved; + union { + __be32 drc_index; + __be32 drc_count; + struct { __be32 count, index; } ic; + char drc_name[1]; + } _drc_u; +}; + +#endif /* _ASM_POWERPC_RTAS_TYPES_H */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 3c1887351c71..014968f25f7e 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -5,6 +5,7 @@ #include <linux/spinlock.h> #include <asm/page.h> +#include <asm/rtas-types.h> #include <linux/time.h> #include <linux/cpumask.h> @@ -42,33 +43,6 @@ * */ -typedef __be32 rtas_arg_t; - -struct rtas_args { - __be32 token; - __be32 nargs; - __be32 nret; - rtas_arg_t args[16]; - rtas_arg_t *rets; /* Pointer to return values in args[]. */ -}; - -struct rtas_t { - unsigned long entry; /* physical address pointer */ - unsigned long base; /* physical address pointer */ - unsigned long size; - arch_spinlock_t lock; - struct rtas_args args; - struct device_node *dev; /* virtual address pointer */ -}; - -struct rtas_suspend_me_data { - atomic_t working; /* number of cpus accessing this struct */ - atomic_t done; - int token; /* ibm,suspend-me */ - atomic_t error; - struct completion *complete; /* wait on this until working == 0 */ -}; - /* RTAS event classes */ #define RTAS_INTERNAL_ERROR 0x80000000 /* set bit 0 */ #define RTAS_EPOW_WARNING 0x40000000 /* set bit 1 */ @@ -148,31 +122,6 @@ struct rtas_suspend_me_data { /* RTAS check-exception vector offset */ #define RTAS_VECTOR_EXTERNAL_INTERRUPT 0x500 -struct rtas_error_log { - /* Byte 0 */ - uint8_t byte0; /* Architectural version */ - - /* Byte 1 */ - uint8_t byte1; - /* XXXXXXXX - * XXX 3: Severity level of error - * XX 2: Degree of recovery - * X 1: Extended log present? - * XX 2: Reserved - */ - - /* Byte 2 */ - uint8_t byte2; - /* XXXXXXXX - * XXXX 4: Initiator of event - * XXXX 4: Target of failed operation - */ - uint8_t byte3; /* General event or error*/ - __be32 extended_log_length; /* length in bytes */ - unsigned char buffer[1]; /* Start of extended log */ - /* Variable length. */ -}; - static inline uint8_t rtas_error_severity(const struct rtas_error_log *elog) { return (elog->byte1 & 0xE0) >> 5; @@ -212,47 +161,6 @@ uint32_t rtas_error_extended_log_length(const struct rtas_error_log *elog) #define RTAS_V6EXT_COMPANY_ID_IBM (('I' << 24) | ('B' << 16) | ('M' << 8)) -/* RTAS general extended event log, Version 6. The extended log starts - * from "buffer" field of struct rtas_error_log defined above. - */ -struct rtas_ext_event_log_v6 { - /* Byte 0 */ - uint8_t byte0; - /* XXXXXXXX - * X 1: Log valid - * X 1: Unrecoverable error - * X 1: Recoverable (correctable or successfully retried) - * X 1: Bypassed unrecoverable error (degraded operation) - * X 1: Predictive error - * X 1: "New" log (always 1 for data returned from RTAS) - * X 1: Big Endian - * X 1: Reserved - */ - - /* Byte 1 */ - uint8_t byte1; /* reserved */ - - /* Byte 2 */ - uint8_t byte2; - /* XXXXXXXX - * X 1: Set to 1 (indicating log is in PowerPC format) - * XXX 3: Reserved - * XXXX 4: Log format used for bytes 12-2047 - */ - - /* Byte 3 */ - uint8_t byte3; /* reserved */ - /* Byte 4-11 */ - uint8_t reserved[8]; /* reserved */ - /* Byte 12-15 */ - __be32 company_id; /* Company ID of the company */ - /* that defines the format for */ - /* the vendor specific log type */ - /* Byte 16-end of log */ - uint8_t vendor_log[1]; /* Start of vendor specific log */ - /* Variable length. */ -}; - static inline uint8_t rtas_ext_event_log_format(struct rtas_ext_event_log_v6 *ext_log) { @@ -287,16 +195,6 @@ inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log) #define PSERIES_ELOG_SECT_ID_HOTPLUG (('H' << 8) | 'P') #define PSERIES_ELOG_SECT_ID_MCE (('M' << 8) | 'C') -/* Vendor specific Platform Event Log Format, Version 6, section header */ -struct pseries_errorlog { - __be16 id; /* 0x00 2-byte ASCII section ID */ - __be16 length; /* 0x02 Section length in bytes */ - uint8_t version; /* 0x04 Section version */ - uint8_t subtype; /* 0x05 Section subtype */ - __be16 creator_component; /* 0x06 Creator component ID */ - uint8_t data[]; /* 0x08 Start of section data */ -}; - static inline uint16_t pseries_errorlog_id(struct pseries_errorlog *sect) { @@ -309,20 +207,6 @@ inline uint16_t pseries_errorlog_length(struct pseries_errorlog *sect) return be16_to_cpu(sect->length); } -/* RTAS pseries hotplug errorlog section */ -struct pseries_hp_errorlog { - u8 resource; - u8 action; - u8 id_type; - u8 reserved; - union { - __be32 drc_index; - __be32 drc_count; - struct { __be32 count, index; } ic; - char drc_name[1]; - } _drc_u; -}; - #define PSERIES_HP_ELOG_RESOURCE_CPU 1 #define PSERIES_HP_ELOG_RESOURCE_MEM 2 #define PSERIES_HP_ELOG_RESOURCE_SLOT 3 @@ -352,6 +236,7 @@ extern struct rtas_t rtas; extern int rtas_token(const char *service); extern int rtas_service_present(const char *service); extern int rtas_call(int token, int, int, int *, ...); +int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...); void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...); extern void __noreturn rtas_restart(char *cmd); @@ -483,5 +368,11 @@ static inline void rtas_initialize(void) { }; extern int call_rtas(const char *, int, int, unsigned long *, ...); +#ifdef CONFIG_HV_PERF_CTRS +void read_24x7_sys_info(void); +#else +static inline void read_24x7_sys_info(void) { } +#endif + #endif /* __KERNEL__ */ #endif /* _POWERPC_RTAS_H */ diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h index c6f466f4c241..0bdd9c62eca0 100644 --- a/arch/powerpc/include/asm/slice.h +++ b/arch/powerpc/include/asm/slice.h @@ -4,8 +4,6 @@ #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/slice.h> -#elif defined(CONFIG_PPC_MMU_NOHASH_32) -#include <asm/nohash/32/slice.h> #endif #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 860228e917dc..2d620896cdae 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -23,7 +23,6 @@ #endif #include <asm/synch.h> #include <asm/ppc-opcode.h> -#include <asm/asm-405.h> #ifdef CONFIG_PPC64 /* use 0x800000yy when locked, where yy == CPU number */ @@ -210,7 +209,6 @@ static inline long __arch_read_trylock(arch_rwlock_t *rw) __DO_SIGN_EXTEND " addic. %0,%0,1\n\ ble- 2f\n" - PPC405_ERR77(0,%1) " stwcx. %0,0,%1\n\ bne- 1b\n" PPC_ACQUIRE_BARRIER @@ -234,7 +232,6 @@ static inline long __arch_write_trylock(arch_rwlock_t *rw) "1: " PPC_LWARX(%0,0,%2,1) "\n\ cmpwi 0,%0,0\n\ bne- 2f\n" - PPC405_ERR77(0,%1) " stwcx. %1,0,%2\n\ bne- 1b\n" PPC_ACQUIRE_BARRIER @@ -292,7 +289,6 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) PPC_RELEASE_BARRIER "1: lwarx %0,0,%1\n\ addic %0,%0,-1\n" - PPC405_ERR77(0,%1) " stwcx. %0,0,%1\n\ bne- 1b" : "=&r"(tmp) diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 769f055509c9..3b01c69a44aa 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -2,6 +2,7 @@ /* * Copyright (C) 2004 Paul Mackerras <paulus@au.ibm.com>, IBM */ +#include <asm/inst.h> struct pt_regs; @@ -15,9 +16,9 @@ struct pt_regs; * Note that IS_MTMSRD returns true for both an mtmsr (32-bit) * and an mtmsrd (64-bit). */ -#define IS_MTMSRD(instr) (((instr) & 0xfc0007be) == 0x7c000124) -#define IS_RFID(instr) (((instr) & 0xfc0007fe) == 0x4c000024) -#define IS_RFI(instr) (((instr) & 0xfc0007fe) == 0x4c000064) +#define IS_MTMSRD(instr) ((ppc_inst_val(instr) & 0xfc0007be) == 0x7c000124) +#define IS_RFID(instr) ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000024) +#define IS_RFI(instr) ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000064) enum instruction_type { COMPUTE, /* arith/logical/CR op, etc. */ @@ -48,6 +49,8 @@ enum instruction_type { #define INSTR_TYPE_MASK 0x1f +#define OP_IS_LOAD(type) ((LOAD <= (type) && (type) <= LOAD_VSX) || (type) == LARX) +#define OP_IS_STORE(type) ((STORE <= (type) && (type) <= STORE_VSX) || (type) == STCX) #define OP_IS_LOAD_STORE(type) (LOAD <= (type) && (type) <= STCX) /* Compute flags, ORed in with type */ @@ -89,11 +92,15 @@ enum instruction_type { #define VSX_LDLEFT 4 /* load VSX register from left */ #define VSX_CHECK_VEC 8 /* check MSR_VEC not MSR_VSX for reg >= 32 */ +/* Prefixed flag, ORed in with type */ +#define PREFIXED 0x800 + /* Size field in type word */ #define SIZE(n) ((n) << 12) #define GETSIZE(w) ((w) >> 12) #define GETTYPE(t) ((t) & INSTR_TYPE_MASK) +#define GETLENGTH(t) (((t) & PREFIXED) ? 8 : 4) #define MKOP(t, f, s) ((t) | (f) | SIZE(s)) @@ -132,7 +139,7 @@ union vsx_reg { * otherwise. */ extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, - unsigned int instr); + struct ppc_inst instr); /* * Emulate an instruction that can be executed just by updating @@ -149,7 +156,7 @@ void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op); * 0 if it could not be emulated, or -1 for an instruction that * should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.). */ -extern int emulate_step(struct pt_regs *regs, unsigned int instr); +extern int emulate_step(struct pt_regs *regs, struct ppc_inst instr); /* * Emulate a load or store instruction by reading/writing the diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index b867b58b1093..fdab93428372 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -102,8 +102,6 @@ static inline void clear_task_ebb(struct task_struct *t) #endif } -extern int set_thread_uses_vas(void); - extern int set_thread_tidr(struct task_struct *t); #endif /* _ASM_POWERPC_SWITCH_TO_H */ diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 38d62acfdce7..fd1b518eed17 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -26,7 +26,10 @@ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) * This is important for seccomp so that compat tasks can set r0 = -1 * to reject the syscall. */ - return TRAP(regs) == 0xc00 ? regs->gpr[0] : -1; + if (trap_is_syscall(regs)) + return regs->gpr[0]; + else + return -1; } static inline void syscall_rollback(struct task_struct *task, diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 39ce95016a3a..b287cfc2dd85 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -51,24 +51,12 @@ struct div_result { static inline unsigned long get_tbl(void) { -#if defined(CONFIG_403GCX) - unsigned long tbl; - asm volatile("mfspr %0, 0x3dd" : "=r" (tbl)); - return tbl; -#else return mftbl(); -#endif } static inline unsigned int get_tbu(void) { -#ifdef CONFIG_403GCX - unsigned int tbu; - asm volatile("mfspr %0, 0x3dc" : "=r" (tbu)); - return tbu; -#else return mftbu(); -#endif } #endif /* !CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 0969285996cb..64c04ab09112 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -93,18 +93,63 @@ static inline int __access_ok(unsigned long addr, unsigned long size, #define __get_user(x, ptr) \ __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true) #define __put_user(x, ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), true) + __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) +#define __put_user_goto(x, ptr, label) \ + __put_user_nocheck_goto((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label) #define __get_user_allowed(x, ptr) \ __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false) -#define __put_user_allowed(x, ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), false) #define __get_user_inatomic(x, ptr) \ __get_user_nosleep((x), (ptr), sizeof(*(ptr))) #define __put_user_inatomic(x, ptr) \ __put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) +#ifdef CONFIG_PPC64 + +#define ___get_user_instr(gu_op, dest, ptr) \ +({ \ + long __gui_ret = 0; \ + unsigned long __gui_ptr = (unsigned long)ptr; \ + struct ppc_inst __gui_inst; \ + unsigned int __prefix, __suffix; \ + __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr); \ + if (__gui_ret == 0) { \ + if ((__prefix >> 26) == OP_PREFIX) { \ + __gui_ret = gu_op(__suffix, \ + (unsigned int __user *)__gui_ptr + 1); \ + __gui_inst = ppc_inst_prefix(__prefix, \ + __suffix); \ + } else { \ + __gui_inst = ppc_inst(__prefix); \ + } \ + if (__gui_ret == 0) \ + (dest) = __gui_inst; \ + } \ + __gui_ret; \ +}) + +#define get_user_instr(x, ptr) \ + ___get_user_instr(get_user, x, ptr) + +#define __get_user_instr(x, ptr) \ + ___get_user_instr(__get_user, x, ptr) + +#define __get_user_instr_inatomic(x, ptr) \ + ___get_user_instr(__get_user_inatomic, x, ptr) + +#else /* !CONFIG_PPC64 */ +#define get_user_instr(x, ptr) \ + get_user((x).val, (u32 __user *)(ptr)) + +#define __get_user_instr(x, ptr) \ + __get_user_nocheck((x).val, (u32 __user *)(ptr), sizeof(u32), true) + +#define __get_user_instr_inatomic(x, ptr) \ + __get_user_nosleep((x).val, (u32 __user *)(ptr), sizeof(u32)) + +#endif /* CONFIG_PPC64 */ + extern long __put_user_bad(void); /* @@ -162,7 +207,7 @@ do { \ prevent_write_to_user(ptr, size); \ } while (0) -#define __put_user_nocheck(x, ptr, size, do_allow) \ +#define __put_user_nocheck(x, ptr, size) \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ @@ -172,10 +217,7 @@ do { \ if (!is_kernel_addr((unsigned long)__pu_addr)) \ might_fault(); \ __chk_user_ptr(__pu_addr); \ - if (do_allow) \ - __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ - else \ - __put_user_size_allowed(__pu_val, __pu_addr, __pu_size, __pu_err); \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ \ __pu_err; \ }) @@ -208,6 +250,52 @@ do { \ }) +#define __put_user_asm_goto(x, addr, label, op) \ + asm volatile goto( \ + "1: " op "%U1%X1 %0,%1 # put_user\n" \ + EX_TABLE(1b, %l2) \ + : \ + : "r" (x), "m" (*addr) \ + : \ + : label) + +#ifdef __powerpc64__ +#define __put_user_asm2_goto(x, ptr, label) \ + __put_user_asm_goto(x, ptr, label, "std") +#else /* __powerpc64__ */ +#define __put_user_asm2_goto(x, addr, label) \ + asm volatile goto( \ + "1: stw%X1 %0, %1\n" \ + "2: stw%X1 %L0, %L1\n" \ + EX_TABLE(1b, %l2) \ + EX_TABLE(2b, %l2) \ + : \ + : "r" (x), "m" (*addr) \ + : \ + : label) +#endif /* __powerpc64__ */ + +#define __put_user_size_goto(x, ptr, size, label) \ +do { \ + switch (size) { \ + case 1: __put_user_asm_goto(x, ptr, label, "stb"); break; \ + case 2: __put_user_asm_goto(x, ptr, label, "sth"); break; \ + case 4: __put_user_asm_goto(x, ptr, label, "stw"); break; \ + case 8: __put_user_asm2_goto(x, ptr, label); break; \ + default: __put_user_bad(); \ + } \ +} while (0) + +#define __put_user_nocheck_goto(x, ptr, size, label) \ +do { \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + if (!is_kernel_addr((unsigned long)__pu_addr)) \ + might_fault(); \ + __chk_user_ptr(ptr); \ + __put_user_size_goto((x), __pu_addr, (size), label); \ +} while (0) + + extern long __get_user_bad(void); /* @@ -489,10 +577,51 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t #define user_access_save prevent_user_access_return #define user_access_restore restore_user_access +static __must_check inline bool +user_read_access_begin(const void __user *ptr, size_t len) +{ + if (unlikely(!access_ok(ptr, len))) + return false; + allow_read_from_user(ptr, len); + return true; +} +#define user_read_access_begin user_read_access_begin +#define user_read_access_end prevent_current_read_from_user + +static __must_check inline bool +user_write_access_begin(const void __user *ptr, size_t len) +{ + if (unlikely(!access_ok(ptr, len))) + return false; + allow_write_to_user((void __user *)ptr, len); + return true; +} +#define user_write_access_begin user_write_access_begin +#define user_write_access_end prevent_current_write_to_user + #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e) -#define unsafe_put_user(x, p, e) unsafe_op_wrap(__put_user_allowed(x, p), e) +#define unsafe_put_user(x, p, e) __put_user_goto(x, p, e) + #define unsafe_copy_to_user(d, s, l, e) \ - unsafe_op_wrap(raw_copy_to_user_allowed(d, s, l), e) +do { \ + u8 __user *_dst = (u8 __user *)(d); \ + const u8 *_src = (const u8 *)(s); \ + size_t _len = (l); \ + int _i; \ + \ + for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long)) \ + __put_user_goto(*(long*)(_src + _i), (long __user *)(_dst + _i), e);\ + if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) { \ + __put_user_goto(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \ + _i += 4; \ + } \ + if (_len & 2) { \ + __put_user_goto(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), e); \ + _i += 2; \ + } \ + if (_len & 1) \ + __put_user_goto(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e);\ +} while (0) #endif /* _ARCH_POWERPC_UACCESS_H */ diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index 2bbdf27d09b5..5bf65f5d44a9 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -11,10 +11,11 @@ #include <linux/notifier.h> #include <asm/probes.h> +#include <asm/inst.h> typedef ppc_opcode_t uprobe_opcode_t; -#define MAX_UINSN_BYTES 4 +#define MAX_UINSN_BYTES 8 #define UPROBE_XOL_SLOT_BYTES (MAX_UINSN_BYTES) /* The following alias is needed for reference from arch-agnostic code */ @@ -23,8 +24,8 @@ typedef ppc_opcode_t uprobe_opcode_t; struct arch_uprobe { union { - u32 insn; - u32 ixol; + struct ppc_inst insn; + struct ppc_inst ixol; }; }; diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index f93e6b0f5c84..e33f80b0ea81 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -86,7 +86,6 @@ struct vas_tx_win_attr { int wcreds_max; int lpid; int pidr; /* hardware PID (from SPRN_PID) */ - int pid; /* linux process id */ int pswid; int rsvd_txbuf_count; int tc_mode; @@ -163,4 +162,16 @@ int vas_copy_crb(void *crb, int offset); */ int vas_paste_crb(struct vas_window *win, int offset, bool re); +/* + * Register / unregister coprocessor type to VAS API which will be exported + * to user space. Applications can use this API to open / close window + * which can be used to send / receive requests directly to cooprcessor. + * + * Only NX GZIP coprocessor type is supported now, but this API can be + * used for others in future. + */ +int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, + const char *name); +void vas_unregister_coproc_api(void); + #endif /* __ASM_POWERPC_VAS_H */ diff --git a/arch/powerpc/include/asm/xilinx_intc.h b/arch/powerpc/include/asm/xilinx_intc.h deleted file mode 100644 index ca9aa162fb09..000000000000 --- a/arch/powerpc/include/asm/xilinx_intc.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Xilinx intc external definitions - * - * Copyright 2007 Secret Lab Technologies Ltd. - */ -#ifndef _ASM_POWERPC_XILINX_INTC_H -#define _ASM_POWERPC_XILINX_INTC_H - -#ifdef __KERNEL__ - -extern void __init xilinx_intc_init_tree(void); -extern unsigned int xintc_get_irq(void); - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_XILINX_INTC_H */ diff --git a/arch/powerpc/include/asm/xilinx_pci.h b/arch/powerpc/include/asm/xilinx_pci.h deleted file mode 100644 index 7a8275caf6af..000000000000 --- a/arch/powerpc/include/asm/xilinx_pci.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Xilinx pci external definitions - * - * Copyright 2009 Roderick Colenbrander - * Copyright 2009 Secret Lab Technologies Ltd. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#ifndef INCLUDE_XILINX_PCI -#define INCLUDE_XILINX_PCI - -#ifdef CONFIG_XILINX_PCI -extern void __init xilinx_pci_init(void); -#else -static inline void __init xilinx_pci_init(void) { return; } -#endif - -#endif /* INCLUDE_XILINX_PCI */ diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h index 33aee7490cbb..8b211faa0e42 100644 --- a/arch/powerpc/include/asm/xive-regs.h +++ b/arch/powerpc/include/asm/xive-regs.h @@ -37,6 +37,14 @@ #define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ #define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ +/* + * Load-after-store ordering + * + * Adding this offset to the load address will enforce + * load-after-store ordering. This is required to use StoreEOI. + */ +#define XIVE_ESB_LD_ST_MO 0x40 /* Load-after-store ordering */ + #define XIVE_ESB_VAL_P 0x2 #define XIVE_ESB_VAL_Q 0x1 #define XIVE_ESB_INVALID 0xFF diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 93f982dbb3d4..d08ea11b271c 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -5,6 +5,8 @@ #ifndef _ASM_POWERPC_XIVE_H #define _ASM_POWERPC_XIVE_H +#include <asm/opal-api.h> + #define XIVE_INVALID_VP 0xffffffff #ifdef CONFIG_PPC_XIVE @@ -108,7 +110,6 @@ void xive_native_free_vp_block(u32 vp_base); int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data); void xive_cleanup_irq_data(struct xive_irq_data *xd); -u32 xive_native_alloc_irq(void); void xive_native_free_irq(u32 irq); int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq); @@ -137,6 +138,12 @@ int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle, u32 qindex); int xive_native_get_vp_state(u32 vp_id, u64 *out_state); bool xive_native_has_queue_state_support(void); +extern u32 xive_native_alloc_irq_on_chip(u32 chip_id); + +static inline u32 xive_native_alloc_irq(void) +{ + return xive_native_alloc_irq_on_chip(OPAL_XIVE_ANY_CHIP); +} #else diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index 540592034740..731b97dc2d15 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -50,6 +50,8 @@ #define PPC_FEATURE2_DARN 0x00200000 /* darn random number insn */ #define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */ #define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000 /* TM w/out suspended state */ +#define PPC_FEATURE2_ARCH_3_1 0x00040000 /* ISA 3.1 */ +#define PPC_FEATURE2_MMA 0x00020000 /* Matrix Multiply Assist */ /* * IMPORTANT! diff --git a/arch/powerpc/include/uapi/asm/vas-api.h b/arch/powerpc/include/uapi/asm/vas-api.h new file mode 100644 index 000000000000..ebd4b2424785 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/vas-api.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright 2019 IBM Corp. + */ + +#ifndef _UAPI_MISC_VAS_H +#define _UAPI_MISC_VAS_H + +#include <linux/types.h> + +#include <asm/ioctl.h> + +#define VAS_MAGIC 'v' +#define VAS_TX_WIN_OPEN _IOW(VAS_MAGIC, 0x20, struct vas_tx_win_open_attr) + +struct vas_tx_win_open_attr { + __u32 version; + __s16 vas_id; /* specific instance of vas or -1 for default */ + __u16 reserved1; + __u64 flags; /* Future use */ + __u64 reserved2[6]; +}; + +#endif /* _UAPI_MISC_VAS_H */ diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 92045ed64976..1f1ce8b86d5b 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -24,6 +24,7 @@ #include <asm/disassemble.h> #include <asm/cpu_has_feature.h> #include <asm/sstep.h> +#include <asm/inst.h> struct aligninfo { unsigned char len; @@ -104,7 +105,7 @@ static struct aligninfo spe_aligninfo[32] = { * so we don't need the address swizzling. */ static int emulate_spe(struct pt_regs *regs, unsigned int reg, - unsigned int instr) + struct ppc_inst ppc_instr) { int ret; union { @@ -115,8 +116,9 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, } data, temp; unsigned char __user *p, *addr; unsigned long *evr = ¤t->thread.evr[reg]; - unsigned int nb, flags; + unsigned int nb, flags, instr; + instr = ppc_inst_val(ppc_instr); instr = (instr >> 1) & 0x1f; /* DAR has the operand effective address */ @@ -293,7 +295,7 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, int fix_alignment(struct pt_regs *regs) { - unsigned int instr; + struct ppc_inst instr; struct instruction_op op; int r, type; @@ -303,18 +305,18 @@ int fix_alignment(struct pt_regs *regs) */ CHECK_FULL_REGS(regs); - if (unlikely(__get_user(instr, (unsigned int __user *)regs->nip))) + if (unlikely(__get_user_instr(instr, (void __user *)regs->nip))) return -EFAULT; if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) { /* We don't handle PPC little-endian any more... */ if (cpu_has_feature(CPU_FTR_PPC_LE)) return -EIO; - instr = swab32(instr); + instr = ppc_inst_swab(instr); } #ifdef CONFIG_SPE - if ((instr >> 26) == 0x4) { - int reg = (instr >> 21) & 0x1f; + if (ppc_inst_primary_opcode(instr) == 0x4) { + int reg = (ppc_inst_val(instr) >> 21) & 0x1f; PPC_WARN_ALIGNMENT(spe, regs); return emulate_spe(regs, reg, instr); } @@ -331,7 +333,7 @@ int fix_alignment(struct pt_regs *regs) * when pasting to a co-processor. Furthermore, paste_last is the * synchronisation point for preceding copy/paste sequences. */ - if ((instr & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe)) + if ((ppc_inst_val(instr) & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe)) return -EIO; r = analyse_instr(&op, regs, instr); diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index fcf24a365fc0..9b9cde07e396 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -70,6 +70,10 @@ #include <asm/fixmap.h> #endif +#ifdef CONFIG_XMON +#include "../xmon/xmon_bpts.h" +#endif + #define STACK_PT_REGS_OFFSET(sym, val) \ DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val)) @@ -795,5 +799,9 @@ int main(void) DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE)); #endif +#ifdef CONFIG_XMON + DEFINE(BPT_SIZE, BPT_SIZE); +#endif + return 0; } diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f6517f67265a..f8b5ff64b604 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -288,6 +288,7 @@ _GLOBAL(__init_fpu_registers) mtmsr r10 isync blr +_ASM_NOKPROBE_SYMBOL(__init_fpu_registers) /* Definitions for the table use to save CPU states */ @@ -483,4 +484,5 @@ _GLOBAL(__restore_cpu_setup) 1: mtcr r7 blr +_ASM_NOKPROBE_SYMBOL(__restore_cpu_setup) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index a460298c7ddb..efdcfa714106 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -91,10 +91,15 @@ _GLOBAL(__restore_cpu_power8) mtlr r11 blr +_GLOBAL(__setup_cpu_power10) + mflr r11 + bl __init_FSCR_power10 + b 1f + _GLOBAL(__setup_cpu_power9) mflr r11 bl __init_FSCR - bl __init_PMU +1: bl __init_PMU bl __init_hvmode_206 mtlr r11 beqlr @@ -116,10 +121,15 @@ _GLOBAL(__setup_cpu_power9) mtlr r11 blr +_GLOBAL(__restore_cpu_power10) + mflr r11 + bl __init_FSCR_power10 + b 1f + _GLOBAL(__restore_cpu_power9) mflr r11 bl __init_FSCR - bl __init_PMU +1: bl __init_PMU mfmsr r3 rldicl. r0,r3,4,63 mtlr r11 @@ -182,9 +192,15 @@ __init_LPCR_ISA300: isync blr +__init_FSCR_power10: + mfspr r3, SPRN_FSCR + ori r3, r3, FSCR_PREFIX + mtspr SPRN_FSCR, r3 + // fall through + __init_FSCR: mfspr r3,SPRN_FSCR - ori r3,r3,FSCR_TAR|FSCR_DSCR|FSCR_EBB + ori r3,r3,FSCR_TAR|FSCR_EBB mtspr SPRN_FSCR,r3 blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 13eba2eb46fe..b4066354f073 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -70,6 +70,8 @@ extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power8(void); extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power9(void); +extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec); +extern void __restore_cpu_power10(void); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); extern long __machine_check_early_realmode_p9(struct pt_regs *regs); @@ -119,6 +121,10 @@ extern void __restore_cpu_e6500(void); PPC_FEATURE2_ARCH_3_00 | \ PPC_FEATURE2_HAS_IEEE128 | \ PPC_FEATURE2_DARN ) +#define COMMON_USER_POWER10 COMMON_USER_POWER9 +#define COMMON_USER2_POWER10 (COMMON_USER2_POWER9 | \ + PPC_FEATURE2_ARCH_3_1 | \ + PPC_FEATURE2_MMA) #ifdef CONFIG_PPC_BOOK3E_64 #define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE) @@ -367,6 +373,22 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_power9, .platform = "power9", }, + { /* 3.1-compliant processor, i.e. Power10 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000006, + .cpu_name = "POWER10 (architected)", + .cpu_features = CPU_FTRS_POWER10, + .cpu_user_features = COMMON_USER_POWER10, + .cpu_user_features2 = COMMON_USER2_POWER10, + .mmu_features = MMU_FTRS_POWER10, + .icache_bsize = 128, + .dcache_bsize = 128, + .oprofile_type = PPC_OPROFILE_INVALID, + .oprofile_cpu_type = "ppc64/ibm-compat-v1", + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .platform = "power10", + }, { /* Power7 */ .pvr_mask = 0xffff0000, .pvr_value = 0x003f0000, @@ -1232,69 +1254,6 @@ static struct cpu_spec __initdata cpu_specs[] = { }, #endif /* CONFIG_PPC_8xx */ #ifdef CONFIG_40x - { /* 403GC */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x00200200, - .cpu_name = "403GC", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 16, - .dcache_bsize = 16, - .machine_check = machine_check_4xx, - .platform = "ppc403", - }, - { /* 403GCX */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x00201400, - .cpu_name = "403GCX", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 16, - .dcache_bsize = 16, - .machine_check = machine_check_4xx, - .platform = "ppc403", - }, - { /* 403G ?? */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00200000, - .cpu_name = "403G ??", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 16, - .dcache_bsize = 16, - .machine_check = machine_check_4xx, - .platform = "ppc403", - }, - { /* 405GP */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x40110000, - .cpu_name = "405GP", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* STB 03xxx */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x40130000, - .cpu_name = "STB03xxx", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, { /* STB 04xxx */ .pvr_mask = 0xffff0000, .pvr_value = 0x41810000, @@ -1385,32 +1344,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_4xx, .platform = "ppc405", }, - { /* Xilinx Virtex-II Pro */ - .pvr_mask = 0xfffff000, - .pvr_value = 0x20010000, - .cpu_name = "Virtex-II Pro", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* Xilinx Virtex-4 FX */ - .pvr_mask = 0xfffff000, - .pvr_value = 0x20011000, - .cpu_name = "Virtex-4 FX", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, { /* 405EP */ .pvr_mask = 0xffff0000, .pvr_value = 0x51210000, @@ -1800,19 +1733,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, - { /* 440 in Xilinx Virtex-5 FXT */ - .pvr_mask = 0xfffffff0, - .pvr_value = 0x7ff21910, - .cpu_name = "440 in Virtex-5 FXT", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440x5, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, { /* 460EX */ .pvr_mask = 0xffff0006, .pvr_value = 0x13020002, diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 05745ddbd229..735e89337398 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -18,6 +18,7 @@ #include <asm/firmware.h> #include <linux/uaccess.h> #include <asm/rtas.h> +#include <asm/inst.h> #ifdef DEBUG #include <asm/udbg.h> @@ -34,7 +35,7 @@ void __init reserve_kdump_trampoline(void) static void __init create_trampoline(unsigned long addr) { - unsigned int *p = (unsigned int *)addr; + struct ppc_inst *p = (struct ppc_inst *)addr; /* The maximum range of a single instruction branch, is the current * instruction's address + (32 MB - 4) bytes. For the trampoline we @@ -44,8 +45,8 @@ static void __init create_trampoline(unsigned long addr) * branch to "addr" we jump to ("addr" + 32 MB). Although it requires * two instructions it doesn't require any registers. */ - patch_instruction(p, PPC_INST_NOP); - patch_branch(++p, addr + PHYSICAL_START, 0); + patch_instruction(p, ppc_inst(PPC_INST_NOP)); + patch_branch((void *)p + 4, addr + PHYSICAL_START, 0); } void __init setup_kdump_trampoline(void) diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c index cc14aa6c4a1b..500f52fa4711 100644 --- a/arch/powerpc/kernel/dawr.c +++ b/arch/powerpc/kernel/dawr.c @@ -16,7 +16,7 @@ bool dawr_force_enable; EXPORT_SYMBOL_GPL(dawr_force_enable); -int set_dawr(struct arch_hw_breakpoint *brk) +int set_dawr(int nr, struct arch_hw_breakpoint *brk) { unsigned long dawr, dawrx, mrd; @@ -39,15 +39,24 @@ int set_dawr(struct arch_hw_breakpoint *brk) if (ppc_md.set_dawr) return ppc_md.set_dawr(dawr, dawrx); - mtspr(SPRN_DAWR, dawr); - mtspr(SPRN_DAWRX, dawrx); + if (nr == 0) { + mtspr(SPRN_DAWR0, dawr); + mtspr(SPRN_DAWRX0, dawrx); + } else { + mtspr(SPRN_DAWR1, dawr); + mtspr(SPRN_DAWRX1, dawrx); + } return 0; } -static void set_dawr_cb(void *info) +static void disable_dawrs_cb(void *info) { - set_dawr(info); + struct arch_hw_breakpoint null_brk = {0}; + int i; + + for (i = 0; i < nr_wp_slots(); i++) + set_dawr(i, &null_brk); } static ssize_t dawr_write_file_bool(struct file *file, @@ -60,7 +69,7 @@ static ssize_t dawr_write_file_bool(struct file *file, /* Send error to user if they hypervisor won't allow us to write DAWR */ if (!dawr_force_enable && firmware_has_feature(FW_FEATURE_LPAR) && - set_dawr(&null_brk) != H_SUCCESS) + set_dawr(0, &null_brk) != H_SUCCESS) return -ENODEV; rc = debugfs_write_file_bool(file, user_buf, count, ppos); @@ -69,7 +78,7 @@ static ssize_t dawr_write_file_bool(struct file *file, /* If we are clearing, make sure all CPUs have the DAWR cleared */ if (!dawr_force_enable) - smp_call_function(set_dawr_cb, &null_brk, 0); + smp_call_function(disable_dawrs_cb, NULL, 0); return rc; } diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 36bc0d5c4f3a..3a409517c031 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -26,6 +26,7 @@ /* Device-tree visible constants follow */ #define ISA_V2_07B 2070 #define ISA_V3_0B 3000 +#define ISA_V3_1 3100 #define USABLE_PR (1U << 0) #define USABLE_OS (1U << 1) @@ -74,6 +75,7 @@ static struct { u64 lpcr_clear; u64 hfscr; u64 fscr; + u64 pcr; } system_registers; static void (*init_pmu_registers)(void); @@ -101,7 +103,7 @@ static void __restore_cpu_cpufeatures(void) if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); - mtspr(SPRN_PCR, PCR_MASK); + mtspr(SPRN_PCR, system_registers.pcr); } mtspr(SPRN_FSCR, system_registers.fscr); @@ -346,6 +348,14 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f) { u64 lpcr; + /* + * Linux relies on FSCR[DSCR] being clear, so that we can take the + * facility unavailable interrupt and track the task's usage of DSCR. + * See facility_unavailable_exception(). + * Clear the bit here so that feat_enable() doesn't set it. + */ + f->fscr_bit_nr = -1; + feat_enable(f); lpcr = mfspr(SPRN_LPCR); @@ -552,6 +562,18 @@ static int __init feat_enable_large_ci(struct dt_cpu_feature *f) return 1; } +static int __init feat_enable_mma(struct dt_cpu_feature *f) +{ + u64 pcr; + + feat_enable(f); + pcr = mfspr(SPRN_PCR); + pcr &= ~PCR_MMA_DIS; + mtspr(SPRN_PCR, pcr); + + return 1; +} + struct dt_cpu_feature_match { const char *name; int (*enable)(struct dt_cpu_feature *f); @@ -625,6 +647,8 @@ static struct dt_cpu_feature_match __initdata {"vector-binary128", feat_enable, 0}, {"vector-binary16", feat_enable, 0}, {"wait-v3", feat_enable, 0}, + {"prefix-instructions", feat_enable, 0}, + {"matrix-multiply-assist", feat_enable_mma, 0}, }; static bool __initdata using_dt_cpu_ftrs; @@ -654,6 +678,11 @@ static void __init cpufeatures_setup_start(u32 isa) cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300; cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00; } + + if (isa >= 3100) { + cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_31; + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_1; + } } static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f) @@ -770,6 +799,7 @@ static void __init cpufeatures_setup_finished(void) system_registers.lpcr = mfspr(SPRN_LPCR); system_registers.hfscr = mfspr(SPRN_HFSCR); system_registers.fscr = mfspr(SPRN_FSCR); + system_registers.pcr = mfspr(SPRN_PCR); pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n", cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 7cdcb413bb44..d407981dec76 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1106,6 +1106,37 @@ static int eeh_init(void) core_initcall_sync(eeh_init); +static int eeh_device_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + switch (action) { + /* + * Note: It's not possible to perform EEH device addition (i.e. + * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on + * the device's resources, which have not yet been set up. + */ + case BUS_NOTIFY_DEL_DEVICE: + eeh_remove_device(to_pci_dev(dev)); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block eeh_device_nb = { + .notifier_call = eeh_device_notifier, +}; + +static __init int eeh_set_bus_notifier(void) +{ + bus_register_notifier(&pci_bus_type, &eeh_device_nb); + return 0; +} +arch_initcall(eeh_set_bus_notifier); + /** * eeh_probe_device() - Perform EEH initialization for the indicated pci device * @dev: pci device for which to set up EEH diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 8420abd4ea1c..217ebdf5b00b 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -28,7 +28,6 @@ #include <asm/unistd.h> #include <asm/ptrace.h> #include <asm/export.h> -#include <asm/asm-405.h> #include <asm/feature-fixups.h> #include <asm/barrier.h> #include <asm/kup.h> @@ -51,6 +50,7 @@ mcheck_transfer_to_handler: mfspr r0,SPRN_DSRR1 stw r0,_DSRR1(r11) /* fall through */ +_ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler) .globl debug_transfer_to_handler debug_transfer_to_handler: @@ -59,6 +59,7 @@ debug_transfer_to_handler: mfspr r0,SPRN_CSRR1 stw r0,_CSRR1(r11) /* fall through */ +_ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler) .globl crit_transfer_to_handler crit_transfer_to_handler: @@ -94,6 +95,7 @@ crit_transfer_to_handler: rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) stw r0,KSP_LIMIT(r8) /* fall through */ +_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #endif #ifdef CONFIG_40x @@ -115,6 +117,7 @@ crit_transfer_to_handler: rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) stw r0,KSP_LIMIT(r8) /* fall through */ +_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #endif /* @@ -127,6 +130,7 @@ crit_transfer_to_handler: .globl transfer_to_handler_full transfer_to_handler_full: SAVE_NVGPRS(r11) +_ASM_NOKPROBE_SYMBOL(transfer_to_handler_full) /* fall through */ .globl transfer_to_handler @@ -227,6 +231,23 @@ transfer_to_handler_cont: SYNC RFI /* jump to handler, enable MMU */ +#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) +4: rlwinm r12,r12,0,~_TLF_NAPPING + stw r12,TI_LOCAL_FLAGS(r2) + b power_save_ppc32_restore + +7: rlwinm r12,r12,0,~_TLF_SLEEPING + stw r12,TI_LOCAL_FLAGS(r2) + lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ + rlwinm r9,r9,0,~MSR_EE + lwz r12,_LINK(r11) /* and return to address in LR */ + kuap_restore r11, r2, r3, r4, r5 + lwz r2, GPR2(r11) + b fast_exception_return +#endif +_ASM_NOKPROBE_SYMBOL(transfer_to_handler) +_ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) + #ifdef CONFIG_TRACE_IRQFLAGS 1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to * keep interrupts disabled at this point otherwise we might risk @@ -272,21 +293,6 @@ reenable_mmu: bctr /* jump to handler */ #endif /* CONFIG_TRACE_IRQFLAGS */ -#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) -4: rlwinm r12,r12,0,~_TLF_NAPPING - stw r12,TI_LOCAL_FLAGS(r2) - b power_save_ppc32_restore - -7: rlwinm r12,r12,0,~_TLF_SLEEPING - stw r12,TI_LOCAL_FLAGS(r2) - lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ - rlwinm r9,r9,0,~MSR_EE - lwz r12,_LINK(r11) /* and return to address in LR */ - kuap_restore r11, r2, r3, r4, r5 - lwz r2, GPR2(r11) - b fast_exception_return -#endif - #ifndef CONFIG_VMAP_STACK /* * On kernel stack overflow, load up an initial stack pointer @@ -313,6 +319,7 @@ stack_ovf: mtspr SPRN_SRR1,r10 SYNC RFI +_ASM_NOKPROBE_SYMBOL(stack_ovf) #endif #ifdef CONFIG_TRACE_IRQFLAGS @@ -455,6 +462,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) lwz r7,_NIP(r1) lwz r2,GPR2(r1) lwz r1,GPR1(r1) +syscall_exit_finish: #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif @@ -462,6 +470,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) mtspr SPRN_SRR1,r8 SYNC RFI +_ASM_NOKPROBE_SYMBOL(syscall_exit_finish) #ifdef CONFIG_44x 2: li r7,0 iccci r0,r0 @@ -541,9 +550,6 @@ syscall_exit_work: addi r12,r2,TI_FLAGS 3: lwarx r8,0,r12 andc r8,r8,r11 -#ifdef CONFIG_IBM405_ERR77 - dcbt 0,r12 -#endif stwcx. r8,0,r12 bne- 3b @@ -596,6 +602,7 @@ ret_from_kernel_syscall: mtspr SPRN_SRR1, r10 SYNC RFI +_ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall) /* * The fork/clone functions need to copy the full register set into @@ -799,6 +806,7 @@ fast_exception_return: lwz r11,GPR11(r11) SYNC RFI +_ASM_NOKPROBE_SYMBOL(fast_exception_return) #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) /* check if the exception happened in a restartable section */ @@ -918,9 +926,6 @@ resume_kernel: addi r5,r2,TI_FLAGS 0: lwarx r8,0,r5 andc r8,r8,r11 -#ifdef CONFIG_IBM405_ERR77 - dcbt 0,r5 -#endif stwcx. r8,0,r5 bne- 0b 1: @@ -997,7 +1002,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) mtspr SPRN_XER,r10 mtctr r11 - PPC405_ERR77(0,r1) BEGIN_FTR_SECTION lwarx r11,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) @@ -1038,6 +1042,8 @@ exc_exit_restart: exc_exit_restart_end: SYNC RFI +_ASM_NOKPROBE_SYMBOL(exc_exit_restart) +_ASM_NOKPROBE_SYMBOL(exc_exit_restart_end) #else /* !(CONFIG_4xx || CONFIG_BOOKE) */ /* @@ -1059,16 +1065,15 @@ exc_exit_restart_end: exc_exit_restart: lwz r11,_NIP(r1) lwz r12,_MSR(r1) -exc_exit_start: mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r12 REST_2GPRS(11, r1) lwz r1,GPR1(r1) .globl exc_exit_restart_end exc_exit_restart_end: - PPC405_ERR77_SYNC rfi b . /* prevent prefetch past rfi */ +_ASM_NOKPROBE_SYMBOL(exc_exit_restart) /* * Returning from a critical interrupt in user mode doesn't need @@ -1109,7 +1114,6 @@ exc_exit_restart_end: lwz r11,_CTR(r1); \ mtspr SPRN_XER,r10; \ mtctr r11; \ - PPC405_ERR77(0,r1); \ stwcx. r0,0,r1; /* to clear the reservation */ \ lwz r11,_LINK(r1); \ mtlr r11; \ @@ -1129,7 +1133,6 @@ exc_exit_restart_end: lwz r10,GPR10(r1); \ lwz r11,GPR11(r1); \ lwz r1,GPR1(r1); \ - PPC405_ERR77_SYNC; \ exc_lvl_rfi; \ b .; /* prevent prefetch past exc_lvl_rfi */ @@ -1182,6 +1185,7 @@ ret_from_crit_exc: mtspr SPRN_SRR0,r9; mtspr SPRN_SRR1,r10; RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) +_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) #endif /* CONFIG_40x */ #ifdef CONFIG_BOOKE @@ -1193,6 +1197,7 @@ ret_from_crit_exc: RESTORE_xSRR(SRR0,SRR1); RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) +_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) .globl ret_from_debug_exc ret_from_debug_exc: @@ -1203,6 +1208,7 @@ ret_from_debug_exc: RESTORE_xSRR(CSRR0,CSRR1); RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, PPC_RFDI) +_ASM_NOKPROBE_SYMBOL(ret_from_debug_exc) .globl ret_from_mcheck_exc ret_from_mcheck_exc: @@ -1214,6 +1220,7 @@ ret_from_mcheck_exc: RESTORE_xSRR(DSRR0,DSRR1); RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI) +_ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc) #endif /* CONFIG_BOOKE */ /* @@ -1337,6 +1344,7 @@ nonrecoverable: bl unrecoverable_exception /* shouldn't return */ b 4b +_ASM_NOKPROBE_SYMBOL(nonrecoverable) .section .bss .align 2 @@ -1391,10 +1399,5 @@ _GLOBAL(enter_rtas) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 RFI /* return to caller */ - - .globl machine_check_in_rtas -machine_check_in_rtas: - twi 31,0,0 - /* XXX load up BATs and panic */ - +_ASM_NOKPROBE_SYMBOL(enter_rtas) #endif /* CONFIG_PPC_RTAS */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index b3c9f15089b6..9d49338e0c85 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -479,11 +479,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) fast_interrupt_return: _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) kuap_check_amr r3, r4 - ld r4,_MSR(r1) - andi. r0,r4,MSR_PR + ld r5,_MSR(r1) + andi. r0,r5,MSR_PR bne .Lfast_user_interrupt_return - kuap_restore_amr r3 - andi. r0,r4,MSR_RI + kuap_restore_amr r3, r4 + andi. r0,r5,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 9d32158ce36f..2ed14d4a47f5 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -11,6 +11,7 @@ #include <asm/cacheflush.h> #include <asm/code-patching.h> #include <asm/machdep.h> +#include <asm/inst.h> #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) extern void epapr_ev_idle(void); @@ -36,10 +37,10 @@ static int __init early_init_dt_scan_epapr(unsigned long node, return -1; for (i = 0; i < (len / 4); i++) { - u32 inst = be32_to_cpu(insts[i]); - patch_instruction(epapr_hypercall_start + i, inst); + struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i])); + patch_instruction((struct ppc_inst *)(epapr_hypercall_start + i), inst); #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) - patch_instruction(epapr_ev_idle_start + i, inst); + patch_instruction((struct ppc_inst *)(epapr_ev_idle_start + i), inst); #endif } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ebeebab74b56..e70ebb5c318c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -939,13 +939,13 @@ EXC_COMMON_BEGIN(system_reset_common) * the right thing. We do not want to reconcile because that goes * through irq tracing which we don't want in NMI. * - * Save PACAIRQHAPPENED to _DAR (otherwise unused), and set HARD_DIS + * Save PACAIRQHAPPENED to RESULT (otherwise unused), and set HARD_DIS * as we are running with MSR[EE]=0. */ li r10,IRQS_ALL_DISABLED stb r10,PACAIRQSOFTMASK(r13) lbz r10,PACAIRQHAPPENED(r13) - std r10,_DAR(r1) + std r10,RESULT(r1) ori r10,r10,PACA_IRQ_HARD_DIS stb r10,PACAIRQHAPPENED(r13) @@ -966,12 +966,12 @@ EXC_COMMON_BEGIN(system_reset_common) /* * Restore soft mask settings. */ - ld r10,_DAR(r1) + ld r10,RESULT(r1) stb r10,PACAIRQHAPPENED(r13) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r10 + kuap_restore_amr r9, r10 EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL @@ -1117,11 +1117,30 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) li r10,MSR_RI mtmsrd r10,1 + /* + * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see + * system_reset_common) + */ + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + lbz r10,PACAIRQHAPPENED(r13) + std r10,RESULT(r1) + ori r10,r10,PACA_IRQ_HARD_DIS + stb r10,PACAIRQHAPPENED(r13) + addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) + /* + * Restore soft mask settings. + */ + ld r10,RESULT(r1) + stb r10,PACAIRQHAPPENED(r13) + ld r10,SOFTE(r1) + stb r10,PACAIRQSOFTMASK(r13) + #ifdef CONFIG_PPC_P7_NAP /* * Check if thread was in power saving mode. We come here when any @@ -1225,17 +1244,19 @@ EXC_COMMON_BEGIN(machine_check_idle_common) bl machine_check_queue_event /* - * We have not used any non-volatile GPRs here, and as a rule - * most exception code including machine check does not. - * Therefore PACA_NAPSTATELOST does not need to be set. Idle - * wakeup will restore volatile registers. + * GPR-loss wakeups are relatively straightforward, because the + * idle sleep code has saved all non-volatile registers on its + * own stack, and r1 in PACAR1. * - * Load the original SRR1 into r3 for pnv_powersave_wakeup_mce. + * For no-loss wakeups the r1 and lr registers used by the + * early machine check handler have to be restored first. r2 is + * the kernel TOC, so no need to restore it. * * Then decrement MCE nesting after finishing with the stack. */ ld r3,_MSR(r1) ld r4,_LINK(r1) + ld r1,GPR1(r1) lhz r11,PACA_IN_MCE(r13) subi r11,r11,1 @@ -1244,7 +1265,7 @@ EXC_COMMON_BEGIN(machine_check_idle_common) mtlr r4 rlwinm r10,r3,47-31,30,31 cmpwi cr1,r10,2 - bltlr cr1 /* no state loss, return to idle caller */ + bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */ b idle_return_gpr_loss #endif @@ -1266,6 +1287,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) andc r10,r10,r3 mtmsrd r10 + lhz r12,PACA_IN_MCE(r13) + subi r12,r12,1 + sth r12,PACA_IN_MCE(r13) + /* Invoke machine_check_exception to print MCE event and panic. */ addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception @@ -2740,7 +2765,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) li r10,IRQS_ALL_DISABLED stb r10,PACAIRQSOFTMASK(r13) lbz r10,PACAIRQHAPPENED(r13) - std r10,_DAR(r1) + std r10,RESULT(r1) ori r10,r10,PACA_IRQ_HARD_DIS stb r10,PACAIRQHAPPENED(r13) @@ -2754,12 +2779,12 @@ EXC_COMMON_BEGIN(soft_nmi_common) /* * Restore soft mask settings. */ - ld r10,_DAR(r1) + ld r10,RESULT(r1) stb r10,PACAIRQHAPPENED(r13) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r10 + kuap_restore_amr r9, r10 EXCEPTION_RESTORE_REGS hsrr=0 RFI_TO_KERNEL diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 59e60a9a9f5c..78ab9a6ee6ac 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -40,8 +40,17 @@ struct kobject *fadump_kobj; #ifndef CONFIG_PRESERVE_FA_DUMP static DEFINE_MUTEX(fadump_mutex); -struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 }; -struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 }; +struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; + +#define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ +#define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ + sizeof(struct fadump_memory_range)) +static struct fadump_memory_range rngs[RESERVED_RNGS_CNT]; +struct fadump_mrange_info reserved_mrange_info = { "reserved", rngs, + RESERVED_RNGS_SZ, 0, + RESERVED_RNGS_CNT, true }; + +static void __init early_init_dt_scan_reserved_ranges(unsigned long node); #ifdef CONFIG_CMA static struct cma *fadump_cma; @@ -110,6 +119,11 @@ static int __init fadump_cma_init(void) { return 1; } int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data) { + if (depth == 0) { + early_init_dt_scan_reserved_ranges(node); + return 0; + } + if (depth != 1) return 0; @@ -431,10 +445,72 @@ static int __init fadump_get_boot_mem_regions(void) return ret; } +/* + * Returns true, if the given range overlaps with reserved memory ranges + * starting at idx. Also, updates idx to index of overlapping memory range + * with the given memory range. + * False, otherwise. + */ +static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx) +{ + bool ret = false; + int i; + + for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) { + u64 rbase = reserved_mrange_info.mem_ranges[i].base; + u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size; + + if (end <= rbase) + break; + + if ((end > rbase) && (base < rend)) { + *idx = i; + ret = true; + break; + } + } + + return ret; +} + +/* + * Locate a suitable memory area to reserve memory for FADump. While at it, + * lookup reserved-ranges & avoid overlap with them, as they are used by F/W. + */ +static u64 __init fadump_locate_reserve_mem(u64 base, u64 size) +{ + struct fadump_memory_range *mrngs; + phys_addr_t mstart, mend; + int idx = 0; + u64 i, ret = 0; + + mrngs = reserved_mrange_info.mem_ranges; + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, + &mstart, &mend, NULL) { + pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n", + i, mstart, mend, base); + + if (mstart > base) + base = PAGE_ALIGN(mstart); + + while ((mend > base) && ((mend - base) >= size)) { + if (!overlaps_reserved_ranges(base, base+size, &idx)) { + ret = base; + goto out; + } + + base = mrngs[idx].base + mrngs[idx].size; + base = PAGE_ALIGN(base); + } + } + +out: + return ret; +} + int __init fadump_reserve_mem(void) { - u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE; - bool is_memblock_bottom_up = memblock_bottom_up(); + u64 base, size, mem_boundary, bootmem_min; int ret = 1; if (!fw_dump.fadump_enabled) @@ -455,9 +531,9 @@ int __init fadump_reserve_mem(void) PAGE_ALIGN(fadump_calculate_reserve_size()); #ifdef CONFIG_CMA if (!fw_dump.nocma) { - align = FADUMP_CMA_ALIGNMENT; fw_dump.boot_memory_size = - ALIGN(fw_dump.boot_memory_size, align); + ALIGN(fw_dump.boot_memory_size, + FADUMP_CMA_ALIGNMENT); } #endif @@ -525,13 +601,9 @@ int __init fadump_reserve_mem(void) * Reserve memory at an offset closer to bottom of the RAM to * minimize the impact of memory hot-remove operation. */ - memblock_set_bottom_up(true); - base = memblock_find_in_range(base, mem_boundary, size, align); + base = fadump_locate_reserve_mem(base, size); - /* Restore the previous allocation mode */ - memblock_set_bottom_up(is_memblock_bottom_up); - - if (!base) { + if (!base || (base + size > mem_boundary)) { pr_err("Failed to find memory chunk for reservation!\n"); goto error_out; } @@ -728,10 +800,14 @@ void fadump_free_cpu_notes_buf(void) static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info) { + if (mrange_info->is_static) { + mrange_info->mem_range_cnt = 0; + return; + } + kfree(mrange_info->mem_ranges); - mrange_info->mem_ranges = NULL; - mrange_info->mem_ranges_sz = 0; - mrange_info->max_mem_ranges = 0; + memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0, + (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ)); } /* @@ -788,6 +864,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) { int ret; + if (mrange_info->is_static) { + pr_err("Reached array size limit for %s memory ranges\n", + mrange_info->name); + return -ENOSPC; + } + ret = fadump_alloc_mem_ranges(mrange_info); if (ret) return ret; @@ -1204,20 +1286,19 @@ static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info) * Scan reserved-ranges to consider them while reserving/releasing * memory for FADump. */ -static inline int fadump_scan_reserved_mem_ranges(void) +static void __init early_init_dt_scan_reserved_ranges(unsigned long node) { - struct device_node *root; const __be32 *prop; int len, ret = -1; unsigned long i; - root = of_find_node_by_path("/"); - if (!root) - return ret; + /* reserved-ranges already scanned */ + if (reserved_mrange_info.mem_range_cnt != 0) + return; - prop = of_get_property(root, "reserved-ranges", &len); + prop = of_get_flat_dt_prop(node, "reserved-ranges", &len); if (!prop) - return ret; + return; /* * Each reserved range is an (address,size) pair, 2 cells each, @@ -1239,7 +1320,8 @@ static inline int fadump_scan_reserved_mem_ranges(void) } } - return ret; + /* Compact reserved ranges */ + sort_and_merge_mem_ranges(&reserved_mrange_info); } /* @@ -1253,32 +1335,21 @@ static void fadump_release_memory(u64 begin, u64 end) u64 ra_start, ra_end, tstart; int i, ret; - fadump_scan_reserved_mem_ranges(); - ra_start = fw_dump.reserve_dump_area_start; ra_end = ra_start + fw_dump.reserve_dump_area_size; /* - * Add reserved dump area to reserved ranges list - * and exclude all these ranges while releasing memory. + * If reserved ranges array limit is hit, overwrite the last reserved + * memory range with reserved dump area to ensure it is excluded from + * the memory being released (reused for next FADump registration). */ - ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end); - if (ret != 0) { - /* - * Not enough memory to setup reserved ranges but the system is - * running shortage of memory. So, release all the memory except - * Reserved dump area (reused for next fadump registration). - */ - if (begin < ra_end && end > ra_start) { - if (begin < ra_start) - fadump_release_reserved_area(begin, ra_start); - if (end > ra_end) - fadump_release_reserved_area(ra_end, end); - } else - fadump_release_reserved_area(begin, end); + if (reserved_mrange_info.mem_range_cnt == + reserved_mrange_info.max_mem_ranges) + reserved_mrange_info.mem_range_cnt--; + ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end); + if (ret != 0) return; - } /* Get the reserved ranges list in order first. */ sort_and_merge_mem_ranges(&reserved_mrange_info); diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 3235a8da6af7..1dfccf58fbb1 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -119,6 +119,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) /* restore registers and return */ /* we haven't used ctr or xer or lr */ blr +_ASM_NOKPROBE_SYMBOL(load_up_fpu) /* * save_fpu(tsk) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 97c887950c3c..e2459550a3bf 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -297,7 +297,7 @@ MachineCheck: cmpwi cr1, r4, 0 #endif beq cr1, machine_check_tramp - b machine_check_in_rtas + twi 31, 0, 0 #else b machine_check_tramp #endif diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 2cec543c38f0..a22a8209971b 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -36,7 +36,6 @@ #include <asm/asm-offsets.h> #include <asm/ptrace.h> #include <asm/export.h> -#include <asm/asm-405.h> #include "head_32.h" @@ -176,135 +175,16 @@ _ENTRY(saved_ksp_limit) * 0x0300 - Data Storage Exception * This happens for just a few reasons. U0 set (but we don't do that), * or zone protection fault (user violation, write to protected page). - * If this is just an update of modified status, we do that quickly - * and exit. Otherwise, we call heavywight functions to do the work. + * The other Data TLB exceptions bail out to this point + * if they can't resolve the lightweight TLB fault. */ START_EXCEPTION(0x0300, DataStorage) - mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH1, r11 -#ifdef CONFIG_403GCX - stw r12, 0(r0) - stw r9, 4(r0) - mfcr r11 - mfspr r12, SPRN_PID - stw r11, 8(r0) - stw r12, 12(r0) -#else - mtspr SPRN_SPRG_SCRATCH3, r12 - mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r11 - mfspr r12, SPRN_PID - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH5, r12 -#endif - - /* First, check if it was a zone fault (which means a user - * tried to access a kernel or read-protected page - always - * a SEGV). All other faults here must be stores, so no - * need to check ESR_DST as well. */ - mfspr r10, SPRN_ESR - andis. r10, r10, ESR_DIZ@h - bne 2f - - mfspr r10, SPRN_DEAR /* Get faulting address */ - - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - li r9, 0 - mtspr SPRN_PID, r9 /* TLB will have 0 TID */ - b 4f - - /* Get the PGD for the current thread. - */ -3: - mfspr r11,SPRN_SPRG_THREAD - lwz r11,PGDIR(r11) -4: - tophys(r11, r11) - rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r11, 0(r11) /* Get L1 entry */ - rlwinm. r12, r11, 0, 0, 19 /* Extract L2 (pte) base address */ - beq 2f /* Bail if no table */ - - rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r12) /* Get Linux PTE */ - - andi. r9, r11, _PAGE_RW /* Is it writeable? */ - beq 2f /* Bail if not */ - - /* Update 'changed'. - */ - ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE - stw r11, 0(r12) /* Update Linux page table */ - - /* Most of the Linux PTE is ready to load into the TLB LO. - * We set ZSEL, where only the LS-bit determines user access. - * We set execute, because we don't have the granularity to - * properly set this at the page level (Linux problem). - * If shared is set, we cause a zero PID->TID load. - * Many of these bits are software only. Bits we don't set - * here we (properly should) assume have the appropriate value. - */ - li r12, 0x0ce2 - andc r11, r11, r12 /* Make sure 20, 21 are zero */ - - /* find the TLB index that caused the fault. It has to be here. - */ - tlbsx r9, 0, r10 - - tlbwe r11, r9, TLB_DATA /* Load TLB LO */ - - /* Done...restore registers and get out of here. - */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 -#endif - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 - PPC405_ERR77_SYNC - rfi /* Should sync shadow TLBs */ - b . /* prevent prefetch past rfi */ - -2: - /* The bailout. Restore registers to pre-exception conditions - * and call the heavyweights to help us out. - */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 -#endif - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 - b DataAccess + EXCEPTION_PROLOG + mfspr r5, SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + stw r5, _ESR(r11) + mfspr r4, SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + stw r4, _DEAR(r11) + EXC_XFER_LITE(0x300, handle_page_fault) /* * 0x0400 - Instruction Storage Exception @@ -372,21 +252,11 @@ _ENTRY(saved_ksp_limit) START_EXCEPTION(0x1100, DTLBMiss) mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ mtspr SPRN_SPRG_SCRATCH1, r11 -#ifdef CONFIG_403GCX - stw r12, 0(r0) - stw r9, 4(r0) - mfcr r11 - mfspr r12, SPRN_PID - stw r11, 8(r0) - stw r12, 12(r0) -#else mtspr SPRN_SPRG_SCRATCH3, r12 mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r11 - mfspr r12, SPRN_PID - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH5, r12 -#endif + mfcr r12 + mfspr r9, SPRN_PID + mtspr SPRN_SPRG_SCRATCH5, r9 mfspr r10, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -409,28 +279,34 @@ _ENTRY(saved_ksp_limit) 4: tophys(r11, r11) rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r12, 0(r11) /* Get L1 entry */ - andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */ + lwz r11, 0(r11) /* Get L1 entry */ + andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */ beq 2f /* Bail if no table */ - rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r12) /* Get Linux PTE */ - andi. r9, r11, _PAGE_PRESENT - beq 5f + rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r11) /* Get Linux PTE */ +#ifdef CONFIG_SWAP + li r9, _PAGE_PRESENT | _PAGE_ACCESSED +#else + li r9, _PAGE_PRESENT +#endif + andc. r9, r9, r11 /* Check permission */ + bne 5f - ori r11, r11, _PAGE_ACCESSED - stw r11, 0(r12) + rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ + and r9, r9, r11 /* hwwrite = dirty & rw */ + rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ /* Create TLB tag. This is the faulting address plus a static * set of bits. These are size, valid, E, U0. */ - li r12, 0x00c0 - rlwimi r10, r12, 0, 20, 31 + li r9, 0x00c0 + rlwimi r10, r9, 0, 20, 31 b finish_tlb_load 2: /* Check for possible large-page pmd entry */ - rlwinm. r9, r12, 2, 22, 24 + rlwinm. r9, r11, 2, 22, 24 beq 5f /* Create TLB tag. This is the faulting address, plus a static @@ -438,7 +314,6 @@ _ENTRY(saved_ksp_limit) */ ori r9, r9, 0x40 rlwimi r10, r9, 0, 20, 31 - mr r11, r12 b finish_tlb_load @@ -446,24 +321,14 @@ _ENTRY(saved_ksp_limit) /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH5 + mtspr SPRN_PID, r9 + mtcr r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 -#endif mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 - b DataAccess + b DataStorage /* 0x1200 - Instruction TLB Miss Exception * Nearly the same as above, except we get our information from different @@ -472,21 +337,11 @@ _ENTRY(saved_ksp_limit) START_EXCEPTION(0x1200, ITLBMiss) mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ mtspr SPRN_SPRG_SCRATCH1, r11 -#ifdef CONFIG_403GCX - stw r12, 0(r0) - stw r9, 4(r0) - mfcr r11 - mfspr r12, SPRN_PID - stw r11, 8(r0) - stw r12, 12(r0) -#else mtspr SPRN_SPRG_SCRATCH3, r12 mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r11 - mfspr r12, SPRN_PID - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH5, r12 -#endif + mfcr r12 + mfspr r9, SPRN_PID + mtspr SPRN_SPRG_SCRATCH5, r9 mfspr r10, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -509,28 +364,34 @@ _ENTRY(saved_ksp_limit) 4: tophys(r11, r11) rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r12, 0(r11) /* Get L1 entry */ - andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */ + lwz r11, 0(r11) /* Get L1 entry */ + andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */ beq 2f /* Bail if no table */ - rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r12) /* Get Linux PTE */ - andi. r9, r11, _PAGE_PRESENT - beq 5f + rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r11) /* Get Linux PTE */ +#ifdef CONFIG_SWAP + li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC +#else + li r9, _PAGE_PRESENT | _PAGE_EXEC +#endif + andc. r9, r9, r11 /* Check permission */ + bne 5f - ori r11, r11, _PAGE_ACCESSED - stw r11, 0(r12) + rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ + and r9, r9, r11 /* hwwrite = dirty & rw */ + rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ /* Create TLB tag. This is the faulting address plus a static * set of bits. These are size, valid, E, U0. */ - li r12, 0x00c0 - rlwimi r10, r12, 0, 20, 31 + li r9, 0x00c0 + rlwimi r10, r9, 0, 20, 31 b finish_tlb_load 2: /* Check for possible large-page pmd entry */ - rlwinm. r9, r12, 2, 22, 24 + rlwinm. r9, r11, 2, 22, 24 beq 5f /* Create TLB tag. This is the faulting address, plus a static @@ -538,7 +399,6 @@ _ENTRY(saved_ksp_limit) */ ori r9, r9, 0x40 rlwimi r10, r9, 0, 20, 31 - mr r11, r12 b finish_tlb_load @@ -546,21 +406,11 @@ _ENTRY(saved_ksp_limit) /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH5 + mtspr SPRN_PID, r9 + mtcr r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 -#endif mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 b InstructionAccess @@ -569,13 +419,7 @@ _ENTRY(saved_ksp_limit) EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD) EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) -#ifdef CONFIG_IBM405_ERR51 - /* 405GP errata 51 */ - START_EXCEPTION(0x1700, Trap_17) - b DTLBMiss -#else EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) -#endif EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD) @@ -636,7 +480,6 @@ _ENTRY(saved_ksp_limit) lwz r12,GPR12(r11) lwz r10,crit_r10@l(0) lwz r11,crit_r11@l(0) - PPC405_ERR77_SYNC rfci b . @@ -669,18 +512,6 @@ WDTException: (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) -/* - * The other Data TLB exceptions bail out to this point - * if they can't resolve the lightweight TLB fault. - */ -DataAccess: - EXCEPTION_PROLOG - mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ - stw r5,_ESR(r11) - mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ - stw r4, _DEAR(r11) - EXC_XFER_LITE(0x300, handle_page_fault) - /* Other PowerPC processors, namely those derived from the 6xx-series * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. * However, for the 4xx-series processors these are neither defined nor @@ -692,7 +523,7 @@ DataAccess: * miss get to this point to load the TLB. * r10 - TLB_TAG value * r11 - Linux PTE - * r12, r9 - available to use + * r9 - available to use * PID - loaded with proper value when we get here * Upon exit, we reload everything and RFI. * Actually, it will fit now, but oh well.....a common place @@ -701,45 +532,32 @@ DataAccess: tlb_4xx_index: .long 0 finish_tlb_load: - /* load the next available TLB index. - */ - lwz r9, tlb_4xx_index@l(0) - addi r9, r9, 1 - andi. r9, r9, (PPC40X_TLB_SIZE-1) - stw r9, tlb_4xx_index@l(0) - -6: /* * Clear out the software-only bits in the PTE to generate the * TLB_DATA value. These are the bottom 2 bits of the RPM, the * top 3 bits of the zone field, and M. */ - li r12, 0x0ce2 - andc r11, r11, r12 + li r9, 0x0ce2 + andc r11, r11, r9 + + /* load the next available TLB index. */ + lwz r9, tlb_4xx_index@l(0) + addi r9, r9, 1 + andi. r9, r9, PPC40X_TLB_SIZE - 1 + stw r9, tlb_4xx_index@l(0) tlbwe r11, r9, TLB_DATA /* Load TLB LO */ tlbwe r10, r9, TLB_TAG /* Load TLB HI */ /* Done...restore registers and get out of here. */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH5 + mtspr SPRN_PID, r9 + mtcr r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 -#endif mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 - PPC405_ERR77_SYNC rfi /* Should sync shadow TLBs */ b . /* prevent prefetch past rfi */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ddfbd02140d9..0e05a9a47a4b 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -947,15 +947,8 @@ start_here_multiplatform: std r0,0(r4) #endif - /* The following gets the stack set up with the regs */ - /* pointing to the real addr of the kernel stack. This is */ - /* all done to support the C function call below which sets */ - /* up the htab. This is done because we have relocated the */ - /* kernel but are still running in real mode. */ - - LOAD_REG_ADDR(r3,init_thread_union) - /* set up a stack pointer */ + LOAD_REG_ADDR(r3,init_thread_union) LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) add r1,r3,r1 li r0,0 diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 073a651787df..abb71fad7d6a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -16,6 +16,7 @@ #include <linux/init.h> #include <linux/magic.h> +#include <linux/sizes.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> @@ -31,10 +32,15 @@ #include "head_32.h" +.macro compare_to_kernel_boundary scratch, addr #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ -#define SIMPLE_KERNEL_ADDRESS 1 + not. \scratch, \addr +#else + rlwinm \scratch, \addr, 16, 0xfff8 + cmpli cr0, \scratch, PAGE_OFFSET@h #endif +.endm /* * We need an ITLB miss handler for kernel addresses if: @@ -196,7 +202,7 @@ SystemCall: InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH0, r10 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mtspr SPRN_SPRG_SCRATCH1, r11 #endif @@ -206,44 +212,31 @@ InstructionTLBMiss: mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10) mtspr SPRN_MD_EPN, r10 - /* Only modules will cause ITLB Misses as we always - * pin the first 8MB of kernel memory */ #ifdef ITLB_MISS_KERNEL mfcr r11 -#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) - cmpi cr0, r10, 0 /* Address >= 0x80000000 */ -#else - rlwinm r10, r10, 16, 0xfff8 - cmpli cr0, r10, PAGE_OFFSET@h -#ifndef CONFIG_PIN_TLB_TEXT - /* It is assumed that kernel code fits into the first 32M */ -0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h - patch_site 0b, patch__itlbmiss_linmem_top -#endif -#endif + compare_to_kernel_boundary r10, r10 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ #ifdef ITLB_MISS_KERNEL -#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) - bge+ 3f -#else blt+ 3f -#endif -#ifndef CONFIG_PIN_TLB_TEXT - blt cr7, ITLBMissLinear -#endif rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha 3: + mtcr r11 #endif +#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) + lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ + mtspr SPRN_MD_TWC, r11 +#else lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MI_TWC, r10 /* Set segment attributes */ - mtspr SPRN_MD_TWC, r10 +#endif mfspr r10, SPRN_MD_TWC lwz r10, 0(r10) /* Get the pte */ -#ifdef ITLB_MISS_KERNEL - mtcr r11 +#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) + rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K + mtspr SPRN_MI_TWC, r11 #endif #ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT @@ -263,7 +256,7 @@ InstructionTLBMiss: /* Restore registers */ 0: mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mfspr r11, SPRN_SPRG_SCRATCH1 #endif rfi @@ -281,33 +274,6 @@ InstructionTLBMiss: rfi #endif -#ifndef CONFIG_PIN_TLB_TEXT -ITLBMissLinear: - mtcr r11 -#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23 - patch_site 0f, patch__itlbmiss_linmem_top8 - - mfspr r10, SPRN_SRR0 -0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha - rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K - ori r11, r11, MI_PS512K | MI_SVALID - rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */ -#else - /* Set 8M byte page and mark it valid */ - li r11, MI_PS8MEG | MI_SVALID - rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */ -#endif - mtspr SPRN_MI_TWC, r11 - ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ - _PAGE_PRESENT - mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ - -0: mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 - rfi - patch_site 0b, patch__itlbmiss_exit_2 -#endif - . = 0x1200 DataStoreTLBMiss: mtspr SPRN_DAR, r10 @@ -318,21 +284,9 @@ DataStoreTLBMiss: * kernel page tables. */ mfspr r10, SPRN_MD_EPN - rlwinm r10, r10, 16, 0xfff8 - cmpli cr0, r10, PAGE_OFFSET@h -#ifndef CONFIG_PIN_TLB_IMMR - cmpli cr6, r10, VIRT_IMMR_BASE@h -#endif -0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h - patch_site 0b, patch__dtlbmiss_linmem_top - + compare_to_kernel_boundary r10, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table */ blt+ 3f -#ifndef CONFIG_PIN_TLB_IMMR -0: beq- cr6, DTLBMissIMMR - patch_site 0b, patch__dtlbmiss_immr_jmp -#endif - blt cr7, DTLBMissLinear rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha 3: @@ -350,6 +304,7 @@ DataStoreTLBMiss: * above. */ rlwimi r11, r10, 0, _PAGE_GUARDED + rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K mtspr SPRN_MD_TWC, r11 /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. @@ -383,61 +338,16 @@ DataStoreTLBMiss: rfi patch_site 0b, patch__dtlbmiss_exit_1 -DTLBMissIMMR: - mtcr r11 - /* Set 512k byte guarded page and mark it valid */ - li r10, MD_PS512K | MD_GUARDED | MD_SVALID - mtspr SPRN_MD_TWC, r10 - mfspr r10, SPRN_IMMR /* Get current IMMR */ - rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ - _PAGE_PRESENT | _PAGE_NO_CACHE - mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ - - li r11, RPN_PATTERN - -0: mfspr r10, SPRN_DAR +#ifdef CONFIG_PERF_EVENTS + patch_site 0f, patch__dtlbmiss_perf +0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) + addi r10, r10, 1 + stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) + mfspr r10, SPRN_DAR mtspr SPRN_DAR, r11 /* Tag DAR */ mfspr r11, SPRN_M_TW rfi - patch_site 0b, patch__dtlbmiss_exit_2 - -DTLBMissLinear: - mtcr r11 - rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */ -#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_DATA_SHIFT < 23 - patch_site 0f, patch__dtlbmiss_romem_top8 - -0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha - rlwinm r11, r11, 0, 0xff800000 - neg r10, r11 - or r11, r11, r10 - rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K - ori r11, r11, MI_PS512K | MI_SVALID - mfspr r10, SPRN_MD_EPN - rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */ -#else - /* Set 8M byte page and mark it valid */ - li r11, MD_PS8MEG | MD_SVALID #endif - mtspr SPRN_MD_TWC, r11 -#ifdef CONFIG_STRICT_KERNEL_RWX - patch_site 0f, patch__dtlbmiss_romem_top - -0: subis r11, r10, 0 - rlwimi r10, r11, 11, _PAGE_RO -#endif - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ - _PAGE_PRESENT - mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ - - li r11, RPN_PATTERN - -0: mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_M_TW - rfi - patch_site 0b, patch__dtlbmiss_exit_3 /* This is an instruction TLB error on the MPC8xx. This could be due * to many reasons, such as executing guarded memory or illegal instruction @@ -485,18 +395,6 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) -/* Called from DataStoreTLBMiss when perf TLB misses events are activated */ -#ifdef CONFIG_PERF_EVENTS - patch_site 0f, patch__dtlbmiss_perf -0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - addi r10, r10, 1 - stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_M_TW - rfi -#endif - stack_overflow: vmap_stack_overflow_exception @@ -563,14 +461,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ cmpli cr1, r11, PAGE_OFFSET@h mfspr r11, SPRN_M_TWB /* Get level 1 table */ blt+ cr1, 3f - rlwinm r11, r10, 16, 0xfff8 - -0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h - patch_site 0b, patch__fixupdar_linmem_top /* create physical page address from effective address */ tophys(r11, r10) - blt- cr7, 201f mfspr r11, SPRN_M_TWB /* Get level 1 table */ rlwinm r11, r11, 0, 20, 31 oris r11, r11, (swapper_pg_dir - PAGE_OFFSET)@ha @@ -581,7 +474,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r11, SPRN_MD_TWC lwz r11, 0(r11) /* Get the pte */ bt 28,200f /* bit 28 = Large page (8M) */ - bt 29,202f /* bit 29 = Large page (8M or 512K) */ /* concat physical page address(r11) and page offset(r10) */ rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 201: lwz r11,0(r11) @@ -608,11 +500,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31 b 201b -202: - /* concat physical page address(r11) and page offset(r10) */ - rlwimi r11, r10, 0, 32 - PAGE_SHIFT_512K, 31 - b 201b - 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 @@ -747,6 +634,31 @@ start_here: rfi /* Load up the kernel context */ 2: +#ifdef CONFIG_PIN_TLB_IMMR + lis r0, MD_TWAM@h + oris r0, r0, 0x1f00 + mtspr SPRN_MD_CTR, r0 + LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID) + tlbie r0 + mtspr SPRN_MD_EPN, r0 + LOAD_REG_IMMEDIATE(r0, MD_SVALID | MD_PS512K | MD_GUARDED) + mtspr SPRN_MD_TWC, r0 + mfspr r0, SPRN_IMMR + rlwinm r0, r0, 0, 0xfff80000 + ori r0, r0, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \ + _PAGE_NO_CACHE | _PAGE_PRESENT + mtspr SPRN_MD_RPN, r0 + lis r0, (MD_TWAM | MD_RSV4I)@h + mtspr SPRN_MD_CTR, r0 +#endif +#ifndef CONFIG_PIN_TLB_TEXT + li r0, 0 + mtspr SPRN_MI_CTR, r0 +#endif +#if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) + lis r0, MD_TWAM@h + mtspr SPRN_MD_CTR, r0 +#endif tlbia /* Clear all TLB entries */ sync /* wait for tlbia/tlbie to finish */ @@ -779,17 +691,10 @@ start_here: initial_mmu: li r8, 0 mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */ - lis r10, MD_RESETVAL@h -#ifndef CONFIG_8xx_COPYBACK - oris r10, r10, MD_WTDEF@h -#endif + lis r10, MD_TWAM@h mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */ tlbia /* Invalidate all TLB entries */ -#ifdef CONFIG_PIN_TLB_DATA - oris r10, r10, MD_RSV4I@h - mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ -#endif lis r8, MI_APG_INIT@h /* Set protection modes */ ori r8, r8, MI_APG_INIT@l @@ -798,55 +703,32 @@ initial_mmu: ori r8, r8, MD_APG_INIT@l mtspr SPRN_MD_AP, r8 - /* Map a 512k page for the IMMR to get the processor - * internal registers (among other things). - */ -#ifdef CONFIG_PIN_TLB_IMMR - oris r10, r10, MD_RSV4I@h - ori r10, r10, 0x1c00 - mtspr SPRN_MD_CTR, r10 - - mfspr r9, 638 /* Get current IMMR */ - andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */ - - lis r8, VIRT_IMMR_BASE@h /* Create vaddr for TLB */ - ori r8, r8, MD_EVALID /* Mark it valid */ - mtspr SPRN_MD_EPN, r8 - li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */ - ori r8, r8, MD_SVALID /* Make it valid */ - mtspr SPRN_MD_TWC, r8 - mr r8, r9 /* Create paddr for TLB */ - ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ - mtspr SPRN_MD_RPN, r8 -#endif - - /* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */ -#ifdef CONFIG_PIN_TLB_TEXT + /* Map the lower RAM (up to 32 Mbytes) into the ITLB and DTLB */ lis r8, MI_RSV4I@h ori r8, r8, 0x1c00 -#endif + oris r12, r10, MD_RSV4I@h + ori r12, r12, 0x1c00 li r9, 4 /* up to 4 pages of 8M */ mtctr r9 lis r9, KERNELBASE@h /* Create vaddr for TLB */ li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */ li r11, MI_BOOTINIT /* Create RPN for address 0 */ - lis r12, _einittext@h - ori r12, r12, _einittext@l 1: -#ifdef CONFIG_PIN_TLB_TEXT mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ addi r8, r8, 0x100 -#endif - ori r0, r9, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r0 mtspr SPRN_MI_TWC, r10 mtspr SPRN_MI_RPN, r11 /* Store TLB entry */ + mtspr SPRN_MD_CTR, r12 + addi r12, r12, 0x100 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r10 + mtspr SPRN_MD_RPN, r11 addis r9, r9, 0x80 addis r11, r11, 0x80 - cmpl cr0, r9, r12 - bdnzf gt, 1b + bdnz 1b /* Since the cache is enabled according to the information we * just loaded into the TLB, invalidate and enable the caches here. @@ -857,17 +739,7 @@ initial_mmu: mtspr SPRN_DC_CST, r8 lis r8, IDC_ENABLE@h mtspr SPRN_IC_CST, r8 -#ifdef CONFIG_8xx_COPYBACK mtspr SPRN_DC_CST, r8 -#else - /* For a debug option, I left this here to easily enable - * the write through cache mode - */ - lis r8, DC_SFWT@h - mtspr SPRN_DC_CST, r8 - lis r8, IDC_ENABLE@h - mtspr SPRN_DC_CST, r8 -#endif /* Disable debug mode entry on breakpoints */ mfspr r8, SPRN_DER #ifdef CONFIG_PERF_EVENTS @@ -878,6 +750,108 @@ initial_mmu: mtspr SPRN_DER, r8 blr +#ifdef CONFIG_PIN_TLB +_GLOBAL(mmu_pin_tlb) + lis r9, (1f - PAGE_OFFSET)@h + ori r9, r9, (1f - PAGE_OFFSET)@l + mfmsr r10 + mflr r11 + li r12, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI) + rlwinm r0, r10, 0, ~MSR_RI + rlwinm r0, r0, 0, ~MSR_EE + mtmsr r0 + isync + .align 4 + mtspr SPRN_SRR0, r9 + mtspr SPRN_SRR1, r12 + rfi +1: + li r5, 0 + lis r6, MD_TWAM@h + mtspr SPRN_MI_CTR, r5 + mtspr SPRN_MD_CTR, r6 + tlbia + +#ifdef CONFIG_PIN_TLB_TEXT + LOAD_REG_IMMEDIATE(r5, 28 << 8) + LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) + LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) + LOAD_REG_ADDR(r9, _sinittext) + li r0, 4 + mtctr r0 + +2: ori r0, r6, MI_EVALID + mtspr SPRN_MI_CTR, r5 + mtspr SPRN_MI_EPN, r0 + mtspr SPRN_MI_TWC, r7 + mtspr SPRN_MI_RPN, r8 + addi r5, r5, 0x100 + addis r6, r6, SZ_8M@h + addis r8, r8, SZ_8M@h + cmplw r6, r9 + bdnzt lt, 2b + lis r0, MI_RSV4I@h + mtspr SPRN_MI_CTR, r0 +#endif + LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) +#ifdef CONFIG_PIN_TLB_DATA + LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) +#ifdef CONFIG_PIN_TLB_IMMR + li r0, 3 +#else + li r0, 4 +#endif + mtctr r0 + cmpwi r4, 0 + beq 4f + LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) + LOAD_REG_ADDR(r9, _sinittext) + +2: ori r0, r6, MD_EVALID + mtspr SPRN_MD_CTR, r5 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r7 + mtspr SPRN_MD_RPN, r8 + addi r5, r5, 0x100 + addis r6, r6, SZ_8M@h + addis r8, r8, SZ_8M@h + cmplw r6, r9 + bdnzt lt, 2b + +4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) +2: ori r0, r6, MD_EVALID + mtspr SPRN_MD_CTR, r5 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r7 + mtspr SPRN_MD_RPN, r8 + addi r5, r5, 0x100 + addis r6, r6, SZ_8M@h + addis r8, r8, SZ_8M@h + cmplw r6, r3 + bdnzt lt, 2b +#endif +#ifdef CONFIG_PIN_TLB_IMMR + LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID) + LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED) + mfspr r8, SPRN_IMMR + rlwinm r8, r8, 0, 0xfff80000 + ori r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \ + _PAGE_NO_CACHE | _PAGE_PRESENT + mtspr SPRN_MD_CTR, r5 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r7 + mtspr SPRN_MD_RPN, r8 +#endif +#if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA) + lis r0, (MD_RSV4I | MD_TWAM)@h + mtspr SPRN_MI_CTR, r0 +#endif + mtspr SPRN_SRR1, r10 + mtspr SPRN_SRR0, r11 + rfi +#endif /* CONFIG_PIN_TLB */ /* * We put a few things here that have to be page-aligned. diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index bd2e5ed8dd50..18f87bf9e32b 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -534,7 +534,7 @@ struct exception_regs { }; /* ensure this structure is always sized to a multiple of the stack alignment */ -#define STACK_EXC_LVL_FRAME_SIZE _ALIGN_UP(sizeof (struct exception_regs), 16) +#define STACK_EXC_LVL_FRAME_SIZE ALIGN(sizeof (struct exception_regs), 16) #endif /* __ASSEMBLY__ */ #endif /* __HEAD_BOOKE_H__ */ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 72f461bd70fb..0000daf0e1da 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -24,13 +24,14 @@ #include <asm/debug.h> #include <asm/debugfs.h> #include <asm/hvcall.h> +#include <asm/inst.h> #include <linux/uaccess.h> /* * Stores the breakpoints currently in use on each breakpoint address * register for every cpu */ -static DEFINE_PER_CPU(struct perf_event *, bp_per_reg); +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM_MAX]); /* * Returns total number of data or instruction breakpoints available. @@ -38,10 +39,21 @@ static DEFINE_PER_CPU(struct perf_event *, bp_per_reg); int hw_breakpoint_slots(int type) { if (type == TYPE_DATA) - return HBP_NUM; + return nr_wp_slots(); return 0; /* no instruction breakpoints available */ } +static bool single_step_pending(void) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (current->thread.last_hit_ubp[i]) + return true; + } + return false; +} + /* * Install a perf counter breakpoint. * @@ -54,16 +66,26 @@ int hw_breakpoint_slots(int type) int arch_install_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot = this_cpu_ptr(&bp_per_reg); + struct perf_event **slot; + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + slot = this_cpu_ptr(&bp_per_reg[i]); + if (!*slot) { + *slot = bp; + break; + } + } - *slot = bp; + if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot")) + return -EBUSY; /* * Do not install DABR values if the instruction must be single-stepped. * If so, DABR will be populated in single_step_dabr_instruction(). */ - if (current->thread.last_hit_ubp != bp) - __set_breakpoint(info); + if (!single_step_pending()) + __set_breakpoint(i, info); return 0; } @@ -79,15 +101,248 @@ int arch_install_hw_breakpoint(struct perf_event *bp) */ void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - struct perf_event **slot = this_cpu_ptr(&bp_per_reg); + struct arch_hw_breakpoint null_brk = {0}; + struct perf_event **slot; + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + slot = this_cpu_ptr(&bp_per_reg[i]); + if (*slot == bp) { + *slot = NULL; + break; + } + } - if (*slot != bp) { - WARN_ONCE(1, "Can't find the breakpoint"); + if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot")) return; + + __set_breakpoint(i, &null_brk); +} + +static bool is_ptrace_bp(struct perf_event *bp) +{ + return bp->overflow_handler == ptrace_triggered; +} + +struct breakpoint { + struct list_head list; + struct perf_event *bp; + bool ptrace_bp; +}; + +static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]); +static LIST_HEAD(task_bps); + +static struct breakpoint *alloc_breakpoint(struct perf_event *bp) +{ + struct breakpoint *tmp; + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return ERR_PTR(-ENOMEM); + tmp->bp = bp; + tmp->ptrace_bp = is_ptrace_bp(bp); + return tmp; +} + +static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2) +{ + __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr; + + bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE); + bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, HW_BREAKPOINT_SIZE); + bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE); + bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, HW_BREAKPOINT_SIZE); + + return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr); +} + +static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp) +{ + return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp; +} + +static bool can_co_exist(struct breakpoint *b, struct perf_event *bp) +{ + return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp)); +} + +static int task_bps_add(struct perf_event *bp) +{ + struct breakpoint *tmp; + + tmp = alloc_breakpoint(bp); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + list_add(&tmp->list, &task_bps); + return 0; +} + +static void task_bps_remove(struct perf_event *bp) +{ + struct list_head *pos, *q; + + list_for_each_safe(pos, q, &task_bps) { + struct breakpoint *tmp = list_entry(pos, struct breakpoint, list); + + if (tmp->bp == bp) { + list_del(&tmp->list); + kfree(tmp); + break; + } } +} - *slot = NULL; - hw_breakpoint_disable(); +/* + * If any task has breakpoint from alternate infrastructure, + * return true. Otherwise return false. + */ +static bool all_task_bps_check(struct perf_event *bp) +{ + struct breakpoint *tmp; + + list_for_each_entry(tmp, &task_bps, list) { + if (!can_co_exist(tmp, bp)) + return true; + } + return false; +} + +/* + * If same task has breakpoint from alternate infrastructure, + * return true. Otherwise return false. + */ +static bool same_task_bps_check(struct perf_event *bp) +{ + struct breakpoint *tmp; + + list_for_each_entry(tmp, &task_bps, list) { + if (tmp->bp->hw.target == bp->hw.target && + !can_co_exist(tmp, bp)) + return true; + } + return false; +} + +static int cpu_bps_add(struct perf_event *bp) +{ + struct breakpoint **cpu_bp; + struct breakpoint *tmp; + int i = 0; + + tmp = alloc_breakpoint(bp); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); + for (i = 0; i < nr_wp_slots(); i++) { + if (!cpu_bp[i]) { + cpu_bp[i] = tmp; + break; + } + } + return 0; +} + +static void cpu_bps_remove(struct perf_event *bp) +{ + struct breakpoint **cpu_bp; + int i = 0; + + cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); + for (i = 0; i < nr_wp_slots(); i++) { + if (!cpu_bp[i]) + continue; + + if (cpu_bp[i]->bp == bp) { + kfree(cpu_bp[i]); + cpu_bp[i] = NULL; + break; + } + } +} + +static bool cpu_bps_check(int cpu, struct perf_event *bp) +{ + struct breakpoint **cpu_bp; + int i; + + cpu_bp = per_cpu_ptr(cpu_bps, cpu); + for (i = 0; i < nr_wp_slots(); i++) { + if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) + return true; + } + return false; +} + +static bool all_cpu_bps_check(struct perf_event *bp) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (cpu_bps_check(cpu, bp)) + return true; + } + return false; +} + +/* + * We don't use any locks to serialize accesses to cpu_bps or task_bps + * because are already inside nr_bp_mutex. + */ +int arch_reserve_bp_slot(struct perf_event *bp) +{ + int ret; + + /* ptrace breakpoint */ + if (is_ptrace_bp(bp)) { + if (all_cpu_bps_check(bp)) + return -ENOSPC; + + if (same_task_bps_check(bp)) + return -ENOSPC; + + return task_bps_add(bp); + } + + /* perf breakpoint */ + if (is_kernel_addr(bp->attr.bp_addr)) + return 0; + + if (bp->hw.target && bp->cpu == -1) { + if (same_task_bps_check(bp)) + return -ENOSPC; + + return task_bps_add(bp); + } else if (!bp->hw.target && bp->cpu != -1) { + if (all_task_bps_check(bp)) + return -ENOSPC; + + return cpu_bps_add(bp); + } + + if (same_task_bps_check(bp)) + return -ENOSPC; + + ret = cpu_bps_add(bp); + if (ret) + return ret; + ret = task_bps_add(bp); + if (ret) + cpu_bps_remove(bp); + + return ret; +} + +void arch_release_bp_slot(struct perf_event *bp) +{ + if (!is_kernel_addr(bp->attr.bp_addr)) { + if (bp->hw.target) + task_bps_remove(bp); + if (bp->cpu != -1) + cpu_bps_remove(bp); + } } /* @@ -102,8 +357,14 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp) * restoration variables to prevent dangling pointers. * FIXME, this should not be using bp->ctx at all! Sayeth peterz. */ - if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) - bp->ctx->task->thread.last_hit_ubp = NULL; + if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) { + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (bp->ctx->task->thread.last_hit_ubp[i] == bp) + bp->ctx->task->thread.last_hit_ubp[i] = NULL; + } + } } /* @@ -140,10 +401,10 @@ int arch_bp_generic_fields(int type, int *gen_bp_type) * <---8 bytes---> * * In this case, we should configure hw as: - * start_addr = address & ~HW_BREAKPOINT_ALIGN + * start_addr = address & ~(HW_BREAKPOINT_SIZE - 1) * len = 16 bytes * - * @start_addr and @end_addr are inclusive. + * @start_addr is inclusive but @end_addr is exclusive. */ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw) { @@ -151,14 +412,14 @@ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw) u16 hw_len; unsigned long start_addr, end_addr; - start_addr = hw->address & ~HW_BREAKPOINT_ALIGN; - end_addr = (hw->address + hw->len - 1) | HW_BREAKPOINT_ALIGN; - hw_len = end_addr - start_addr + 1; + start_addr = ALIGN_DOWN(hw->address, HW_BREAKPOINT_SIZE); + end_addr = ALIGN(hw->address + hw->len, HW_BREAKPOINT_SIZE); + hw_len = end_addr - start_addr; if (dawr_enabled()) { max_len = DAWR_MAX_LEN; /* DAWR region can't cross 512 bytes boundary */ - if ((start_addr >> 9) != (end_addr >> 9)) + if (ALIGN(start_addr, SZ_512M) != ALIGN(end_addr - 1, SZ_512M)) return -EINVAL; } else if (IS_ENABLED(CONFIG_PPC_8xx)) { /* 8xx can setup a range without limitation */ @@ -215,90 +476,209 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) { struct arch_hw_breakpoint *info; + int i; - if (likely(!tsk->thread.last_hit_ubp)) - return; + for (i = 0; i < nr_wp_slots(); i++) { + if (unlikely(tsk->thread.last_hit_ubp[i])) + goto reset; + } + return; - info = counter_arch_bp(tsk->thread.last_hit_ubp); +reset: regs->msr &= ~MSR_SE; - __set_breakpoint(info); - tsk->thread.last_hit_ubp = NULL; + for (i = 0; i < nr_wp_slots(); i++) { + info = counter_arch_bp(__this_cpu_read(bp_per_reg[i])); + __set_breakpoint(i, info); + tsk->thread.last_hit_ubp[i] = NULL; + } } -static bool dar_within_range(unsigned long dar, struct arch_hw_breakpoint *info) +static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info) { return ((info->address <= dar) && (dar - info->address < info->len)); } -static bool -dar_range_overlaps(unsigned long dar, int size, struct arch_hw_breakpoint *info) +static bool dar_user_range_overlaps(unsigned long dar, int size, + struct arch_hw_breakpoint *info) +{ + return ((dar < info->address + info->len) && + (dar + size > info->address)); +} + +static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info) +{ + unsigned long hw_start_addr, hw_end_addr; + + hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); + hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); + + return ((hw_start_addr <= dar) && (hw_end_addr > dar)); +} + +static bool dar_hw_range_overlaps(unsigned long dar, int size, + struct arch_hw_breakpoint *info) { - return ((dar <= info->address + info->len - 1) && - (dar + size - 1 >= info->address)); + unsigned long hw_start_addr, hw_end_addr; + + hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); + hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); + + return ((dar < hw_end_addr) && (dar + size > hw_start_addr)); } /* - * Handle debug exception notifications. + * If hw has multiple DAWR registers, we also need to check all + * dawrx constraint bits to confirm this is _really_ a valid event. */ -static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp, - struct arch_hw_breakpoint *info) +static bool check_dawrx_constraints(struct pt_regs *regs, int type, + struct arch_hw_breakpoint *info) { - unsigned int instr = 0; - int ret, type, size; - struct instruction_op op; - unsigned long addr = info->address; + if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ)) + return false; - if (__get_user_inatomic(instr, (unsigned int *)regs->nip)) - goto fail; + if (OP_IS_STORE(type) && !(info->type & HW_BRK_TYPE_WRITE)) + return false; - ret = analyse_instr(&op, regs, instr); - type = GETTYPE(op.type); - size = GETSIZE(op.type); + if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL)) + return false; - if (!ret && (type == LARX || type == STCX)) { - printk_ratelimited("Breakpoint hit on instruction that can't be emulated." - " Breakpoint at 0x%lx will be disabled.\n", addr); - goto disable; - } + if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER)) + return false; + + return true; +} + +/* + * Return true if the event is valid wrt dawr configuration, + * including extraneous exception. Otherwise return false. + */ +static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, + int type, int size, struct arch_hw_breakpoint *info) +{ + bool in_user_range = dar_in_user_range(regs->dar, info); + bool dawrx_constraints; /* - * If it's extraneous event, we still need to emulate/single- - * step the instruction, but we don't generate an event. + * 8xx supports only one breakpoint and thus we can + * unconditionally return true. */ - if (size && !dar_range_overlaps(regs->dar, size, info)) - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + if (IS_ENABLED(CONFIG_PPC_8xx)) { + if (!in_user_range) + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + return true; + } - /* Do not emulate user-space instructions, instead single-step them */ - if (user_mode(regs)) { - current->thread.last_hit_ubp = bp; - regs->msr |= MSR_SE; + if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) { + if (in_user_range) + return true; + + if (dar_in_hw_range(regs->dar, info)) { + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + return true; + } return false; } - if (!emulate_step(regs, instr)) - goto fail; + dawrx_constraints = check_dawrx_constraints(regs, type, info); - return true; + if (dar_user_range_overlaps(regs->dar, size, info)) + return dawrx_constraints; + + if (dar_hw_range_overlaps(regs->dar, size, info)) { + if (dawrx_constraints) { + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + return true; + } + } + return false; +} + +static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, + int *type, int *size, bool *larx_stcx) +{ + struct instruction_op op; + + if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip)) + return; + + analyse_instr(&op, regs, *instr); -fail: /* - * We've failed in reliably handling the hw-breakpoint. Unregister - * it and throw a warning message to let the user know about it. + * Set size = 8 if analyse_instr() fails. If it's a userspace + * watchpoint(valid or extraneous), we can notify user about it. + * If it's a kernel watchpoint, instruction emulation will fail + * in stepping_handler() and watchpoint will be disabled. */ - WARN(1, "Unable to handle hardware breakpoint. Breakpoint at " - "0x%lx will be disabled.", addr); + *type = GETTYPE(op.type); + *size = !(*type == UNKNOWN) ? GETSIZE(op.type) : 8; + *larx_stcx = (*type == LARX || *type == STCX); +} -disable: +/* + * We've failed in reliably handling the hw-breakpoint. Unregister + * it and throw a warning message to let the user know about it. + */ +static void handler_error(struct perf_event *bp, struct arch_hw_breakpoint *info) +{ + WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.", + info->address); perf_event_disable_inatomic(bp); - return false; +} + +static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info) +{ + printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n", + info->address); + perf_event_disable_inatomic(bp); +} + +static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, + struct arch_hw_breakpoint **info, int *hit, + struct ppc_inst instr) +{ + int i; + int stepped; + + /* Do not emulate user-space instructions, instead single-step them */ + if (user_mode(regs)) { + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + current->thread.last_hit_ubp[i] = bp[i]; + info[i] = NULL; + } + regs->msr |= MSR_SE; + return false; + } + + stepped = emulate_step(regs, instr); + if (!stepped) { + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + handler_error(bp[i], info[i]); + info[i] = NULL; + } + return false; + } + return true; } int hw_breakpoint_handler(struct die_args *args) { + bool err = false; int rc = NOTIFY_STOP; - struct perf_event *bp; + struct perf_event *bp[HBP_NUM_MAX] = { NULL }; struct pt_regs *regs = args->regs; - struct arch_hw_breakpoint *info; + struct arch_hw_breakpoint *info[HBP_NUM_MAX] = { NULL }; + int i; + int hit[HBP_NUM_MAX] = {0}; + int nr_hit = 0; + bool ptrace_bp = false; + struct ppc_inst instr = ppc_inst(0); + int type = 0; + int size = 0; + bool larx_stcx = false; /* Disable breakpoints during exception handling */ hw_breakpoint_disable(); @@ -311,12 +691,40 @@ int hw_breakpoint_handler(struct die_args *args) */ rcu_read_lock(); - bp = __this_cpu_read(bp_per_reg); - if (!bp) { + if (!IS_ENABLED(CONFIG_PPC_8xx)) + get_instr_detail(regs, &instr, &type, &size, &larx_stcx); + + for (i = 0; i < nr_wp_slots(); i++) { + bp[i] = __this_cpu_read(bp_per_reg[i]); + if (!bp[i]) + continue; + + info[i] = counter_arch_bp(bp[i]); + info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; + + if (check_constraints(regs, instr, type, size, info[i])) { + if (!IS_ENABLED(CONFIG_PPC_8xx) && + ppc_inst_equal(instr, ppc_inst(0))) { + handler_error(bp[i], info[i]); + info[i] = NULL; + err = 1; + continue; + } + + if (is_ptrace_bp(bp[i])) + ptrace_bp = true; + hit[i] = 1; + nr_hit++; + } + } + + if (err) + goto reset; + + if (!nr_hit) { rc = NOTIFY_DONE; goto out; } - info = counter_arch_bp(bp); /* * Return early after invoking user-callback function without restoring @@ -324,29 +732,50 @@ int hw_breakpoint_handler(struct die_args *args) * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal * generated in do_dabr(). */ - if (bp->overflow_handler == ptrace_triggered) { - perf_bp_event(bp, regs); + if (ptrace_bp) { + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + perf_bp_event(bp[i], regs); + info[i] = NULL; + } rc = NOTIFY_DONE; - goto out; + goto reset; } - info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (IS_ENABLED(CONFIG_PPC_8xx)) { - if (!dar_within_range(regs->dar, info)) - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; - } else { - if (!stepping_handler(regs, bp, info)) - goto out; + if (!IS_ENABLED(CONFIG_PPC_8xx)) { + if (larx_stcx) { + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + larx_stcx_err(bp[i], info[i]); + info[i] = NULL; + } + goto reset; + } + + if (!stepping_handler(regs, bp, info, hit, instr)) + goto reset; } /* * As a policy, the callback is invoked in a 'trigger-after-execute' * fashion */ - if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) - perf_bp_event(bp, regs); + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + if (!(info[i]->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) + perf_bp_event(bp[i], regs); + } + +reset: + for (i = 0; i < nr_wp_slots(); i++) { + if (!info[i]) + continue; + __set_breakpoint(i, info[i]); + } - __set_breakpoint(info); out: rcu_read_unlock(); return rc; @@ -361,26 +790,43 @@ static int single_step_dabr_instruction(struct die_args *args) struct pt_regs *regs = args->regs; struct perf_event *bp = NULL; struct arch_hw_breakpoint *info; + int i; + bool found = false; - bp = current->thread.last_hit_ubp; /* * Check if we are single-stepping as a result of a * previous HW Breakpoint exception */ - if (!bp) - return NOTIFY_DONE; + for (i = 0; i < nr_wp_slots(); i++) { + bp = current->thread.last_hit_ubp[i]; + + if (!bp) + continue; + + found = true; + info = counter_arch_bp(bp); + + /* + * We shall invoke the user-defined callback function in the + * single stepping handler to confirm to 'trigger-after-execute' + * semantics + */ + if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) + perf_bp_event(bp, regs); + current->thread.last_hit_ubp[i] = NULL; + } - info = counter_arch_bp(bp); + if (!found) + return NOTIFY_DONE; - /* - * We shall invoke the user-defined callback function in the single - * stepping handler to confirm to 'trigger-after-execute' semantics - */ - if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) - perf_bp_event(bp, regs); + for (i = 0; i < nr_wp_slots(); i++) { + bp = __this_cpu_read(bp_per_reg[i]); + if (!bp) + continue; - __set_breakpoint(info); - current->thread.last_hit_ubp = NULL; + info = counter_arch_bp(bp); + __set_breakpoint(i, info); + } /* * If the process was being single-stepped by ptrace, let the @@ -419,10 +865,13 @@ NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify); */ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { + int i; struct thread_struct *t = &tsk->thread; - unregister_hw_breakpoint(t->ptrace_bps[0]); - t->ptrace_bps[0] = NULL; + for (i = 0; i < nr_wp_slots(); i++) { + unregister_hw_breakpoint(t->ptrace_bps[i]); + t->ptrace_bps[i] = NULL; + } } void hw_breakpoint_pmu_read(struct perf_event *bp) diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 433d97bea1f3..69df840f7253 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -187,6 +187,7 @@ BEGIN_FTR_SECTION mtspr SPRN_HID1, r9 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) b transfer_to_handler_cont +_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) .data diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 308f499e146c..72c85b6f3898 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -90,3 +90,4 @@ _GLOBAL(power_save_ppc32_restore) #endif b transfer_to_handler_cont +_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index ca37702bde97..144858027fa3 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -6,14 +6,15 @@ #include <linux/kernel.h> #include <linux/jump_label.h> #include <asm/code-patching.h> +#include <asm/inst.h> void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - u32 *addr = (u32 *)(unsigned long)entry->code; + struct ppc_inst *addr = (struct ppc_inst *)(unsigned long)entry->code; if (type == JUMP_LABEL_JMP) patch_branch(addr, entry->target, 0); else - patch_instruction(addr, PPC_INST_NOP); + patch_instruction(addr, ppc_inst(PPC_INST_NOP)); } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7dd55eb1259d..652b2852bea3 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -26,6 +26,7 @@ #include <asm/debug.h> #include <asm/code-patching.h> #include <linux/slab.h> +#include <asm/inst.h> /* * This table contains the mapping between PowerPC hardware trap types, and @@ -418,13 +419,13 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; unsigned int instr; - unsigned int *addr = (unsigned int *)bpt->bpt_addr; + struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; err = probe_kernel_address(addr, instr); if (err) return err; - err = patch_instruction(addr, BREAK_INSTR); + err = patch_instruction(addr, ppc_inst(BREAK_INSTR)); if (err) return -EFAULT; @@ -437,9 +438,9 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { int err; unsigned int instr = *(unsigned int *)bpt->saved_instr; - unsigned int *addr = (unsigned int *)bpt->bpt_addr; + struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = patch_instruction(addr, instr); + err = patch_instruction(addr, ppc_inst(instr)); if (err) return -EFAULT; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 81efb605113e..6f96f65ebfe8 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -23,6 +23,7 @@ #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/sections.h> +#include <asm/inst.h> #include <linux/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -105,7 +106,9 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; - kprobe_opcode_t insn = *p->addr; + struct kprobe *prev; + struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr); + struct ppc_inst prefix = ppc_inst_read((struct ppc_inst *)(p->addr - 1)); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); @@ -113,6 +116,17 @@ int arch_prepare_kprobe(struct kprobe *p) } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) { printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n"); ret = -EINVAL; + } else if (ppc_inst_prefixed(prefix)) { + printk("Cannot register a kprobe on the second word of prefixed instruction\n"); + ret = -EINVAL; + } + preempt_disable(); + prev = get_kprobe(p->addr - 1); + preempt_enable_no_resched(); + if (prev && + ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)prev->ainsn.insn))) { + printk("Cannot register a kprobe on the second word of prefixed instruction\n"); + ret = -EINVAL; } /* insn must be on a special executable page on ppc64. This is @@ -124,11 +138,8 @@ int arch_prepare_kprobe(struct kprobe *p) } if (!ret) { - memcpy(p->ainsn.insn, p->addr, - MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - p->opcode = *p->addr; - flush_icache_range((unsigned long)p->ainsn.insn, - (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t)); + patch_instruction((struct ppc_inst *)p->ainsn.insn, insn); + p->opcode = ppc_inst_val(insn); } p->ainsn.boostable = 0; @@ -138,13 +149,13 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe); void arch_arm_kprobe(struct kprobe *p) { - patch_instruction(p->addr, BREAKPOINT_INSTRUCTION); + patch_instruction((struct ppc_inst *)p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)); } NOKPROBE_SYMBOL(arch_arm_kprobe); void arch_disarm_kprobe(struct kprobe *p) { - patch_instruction(p->addr, p->opcode); + patch_instruction((struct ppc_inst *)p->addr, ppc_inst(p->opcode)); } NOKPROBE_SYMBOL(arch_disarm_kprobe); @@ -216,7 +227,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; - unsigned int insn = *p->ainsn.insn; + struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); /* regs->nip is also adjusted if emulate_step returns 1 */ ret = emulate_step(regs, insn); @@ -233,7 +244,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) * So, we should never get here... but, its still * good to catch them, just in case... */ - printk("Can't step on instruction %x\n", insn); + printk("Can't step on instruction %x\n", ppc_inst_val(insn)); BUG(); } else { /* @@ -276,14 +287,18 @@ int kprobe_handler(struct pt_regs *regs) p = get_kprobe(addr); if (!p) { - if (*addr != BREAKPOINT_INSTRUCTION) { + unsigned int instr; + + if (probe_kernel_address(addr, instr)) + goto no_kprobe; + + if (instr != BREAKPOINT_INSTRUCTION) { /* * PowerPC has multiple variants of the "trap" * instruction. If the current instruction is a * trap variant, it could belong to someone else */ - kprobe_opcode_t cur_insn = *addr; - if (is_trap(cur_insn)) + if (is_trap(instr)) goto no_kprobe; /* * The breakpoint instruction was removed right @@ -464,14 +479,16 @@ NOKPROBE_SYMBOL(trampoline_probe_handler); */ int kprobe_post_handler(struct pt_regs *regs) { + int len; struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); if (!cur || user_mode(regs)) return 0; + len = ppc_inst_len(ppc_inst_read((struct ppc_inst *)cur->ainsn.insn)); /* make sure we got here for instruction we have a kprobe on */ - if (((unsigned long)cur->ainsn.insn + 4) != regs->nip) + if (((unsigned long)cur->ainsn.insn + len) != regs->nip) return 0; if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { @@ -480,7 +497,7 @@ int kprobe_post_handler(struct pt_regs *regs) } /* Adjust nip to after the single-stepped instruction */ - regs->nip = (unsigned long)cur->addr + 4; + regs->nip = (unsigned long)cur->addr + len; regs->msr |= kcb->kprobe_saved_msr; /*Restore back the original saved kprobes variables and continue. */ diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 2020d255585f..5f07aa5e9851 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -455,5 +455,6 @@ _GLOBAL(__inval_enable_L1) sync blr +_ASM_NOKPROBE_SYMBOL(__inval_enable_L1) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 8077b5fb18a7..fd90c0eda229 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -16,6 +16,7 @@ #include <linux/export.h> #include <linux/irq_work.h> #include <linux/extable.h> +#include <linux/ftrace.h> #include <asm/machdep.h> #include <asm/mce.h> @@ -571,9 +572,16 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info); * * regs->nip and regs->msr contains srr0 and ssr1. */ -long machine_check_early(struct pt_regs *regs) +long notrace machine_check_early(struct pt_regs *regs) { long handled = 0; + bool nested = in_nmi(); + u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); + + this_cpu_set_ftrace_enabled(0); + + if (!nested) + nmi_enter(); hv_nmi_check_nonrecoverable(regs); @@ -582,6 +590,12 @@ long machine_check_early(struct pt_regs *regs) */ if (ppc_md.machine_check_early) handled = ppc_md.machine_check_early(regs); + + if (!nested) + nmi_exit(); + + this_cpu_set_ftrace_enabled(ftrace_enabled); + return handled; } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 067b094bfeff..c32af49a5138 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -20,6 +20,7 @@ #include <asm/sstep.h> #include <asm/exception-64s.h> #include <asm/extable.h> +#include <asm/inst.h> /* * Convert an address related to an mm to a PFN. NOTE: we are in real @@ -27,7 +28,7 @@ */ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) { - pte_t *ptep; + pte_t *ptep, pte; unsigned int shift; unsigned long pfn, flags; struct mm_struct *mm; @@ -39,19 +40,23 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) local_irq_save(flags); ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift); + if (!ptep) { + pfn = ULONG_MAX; + goto out; + } + pte = READ_ONCE(*ptep); - if (!ptep || pte_special(*ptep)) { + if (!pte_present(pte) || pte_special(pte)) { pfn = ULONG_MAX; goto out; } if (shift <= PAGE_SHIFT) - pfn = pte_pfn(*ptep); + pfn = pte_pfn(pte); else { unsigned long rpnmask = (1ul << shift) - PAGE_SIZE; - pfn = pte_pfn(__pte(pte_val(*ptep) | (addr & rpnmask))); + pfn = pte_pfn(__pte(pte_val(pte) | (addr & rpnmask))); } - out: local_irq_restore(flags); return pfn; @@ -365,7 +370,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, * in real-mode is tricky and can lead to recursive * faults */ - int instr; + struct ppc_inst instr; unsigned long pfn, instr_addr; struct instruction_op op; struct pt_regs tmp = *regs; @@ -373,7 +378,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, pfn = addr_to_pfn(regs, regs->nip); if (pfn != ULONG_MAX) { instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK); - instr = *(unsigned int *)(instr_addr); + instr = ppc_inst_read((struct ppc_inst *)instr_addr); if (!analyse_instr(&op, &tmp, instr)) { pfn = addr_to_pfn(regs, op.ea); *addr = op.ea; diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 65f9f731c229..5be96feccb55 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -36,6 +36,8 @@ _GLOBAL(add_reloc_offset) add r3,r3,r5 mtlr r0 blr +_ASM_NOKPROBE_SYMBOL(reloc_offset) +_ASM_NOKPROBE_SYMBOL(add_reloc_offset) .align 3 2: PPC_LONG 1b diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index d80212be8698..b24f866fef81 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -246,6 +246,7 @@ _GLOBAL(real_readb) sync isync blr +_ASM_NOKPROBE_SYMBOL(real_readb) /* * Do an IO access in real mode @@ -263,6 +264,7 @@ _GLOBAL(real_writeb) sync isync blr +_ASM_NOKPROBE_SYMBOL(real_writeb) #endif /* CONFIG_40x */ @@ -274,17 +276,8 @@ _GLOBAL(real_writeb) #ifndef CONFIG_PPC_8xx _GLOBAL(flush_instruction_cache) #if defined(CONFIG_4xx) -#ifdef CONFIG_403GCX - li r3, 512 - mtctr r3 - lis r4, KERNELBASE@h -1: iccci 0, r4 - addi r4, r4, 16 - bdnz 1b -#else lis r3, KERNELBASE@h iccci 0,r3 -#endif #elif defined(CONFIG_FSL_BOOKE) #ifdef CONFIG_E200 mfspr r3,SPRN_L1CSR0 diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index d7134c614c16..c27b8687b82a 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -67,21 +67,6 @@ static int relacmp(const void *_x, const void *_y) return 0; } -static void relaswap(void *_x, void *_y, int size) -{ - uint32_t *x, *y, tmp; - int i; - - y = (uint32_t *)_x; - x = (uint32_t *)_y; - - for (i = 0; i < sizeof(Elf32_Rela) / sizeof(uint32_t); i++) { - tmp = x[i]; - x[i] = y[i]; - y[i] = tmp; - } -} - /* Get the potential trampolines size required of the init and non-init sections */ static unsigned long get_plt_size(const Elf32_Ehdr *hdr, @@ -118,7 +103,7 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr, */ sort((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size / sizeof(Elf32_Rela), - sizeof(Elf32_Rela), relacmp, relaswap); + sizeof(Elf32_Rela), relacmp, NULL); ret += count_relocs((void *)hdr + sechdrs[i].sh_offset, diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 007606a48fd9..f4c2fa190192 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -20,6 +20,7 @@ #include <linux/sort.h> #include <asm/setup.h> #include <asm/sections.h> +#include <asm/inst.h> /* FIXME: We don't do .init separately. To do this, we'd need to have a separate r2 value in the init and core section, and stub between @@ -144,42 +145,6 @@ static u32 ppc64_stub_insns[] = { PPC_INST_BCTR, }; -#ifdef CONFIG_DYNAMIC_FTRACE -int module_trampoline_target(struct module *mod, unsigned long addr, - unsigned long *target) -{ - struct ppc64_stub_entry *stub; - func_desc_t funcdata; - u32 magic; - - if (!within_module_core(addr, mod)) { - pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name); - return -EFAULT; - } - - stub = (struct ppc64_stub_entry *)addr; - - if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) { - pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); - return -EFAULT; - } - - if (magic != STUB_MAGIC) { - pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name); - return -EFAULT; - } - - if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) { - pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); - return -EFAULT; - } - - *target = stub_func_addr(funcdata); - - return 0; -} -#endif - /* Count how many different 24-bit relocations (different symbol, different addend) */ static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) @@ -226,21 +191,6 @@ static int relacmp(const void *_x, const void *_y) return 0; } -static void relaswap(void *_x, void *_y, int size) -{ - uint64_t *x, *y, tmp; - int i; - - y = (uint64_t *)_x; - x = (uint64_t *)_y; - - for (i = 0; i < sizeof(Elf64_Rela) / sizeof(uint64_t); i++) { - tmp = x[i]; - x[i] = y[i]; - y[i] = tmp; - } -} - /* Get size of potential trampolines required. */ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, const Elf64_Shdr *sechdrs) @@ -264,7 +214,7 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, */ sort((void *)sechdrs[i].sh_addr, sechdrs[i].sh_size / sizeof(Elf64_Rela), - sizeof(Elf64_Rela), relacmp, relaswap); + sizeof(Elf64_Rela), relacmp, NULL); relocs += count_relocs((void *)sechdrs[i].sh_addr, sechdrs[i].sh_size @@ -384,6 +334,92 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, return 0; } +#ifdef CONFIG_MPROFILE_KERNEL + +#define PACATOC offsetof(struct paca_struct, kernel_toc) + +/* + * ld r12,PACATOC(r13) + * addis r12,r12,<high> + * addi r12,r12,<low> + * mtctr r12 + * bctr + */ +static u32 stub_insns[] = { + PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC, + PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12), + PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12), + PPC_INST_MTCTR | __PPC_RS(R12), + PPC_INST_BCTR, +}; + +/* + * For mprofile-kernel we use a special stub for ftrace_caller() because we + * can't rely on r2 containing this module's TOC when we enter the stub. + * + * That can happen if the function calling us didn't need to use the toc. In + * that case it won't have setup r2, and the r2 value will be either the + * kernel's toc, or possibly another modules toc. + * + * To deal with that this stub uses the kernel toc, which is always accessible + * via the paca (in r13). The target (ftrace_caller()) is responsible for + * saving and restoring the toc before returning. + */ +static inline int create_ftrace_stub(struct ppc64_stub_entry *entry, + unsigned long addr, + struct module *me) +{ + long reladdr; + + memcpy(entry->jump, stub_insns, sizeof(stub_insns)); + + /* Stub uses address relative to kernel toc (from the paca) */ + reladdr = addr - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("%s: Address of %ps out of range of kernel_toc.\n", + me->name, (void *)addr); + return 0; + } + + entry->jump[1] |= PPC_HA(reladdr); + entry->jump[2] |= PPC_LO(reladdr); + + /* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */ + entry->funcdata = func_desc(addr); + entry->magic = STUB_MAGIC; + + return 1; +} + +static bool is_mprofile_ftrace_call(const char *name) +{ + if (!strcmp("_mcount", name)) + return true; +#ifdef CONFIG_DYNAMIC_FTRACE + if (!strcmp("ftrace_caller", name)) + return true; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (!strcmp("ftrace_regs_caller", name)) + return true; +#endif +#endif + + return false; +} +#else +static inline int create_ftrace_stub(struct ppc64_stub_entry *entry, + unsigned long addr, + struct module *me) +{ + return 0; +} + +static bool is_mprofile_ftrace_call(const char *name) +{ + return false; +} +#endif + /* * r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the * value maximum span in an instruction which uses a signed offset). Round down @@ -399,10 +435,14 @@ static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me) static inline int create_stub(const Elf64_Shdr *sechdrs, struct ppc64_stub_entry *entry, unsigned long addr, - struct module *me) + struct module *me, + const char *name) { long reladdr; + if (is_mprofile_ftrace_call(name)) + return create_ftrace_stub(entry, addr, me); + memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns)); /* Stub uses address relative to r2. */ @@ -426,7 +466,8 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, stub to set up the TOC ptr (r2) for the function. */ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, unsigned long addr, - struct module *me) + struct module *me, + const char *name) { struct ppc64_stub_entry *stubs; unsigned int i, num_stubs; @@ -443,62 +484,19 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, return (unsigned long)&stubs[i]; } - if (!create_stub(sechdrs, &stubs[i], addr, me)) + if (!create_stub(sechdrs, &stubs[i], addr, me, name)) return 0; return (unsigned long)&stubs[i]; } -#ifdef CONFIG_MPROFILE_KERNEL -static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction) -{ - if (strcmp("_mcount", name)) - return false; - - /* - * Check if this is one of the -mprofile-kernel sequences. - */ - if (instruction[-1] == PPC_INST_STD_LR && - instruction[-2] == PPC_INST_MFLR) - return true; - - if (instruction[-1] == PPC_INST_MFLR) - return true; - - return false; -} - -/* - * In case of _mcount calls, do not save the current callee's TOC (in r2) into - * the original caller's stack frame. If we did we would clobber the saved TOC - * value of the original caller. - */ -static void squash_toc_save_inst(const char *name, unsigned long addr) -{ - struct ppc64_stub_entry *stub = (struct ppc64_stub_entry *)addr; - - /* Only for calls to _mcount */ - if (strcmp("_mcount", name) != 0) - return; - - stub->jump[2] = PPC_INST_NOP; -} -#else -static void squash_toc_save_inst(const char *name, unsigned long addr) { } - -static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction) -{ - return false; -} -#endif - /* We expect a noop next: if it is, replace it with instruction to restore r2. */ static int restore_r2(const char *name, u32 *instruction, struct module *me) { u32 *prev_insn = instruction - 1; - if (is_mprofile_mcount_callsite(name, prev_insn)) + if (is_mprofile_ftrace_call(name)) return 1; /* @@ -506,7 +504,7 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) * "link" branches and they don't return, so they don't need the r2 * restore afterwards. */ - if (!instr_is_relative_link_branch(*prev_insn)) + if (!instr_is_relative_link_branch(ppc_inst(*prev_insn))) return 1; if (*instruction != PPC_INST_NOP) { @@ -636,14 +634,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_LIVEPATCH) { /* External: go via stub */ - value = stub_for_addr(sechdrs, value, me); + value = stub_for_addr(sechdrs, value, me, + strtab + sym->st_name); if (!value) return -ENOENT; if (!restore_r2(strtab + sym->st_name, (u32 *)location + 1, me)) return -ENOEXEC; - - squash_toc_save_inst(strtab + sym->st_name, value); } else value += local_entry_offset(sym); @@ -745,89 +742,51 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } #ifdef CONFIG_DYNAMIC_FTRACE - -#ifdef CONFIG_MPROFILE_KERNEL - -#define PACATOC offsetof(struct paca_struct, kernel_toc) - -/* - * For mprofile-kernel we use a special stub for ftrace_caller() because we - * can't rely on r2 containing this module's TOC when we enter the stub. - * - * That can happen if the function calling us didn't need to use the toc. In - * that case it won't have setup r2, and the r2 value will be either the - * kernel's toc, or possibly another modules toc. - * - * To deal with that this stub uses the kernel toc, which is always accessible - * via the paca (in r13). The target (ftrace_caller()) is responsible for - * saving and restoring the toc before returning. - */ -static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, - struct module *me, unsigned long addr) +int module_trampoline_target(struct module *mod, unsigned long addr, + unsigned long *target) { - struct ppc64_stub_entry *entry; - unsigned int i, num_stubs; - /* - * ld r12,PACATOC(r13) - * addis r12,r12,<high> - * addi r12,r12,<low> - * mtctr r12 - * bctr - */ - static u32 stub_insns[] = { - PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC, - PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_MTCTR | __PPC_RS(R12), - PPC_INST_BCTR, - }; - long reladdr; + struct ppc64_stub_entry *stub; + func_desc_t funcdata; + u32 magic; - num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*entry); + if (!within_module_core(addr, mod)) { + pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name); + return -EFAULT; + } - /* Find the next available stub entry */ - entry = (void *)sechdrs[me->arch.stubs_section].sh_addr; - for (i = 0; i < num_stubs && stub_func_addr(entry->funcdata); i++, entry++); + stub = (struct ppc64_stub_entry *)addr; - if (i >= num_stubs) { - pr_err("%s: Unable to find a free slot for ftrace stub.\n", me->name); - return 0; + if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) { + pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; } - memcpy(entry->jump, stub_insns, sizeof(stub_insns)); - - /* Stub uses address relative to kernel toc (from the paca) */ - reladdr = addr - kernel_toc_addr(); - if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - pr_err("%s: Address of %ps out of range of kernel_toc.\n", - me->name, (void *)addr); - return 0; + if (magic != STUB_MAGIC) { + pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; } - entry->jump[1] |= PPC_HA(reladdr); - entry->jump[2] |= PPC_LO(reladdr); + if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) { + pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; + } - /* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */ - entry->funcdata = func_desc(addr); - entry->magic = STUB_MAGIC; + *target = stub_func_addr(funcdata); - return (unsigned long)entry; -} -#else -static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, - struct module *me, unsigned long addr) -{ - return stub_for_addr(sechdrs, addr, me); + return 0; } -#endif int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) { - mod->arch.tramp = create_ftrace_stub(sechdrs, mod, - (unsigned long)ftrace_caller); + mod->arch.tramp = stub_for_addr(sechdrs, + (unsigned long)ftrace_caller, + mod, + "ftrace_caller"); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - mod->arch.tramp_regs = create_ftrace_stub(sechdrs, mod, - (unsigned long)ftrace_regs_caller); + mod->arch.tramp_regs = stub_for_addr(sechdrs, + (unsigned long)ftrace_regs_caller, + mod, + "ftrace_regs_caller"); if (!mod->arch.tramp_regs) return -ENOENT; #endif diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 0cd1c88bfc8b..532f22637783 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -852,8 +852,8 @@ loff_t __init nvram_create_partition(const char *name, int sig, BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16); /* Convert sizes from bytes to blocks */ - req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; - min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; + req_size = ALIGN(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; + min_size = ALIGN(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; /* If no minimum size specified, make it the same as the * requested size diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 024f7aad1952..69bfe96884e2 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -16,6 +16,7 @@ #include <asm/code-patching.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> +#include <asm/inst.h> #define TMPL_CALL_HDLR_IDX \ (optprobe_template_call_handler - optprobe_template_entry) @@ -99,8 +100,9 @@ static unsigned long can_optimize(struct kprobe *p) * Ensure that the instruction is not a conditional branch, * and that can be emulated. */ - if (!is_conditional_branch(*p->ainsn.insn) && - analyse_instr(&op, ®s, *p->ainsn.insn) == 1) { + if (!is_conditional_branch(ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) && + analyse_instr(&op, ®s, + ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) == 1) { emulate_update_regs(®s, &op); nip = regs.nip; } @@ -147,50 +149,57 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) { /* addis r4,0,(insn)@h */ - patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(4) | - ((val >> 16) & 0xffff)); + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ADDIS | ___PPC_RT(4) | + ((val >> 16) & 0xffff))); addr++; /* ori r4,r4,(insn)@l */ - patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(4) | - ___PPC_RS(4) | (val & 0xffff)); + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ORI | ___PPC_RA(4) | + ___PPC_RS(4) | (val & 0xffff))); } /* * Generate instructions to load provided immediate 64-bit value - * to register 'r3' and patch these instructions at 'addr'. + * to register 'reg' and patch these instructions at 'addr'. */ -void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr) +void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) { - /* lis r3,(op)@highest */ - patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(3) | - ((val >> 48) & 0xffff)); + /* lis reg,(op)@highest */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | + ((val >> 48) & 0xffff))); addr++; - /* ori r3,r3,(op)@higher */ - patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) | - ___PPC_RS(3) | ((val >> 32) & 0xffff)); + /* ori reg,reg,(op)@higher */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | + ___PPC_RS(reg) | ((val >> 32) & 0xffff))); addr++; - /* rldicr r3,r3,32,31 */ - patch_instruction(addr, PPC_INST_RLDICR | ___PPC_RA(3) | - ___PPC_RS(3) | __PPC_SH64(32) | __PPC_ME64(31)); + /* rldicr reg,reg,32,31 */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | + ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); addr++; - /* oris r3,r3,(op)@h */ - patch_instruction(addr, PPC_INST_ORIS | ___PPC_RA(3) | - ___PPC_RS(3) | ((val >> 16) & 0xffff)); + /* oris reg,reg,(op)@h */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | + ___PPC_RS(reg) | ((val >> 16) & 0xffff))); addr++; - /* ori r3,r3,(op)@l */ - patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) | - ___PPC_RS(3) | (val & 0xffff)); + /* ori reg,reg,(op)@l */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | + ___PPC_RS(reg) | (val & 0xffff))); } int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { - kprobe_opcode_t *buff, branch_op_callback, branch_emulate_step; - kprobe_opcode_t *op_callback_addr, *emulate_step_addr; + struct ppc_inst branch_op_callback, branch_emulate_step, temp; + kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff; long b_offset; unsigned long nip, size; int rc, i; @@ -230,7 +239,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int); pr_devel("Copying template to %p, size %lu\n", buff, size); for (i = 0; i < size; i++) { - rc = patch_instruction(buff + i, *(optprobe_template_entry + i)); + rc = patch_instruction((struct ppc_inst *)(buff + i), + ppc_inst(*(optprobe_template_entry + i))); if (rc < 0) goto error; } @@ -239,7 +249,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) * Fixup the template with instructions to: * 1. load the address of the actual probepoint */ - patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX); + patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); /* * 2. branch to optimized_callback() and emulate_step() @@ -251,29 +261,34 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) goto error; } - branch_op_callback = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX, - (unsigned long)op_callback_addr, - BRANCH_SET_LINK); + rc = create_branch(&branch_op_callback, + (struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), + (unsigned long)op_callback_addr, + BRANCH_SET_LINK); - branch_emulate_step = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX, - (unsigned long)emulate_step_addr, - BRANCH_SET_LINK); + rc |= create_branch(&branch_emulate_step, + (struct ppc_inst *)(buff + TMPL_EMULATE_IDX), + (unsigned long)emulate_step_addr, + BRANCH_SET_LINK); - if (!branch_op_callback || !branch_emulate_step) + if (rc) goto error; - patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback); - patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step); + patch_instruction((struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), + branch_op_callback); + patch_instruction((struct ppc_inst *)(buff + TMPL_EMULATE_IDX), + branch_emulate_step); /* * 3. load instruction to be emulated into relevant register, and */ - patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX); + temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); + patch_imm64_load_insns(ppc_inst_as_u64(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline */ - patch_branch(buff + TMPL_RET_IDX, (unsigned long)nip, 0); + patch_branch((struct ppc_inst *)(buff + TMPL_RET_IDX), (unsigned long)nip, 0); flush_icache_range((unsigned long)buff, (unsigned long)(&buff[TMPL_END_IDX])); @@ -305,6 +320,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op) void arch_optimize_kprobes(struct list_head *oplist) { + struct ppc_inst instr; struct optimized_kprobe *op; struct optimized_kprobe *tmp; @@ -315,9 +331,10 @@ void arch_optimize_kprobes(struct list_head *oplist) */ memcpy(op->optinsn.copied_insn, op->kp.addr, RELATIVEJUMP_SIZE); - patch_instruction(op->kp.addr, - create_branch((unsigned int *)op->kp.addr, - (unsigned long)op->optinsn.insn, 0)); + create_branch(&instr, + (struct ppc_inst *)op->kp.addr, + (unsigned long)op->optinsn.insn, 0); + patch_instruction((struct ppc_inst *)op->kp.addr, instr); list_del_init(&op->list); } } diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index cf383520843f..ff8ba4d3824d 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -94,6 +94,9 @@ optprobe_template_insn: /* 2, Pass instruction to be emulated in r4 */ nop nop + nop + nop + nop .global optprobe_template_call_emulate optprobe_template_call_emulate: diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 3f91ccaa9c74..8d96169c597e 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -16,6 +16,7 @@ #include <asm/kexec.h> #include <asm/svm.h> #include <asm/ultravisor.h> +#include <asm/rtas.h> #include "setup.h" @@ -164,6 +165,30 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) #endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_PPC_PSERIES +/** + * new_rtas_args() - Allocates rtas args + * @cpu: CPU number + * @limit: Memory limit for this allocation + * + * Allocates a struct rtas_args and return it's pointer, + * if not in Hypervisor mode + * + * Return: Pointer to allocated rtas_args + * NULL if CPU in Hypervisor Mode + */ +static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit) +{ + limit = min_t(unsigned long, limit, RTAS_INSTANTIATE_MAX); + + if (early_cpu_has_feature(CPU_FTR_HVMODE)) + return NULL; + + return alloc_paca_data(sizeof(struct rtas_args), L1_CACHE_BYTES, + limit, cpu); +} +#endif /* CONFIG_PPC_PSERIES */ + /* The Paca is an array with one entry per processor. Each contains an * lppaca, which contains the information shared between the * hypervisor and Linux. @@ -202,6 +227,10 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int /* For now -- if we have threads this will be adjusted later */ new_paca->tcd_ptr = &new_paca->tcd; #endif + +#ifdef CONFIG_PPC_PSERIES + new_paca->rtas_args_reentrant = NULL; +#endif } /* Put the paca pointer into r13 and SPRG_PACA */ @@ -274,6 +303,9 @@ void __init allocate_paca(int cpu) #ifdef CONFIG_PPC_BOOK3S_64 paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); #endif +#ifdef CONFIG_PPC_PSERIES + paca->rtas_args_reentrant = new_rtas_args(cpu, limit); +#endif paca_struct_size += sizeof(struct paca_struct); } diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index bf83f76563a3..2fc12198ec07 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -57,8 +57,6 @@ void pcibios_release_device(struct pci_dev *dev) struct pci_controller *phb = pci_bus_to_host(dev->bus); struct pci_dn *pdn = pci_get_pdn(dev); - eeh_remove_device(dev); - if (phb->controller_ops.release_device) phb->controller_ops.release_device(dev); diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index d9ac980c398c..9312e6eda7ff 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -100,7 +100,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus) pci_name(bus->self)); #ifdef CONFIG_PPC_BOOK3S_64 - __flush_hash_table_range(&init_mm, res->start + _IO_BASE, + __flush_hash_table_range(res->start + _IO_BASE, res->end + _IO_BASE + 1); #endif return 0; @@ -154,8 +154,8 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose) unsigned long size_page; unsigned long io_virt_offset; - phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE); - size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE); + phys_page = ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE); + size_page = ALIGN(hose->pci_io_size, PAGE_SIZE); /* Make sure IO area address is clear */ hose->io_base_alloc = NULL; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9c21288f8645..048d64c4e115 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -629,15 +629,12 @@ void do_break (struct pt_regs *regs, unsigned long address, if (debugger_break_match(regs)) return; - /* Clear the breakpoint */ - hw_breakpoint_disable(); - /* Deliver the signal to userspace */ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address); } #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk); +static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk[HBP_NUM_MAX]); #ifdef CONFIG_PPC_ADV_DEBUG_REGS /* @@ -711,21 +708,49 @@ void switch_booke_debug_regs(struct debug_reg *new_debug) EXPORT_SYMBOL_GPL(switch_booke_debug_regs); #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ #ifndef CONFIG_HAVE_HW_BREAKPOINT -static void set_breakpoint(struct arch_hw_breakpoint *brk) +static void set_breakpoint(int i, struct arch_hw_breakpoint *brk) { preempt_disable(); - __set_breakpoint(brk); + __set_breakpoint(i, brk); preempt_enable(); } static void set_debug_reg_defaults(struct thread_struct *thread) { - thread->hw_brk.address = 0; - thread->hw_brk.type = 0; - thread->hw_brk.len = 0; - thread->hw_brk.hw_len = 0; - if (ppc_breakpoint_available()) - set_breakpoint(&thread->hw_brk); + int i; + struct arch_hw_breakpoint null_brk = {0}; + + for (i = 0; i < nr_wp_slots(); i++) { + thread->hw_brk[i] = null_brk; + if (ppc_breakpoint_available()) + set_breakpoint(i, &thread->hw_brk[i]); + } +} + +static inline bool hw_brk_match(struct arch_hw_breakpoint *a, + struct arch_hw_breakpoint *b) +{ + if (a->address != b->address) + return false; + if (a->type != b->type) + return false; + if (a->len != b->len) + return false; + /* no need to check hw_len. it's calculated from address and len */ + return true; +} + +static void switch_hw_breakpoint(struct task_struct *new) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (likely(hw_brk_match(this_cpu_ptr(¤t_brk[i]), + &new->thread.hw_brk[i]))) + continue; + + __set_breakpoint(i, &new->thread.hw_brk[i]); + } } #endif /* !CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ @@ -772,12 +797,12 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) unsigned long lctrl1 = LCTRL1_CTE_GT | LCTRL1_CTF_LT | LCTRL1_CRWE_RW | LCTRL1_CRWF_RW; unsigned long lctrl2 = LCTRL2_LW0EN | LCTRL2_LW0LADC | LCTRL2_SLW0EN; - unsigned long start_addr = brk->address & ~HW_BREAKPOINT_ALIGN; - unsigned long end_addr = (brk->address + brk->len - 1) | HW_BREAKPOINT_ALIGN; + unsigned long start_addr = ALIGN_DOWN(brk->address, HW_BREAKPOINT_SIZE); + unsigned long end_addr = ALIGN(brk->address + brk->len, HW_BREAKPOINT_SIZE); if (start_addr == 0) lctrl2 |= LCTRL2_LW0LA_F; - else if (end_addr == ~0U) + else if (end_addr == 0) lctrl2 |= LCTRL2_LW0LA_E; else lctrl2 |= LCTRL2_LW0LA_EandF; @@ -793,20 +818,20 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) lctrl1 |= LCTRL1_CRWE_WO | LCTRL1_CRWF_WO; mtspr(SPRN_CMPE, start_addr - 1); - mtspr(SPRN_CMPF, end_addr + 1); + mtspr(SPRN_CMPF, end_addr); mtspr(SPRN_LCTRL1, lctrl1); mtspr(SPRN_LCTRL2, lctrl2); return 0; } -void __set_breakpoint(struct arch_hw_breakpoint *brk) +void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) { - memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); + memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk)); if (dawr_enabled()) // Power8 or later - set_dawr(brk); + set_dawr(nr, brk); else if (IS_ENABLED(CONFIG_PPC_8xx)) set_breakpoint_8xx(brk); else if (!cpu_has_feature(CPU_FTR_ARCH_207S)) @@ -829,19 +854,6 @@ bool ppc_breakpoint_available(void) } EXPORT_SYMBOL_GPL(ppc_breakpoint_available); -static inline bool hw_brk_match(struct arch_hw_breakpoint *a, - struct arch_hw_breakpoint *b) -{ - if (a->address != b->address) - return false; - if (a->type != b->type) - return false; - if (a->len != b->len) - return false; - /* no need to check hw_len. it's calculated from address and len */ - return true; -} - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline bool tm_enabled(struct task_struct *tsk) @@ -1174,8 +1186,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * schedule DABR */ #ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) - __set_breakpoint(&new->thread.hw_brk); + switch_hw_breakpoint(new); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif @@ -1228,7 +1239,8 @@ struct task_struct *__switch_to(struct task_struct *prev, * mappings, we must issue a cp_abort to clear any state and * prevent snooping, corruption or a covert channel. */ - if (current->thread.used_vas) + if (current->mm && + atomic_read(¤t->mm->context.vas_windows)) asm volatile(PPC_CP_ABORT); } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -1412,7 +1424,7 @@ void show_regs(struct pt_regs * regs) print_msr_bits(regs->msr); pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); - if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) + if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR)) pr_cont("CFAR: "REG" ", regs->orig_gpr3); if (trap == 0x200 || trap == 0x300 || trap == 0x600) #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) @@ -1467,27 +1479,6 @@ void arch_setup_new_exec(void) } #endif -int set_thread_uses_vas(void) -{ -#ifdef CONFIG_PPC_BOOK3S_64 - if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -EINVAL; - - current->thread.used_vas = 1; - - /* - * Even a process that has no foreign real address mapping can use - * an unpaired COPY instruction (to no real effect). Issue CP_ABORT - * to clear any pending COPY and prevent a covert channel. - * - * __switch_to() will issue CP_ABORT on future context switches. - */ - asm volatile(PPC_CP_ABORT); - -#endif /* CONFIG_PPC_BOOK3S_64 */ - return 0; -} - #ifdef CONFIG_PPC64 /** * Assign a TIDR (thread ID) for task @t and set it in the thread @@ -1610,6 +1601,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, void (*f)(void); unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; struct thread_info *ti = task_thread_info(p); +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int i; +#endif klp_init_thread_info(p); @@ -1669,7 +1663,8 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, p->thread.ksp_limit = (unsigned long)end_of_stack(p); #endif #ifdef CONFIG_HAVE_HW_BREAKPOINT - p->thread.ptrace_bps[0] = NULL; + for (i = 0; i < nr_wp_slots(); i++) + p->thread.ptrace_bps[i] = NULL; #endif p->thread.fp_save_area = NULL; @@ -1740,7 +1735,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) * FULL_REGS(regs) return true. This is necessary to allow * ptrace to examine the thread immediately after exec. */ - regs->trap &= ~1UL; + SET_FULL_REGS(regs); #ifdef CONFIG_PPC32 regs->mq = 0; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 6620f37abe73..6a3bac357e24 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -96,8 +96,8 @@ static inline int overlaps_initrd(unsigned long start, unsigned long size) if (!initrd_start) return 0; - return (start + size) > _ALIGN_DOWN(initrd_start, PAGE_SIZE) && - start <= _ALIGN_UP(initrd_end, PAGE_SIZE); + return (start + size) > ALIGN_DOWN(initrd_start, PAGE_SIZE) && + start <= ALIGN(initrd_end, PAGE_SIZE); #else return 0; #endif @@ -515,9 +515,14 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, size = 0x80000000ul - base; } + if (!validate_mem_limit(base, &size)) + continue; + DBG("Adding: %llx -> %llx\n", base, size); - if (validate_mem_limit(base, &size)) - memblock_add(base, size); + memblock_add(base, size); + + if (lmb->flags & DRCONF_MEM_HOTREMOVABLE) + memblock_mark_hotplug(base, size); } while (--rngs); } #endif /* CONFIG_PPC_PSERIES */ @@ -623,9 +628,9 @@ static void __init early_reserve_mem(void) #ifdef CONFIG_BLK_DEV_INITRD /* Then reserve the initrd, if any */ if (initrd_start && (initrd_end > initrd_start)) { - memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), - _ALIGN_UP(initrd_end, PAGE_SIZE) - - _ALIGN_DOWN(initrd_start, PAGE_SIZE)); + memblock_reserve(ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), + ALIGN(initrd_end, PAGE_SIZE) - + ALIGN_DOWN(initrd_start, PAGE_SIZE)); } #endif /* CONFIG_BLK_DEV_INITRD */ @@ -685,6 +690,23 @@ static void __init tm_init(void) static void tm_init(void) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#ifdef CONFIG_PPC64 +static void __init save_fscr_to_task(void) +{ + /* + * Ensure the init_task (pid 0, aka swapper) uses the value of FSCR we + * have configured via the device tree features or via __init_FSCR(). + * That value will then be propagated to pid 1 (init) and all future + * processes. + */ + if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) + init_task.thread.fscr = mfspr(SPRN_FSCR); +} +#else +static inline void save_fscr_to_task(void) {}; +#endif + + void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -773,6 +795,8 @@ void __init early_init_devtree(void *params) BUG(); } + save_fscr_to_task(); + #if defined(CONFIG_SMP) && defined(CONFIG_PPC64) /* We'll later wait for secondaries to check in; there are * NCPUS-1 non-boot CPUs :-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 806be751c336..5f15b10eb007 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -920,7 +920,7 @@ struct option_vector6 { } __packed; struct ibm_arch_vec { - struct { u32 mask, val; } pvrs[12]; + struct { u32 mask, val; } pvrs[14]; u8 num_vectors; @@ -974,6 +974,14 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .val = cpu_to_be32(0x004e0000), }, { + .mask = cpu_to_be32(0xffff0000), /* POWER10 */ + .val = cpu_to_be32(0x00800000), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 3.1-compliant */ + .val = cpu_to_be32(0x0f000006), + }, + { .mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */ .val = cpu_to_be32(0x0f000005), }, @@ -1002,7 +1010,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .byte1 = 0, .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, - .arch_versions3 = OV1_PPC_3_00, + .arch_versions3 = OV1_PPC_3_00 | OV1_PPC_3_1, }, .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)), @@ -1449,18 +1457,18 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) unsigned long addr = 0; if (align) - base = _ALIGN_UP(base, align); + base = ALIGN(base, align); prom_debug("%s(%lx, %lx)\n", __func__, size, align); if (ram_top == 0) prom_panic("alloc_up() called with mem not initialized\n"); if (align) - base = _ALIGN_UP(alloc_bottom, align); + base = ALIGN(alloc_bottom, align); else base = alloc_bottom; for(; (base + size) <= alloc_top; - base = _ALIGN_UP(base + 0x100000, align)) { + base = ALIGN(base + 0x100000, align)) { prom_debug(" trying: 0x%lx\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); if (addr != PROM_ERROR && addr != 0) @@ -1500,7 +1508,7 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, if (highmem) { /* Carve out storage for the TCE table. */ - addr = _ALIGN_DOWN(alloc_top_high - size, align); + addr = ALIGN_DOWN(alloc_top_high - size, align); if (addr <= alloc_bottom) return 0; /* Will we bump into the RMO ? If yes, check out that we @@ -1518,9 +1526,9 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, goto bail; } - base = _ALIGN_DOWN(alloc_top - size, align); + base = ALIGN_DOWN(alloc_top - size, align); for (; base > alloc_bottom; - base = _ALIGN_DOWN(base - 0x100000, align)) { + base = ALIGN_DOWN(base - 0x100000, align)) { prom_debug(" trying: 0x%lx\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); if (addr != PROM_ERROR && addr != 0) @@ -1586,8 +1594,8 @@ static void __init reserve_mem(u64 base, u64 size) * have our terminator with "size" set to 0 since we are * dumb and just copy this entire array to the boot params */ - base = _ALIGN_DOWN(base, PAGE_SIZE); - top = _ALIGN_UP(top, PAGE_SIZE); + base = ALIGN_DOWN(base, PAGE_SIZE); + top = ALIGN(top, PAGE_SIZE); size = top - base; if (cnt >= (MEM_RESERVE_MAP_SIZE - 1)) @@ -2426,7 +2434,7 @@ static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end, { void *ret; - *mem_start = _ALIGN(*mem_start, align); + *mem_start = ALIGN(*mem_start, align); while ((*mem_start + needed) > *mem_end) { unsigned long room, chunk; @@ -2562,7 +2570,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, *lp++ = *p; } *lp = 0; - *mem_start = _ALIGN((unsigned long)lp + 1, 4); + *mem_start = ALIGN((unsigned long)lp + 1, 4); } /* get it again for debugging */ @@ -2608,7 +2616,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, /* push property content */ valp = make_room(mem_start, mem_end, l, 4); call_prom("getprop", 4, 1, node, pname, valp, l); - *mem_start = _ALIGN(*mem_start, 4); + *mem_start = ALIGN(*mem_start, 4); if (!prom_strcmp(pname, "phandle")) has_phandle = 1; @@ -2667,7 +2675,7 @@ static void __init flatten_device_tree(void) prom_panic ("couldn't get device tree root\n"); /* Build header and make room for mem rsv map */ - mem_start = _ALIGN(mem_start, 4); + mem_start = ALIGN(mem_start, 4); hdr = make_room(&mem_start, &mem_end, sizeof(struct boot_param_header), 4); dt_header_start = (unsigned long)hdr; diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c index f87e7c5c3bf3..697c7e4b5877 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -44,7 +44,7 @@ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo) dbginfo->version = 1; dbginfo->num_instruction_bps = 0; if (ppc_breakpoint_available()) - dbginfo->num_data_bps = 1; + dbginfo->num_data_bps = nr_wp_slots(); else dbginfo->num_data_bps = 0; dbginfo->num_condition_regs = 0; @@ -67,11 +67,16 @@ int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, /* We only support one DABR and no IABRS at the moment */ if (addr > 0) return -EINVAL; - dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | - (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); + dabr_fake = ((child->thread.hw_brk[0].address & (~HW_BRK_TYPE_DABR)) | + (child->thread.hw_brk[0].type & HW_BRK_TYPE_DABR)); return put_user(dabr_fake, datalp); } +/* + * ptrace_set_debugreg() fakes DABR and DABR is only one. So even if + * internal hw supports more than one watchpoint, we support only one + * watchpoint with this interface. + */ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -137,7 +142,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l return ret; thread->ptrace_bps[0] = bp; - thread->hw_brk = hw_brk; + thread->hw_brk[0] = hw_brk; return 0; } @@ -159,12 +164,37 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l if (set_bp && (!ppc_breakpoint_available())) return -ENODEV; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ - task->thread.hw_brk = hw_brk; + task->thread.hw_brk[0] = hw_brk; return 0; } +#ifdef CONFIG_HAVE_HW_BREAKPOINT +static int find_empty_ptrace_bp(struct thread_struct *thread) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (!thread->ptrace_bps[i]) + return i; + } + return -1; +} +#endif + +static int find_empty_hw_brk(struct thread_struct *thread) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (!thread->hw_brk[i].address) + return i; + } + return -1; +} + long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) { + int i; #ifdef CONFIG_HAVE_HW_BREAKPOINT int len = 0; struct thread_struct *thread = &child->thread; @@ -186,7 +216,7 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf if ((unsigned long)bp_info->addr >= TASK_SIZE) return -EIO; - brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN; + brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE); brk.type = HW_BRK_TYPE_TRANSLATE; brk.len = DABR_MAX_LEN; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) @@ -200,8 +230,9 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf len = 1; else return -EINVAL; - bp = thread->ptrace_bps[0]; - if (bp) + + i = find_empty_ptrace_bp(thread); + if (i < 0) return -ENOSPC; /* Create a new breakpoint request if one doesn't exist already */ @@ -211,27 +242,28 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf arch_bp_generic_fields(brk.type, &attr.bp_type); bp = register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, child); - thread->ptrace_bps[0] = bp; + thread->ptrace_bps[i] = bp; if (IS_ERR(bp)) { - thread->ptrace_bps[0] = NULL; + thread->ptrace_bps[i] = NULL; return PTR_ERR(bp); } - return 1; + return i + 1; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) return -EINVAL; - if (child->thread.hw_brk.address) + i = find_empty_hw_brk(&child->thread); + if (i < 0) return -ENOSPC; if (!ppc_breakpoint_available()) return -ENODEV; - child->thread.hw_brk = brk; + child->thread.hw_brk[i] = brk; - return 1; + return i + 1; } long ppc_del_hwdebug(struct task_struct *child, long data) @@ -241,24 +273,24 @@ long ppc_del_hwdebug(struct task_struct *child, long data) struct thread_struct *thread = &child->thread; struct perf_event *bp; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ - if (data != 1) + if (data < 1 || data > nr_wp_slots()) return -EINVAL; #ifdef CONFIG_HAVE_HW_BREAKPOINT - bp = thread->ptrace_bps[0]; + bp = thread->ptrace_bps[data - 1]; if (bp) { unregister_hw_breakpoint(bp); - thread->ptrace_bps[0] = NULL; + thread->ptrace_bps[data - 1] = NULL; } else { ret = -ENOENT; } return ret; #else /* CONFIG_HAVE_HW_BREAKPOINT */ - if (child->thread.hw_brk.address == 0) + if (child->thread.hw_brk[data - 1].address == 0) return -ENOENT; - child->thread.hw_brk.address = 0; - child->thread.hw_brk.type = 0; + child->thread.hw_brk[data - 1].address = 0; + child->thread.hw_brk[data - 1].type = 0; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ return 0; diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index d75aff31f637..32d62c606681 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -43,7 +43,7 @@ static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr) static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap) { - task->thread.ckpt_regs.trap = trap & 0xfff0; + set_trap(&task->thread.ckpt_regs, trap); return 0; } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 15e3b79b6395..caeb5822a8f4 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -149,7 +149,7 @@ static int set_user_dscr(struct task_struct *task, unsigned long dscr) */ static int set_user_trap(struct task_struct *task, unsigned long trap) { - task->thread.regs->trap = trap & 0xfff0; + set_trap(task->thread.regs, trap); return 0; } diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c index 7976ddf29c0e..7589a9665ffb 100644 --- a/arch/powerpc/kernel/ptrace/ptrace32.c +++ b/arch/powerpc/kernel/ptrace/ptrace32.c @@ -259,8 +259,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, ret = put_user(child->thread.debug.dac1, (u32 __user *)data); #else dabr_fake = ( - (child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | - (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); + (child->thread.hw_brk[0].address & (~HW_BRK_TYPE_DABR)) | + (child->thread.hw_brk[0].type & HW_BRK_TYPE_DABR)); ret = put_user(dabr_fake, (u32 __user *)data); #endif break; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c5fa251b8950..a09eba03f180 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -41,6 +41,7 @@ #include <asm/time.h> #include <asm/mmu.h> #include <asm/topology.h> +#include <asm/paca.h> /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); @@ -1014,6 +1015,57 @@ out: free_cpumask_var(offline_mask); return atomic_read(&data.error); } + +/** + * rtas_call_reentrant() - Used for reentrant rtas calls + * @token: Token for desired reentrant RTAS call + * @nargs: Number of Input Parameters + * @nret: Number of Output Parameters + * @outputs: Array of outputs + * @...: Inputs for desired RTAS call + * + * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off", + * "ibm,get-xive" and "ibm,set-xive" are currently reentrant. + * Reentrant calls need their own rtas_args buffer, so not using rtas.args, but + * PACA one instead. + * + * Return: -1 on error, + * First output value of RTAS call if (nret > 0), + * 0 otherwise, + */ +int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) +{ + va_list list; + struct rtas_args *args; + unsigned long flags; + int i, ret = 0; + + if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) + return -1; + + local_irq_save(flags); + preempt_disable(); + + /* We use the per-cpu (PACA) rtas args buffer */ + args = local_paca->rtas_args_reentrant; + + va_start(list, outputs); + va_rtas_call_unlocked(args, token, nargs, nret, list); + va_end(list); + + if (nret > 1 && outputs) + for (i = 0; i < nret - 1; ++i) + outputs[i] = be32_to_cpu(args->rets[i + 1]); + + if (nret > 0) + ret = be32_to_cpu(args->rets[0]); + + local_irq_restore(flags); + preempt_enable(); + + return ret; +} + #else /* CONFIG_PPC_PSERIES */ int rtas_ibm_suspend_me(u64 handle) { diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index bd70f5be1c27..d86701ce116b 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -7,6 +7,8 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/device.h> +#include <linux/nospec.h> +#include <linux/prctl.h> #include <linux/seq_buf.h> #include <asm/asm-prototypes.h> @@ -14,6 +16,7 @@ #include <asm/debugfs.h> #include <asm/security_features.h> #include <asm/setup.h> +#include <asm/inst.h> u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; @@ -353,6 +356,40 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute * return sprintf(buf, "Vulnerable\n"); } +static int ssb_prctl_get(struct task_struct *task) +{ + if (stf_enabled_flush_types == STF_BARRIER_NONE) + /* + * We don't have an explicit signal from firmware that we're + * vulnerable or not, we only have certain CPU revisions that + * are known to be vulnerable. + * + * We assume that if we're on another CPU, where the barrier is + * NONE, then we are not vulnerable. + */ + return PR_SPEC_NOT_AFFECTED; + else + /* + * If we do have a barrier type then we are vulnerable. The + * barrier is not a global or per-process mitigation, so the + * only value we can report here is PR_SPEC_ENABLE, which + * appears as "vulnerable" in /proc. + */ + return PR_SPEC_ENABLE; + + return -EINVAL; +} + +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_get(task); + default: + return -ENODEV; + } +} + #ifdef CONFIG_DEBUG_FS static int stf_barrier_set(void *data, u64 val) { @@ -403,9 +440,11 @@ static void toggle_count_cache_flush(bool enable) enable = false; if (!enable) { - patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP); + patch_instruction_site(&patch__call_flush_count_cache, + ppc_inst(PPC_INST_NOP)); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - patch_instruction_site(&patch__call_kvm_flush_link_stack, PPC_INST_NOP); + patch_instruction_site(&patch__call_kvm_flush_link_stack, + ppc_inst(PPC_INST_NOP)); #endif pr_info("link-stack-flush: software flush disabled.\n"); link_stack_flush_enabled = false; @@ -428,7 +467,8 @@ static void toggle_count_cache_flush(bool enable) // If we just need to flush the link stack, patch an early return if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { - patch_instruction_site(&patch__flush_link_stack_return, PPC_INST_BLR); + patch_instruction_site(&patch__flush_link_stack_return, + ppc_inst(PPC_INST_BLR)); no_count_cache_flush(); return; } @@ -439,7 +479,7 @@ static void toggle_count_cache_flush(bool enable) return; } - patch_instruction_site(&patch__flush_count_cache_return, PPC_INST_BLR); + patch_instruction_site(&patch__flush_count_cache_return, ppc_inst(PPC_INST_BLR)); count_cache_flush_type = COUNT_CACHE_FLUSH_HW; pr_info("count-cache-flush: hardware assisted flush sequence enabled\n"); } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index f9c0d888ce8a..c376a0588039 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -306,10 +306,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) } } else { switch (PVR_VER(pvr)) { - case 0x0020: /* 403 family */ - maj = PVR_MAJ(pvr) + 1; - min = PVR_MIN(pvr); - break; case 0x1008: /* 740P/750P ?? */ maj = ((pvr >> 8) & 0xFF) - 1; min = pvr & 0xFF; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 305ca89d856f..d642e42eabb1 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -74,20 +74,20 @@ EXPORT_SYMBOL(DMA_MODE_WRITE); */ notrace void __init machine_init(u64 dt_ptr) { - unsigned int *addr = (unsigned int *)patch_site_addr(&patch__memset_nocache); - unsigned long insn; + struct ppc_inst *addr = (struct ppc_inst *)patch_site_addr(&patch__memset_nocache); + struct ppc_inst insn; /* Configure static keys first, now that we're relocated. */ setup_feature_keys(); - early_ioremap_setup(); + early_ioremap_init(); /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - patch_instruction_site(&patch__memcpy_nocache, PPC_INST_NOP); + patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_INST_NOP)); - insn = create_cond_branch(addr, branch_target(addr), 0x820000); + create_cond_branch(&insn, addr, branch_target(addr), 0x820000); patch_instruction(addr, insn); /* replace b by bne cr0 */ /* Do some early initialization based on the flat device tree */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8105010b0e76..bb47555d48a2 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -711,7 +711,7 @@ void __init exc_lvl_early_init(void) */ void __init emergency_stack_init(void) { - u64 limit; + u64 limit, mce_limit; unsigned int i; /* @@ -728,7 +728,16 @@ void __init emergency_stack_init(void) * initialized in kernel/irq.c. These are initialized here in order * to have emergency stacks available as early as possible. */ - limit = min(ppc64_bolted_size(), ppc64_rma_size); + limit = mce_limit = min(ppc64_bolted_size(), ppc64_rma_size); + + /* + * Machine check on pseries calls rtas, but can't use the static + * rtas_args due to a machine check hitting while the lock is held. + * rtas args have to be under 4GB, so the machine check stack is + * limited to 4GB so args can be put on stack. + */ + if (firmware_has_feature(FW_FEATURE_LPAR) && mce_limit > SZ_4G) + mce_limit = SZ_4G; for_each_possible_cpu(i) { paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; @@ -738,7 +747,7 @@ void __init emergency_stack_init(void) paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; /* emergency stack for machine check exception handling. */ - paca_ptrs[i]->mc_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; + paca_ptrs[i]->mc_emergency_sp = alloc_stack(mce_limit, i) + THREAD_SIZE; #endif } } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index a264989626fd..b4143b6ff093 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -198,7 +198,10 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, int restart = 1; /* syscall ? */ - if (TRAP(regs) != 0x0C00) + if (!trap_is_syscall(regs)) + return; + + if (trap_norestart(regs)) return; /* error signalled ? */ @@ -258,19 +261,24 @@ static void do_signal(struct task_struct *tsk) if (ksig.sig <= 0) { /* No signal to deliver -- put the saved sigmask back */ restore_saved_sigmask(); - tsk->thread.regs->trap = 0; + set_trap_norestart(tsk->thread.regs); return; /* no signals delivered */ } -#ifndef CONFIG_PPC_ADV_DEBUG_REGS /* * Reenable the DABR before delivering the signal to * user space. The DABR will have been cleared if it * triggered inside the kernel. */ - if (tsk->thread.hw_brk.address && tsk->thread.hw_brk.type) - __set_breakpoint(&tsk->thread.hw_brk); -#endif + if (!IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) { + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (tsk->thread.hw_brk[i].address && tsk->thread.hw_brk[i].type) + __set_breakpoint(i, &tsk->thread.hw_brk[i]); + } + } + /* Re-enable the breakpoints for the signal stack */ thread_change_pc(tsk, tsk->thread.regs); @@ -285,7 +293,7 @@ static void do_signal(struct task_struct *tsk) ret = handle_rt_signal64(&ksig, oldset, tsk); } - tsk->thread.regs->trap = 0; + set_trap_norestart(tsk->thread.regs); signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP)); } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 4f96d29a22bf..ae3da7440b2f 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -500,7 +500,7 @@ static long restore_user_regs(struct pt_regs *regs, if (!sig) save_r2 = (unsigned int)regs->gpr[2]; err = restore_general_regs(regs, sr); - regs->trap = 0; + set_trap_norestart(regs); err |= __get_user(msr, &sr->mc_gregs[PT_MSR]); if (!sig) regs->gpr[2] = (unsigned long) save_r2; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index adfde59cf4ba..77061915897f 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -350,8 +350,8 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]); err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]); err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]); - /* skip SOFTE */ - regs->trap = 0; + /* Don't allow userspace to set SOFTE */ + set_trap_norestart(regs); err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); @@ -472,10 +472,8 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, &sc->gp_regs[PT_XER]); err |= __get_user(tsk->thread.ckpt_regs.ccr, &sc->gp_regs[PT_CCR]); - - /* Don't allow userspace to set the trap value */ - regs->trap = 0; - + /* Don't allow userspace to set SOFTE */ + set_trap_norestart(regs); /* These regs are not checkpointed; they can go in 'regs'. */ err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 6d2a3a3666f0..c820c95162ff 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1383,7 +1383,7 @@ void __init smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_SCHED_SMT if (has_big_cores) { - pr_info("Using small cores at SMT level\n"); + pr_info("Big cores detected but using small core scheduling\n"); power9_topology[0].mask = smallcore_smt_mask; powerpc_topology[0].mask = smallcore_smt_mask; } diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index cbdf86228eaa..f73f4d72fea4 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -395,6 +395,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) li r3,0 blr +_ASM_NOKPROBE_SYMBOL(swsusp_arch_resume) /* FIXME:This construct is actually not useful since we don't shut * down the instruction MMU, we could just flip back MSR-DR on. @@ -406,4 +407,5 @@ turn_on_mmu: sync isync rfi +_ASM_NOKPROBE_SYMBOL(turn_on_mmu) diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 7b7c89cad901..79edba3ab312 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -102,6 +102,31 @@ notrace long system_call_exception(long r3, long r4, long r5, } /* + * local irqs must be disabled. Returns false if the caller must re-enable + * them, check for new work, and try again. + */ +static notrace inline bool prep_irq_for_enabled_exit(void) +{ + /* This must be done with RI=1 because tracing may touch vmaps */ + trace_hardirqs_on(); + + /* This pattern matches prep_irq_for_idle */ + __hard_EE_RI_disable(); + if (unlikely(lazy_irq_pending_nocheck())) { + /* Took an interrupt, may have more exit work to do. */ + __hard_RI_enable(); + trace_hardirqs_off(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + return false; + } + local_paca->irq_happened = 0; + irq_soft_mask_set(IRQS_ENABLED); + + return true; +} + +/* * This should be called after a syscall returns, with r3 the return value * from the syscall. If this function returns non-zero, the system call * exit assembly should additionally load all GPR registers and CTR and XER @@ -186,21 +211,10 @@ again: } } - /* This must be done with RI=1 because tracing may touch vmaps */ - trace_hardirqs_on(); - - /* This pattern matches prep_irq_for_idle */ - __hard_EE_RI_disable(); - if (unlikely(lazy_irq_pending_nocheck())) { - __hard_RI_enable(); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + if (unlikely(!prep_irq_for_enabled_exit())) { local_irq_enable(); - /* Took an interrupt, may have more exit work to do. */ goto again; } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; @@ -228,6 +242,10 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned BUG_ON(!FULL_REGS(regs)); BUG_ON(regs->softe != IRQS_ENABLED); + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * AMR can only have been unlocked if we interrupted the kernel. + */ kuap_check_amr(); local_irq_save(flags); @@ -264,19 +282,11 @@ again: } } - trace_hardirqs_on(); - __hard_EE_RI_disable(); - if (unlikely(lazy_irq_pending_nocheck())) { - __hard_RI_enable(); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + if (unlikely(!prep_irq_for_enabled_exit())) { local_irq_enable(); local_irq_disable(); - /* Took an interrupt, may have more exit work to do. */ goto again; } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); #ifdef CONFIG_PPC_BOOK3E if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) { @@ -307,13 +317,14 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long flags; unsigned long ret = 0; + unsigned long amr; if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI))) unrecoverable_exception(regs); BUG_ON(regs->msr & MSR_PR); BUG_ON(!FULL_REGS(regs)); - kuap_check_amr(); + amr = kuap_get_and_check_amr(); if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) { clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp); @@ -334,13 +345,7 @@ again: } } - trace_hardirqs_on(); - __hard_EE_RI_disable(); - if (unlikely(lazy_irq_pending_nocheck())) { - __hard_RI_enable(); - irq_soft_mask_set(IRQS_ALL_DISABLED); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + if (unlikely(!prep_irq_for_enabled_exit())) { /* * Can't local_irq_restore to replay if we were in * interrupt context. Must replay directly. @@ -354,8 +359,6 @@ again: /* Took an interrupt, may have more exit work to do. */ goto again; } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); } else { /* Returning to a kernel context with local irqs disabled. */ __hard_EE_RI_disable(); @@ -369,10 +372,11 @@ again: #endif /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * The value of AMR only matters while we're in the kernel. + * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr, + * which would cause Read-After-Write stalls. Hence, we take the AMR + * value from the check above. */ - kuap_restore_amr(regs); + kuap_restore_amr(regs, amr); return ret; } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 479c70680b76..571b3259697e 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -19,6 +19,7 @@ #include <asm/smp.h> #include <asm/pmc.h> #include <asm/firmware.h> +#include <asm/idle.h> #include <asm/svm.h> #include "cacheinfo.h" @@ -760,6 +761,74 @@ static void create_svm_file(void) } #endif /* CONFIG_PPC_SVM */ +#ifdef CONFIG_PPC_PSERIES +static void read_idle_purr(void *val) +{ + u64 *ret = val; + + *ret = read_this_idle_purr(); +} + +static ssize_t idle_purr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + u64 val; + + smp_call_function_single(cpu->dev.id, read_idle_purr, &val, 1); + return sprintf(buf, "%llx\n", val); +} +static DEVICE_ATTR(idle_purr, 0400, idle_purr_show, NULL); + +static void create_idle_purr_file(struct device *s) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + device_create_file(s, &dev_attr_idle_purr); +} + +static void remove_idle_purr_file(struct device *s) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + device_remove_file(s, &dev_attr_idle_purr); +} + +static void read_idle_spurr(void *val) +{ + u64 *ret = val; + + *ret = read_this_idle_spurr(); +} + +static ssize_t idle_spurr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + u64 val; + + smp_call_function_single(cpu->dev.id, read_idle_spurr, &val, 1); + return sprintf(buf, "%llx\n", val); +} +static DEVICE_ATTR(idle_spurr, 0400, idle_spurr_show, NULL); + +static void create_idle_spurr_file(struct device *s) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + device_create_file(s, &dev_attr_idle_spurr); +} + +static void remove_idle_spurr_file(struct device *s) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + device_remove_file(s, &dev_attr_idle_spurr); +} + +#else /* CONFIG_PPC_PSERIES */ +#define create_idle_purr_file(s) +#define remove_idle_purr_file(s) +#define create_idle_spurr_file(s) +#define remove_idle_spurr_file(s) +#endif /* CONFIG_PPC_PSERIES */ + static int register_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); @@ -823,10 +892,13 @@ static int register_cpu_online(unsigned int cpu) if (!firmware_has_feature(FW_FEATURE_LPAR)) add_write_permission_dev_attr(&dev_attr_purr); device_create_file(s, &dev_attr_purr); + create_idle_purr_file(s); } - if (cpu_has_feature(CPU_FTR_SPURR)) + if (cpu_has_feature(CPU_FTR_SPURR)) { device_create_file(s, &dev_attr_spurr); + create_idle_spurr_file(s); + } if (cpu_has_feature(CPU_FTR_DSCR)) device_create_file(s, &dev_attr_dscr); @@ -910,11 +982,15 @@ static int unregister_cpu_online(unsigned int cpu) device_remove_file(s, &dev_attr_mmcra); #endif /* CONFIG_PMU_SYSFS */ - if (cpu_has_feature(CPU_FTR_PURR)) + if (cpu_has_feature(CPU_FTR_PURR)) { device_remove_file(s, &dev_attr_purr); + remove_idle_purr_file(s); + } - if (cpu_has_feature(CPU_FTR_SPURR)) + if (cpu_has_feature(CPU_FTR_SPURR)) { device_remove_file(s, &dev_attr_spurr); + remove_idle_spurr_file(s); + } if (cpu_has_feature(CPU_FTR_DSCR)) device_remove_file(s, &dev_attr_dscr); diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 7ea0ca044b65..5e399628f51a 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -27,6 +27,7 @@ #include <asm/code-patching.h> #include <asm/ftrace.h> #include <asm/syscall.h> +#include <asm/inst.h> #ifdef CONFIG_DYNAMIC_FTRACE @@ -40,23 +41,23 @@ #define NUM_FTRACE_TRAMPS 8 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; -static unsigned int +static struct ppc_inst ftrace_call_replace(unsigned long ip, unsigned long addr, int link) { - unsigned int op; + struct ppc_inst op; addr = ppc_function_entry((void *)addr); /* if (link) set op to 'bl' else 'b' */ - op = create_branch((unsigned int *)ip, addr, link ? 1 : 0); + create_branch(&op, (struct ppc_inst *)ip, addr, link ? 1 : 0); return op; } static int -ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) +ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) { - unsigned int replaced; + struct ppc_inst replaced; /* * Note: @@ -67,18 +68,18 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) */ /* read the text we want to modify */ - if (probe_kernel_read(&replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (probe_kernel_read_inst(&replaced, (void *)ip)) return -EFAULT; /* Make sure it is what we expect it to be */ - if (replaced != old) { + if (!ppc_inst_equal(replaced, old)) { pr_err("%p: replaced (%#x) != old (%#x)", - (void *)ip, replaced, old); + (void *)ip, ppc_inst_val(replaced), ppc_inst_val(old)); return -EINVAL; } /* replace the text with the new text */ - if (patch_instruction((unsigned int *)ip, new)) + if (patch_instruction((struct ppc_inst *)ip, new)) return -EPERM; return 0; @@ -89,27 +90,28 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) */ static int test_24bit_addr(unsigned long ip, unsigned long addr) { + struct ppc_inst op; addr = ppc_function_entry((void *)addr); /* use the create_branch to verify that this offset can be branched */ - return create_branch((unsigned int *)ip, addr, 0); + return create_branch(&op, (struct ppc_inst *)ip, addr, 0) == 0; } -static int is_bl_op(unsigned int op) +static int is_bl_op(struct ppc_inst op) { - return (op & 0xfc000003) == 0x48000001; + return (ppc_inst_val(op) & 0xfc000003) == 0x48000001; } -static int is_b_op(unsigned int op) +static int is_b_op(struct ppc_inst op) { - return (op & 0xfc000003) == 0x48000000; + return (ppc_inst_val(op) & 0xfc000003) == 0x48000000; } -static unsigned long find_bl_target(unsigned long ip, unsigned int op) +static unsigned long find_bl_target(unsigned long ip, struct ppc_inst op) { int offset; - offset = (op & 0x03fffffc); + offset = (ppc_inst_val(op) & 0x03fffffc); /* make it signed */ if (offset & 0x02000000) offset |= 0xfe000000; @@ -125,17 +127,17 @@ __ftrace_make_nop(struct module *mod, { unsigned long entry, ptr, tramp; unsigned long ip = rec->ip; - unsigned int op, pop; + struct ppc_inst op, pop; /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + if (probe_kernel_read_inst(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); return -EINVAL; } @@ -160,16 +162,18 @@ __ftrace_make_nop(struct module *mod, #ifdef CONFIG_MPROFILE_KERNEL /* When using -mkernel_profile there is no load to jump over */ - pop = PPC_INST_NOP; + pop = ppc_inst(PPC_INST_NOP); - if (probe_kernel_read(&op, (void *)(ip - 4), 4)) { + if (probe_kernel_read_inst(&op, (void *)(ip - 4))) { pr_err("Fetching instruction at %lx failed.\n", ip - 4); return -EFAULT; } /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ - if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) { - pr_err("Unexpected instruction %08x around bl _mcount\n", op); + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) && + !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { + pr_err("Unexpected instruction %08x around bl _mcount\n", + ppc_inst_val(op)); return -EINVAL; } #else @@ -187,24 +191,24 @@ __ftrace_make_nop(struct module *mod, * Use a b +8 to jump over the load. */ - pop = PPC_INST_BRANCH | 8; /* b +8 */ + pop = ppc_inst(PPC_INST_BRANCH | 8); /* b +8 */ /* * Check what is in the next instruction. We can see ld r2,40(r1), but * on first pass after boot we will see mflr r0. */ - if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) { + if (probe_kernel_read_inst(&op, (void *)(ip + 4))) { pr_err("Fetching op failed.\n"); return -EFAULT; } - if (op != PPC_INST_LD_TOC) { - pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op); + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { + pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, ppc_inst_val(op)); return -EINVAL; } #endif /* CONFIG_MPROFILE_KERNEL */ - if (patch_instruction((unsigned int *)ip, pop)) { + if (patch_instruction((struct ppc_inst *)ip, pop)) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -217,7 +221,7 @@ static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op; + struct ppc_inst op; unsigned int jmp[4]; unsigned long ip = rec->ip; unsigned long tramp; @@ -227,7 +231,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); return -EINVAL; } @@ -274,9 +278,9 @@ __ftrace_make_nop(struct module *mod, return -EINVAL; } - op = PPC_INST_NOP; + op = ppc_inst(PPC_INST_NOP); - if (patch_instruction((unsigned int *)ip, op)) + if (patch_instruction((struct ppc_inst *)ip, op)) return -EPERM; return 0; @@ -287,6 +291,7 @@ __ftrace_make_nop(struct module *mod, static unsigned long find_ftrace_tramp(unsigned long ip) { int i; + struct ppc_inst instr; /* * We have the compiler generated long_branch tramps at the end @@ -295,7 +300,8 @@ static unsigned long find_ftrace_tramp(unsigned long ip) for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--) if (!ftrace_tramps[i]) continue; - else if (create_branch((void *)ip, ftrace_tramps[i], 0)) + else if (create_branch(&instr, (void *)ip, + ftrace_tramps[i], 0) == 0) return ftrace_tramps[i]; return 0; @@ -322,8 +328,10 @@ static int add_ftrace_tramp(unsigned long tramp) */ static int setup_mcount_compiler_tramp(unsigned long tramp) { - int i, op; + int i; + struct ppc_inst op; unsigned long ptr; + struct ppc_inst instr; static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS]; /* Is this a known long jump tramp? */ @@ -341,7 +349,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) return -1; /* New trampoline -- read where this goes */ - if (probe_kernel_read(&op, (void *)tramp, sizeof(int))) { + if (probe_kernel_read_inst(&op, (void *)tramp)) { pr_debug("Fetching opcode failed.\n"); return -1; } @@ -366,13 +374,13 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) #else ptr = ppc_global_function_entry((void *)ftrace_caller); #endif - if (!create_branch((void *)tramp, ptr, 0)) { + if (create_branch(&instr, (void *)tramp, ptr, 0)) { pr_debug("%ps is not reachable from existing mcount tramp\n", (void *)ptr); return -1; } - if (patch_branch((unsigned int *)tramp, ptr, 0)) { + if (patch_branch((struct ppc_inst *)tramp, ptr, 0)) { pr_debug("REL24 out of range!\n"); return -1; } @@ -388,17 +396,17 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) { unsigned long tramp, ip = rec->ip; - unsigned int op; + struct ppc_inst op; /* Read where this goes */ - if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + if (probe_kernel_read_inst(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); return -EINVAL; } @@ -416,7 +424,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) } } - if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) { + if (patch_instruction((struct ppc_inst *)ip, ppc_inst(PPC_INST_NOP))) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -428,7 +436,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; - unsigned int old, new; + struct ppc_inst old, new; /* * If the calling address is more that 24 bits away, @@ -438,7 +446,7 @@ int ftrace_make_nop(struct module *mod, if (test_24bit_addr(ip, addr)) { /* within range */ old = ftrace_call_replace(ip, addr, 1); - new = PPC_INST_NOP; + new = ppc_inst(PPC_INST_NOP); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) return __ftrace_make_nop_kernel(rec, addr); @@ -481,7 +489,7 @@ int ftrace_make_nop(struct module *mod, */ #ifndef CONFIG_MPROFILE_KERNEL static int -expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) +expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) { /* * We expect to see: @@ -492,16 +500,17 @@ expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) * The load offset is different depending on the ABI. For simplicity * just mask it out when doing the compare. */ - if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000)) + if (!ppc_inst_equal(op0, ppc_inst(0x48000008)) || + (ppc_inst_val(op1) & 0xffff0000) != 0xe8410000) return 0; return 1; } #else static int -expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) +expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) { /* look for patched "NOP" on ppc64 with -mprofile-kernel */ - if (op0 != PPC_INST_NOP) + if (!ppc_inst_equal(op0, ppc_inst(PPC_INST_NOP))) return 0; return 1; } @@ -510,18 +519,22 @@ expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op[2]; + struct ppc_inst op[2]; + struct ppc_inst instr; void *ip = (void *)rec->ip; unsigned long entry, ptr, tramp; struct module *mod = rec->arch.mod; /* read where this goes */ - if (probe_kernel_read(op, ip, sizeof(op))) + if (probe_kernel_read_inst(op, ip)) + return -EFAULT; + + if (probe_kernel_read_inst(op + 1, ip + 4)) return -EFAULT; if (!expected_nop_sequence(ip, op[0], op[1])) { pr_err("Unexpected call sequence at %p: %x %x\n", - ip, op[0], op[1]); + ip, ppc_inst_val(op[0]), ppc_inst_val(op[1])); return -EINVAL; } @@ -557,7 +570,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* Ensure branch is within 24 bits */ - if (!create_branch(ip, tramp, BRANCH_SET_LINK)) { + if (create_branch(&instr, ip, tramp, BRANCH_SET_LINK)) { pr_err("Branch out of range\n"); return -EINVAL; } @@ -574,16 +587,17 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op; + int err; + struct ppc_inst op; unsigned long ip = rec->ip; /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) + if (probe_kernel_read_inst(&op, (void *)ip)) return -EFAULT; /* It should be pointing to a nop */ - if (op != PPC_INST_NOP) { - pr_err("Expected NOP but have %x\n", op); + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + pr_err("Expected NOP but have %x\n", ppc_inst_val(op)); return -EINVAL; } @@ -594,16 +608,16 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* create the branch to the trampoline */ - op = create_branch((unsigned int *)ip, - rec->arch.mod->arch.tramp, BRANCH_SET_LINK); - if (!op) { + err = create_branch(&op, (struct ppc_inst *)ip, + rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + if (err) { pr_err("REL24 out of range!\n"); return -EINVAL; } pr_devel("write to %lx\n", rec->ip); - if (patch_instruction((unsigned int *)ip, op)) + if (patch_instruction((struct ppc_inst *)ip, op)) return -EPERM; return 0; @@ -613,7 +627,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op; + struct ppc_inst op; void *ip = (void *)rec->ip; unsigned long tramp, entry, ptr; @@ -634,13 +648,13 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) } /* Make sure we have a nop */ - if (probe_kernel_read(&op, ip, sizeof(op))) { + if (probe_kernel_read_inst(&op, ip)) { pr_err("Unable to read ftrace location %p\n", ip); return -EFAULT; } - if (op != PPC_INST_NOP) { - pr_err("Unexpected call sequence at %p: %x\n", ip, op); + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + pr_err("Unexpected call sequence at %p: %x\n", ip, ppc_inst_val(op)); return -EINVAL; } @@ -661,7 +675,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; - unsigned int old, new; + struct ppc_inst old, new; /* * If the calling address is more that 24 bits away, @@ -670,7 +684,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) */ if (test_24bit_addr(ip, addr)) { /* within range */ - old = PPC_INST_NOP; + old = ppc_inst(PPC_INST_NOP); new = ftrace_call_replace(ip, addr, 1); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) @@ -700,7 +714,7 @@ static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - unsigned int op; + struct ppc_inst op; unsigned long ip = rec->ip; unsigned long entry, ptr, tramp; struct module *mod = rec->arch.mod; @@ -712,14 +726,14 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + if (probe_kernel_read_inst(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); return -EINVAL; } @@ -748,7 +762,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, /* The new target may be within range */ if (test_24bit_addr(ip, addr)) { /* within range */ - if (patch_branch((unsigned int *)ip, addr, BRANCH_SET_LINK)) { + if (patch_branch((struct ppc_inst *)ip, addr, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -776,12 +790,12 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } /* Ensure branch is within 24 bits */ - if (!create_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) { + if (create_branch(&op, (struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { pr_err("Branch out of range\n"); return -EINVAL; } - if (patch_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) { + if (patch_branch((struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -794,7 +808,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { unsigned long ip = rec->ip; - unsigned int old, new; + struct ppc_inst old, new; /* * If the calling address is more that 24 bits away, @@ -834,10 +848,10 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); - unsigned int old, new; + struct ppc_inst old, new; int ret; - old = *(unsigned int *)&ftrace_call; + old = ppc_inst_read((struct ppc_inst *)&ftrace_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); @@ -845,7 +859,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) /* Also update the regs callback function */ if (!ret) { ip = (unsigned long)(&ftrace_regs_call); - old = *(unsigned int *)&ftrace_regs_call; + old = ppc_inst_read((struct ppc_inst *)&ftrace_regs_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); } @@ -919,7 +933,7 @@ int ftrace_enable_ftrace_graph_caller(void) unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); - unsigned int old, new; + struct ppc_inst old, new; old = ftrace_call_replace(ip, stub, 0); new = ftrace_call_replace(ip, addr, 0); @@ -932,7 +946,7 @@ int ftrace_disable_ftrace_graph_caller(void) unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); - unsigned int old, new; + struct ppc_inst old, new; old = ftrace_call_replace(ip, addr, 0); new = ftrace_call_replace(ip, stub, 0); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index b44dd75de517..067e501f2202 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -442,6 +442,9 @@ void system_reset_exception(struct pt_regs *regs) { unsigned long hsrr0, hsrr1; bool saved_hsrrs = false; + u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); + + this_cpu_set_ftrace_enabled(0); nmi_enter(); @@ -504,11 +507,11 @@ out: #ifdef CONFIG_PPC_BOOK3S_64 BUG_ON(get_paca()->in_nmi == 0); if (get_paca()->in_nmi > 1) - nmi_panic(regs, "Unrecoverable nested System Reset"); + die("Unrecoverable nested System Reset", regs, SIGABRT); #endif /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) - nmi_panic(regs, "Unrecoverable System Reset"); + die("Unrecoverable System Reset", regs, SIGABRT); if (saved_hsrrs) { mtspr(SPRN_HSRR0, hsrr0); @@ -517,6 +520,8 @@ out: nmi_exit(); + this_cpu_set_ftrace_enabled(ftrace_enabled); + /* What should we do here? We could issue a shutdown or hard reset. */ } @@ -576,6 +581,8 @@ static inline int check_io_access(struct pt_regs *regs) #define REASON_ILLEGAL (ESR_PIL | ESR_PUO) #define REASON_PRIVILEGED ESR_PPR #define REASON_TRAP ESR_PTR +#define REASON_PREFIXED 0 +#define REASON_BOUNDARY 0 /* single-step stuff */ #define single_stepping(regs) (current->thread.debug.dbcr0 & DBCR0_IC) @@ -590,12 +597,16 @@ static inline int check_io_access(struct pt_regs *regs) #define REASON_ILLEGAL SRR1_PROGILL #define REASON_PRIVILEGED SRR1_PROGPRIV #define REASON_TRAP SRR1_PROGTRAP +#define REASON_PREFIXED SRR1_PREFIXED +#define REASON_BOUNDARY SRR1_BOUNDARY #define single_stepping(regs) ((regs)->msr & MSR_SE) #define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) #define clear_br_trace(regs) ((regs)->msr &= ~MSR_BE) #endif +#define inst_length(reason) (((reason) & REASON_PREFIXED) ? 8 : 4) + #if defined(CONFIG_E500) int machine_check_e500mc(struct pt_regs *regs) { @@ -817,7 +828,19 @@ void machine_check_exception(struct pt_regs *regs) { int recover = 0; - nmi_enter(); + /* + * BOOK3S_64 does not call this handler as a non-maskable interrupt + * (it uses its own early real-mode handler to handle the MCE proper + * and then raises irq_work to call this handler when interrupts are + * enabled). + * + * This is silly. The BOOK3S_64 should just call a different function + * rather than expecting semantics to magically change. Something + * like 'non_nmi_machine_check_exception()', perhaps? + */ + const bool nmi = !IS_ENABLED(CONFIG_PPC_BOOK3S_64); + + if (nmi) nmi_enter(); __this_cpu_inc(irq_stat.mce_exceptions); @@ -843,18 +866,18 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - nmi_exit(); + if (nmi) nmi_exit(); die("Machine check", regs, SIGBUS); /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) - nmi_panic(regs, "Unrecoverable Machine check"); + die("Unrecoverable Machine check", regs, SIGBUS); return; bail: - nmi_exit(); + if (nmi) nmi_exit(); } void SMIException(struct pt_regs *regs) @@ -1583,11 +1606,20 @@ void alignment_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); int sig, code, fixed = 0; + unsigned long reason; /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) local_irq_enable(); + reason = get_reason(regs); + + if (reason & REASON_BOUNDARY) { + sig = SIGBUS; + code = BUS_ADRALN; + goto bad; + } + if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT)) goto bail; @@ -1596,7 +1628,8 @@ void alignment_exception(struct pt_regs *regs) fixed = fix_alignment(regs); if (fixed == 1) { - regs->nip += 4; /* skip over emulated instruction */ + /* skip over emulated instruction */ + regs->nip += inst_length(reason); emulate_single_step(regs); goto bail; } @@ -1609,6 +1642,7 @@ void alignment_exception(struct pt_regs *regs) sig = SIGBUS; code = BUS_ADRALN; } +bad: if (user_mode(regs)) _exception(sig, regs, code, regs->dar); else @@ -1710,6 +1744,7 @@ void facility_unavailable_exception(struct pt_regs *regs) [FSCR_TAR_LG] = "TAR", [FSCR_MSGP_LG] = "MSGP", [FSCR_SCV_LG] = "SCV", + [FSCR_PREFIX_LG] = "PREFIX", }; char *facility = "unknown"; u64 value; diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 1cfef0e5fec5..d200e7df7167 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -14,6 +14,7 @@ #include <linux/kdebug.h> #include <asm/sstep.h> +#include <asm/inst.h> #define UPROBE_TRAP_NR UINT_MAX @@ -111,7 +112,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * support doesn't exist and have to fix-up the next instruction * to be executed. */ - regs->nip = utask->vaddr + MAX_UINSN_BYTES; + regs->nip = (unsigned long)ppc_inst_next((void *)utask->vaddr, &auprobe->insn); user_disable_single_step(current); return 0; @@ -173,7 +174,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) * emulate_step() returns 1 if the insn was successfully emulated. * For all other cases, we need to single-step in hardware. */ - ret = emulate_step(regs, auprobe->insn); + ret = emulate_step(regs, ppc_inst_read(&auprobe->insn)); if (ret > 0) return true; diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index 4acd3fb2b38e..ae632569446f 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -10,6 +10,7 @@ #include <asm/processor.h> #include <asm/switch_to.h> #include <linux/uaccess.h> +#include <asm/inst.h> /* Functions in vector.S */ extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); @@ -260,21 +261,24 @@ static unsigned int rfin(unsigned int x) int emulate_altivec(struct pt_regs *regs) { - unsigned int instr, i; + struct ppc_inst instr; + unsigned int i, word; unsigned int va, vb, vc, vd; vector128 *vrs; - if (get_user(instr, (unsigned int __user *) regs->nip)) + if (get_user_instr(instr, (void __user *)regs->nip)) return -EFAULT; - if ((instr >> 26) != 4) + + word = ppc_inst_val(instr); + if (ppc_inst_primary_opcode(instr) != 4) return -EINVAL; /* not an altivec instruction */ - vd = (instr >> 21) & 0x1f; - va = (instr >> 16) & 0x1f; - vb = (instr >> 11) & 0x1f; - vc = (instr >> 6) & 0x1f; + vd = (word >> 21) & 0x1f; + va = (word >> 16) & 0x1f; + vb = (word >> 11) & 0x1f; + vc = (word >> 6) & 0x1f; vrs = current->thread.vr_state.vr; - switch (instr & 0x3f) { + switch (word & 0x3f) { case 10: switch (vc) { case 0: /* vaddfp */ diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index d20c5e79e03c..efc5b52f95d2 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -89,6 +89,7 @@ _GLOBAL(load_up_altivec) REST_32VRS(0,r4,r6) /* restore registers and return */ blr +_ASM_NOKPROBE_SYMBOL(load_up_altivec) /* * save_altivec(tsk) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index a1706b63b82d..326e113d2e45 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -15,7 +15,6 @@ #include <asm/thread_info.h> #define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT) -#define ETEXT_ALIGN_SIZE (1 << CONFIG_ETEXT_SHIFT) ENTRY(_stext) @@ -117,7 +116,7 @@ SECTIONS } :text - . = ALIGN(ETEXT_ALIGN_SIZE); + . = ALIGN(PAGE_SIZE); _etext = .; PROVIDE32 (etext = .); diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 078fe3d76feb..56da5eb2b923 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -115,11 +115,12 @@ void machine_kexec(struct kimage *image) void __init reserve_crashkernel(void) { - unsigned long long crash_size, crash_base; + unsigned long long crash_size, crash_base, total_mem_sz; int ret; + total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size(); /* use common parsing */ - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, total_mem_sz, &crash_size, &crash_base); if (ret == 0 && crash_size > 0) { crashk_res.start = crash_base; @@ -178,6 +179,7 @@ void __init reserve_crashkernel(void) /* Crash kernel trumps memory limit */ if (memory_limit && memory_limit <= crashk_res.end) { memory_limit = crashk_res.end + 1; + total_mem_sz = memory_limit; printk("Adjusted memory limit for crashkernel, now 0x%llx\n", memory_limit); } @@ -186,7 +188,7 @@ void __init reserve_crashkernel(void) "for crashkernel (System RAM: %ldMB)\n", (unsigned long)(crash_size >> 20), (unsigned long)(crashk_res.start >> 20), - (unsigned long)(memblock_phys_mem_size() >> 20)); + (unsigned long)(total_mem_sz >> 20)); if (!memblock_is_region_memory(crashk_res.start, crash_size) || memblock_reserve(crashk_res.start, crash_size)) { diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index d488311efab1..c9a889880214 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -311,6 +311,9 @@ void default_machine_crash_shutdown(struct pt_regs *regs) unsigned int i; int (*old_handler)(struct pt_regs *regs); + /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ + printk_nmi_enter(); + /* * This function is only called after the system * has panicked or is otherwise in a critical state. diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2b35f9bcf892..18aed9775a3c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -281,11 +281,10 @@ static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, { long ret; - /* Protect linux PTE lookup from page table destruction */ - rcu_read_lock_sched(); /* this disables preemption too */ + preempt_disable(); ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, kvm->mm->pgd, false, pte_idx_ret); - rcu_read_unlock_sched(); + preempt_enable(); if (ret == H_TOO_HARD) { /* this can't happen */ pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); @@ -602,12 +601,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, * Read the PTE from the process' radix tree and use that * so we get the shift and attribute bits. */ - local_irq_disable(); - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + spin_lock(&kvm->mmu_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); pte = __pte(0); if (ptep) - pte = *ptep; - local_irq_enable(); + pte = READ_ONCE(*ptep); + spin_unlock(&kvm->mmu_lock); /* * If the PTE disappeared temporarily due to a THP * collapse, just return and let the guest try again. diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index d605ed0bb2e7..02219e28b1e4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -739,7 +739,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, return ret; } -bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, +bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing, unsigned long gpa, unsigned int lpid) { unsigned long pgflags; @@ -754,12 +754,12 @@ bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, pgflags = _PAGE_ACCESSED; if (writing) pgflags |= _PAGE_DIRTY; - /* - * We are walking the secondary (partition-scoped) page table here. - * We can do this without disabling irq because the Linux MM - * subsystem doesn't do THP splits and collapses on this tree. - */ - ptep = __find_linux_pte(pgtable, gpa, NULL, &shift); + + if (nested) + ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); + else + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); + if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) { kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift); return true; @@ -817,12 +817,12 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, * Read the PTE from the process' radix tree and use that * so we get the shift and attribute bits. */ - local_irq_disable(); - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + spin_lock(&kvm->mmu_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); pte = __pte(0); if (ptep) - pte = *ptep; - local_irq_enable(); + pte = READ_ONCE(*ptep); + spin_unlock(&kvm->mmu_lock); /* * If the PTE disappeared temporarily due to a THP * collapse, just return and let the guest try again. @@ -953,8 +953,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Failed to set the reference/change bits */ if (dsisr & DSISR_SET_RC) { spin_lock(&kvm->mmu_lock); - if (kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, - writing, gpa, kvm->arch.lpid)) + if (kvmppc_hv_handle_set_rc(kvm, false, writing, + gpa, kvm->arch.lpid)) dsisr &= ~DSISR_SET_RC; spin_unlock(&kvm->mmu_lock); @@ -985,11 +985,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, return 0; } - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, kvm->arch.lpid); - return 0; + return 0; } /* Called with kvm->mmu_lock held */ @@ -1005,7 +1005,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ref; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) { old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, gpa, shift); @@ -1032,7 +1032,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ref; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) ref = 1; return ref; @@ -1044,7 +1044,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, { unsigned long gfn = memslot->base_gfn + pagenum; unsigned long gpa = gfn << PAGE_SHIFT; - pte_t *ptep; + pte_t *ptep, pte; unsigned int shift; int ret = 0; unsigned long old, *rmapp; @@ -1052,12 +1052,35 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ret; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); - if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { - ret = 1; - if (shift) - ret = 1 << (shift - PAGE_SHIFT); + /* + * For performance reasons we don't hold kvm->mmu_lock while walking the + * partition scoped table. + */ + ptep = find_kvm_secondary_pte_unlocked(kvm, gpa, &shift); + if (!ptep) + return 0; + + pte = READ_ONCE(*ptep); + if (pte_present(pte) && pte_dirty(pte)) { spin_lock(&kvm->mmu_lock); + /* + * Recheck the pte again + */ + if (pte_val(pte) != pte_val(*ptep)) { + /* + * We have KVM_MEM_LOG_DIRTY_PAGES enabled. Hence we can + * only find PAGE_SIZE pte entries here. We can continue + * to use the pte addr returned by above page table + * walk. + */ + if (!pte_present(*ptep) || !pte_dirty(*ptep)) { + spin_unlock(&kvm->mmu_lock); + return 0; + } + } + + ret = 1; + VM_BUG_ON(shift); old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid); @@ -1113,7 +1136,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, gpa = memslot->base_gfn << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); for (n = memslot->npages; n; --n) { - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, kvm->arch.lpid); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 6fcaf1fa8e02..ac6ac192b8bb 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -74,8 +74,8 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm, EXPORT_SYMBOL_GPL(kvmppc_find_table); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce, - unsigned long *ua, unsigned long **prmap) +static long kvmppc_rm_tce_to_ua(struct kvm *kvm, + unsigned long tce, unsigned long *ua) { unsigned long gfn = tce >> PAGE_SHIFT; struct kvm_memory_slot *memslot; @@ -87,9 +87,6 @@ static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce, *ua = __gfn_to_hva_memslot(memslot, gfn) | (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); - if (prmap) - *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; - return 0; } @@ -116,7 +113,7 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt, if (iommu_tce_check_gpa(stt->page_shift, gpa)) return H_PARAMETER; - if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua, NULL)) + if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua)) return H_TOO_HARD; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -208,7 +205,7 @@ static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt, idx = (ioba >> stt->page_shift) - stt->offset; sttpage = idx / TCES_PER_PAGE; - sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) / + sttpages = ALIGN(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) / TCES_PER_PAGE; for (i = sttpage; i < sttpage + sttpages; ++i) if (!stt->pages[i]) @@ -411,7 +408,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return ret; dir = iommu_tce_direction(tce); - if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) + if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) return H_PARAMETER; entry = ioba >> stt->page_shift; @@ -437,8 +434,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return H_SUCCESS; } -static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, - unsigned long ua, unsigned long *phpa) +static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, + unsigned long ua, unsigned long *phpa) { pte_t *ptep, pte; unsigned shift = 0; @@ -452,10 +449,17 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, * to exit which will agains result in the below page table walk * to finish. */ - ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift); - if (!ptep || !pte_present(*ptep)) + /* an rmap lock won't make it safe. because that just ensure hash + * page table entries are removed with rmap lock held. After that + * mmu notifier returns and we go ahead and removing ptes from Qemu page table. + */ + ptep = find_kvm_host_pte(vcpu->kvm, mmu_seq, ua, &shift); + if (!ptep) + return -ENXIO; + + pte = READ_ONCE(*ptep); + if (!pte_present(pte)) return -ENXIO; - pte = *ptep; if (!shift) shift = PAGE_SHIFT; @@ -477,10 +481,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce_list, unsigned long npages) { + struct kvm *kvm = vcpu->kvm; struct kvmppc_spapr_tce_table *stt; long i, ret = H_SUCCESS; unsigned long tces, entry, ua = 0; - unsigned long *rmap = NULL; + unsigned long mmu_seq; bool prereg = false; struct kvmppc_spapr_tce_iommu_table *stit; @@ -488,6 +493,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (kvm_is_radix(vcpu->kvm)) return H_TOO_HARD; + /* + * used to check for invalidations in progress + */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -515,7 +526,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, */ struct mm_iommu_table_group_mem_t *mem; - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua)) return H_TOO_HARD; mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); @@ -531,23 +542,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, * We do not require memory to be preregistered in this case * so lock rmap and do __find_linux_pte_or_hugepte(). */ - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) - return H_TOO_HARD; - - rmap = (void *) vmalloc_to_phys(rmap); - if (WARN_ON_ONCE_RM(!rmap)) + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua)) return H_TOO_HARD; - /* - * Synchronize with the MMU notifier callbacks in - * book3s_64_mmu_hv.c (kvm_unmap_hva_range_hv etc.). - * While we have the rmap lock, code running on other CPUs - * cannot finish unmapping the host real page that backs - * this guest real page, so we are OK to access the host - * real page. - */ - lock_rmap(rmap); - if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + if (kvmppc_rm_ua_to_hpa(vcpu, mmu_seq, ua, &tces)) { ret = H_TOO_HARD; goto unlock_exit; } @@ -565,7 +564,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); ua = 0; - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) { ret = H_PARAMETER; goto invalidate_exit; } @@ -590,9 +589,8 @@ invalidate_exit: iommu_tce_kill_rm(stit->tbl, entry, npages); unlock_exit: - if (rmap) - unlock_rmap(rmap); - + if (!prereg) + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7f59c47a5b9d..a07e12ed9f5a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -342,9 +342,6 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) vcpu->arch.pvr = pvr; } -/* Dummy value used in computing PCR value below */ -#define PCR_ARCH_300 (PCR_ARCH_207 << 1) - static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { unsigned long host_pcr_bit = 0, guest_pcr_bit = 0; @@ -3390,8 +3387,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, int trap; unsigned long host_hfscr = mfspr(SPRN_HFSCR); unsigned long host_ciabr = mfspr(SPRN_CIABR); - unsigned long host_dawr = mfspr(SPRN_DAWR); - unsigned long host_dawrx = mfspr(SPRN_DAWRX); + unsigned long host_dawr = mfspr(SPRN_DAWR0); + unsigned long host_dawrx = mfspr(SPRN_DAWRX0); unsigned long host_psscr = mfspr(SPRN_PSSCR); unsigned long host_pidr = mfspr(SPRN_PID); @@ -3420,8 +3417,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_SPURR, vcpu->arch.spurr); if (dawr_enabled()) { - mtspr(SPRN_DAWR, vcpu->arch.dawr); - mtspr(SPRN_DAWRX, vcpu->arch.dawrx); + mtspr(SPRN_DAWR0, vcpu->arch.dawr); + mtspr(SPRN_DAWRX0, vcpu->arch.dawrx); } mtspr(SPRN_CIABR, vcpu->arch.ciabr); mtspr(SPRN_IC, vcpu->arch.ic); @@ -3473,8 +3470,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); mtspr(SPRN_HFSCR, host_hfscr); mtspr(SPRN_CIABR, host_ciabr); - mtspr(SPRN_DAWR, host_dawr); - mtspr(SPRN_DAWRX, host_dawrx); + mtspr(SPRN_DAWR0, host_dawr); + mtspr(SPRN_DAWRX0, host_dawrx); mtspr(SPRN_PID, host_pidr); /* diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index dc97e5be76f6..66c38ee37fd5 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -750,6 +750,23 @@ static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid) return kvm->arch.nested_guests[lpid]; } +pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid, + unsigned long ea, unsigned *hshift) +{ + struct kvm_nested_guest *gp; + pte_t *pte; + + gp = kvmhv_find_nested(kvm, lpid); + if (!gp) + return NULL; + + VM_WARN(!spin_is_locked(&kvm->mmu_lock), + "%s called with kvm mmu_lock not held \n", __func__); + pte = __find_linux_pte(gp->shadow_pgtable, ea, NULL, hshift); + + return pte; +} + static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2) { return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK | @@ -792,19 +809,15 @@ static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap, unsigned long clr, unsigned long set, unsigned long hpa, unsigned long mask) { - struct kvm_nested_guest *gp; unsigned long gpa; unsigned int shift, lpid; pte_t *ptep; gpa = n_rmap & RMAP_NESTED_GPA_MASK; lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT; - gp = kvmhv_find_nested(kvm, lpid); - if (!gp) - return; /* Find the pte */ - ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); + ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); /* * If the pte is present and the pfn is still the same, update the pte. * If the pfn has changed then this is a stale rmap entry, the nested @@ -854,7 +867,7 @@ static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap, return; /* Find and invalidate the pte */ - ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); + ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); /* Don't spuriously invalidate ptes if the pfn has changed */ if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid); @@ -921,7 +934,7 @@ static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu, int shift; spin_lock(&kvm->mmu_lock); - ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); + ptep = find_kvm_nested_guest_pte(kvm, gp->l1_lpid, gpa, &shift); if (!shift) shift = PAGE_SHIFT; if (ptep && pte_present(*ptep)) { @@ -1169,7 +1182,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu, } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { /* Can we execute? */ if (!gpte_p->may_execute) { - flags |= SRR1_ISI_N_OR_G; + flags |= SRR1_ISI_N_G_OR_CIP; goto forward_to_l1; } } else { @@ -1212,16 +1225,16 @@ static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu, spin_lock(&kvm->mmu_lock); /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */ - ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing, - gpte.raddr, kvm->arch.lpid); + ret = kvmppc_hv_handle_set_rc(kvm, false, writing, + gpte.raddr, kvm->arch.lpid); if (!ret) { ret = -EINVAL; goto out_unlock; } /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */ - ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa, - gp->shadow_lpid); + ret = kvmppc_hv_handle_set_rc(kvm, true, writing, + n_gpa, gp->shadow_lpid); if (!ret) ret = -EINVAL; else @@ -1362,7 +1375,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run, /* See if can find translation in our partition scoped tables for L1 */ pte = __pte(0); spin_lock(&kvm->mmu_lock); - pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + pte_p = find_kvm_secondary_pte(kvm, gpa, &shift); if (!shift) shift = PAGE_SHIFT; if (pte_p) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 220305454c23..88da2764c1bb 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -210,7 +210,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, pte_t *ptep; unsigned int writing; unsigned long mmu_seq; - unsigned long rcbits, irq_flags = 0; + unsigned long rcbits; if (kvm_is_radix(kvm)) return H_FUNCTION; @@ -248,17 +248,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, /* Translate to host virtual address */ hva = __gfn_to_hva_memslot(memslot, gfn); - /* - * If we had a page table table change after lookup, we would - * retry via mmu_notifier_retry. - */ - if (!realmode) - local_irq_save(irq_flags); - /* - * If called in real mode we have MSR_EE = 0. Otherwise - * we disable irq above. - */ - ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift); + + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift); if (ptep) { pte_t pte; unsigned int host_pte_size; @@ -272,8 +264,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, * to <= host page size, if host is using hugepage */ if (host_pte_size < psize) { - if (!realmode) - local_irq_restore(flags); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return H_PARAMETER; } pte = kvmppc_read_update_linux_pte(ptep, writing); @@ -287,8 +278,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, pa |= gpa & ~PAGE_MASK; } } - if (!realmode) - local_irq_restore(irq_flags); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); ptel |= pa; @@ -888,8 +878,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, return ret; } -static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa, - int writing, unsigned long *hpa, +static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, + unsigned long gpa, int writing, unsigned long *hpa, struct kvm_memory_slot **memslot_p) { struct kvm *kvm = vcpu->kvm; @@ -908,7 +898,7 @@ static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa, hva = __gfn_to_hva_memslot(memslot, gfn); /* Try to find the host pte for that virtual address */ - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); if (!ptep) return H_TOO_HARD; pte = kvmppc_read_update_linux_pte(ptep, writing); @@ -943,16 +933,11 @@ static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); - ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot); - if (ret != H_SUCCESS) - return ret; + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); - /* Check if we've been invalidated */ - raw_spin_lock(&kvm->mmu_lock.rlock); - if (mmu_notifier_retry(kvm, mmu_seq)) { - ret = H_TOO_HARD; + ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot); + if (ret != H_SUCCESS) goto out_unlock; - } /* Zero the page */ for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES) @@ -960,7 +945,7 @@ static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE); out_unlock: - raw_spin_unlock(&kvm->mmu_lock.rlock); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } @@ -976,19 +961,14 @@ static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); - ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, &dest_memslot); - if (ret != H_SUCCESS) - return ret; - ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL); + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot); if (ret != H_SUCCESS) - return ret; + goto out_unlock; - /* Check if we've been invalidated */ - raw_spin_lock(&kvm->mmu_lock.rlock); - if (mmu_notifier_retry(kvm, mmu_seq)) { - ret = H_TOO_HARD; + ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL); + if (ret != H_SUCCESS) goto out_unlock; - } /* Copy the page */ memcpy((void *)dest_pa, (void *)src_pa, SZ_4K); @@ -996,7 +976,7 @@ static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE); out_unlock: - raw_spin_unlock(&kvm->mmu_lock.rlock); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } @@ -1260,7 +1240,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ if (!data) { if (gr & (HPTE_R_N | HPTE_R_G)) - return status | SRR1_ISI_N_OR_G; + return status | SRR1_ISI_N_G_OR_CIP; if (!hpte_read_permission(pp, slb_v & key)) return status | SRR1_ISI_PROT; } else if (status & DSISR_ISSTORE) { diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 780a499c7114..71943892c81c 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -707,8 +707,8 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) BEGIN_FTR_SECTION mfspr r5, SPRN_CIABR - mfspr r6, SPRN_DAWR - mfspr r7, SPRN_DAWRX + mfspr r6, SPRN_DAWR0 + mfspr r7, SPRN_DAWRX0 mfspr r8, SPRN_IAMR std r5, STACK_SLOT_CIABR(r1) std r6, STACK_SLOT_DAWR(r1) @@ -803,8 +803,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) beq 1f ld r5, VCPU_DAWR(r4) ld r6, VCPU_DAWRX(r4) - mtspr SPRN_DAWR, r5 - mtspr SPRN_DAWRX, r6 + mtspr SPRN_DAWR0, r5 + mtspr SPRN_DAWRX0, r6 1: ld r7, VCPU_CIABR(r4) ld r8, VCPU_TAR(r4) @@ -1766,8 +1766,8 @@ BEGIN_FTR_SECTION * If the DAWR doesn't work, it's ok to write these here as * this value should always be zero */ - mtspr SPRN_DAWR, r6 - mtspr SPRN_DAWRX, r7 + mtspr SPRN_DAWR0, r6 + mtspr SPRN_DAWRX0, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) @@ -2577,8 +2577,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfmsr r6 andi. r6, r6, MSR_DR /* in real mode? */ bne 4f - mtspr SPRN_DAWR, r4 - mtspr SPRN_DAWRX, r5 + mtspr SPRN_DAWR0, r4 + mtspr SPRN_DAWRX0, r5 4: li r3, 0 blr @@ -2907,6 +2907,11 @@ kvm_cede_exit: beq 4f li r0, 0 stb r0, VCPU_CEDED(r9) + /* + * The escalation interrupts are special as we don't EOI them. + * There is no need to use the load-after-store ordering offset + * to set PQ to 10 as we won't use StoreEOI. + */ li r6, XIVE_ESB_SET_PQ_10 b 5f 4: li r0, 1 @@ -3329,7 +3334,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_AMR, r0 mtspr SPRN_IAMR, r0 mtspr SPRN_CIABR, r0 - mtspr SPRN_DAWRX, r0 + mtspr SPRN_DAWRX0, r0 BEGIN_MMU_FTR_SECTION b 4f diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 6ef0151ff70a..bdea91df1497 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -31,6 +31,12 @@ static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) { u64 val; + /* + * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10 + * load operation, so there is no need to enforce load-after-store + * ordering. + */ + if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) offset |= offset << 4; diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index a8a900ace1e6..4ad3c0279458 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -58,6 +58,9 @@ static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset) { u64 val; + if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + offset |= XIVE_ESB_LD_ST_MO; + if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) offset |= offset << 4; diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 1139bc56e004..135d0e686622 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -95,7 +95,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) emulated = EMULATE_FAIL; vcpu->arch.regs.msr = vcpu->arch.shared->msr; - if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) { + if (analyse_instr(&op, &vcpu->arch.regs, ppc_inst(inst)) == 0) { int type = op.type & INSTR_TYPE_MASK; int size = GETSIZE(op.type); diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index b8de3be10eb4..5e994cda8e40 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -16,7 +16,7 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += alloc.o code-patching.o feature-fixups.o pmem.o +obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o test_code-patching.o ifndef CONFIG_KASAN obj-y += string.o memcmp_$(BITS).o diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 7a59f6863cec..e64546b8875c 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -17,13 +17,19 @@ #include <asm/page.h> #include <asm/code-patching.h> #include <asm/setup.h> +#include <asm/inst.h> -static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, - unsigned int *patch_addr) +static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr, + struct ppc_inst *patch_addr) { int err = 0; - __put_user_asm(instr, patch_addr, err, "stw"); + if (!ppc_inst_prefixed(instr)) { + __put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw"); + } else { + __put_user_asm(ppc_inst_as_u64(instr), patch_addr, err, "std"); + } + if (err) return err; @@ -33,7 +39,7 @@ static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, return 0; } -int raw_patch_instruction(unsigned int *addr, unsigned int instr) +int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) { return __patch_instruction(addr, instr, addr); } @@ -141,10 +147,10 @@ static inline int unmap_patch_area(unsigned long addr) return 0; } -static int do_patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) { int err; - unsigned int *patch_addr = NULL; + struct ppc_inst *patch_addr = NULL; unsigned long flags; unsigned long text_poke_addr; unsigned long kaddr = (unsigned long)addr; @@ -165,8 +171,7 @@ static int do_patch_instruction(unsigned int *addr, unsigned int instr) goto out; } - patch_addr = (unsigned int *)(text_poke_addr) + - ((kaddr & ~PAGE_MASK) / sizeof(unsigned int)); + patch_addr = (struct ppc_inst *)(text_poke_addr + (kaddr & ~PAGE_MASK)); __patch_instruction(addr, instr, patch_addr); @@ -181,14 +186,14 @@ out: } #else /* !CONFIG_STRICT_KERNEL_RWX */ -static int do_patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) { return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ -int patch_instruction(unsigned int *addr, unsigned int instr) +int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) { /* Make sure we aren't patching a freed init section */ if (init_mem_is_free && init_section_contains(addr, 4)) { @@ -199,9 +204,12 @@ int patch_instruction(unsigned int *addr, unsigned int instr) } NOKPROBE_SYMBOL(patch_instruction); -int patch_branch(unsigned int *addr, unsigned long target, int flags) +int patch_branch(struct ppc_inst *addr, unsigned long target, int flags) { - return patch_instruction(addr, create_branch(addr, target, flags)); + struct ppc_inst instr; + + create_branch(&instr, addr, target, flags); + return patch_instruction(addr, instr); } bool is_offset_in_branch_range(long offset) @@ -230,14 +238,14 @@ bool is_offset_in_branch_range(long offset) * Helper to check if a given instruction is a conditional branch * Derived from the conditional checks in analyse_instr() */ -bool is_conditional_branch(unsigned int instr) +bool is_conditional_branch(struct ppc_inst instr) { - unsigned int opcode = instr >> 26; + unsigned int opcode = ppc_inst_primary_opcode(instr); if (opcode == 16) /* bc, bca, bcl, bcla */ return true; if (opcode == 19) { - switch ((instr >> 1) & 0x3ff) { + switch ((ppc_inst_val(instr) >> 1) & 0x3ff) { case 16: /* bclr, bclrl */ case 528: /* bcctr, bcctrl */ case 560: /* bctar, bctarl */ @@ -248,30 +256,30 @@ bool is_conditional_branch(unsigned int instr) } NOKPROBE_SYMBOL(is_conditional_branch); -unsigned int create_branch(const unsigned int *addr, - unsigned long target, int flags) +int create_branch(struct ppc_inst *instr, + const struct ppc_inst *addr, + unsigned long target, int flags) { - unsigned int instruction; long offset; + *instr = ppc_inst(0); offset = target; if (! (flags & BRANCH_ABSOLUTE)) offset = offset - (unsigned long)addr; /* Check we can represent the target in the instruction format */ if (!is_offset_in_branch_range(offset)) - return 0; + return 1; /* Mask out the flags and target, so they don't step on each other. */ - instruction = 0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC); + *instr = ppc_inst(0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC)); - return instruction; + return 0; } -unsigned int create_cond_branch(const unsigned int *addr, - unsigned long target, int flags) +int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr, + unsigned long target, int flags) { - unsigned int instruction; long offset; offset = target; @@ -280,104 +288,107 @@ unsigned int create_cond_branch(const unsigned int *addr, /* Check we can represent the target in the instruction format */ if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3) - return 0; + return 1; /* Mask out the flags and target, so they don't step on each other. */ - instruction = 0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC); + *instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC)); - return instruction; + return 0; } -static unsigned int branch_opcode(unsigned int instr) +static unsigned int branch_opcode(struct ppc_inst instr) { - return (instr >> 26) & 0x3F; + return ppc_inst_primary_opcode(instr) & 0x3F; } -static int instr_is_branch_iform(unsigned int instr) +static int instr_is_branch_iform(struct ppc_inst instr) { return branch_opcode(instr) == 18; } -static int instr_is_branch_bform(unsigned int instr) +static int instr_is_branch_bform(struct ppc_inst instr) { return branch_opcode(instr) == 16; } -int instr_is_relative_branch(unsigned int instr) +int instr_is_relative_branch(struct ppc_inst instr) { - if (instr & BRANCH_ABSOLUTE) + if (ppc_inst_val(instr) & BRANCH_ABSOLUTE) return 0; return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); } -int instr_is_relative_link_branch(unsigned int instr) +int instr_is_relative_link_branch(struct ppc_inst instr) { - return instr_is_relative_branch(instr) && (instr & BRANCH_SET_LINK); + return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK); } -static unsigned long branch_iform_target(const unsigned int *instr) +static unsigned long branch_iform_target(const struct ppc_inst *instr) { signed long imm; - imm = *instr & 0x3FFFFFC; + imm = ppc_inst_val(*instr) & 0x3FFFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x2000000) imm -= 0x4000000; - if ((*instr & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; } -static unsigned long branch_bform_target(const unsigned int *instr) +static unsigned long branch_bform_target(const struct ppc_inst *instr) { signed long imm; - imm = *instr & 0xFFFC; + imm = ppc_inst_val(*instr) & 0xFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x8000) imm -= 0x10000; - if ((*instr & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; } -unsigned long branch_target(const unsigned int *instr) +unsigned long branch_target(const struct ppc_inst *instr) { - if (instr_is_branch_iform(*instr)) + if (instr_is_branch_iform(ppc_inst_read(instr))) return branch_iform_target(instr); - else if (instr_is_branch_bform(*instr)) + else if (instr_is_branch_bform(ppc_inst_read(instr))) return branch_bform_target(instr); return 0; } -int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr) +int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr) { - if (instr_is_branch_iform(*instr) || instr_is_branch_bform(*instr)) + if (instr_is_branch_iform(ppc_inst_read(instr)) || + instr_is_branch_bform(ppc_inst_read(instr))) return branch_target(instr) == addr; return 0; } -unsigned int translate_branch(const unsigned int *dest, const unsigned int *src) +int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest, + const struct ppc_inst *src) { unsigned long target; - target = branch_target(src); - if (instr_is_branch_iform(*src)) - return create_branch(dest, target, *src); - else if (instr_is_branch_bform(*src)) - return create_cond_branch(dest, target, *src); + if (instr_is_branch_iform(ppc_inst_read(src))) + return create_branch(instr, dest, target, + ppc_inst_val(ppc_inst_read(src))); + else if (instr_is_branch_bform(ppc_inst_read(src))) + return create_cond_branch(instr, dest, target, + ppc_inst_val(ppc_inst_read(src))); - return 0; + return 1; } #ifdef CONFIG_PPC_BOOK3E_64 @@ -392,7 +403,7 @@ void __patch_exception(int exc, unsigned long addr) * instruction of the exception, not the first one */ - patch_branch(ibase + (exc / 4) + 1, addr, 0); + patch_branch((struct ppc_inst *)(ibase + (exc / 4) + 1), addr, 0); } #endif @@ -408,165 +419,171 @@ static void __init test_trampoline(void) static void __init test_branch_iform(void) { - unsigned int instr; + int err; + struct ppc_inst instr; unsigned long addr; addr = (unsigned long)&instr; /* The simplest case, branch to self, no flags */ - check(instr_is_branch_iform(0x48000000)); + check(instr_is_branch_iform(ppc_inst(0x48000000))); /* All bits of target set, and flags */ - check(instr_is_branch_iform(0x4bffffff)); + check(instr_is_branch_iform(ppc_inst(0x4bffffff))); /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_iform(0xcbffffff)); + check(!instr_is_branch_iform(ppc_inst(0xcbffffff))); /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_iform(0x7bffffff)); + check(!instr_is_branch_iform(ppc_inst(0x7bffffff))); /* Simplest case, branch to self with link */ - check(instr_is_branch_iform(0x48000001)); + check(instr_is_branch_iform(ppc_inst(0x48000001))); /* All bits of targets set */ - check(instr_is_branch_iform(0x4bfffffd)); + check(instr_is_branch_iform(ppc_inst(0x4bfffffd))); /* Some bits of targets set */ - check(instr_is_branch_iform(0x4bff00fd)); + check(instr_is_branch_iform(ppc_inst(0x4bff00fd))); /* Must be a valid branch to start with */ - check(!instr_is_branch_iform(0x7bfffffd)); + check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); /* Absolute branch to 0x100 */ - instr = 0x48000103; + instr = ppc_inst(0x48000103); check(instr_is_branch_to_addr(&instr, 0x100)); /* Absolute branch to 0x420fc */ - instr = 0x480420ff; + instr = ppc_inst(0x480420ff); check(instr_is_branch_to_addr(&instr, 0x420fc)); /* Maximum positive relative branch, + 20MB - 4B */ - instr = 0x49fffffc; + instr = ppc_inst(0x49fffffc); check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC)); /* Smallest negative relative branch, - 4B */ - instr = 0x4bfffffc; + instr = ppc_inst(0x4bfffffc); check(instr_is_branch_to_addr(&instr, addr - 4)); /* Largest negative relative branch, - 32 MB */ - instr = 0x4a000000; + instr = ppc_inst(0x4a000000); check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); /* Branch to self, with link */ - instr = create_branch(&instr, addr, BRANCH_SET_LINK); + err = create_branch(&instr, &instr, addr, BRANCH_SET_LINK); check(instr_is_branch_to_addr(&instr, addr)); /* Branch to self - 0x100, with link */ - instr = create_branch(&instr, addr - 0x100, BRANCH_SET_LINK); + err = create_branch(&instr, &instr, addr - 0x100, BRANCH_SET_LINK); check(instr_is_branch_to_addr(&instr, addr - 0x100)); /* Branch to self + 0x100, no link */ - instr = create_branch(&instr, addr + 0x100, 0); + err = create_branch(&instr, &instr, addr + 0x100, 0); check(instr_is_branch_to_addr(&instr, addr + 0x100)); /* Maximum relative negative offset, - 32 MB */ - instr = create_branch(&instr, addr - 0x2000000, BRANCH_SET_LINK); + err = create_branch(&instr, &instr, addr - 0x2000000, BRANCH_SET_LINK); check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); /* Out of range relative negative offset, - 32 MB + 4*/ - instr = create_branch(&instr, addr - 0x2000004, BRANCH_SET_LINK); - check(instr == 0); + err = create_branch(&instr, &instr, addr - 0x2000004, BRANCH_SET_LINK); + check(err); /* Out of range relative positive offset, + 32 MB */ - instr = create_branch(&instr, addr + 0x2000000, BRANCH_SET_LINK); - check(instr == 0); + err = create_branch(&instr, &instr, addr + 0x2000000, BRANCH_SET_LINK); + check(err); /* Unaligned target */ - instr = create_branch(&instr, addr + 3, BRANCH_SET_LINK); - check(instr == 0); + err = create_branch(&instr, &instr, addr + 3, BRANCH_SET_LINK); + check(err); /* Check flags are masked correctly */ - instr = create_branch(&instr, addr, 0xFFFFFFFC); + err = create_branch(&instr, &instr, addr, 0xFFFFFFFC); check(instr_is_branch_to_addr(&instr, addr)); - check(instr == 0x48000000); + check(ppc_inst_equal(instr, ppc_inst(0x48000000))); } static void __init test_create_function_call(void) { - unsigned int *iptr; + struct ppc_inst *iptr; unsigned long dest; + struct ppc_inst instr; /* Check we can create a function call */ - iptr = (unsigned int *)ppc_function_entry(test_trampoline); + iptr = (struct ppc_inst *)ppc_function_entry(test_trampoline); dest = ppc_function_entry(test_create_function_call); - patch_instruction(iptr, create_branch(iptr, dest, BRANCH_SET_LINK)); + create_branch(&instr, iptr, dest, BRANCH_SET_LINK); + patch_instruction(iptr, instr); check(instr_is_branch_to_addr(iptr, dest)); } static void __init test_branch_bform(void) { + int err; unsigned long addr; - unsigned int *iptr, instr, flags; + struct ppc_inst *iptr, instr; + unsigned int flags; iptr = &instr; addr = (unsigned long)iptr; /* The simplest case, branch to self, no flags */ - check(instr_is_branch_bform(0x40000000)); + check(instr_is_branch_bform(ppc_inst(0x40000000))); /* All bits of target set, and flags */ - check(instr_is_branch_bform(0x43ffffff)); + check(instr_is_branch_bform(ppc_inst(0x43ffffff))); /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_bform(0xc3ffffff)); + check(!instr_is_branch_bform(ppc_inst(0xc3ffffff))); /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_bform(0x7bffffff)); + check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); /* Absolute conditional branch to 0x100 */ - instr = 0x43ff0103; + instr = ppc_inst(0x43ff0103); check(instr_is_branch_to_addr(&instr, 0x100)); /* Absolute conditional branch to 0x20fc */ - instr = 0x43ff20ff; + instr = ppc_inst(0x43ff20ff); check(instr_is_branch_to_addr(&instr, 0x20fc)); /* Maximum positive relative conditional branch, + 32 KB - 4B */ - instr = 0x43ff7ffc; + instr = ppc_inst(0x43ff7ffc); check(instr_is_branch_to_addr(&instr, addr + 0x7FFC)); /* Smallest negative relative conditional branch, - 4B */ - instr = 0x43fffffc; + instr = ppc_inst(0x43fffffc); check(instr_is_branch_to_addr(&instr, addr - 4)); /* Largest negative relative conditional branch, - 32 KB */ - instr = 0x43ff8000; + instr = ppc_inst(0x43ff8000); check(instr_is_branch_to_addr(&instr, addr - 0x8000)); /* All condition code bits set & link */ flags = 0x3ff000 | BRANCH_SET_LINK; /* Branch to self */ - instr = create_cond_branch(iptr, addr, flags); + err = create_cond_branch(&instr, iptr, addr, flags); check(instr_is_branch_to_addr(&instr, addr)); /* Branch to self - 0x100 */ - instr = create_cond_branch(iptr, addr - 0x100, flags); + err = create_cond_branch(&instr, iptr, addr - 0x100, flags); check(instr_is_branch_to_addr(&instr, addr - 0x100)); /* Branch to self + 0x100 */ - instr = create_cond_branch(iptr, addr + 0x100, flags); + err = create_cond_branch(&instr, iptr, addr + 0x100, flags); check(instr_is_branch_to_addr(&instr, addr + 0x100)); /* Maximum relative negative offset, - 32 KB */ - instr = create_cond_branch(iptr, addr - 0x8000, flags); + err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); check(instr_is_branch_to_addr(&instr, addr - 0x8000)); /* Out of range relative negative offset, - 32 KB + 4*/ - instr = create_cond_branch(iptr, addr - 0x8004, flags); - check(instr == 0); + err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); + check(err); /* Out of range relative positive offset, + 32 KB */ - instr = create_cond_branch(iptr, addr + 0x8000, flags); - check(instr == 0); + err = create_cond_branch(&instr, iptr, addr + 0x8000, flags); + check(err); /* Unaligned target */ - instr = create_cond_branch(iptr, addr + 3, flags); - check(instr == 0); + err = create_cond_branch(&instr, iptr, addr + 3, flags); + check(err); /* Check flags are masked correctly */ - instr = create_cond_branch(iptr, addr, 0xFFFFFFFC); + err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); check(instr_is_branch_to_addr(&instr, addr)); - check(instr == 0x43FF0000); + check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); } static void __init test_translate_branch(void) { unsigned long addr; - unsigned int *p, *q; + void *p, *q; + struct ppc_inst instr; void *buf; buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); @@ -579,8 +596,9 @@ static void __init test_translate_branch(void) addr = (unsigned long)p; patch_branch(p, addr, 0); check(instr_is_branch_to_addr(p, addr)); - q = p + 1; - patch_instruction(q, translate_branch(q, p)); + q = p + 4; + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(q, addr)); /* Maximum negative case, move b . to addr + 32 MB */ @@ -588,27 +606,30 @@ static void __init test_translate_branch(void) addr = (unsigned long)p; patch_branch(p, addr, 0); q = buf + 0x2000000; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x4a000000); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000))); /* Maximum positive case, move x to x - 32 MB + 4 */ p = buf + 0x2000000; addr = (unsigned long)p; patch_branch(p, addr, 0); q = buf + 4; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x49fffffc); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc))); /* Jump to x + 16 MB moved to x + 20 MB */ p = buf; addr = 0x1000000 + (unsigned long)buf; patch_branch(p, addr, BRANCH_SET_LINK); q = buf + 0x1400000; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); @@ -617,7 +638,8 @@ static void __init test_translate_branch(void) addr = 0x2000000 + (unsigned long)buf; patch_branch(p, addr, 0); q = buf + 4; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); @@ -627,47 +649,57 @@ static void __init test_translate_branch(void) /* Simple case, branch to self moved a little */ p = buf; addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0)); + create_cond_branch(&instr, p, addr, 0); + patch_instruction(p, instr); check(instr_is_branch_to_addr(p, addr)); - q = p + 1; - patch_instruction(q, translate_branch(q, p)); + q = buf + 4; + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(q, addr)); /* Maximum negative case, move b . to addr + 32 KB */ p = buf; addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC)); + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + patch_instruction(p, instr); q = buf + 0x8000; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x43ff8000); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000))); /* Maximum positive case, move x to x - 32 KB + 4 */ p = buf + 0x8000; addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC)); + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + patch_instruction(p, instr); q = buf + 4; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x43ff7ffc); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc))); /* Jump to x + 12 KB moved to x + 20 KB */ p = buf; addr = 0x3000 + (unsigned long)buf; - patch_instruction(p, create_cond_branch(p, addr, BRANCH_SET_LINK)); + create_cond_branch(&instr, p, addr, BRANCH_SET_LINK); + patch_instruction(p, instr); q = buf + 0x5000; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); /* Jump to x + 8 KB moved to x - 8 KB + 4 */ p = buf + 0x2000; addr = 0x4000 + (unsigned long)buf; - patch_instruction(p, create_cond_branch(p, addr, 0)); + create_cond_branch(&instr, p, addr, 0); + patch_instruction(p, instr); q = buf + 4; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); @@ -675,6 +707,26 @@ static void __init test_translate_branch(void) vfree(buf); } +#ifdef CONFIG_PPC64 +static void __init test_prefixed_patching(void) +{ + extern unsigned int code_patching_test1[]; + extern unsigned int code_patching_test1_expected[]; + extern unsigned int end_code_patching_test1[]; + + __patch_instruction((struct ppc_inst *)code_patching_test1, + ppc_inst_prefix(OP_PREFIX << 26, 0x00000000), + (struct ppc_inst *)code_patching_test1); + + check(!memcmp(code_patching_test1, + code_patching_test1_expected, + sizeof(unsigned int) * + (end_code_patching_test1 - code_patching_test1))); +} +#else +static inline void test_prefixed_patching(void) {} +#endif + static int __init test_code_patching(void) { printk(KERN_DEBUG "Running code patching self-tests ...\n"); @@ -683,6 +735,7 @@ static int __init test_code_patching(void) test_branch_bform(); test_create_function_call(); test_translate_branch(); + test_prefixed_patching(); return 0; } diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S index b12168c2447a..480172fbd024 100644 --- a/arch/powerpc/lib/feature-fixups-test.S +++ b/arch/powerpc/lib/feature-fixups-test.S @@ -7,6 +7,7 @@ #include <asm/ppc_asm.h> #include <asm/synch.h> #include <asm/asm-compat.h> +#include <asm/ppc-opcode.h> .text @@ -791,3 +792,71 @@ globl(lwsync_fixup_test_expected_SYNC) 1: or 1,1,1 sync +globl(ftr_fixup_prefix1) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 +globl(end_ftr_fixup_prefix1) + +globl(ftr_fixup_prefix1_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + +globl(ftr_fixup_prefix1_expected) + or 1,1,1 + nop + nop + or 2,2,2 + +globl(ftr_fixup_prefix2) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 +globl(end_ftr_fixup_prefix2) + +globl(ftr_fixup_prefix2_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + +globl(ftr_fixup_prefix2_alt) + .long OP_PREFIX << 26 + .long 0x0000001 + +globl(ftr_fixup_prefix2_expected) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000001 + or 2,2,2 + +globl(ftr_fixup_prefix3) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + or 3,3,3 +globl(end_ftr_fixup_prefix3) + +globl(ftr_fixup_prefix3_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + or 3,3,3 + +globl(ftr_fixup_prefix3_alt) + .long OP_PREFIX << 26 + .long 0x0000001 + nop + +globl(ftr_fixup_prefix3_expected) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000001 + nop + or 3,3,3 diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4ba634b89ce5..4c0a7ee9fa00 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -21,6 +21,7 @@ #include <asm/setup.h> #include <asm/security_features.h> #include <asm/firmware.h> +#include <asm/inst.h> struct fixup_entry { unsigned long mask; @@ -31,30 +32,31 @@ struct fixup_entry { long alt_end_off; }; -static unsigned int *calc_addr(struct fixup_entry *fcur, long offset) +static struct ppc_inst *calc_addr(struct fixup_entry *fcur, long offset) { /* * We store the offset to the code as a negative offset from * the start of the alt_entry, to support the VDSO. This * routine converts that back into an actual address. */ - return (unsigned int *)((unsigned long)fcur + offset); + return (struct ppc_inst *)((unsigned long)fcur + offset); } -static int patch_alt_instruction(unsigned int *src, unsigned int *dest, - unsigned int *alt_start, unsigned int *alt_end) +static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest, + struct ppc_inst *alt_start, struct ppc_inst *alt_end) { - unsigned int instr; + int err; + struct ppc_inst instr; - instr = *src; + instr = ppc_inst_read(src); if (instr_is_relative_branch(*src)) { - unsigned int *target = (unsigned int *)branch_target(src); + struct ppc_inst *target = (struct ppc_inst *)branch_target(src); /* Branch within the section doesn't need translating */ if (target < alt_start || target > alt_end) { - instr = translate_branch(dest, src); - if (!instr) + err = translate_branch(&instr, dest, src); + if (err) return 1; } } @@ -66,7 +68,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest, static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) { - unsigned int *start, *end, *alt_start, *alt_end, *src, *dest; + struct ppc_inst *start, *end, *alt_start, *alt_end, *src, *dest, nop; start = calc_addr(fcur, fcur->start_off); end = calc_addr(fcur, fcur->end_off); @@ -82,13 +84,15 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) src = alt_start; dest = start; - for (; src < alt_end; src++, dest++) { + for (; src < alt_end; src = ppc_inst_next(src, src), + dest = ppc_inst_next(dest, dest)) { if (patch_alt_instruction(src, dest, alt_start, alt_end)) return 1; } - for (; dest < end; dest++) - raw_patch_instruction(dest, PPC_INST_NOP); + nop = ppc_inst(PPC_INST_NOP); + for (; dest < end; dest = ppc_inst_next(dest, &nop)) + raw_patch_instruction(dest, nop); return 0; } @@ -145,15 +149,17 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instrs[0]); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); if (types & STF_BARRIER_FALLBACK) - patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback, + patch_branch((struct ppc_inst *)(dest + 1), + (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK); else - patch_instruction(dest + 1, instrs[1]); + patch_instruction((struct ppc_inst *)(dest + 1), + ppc_inst(instrs[1])); - patch_instruction(dest + 2, instrs[2]); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); } printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, @@ -206,12 +212,12 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instrs[0]); - patch_instruction(dest + 1, instrs[1]); - patch_instruction(dest + 2, instrs[2]); - patch_instruction(dest + 3, instrs[3]); - patch_instruction(dest + 4, instrs[4]); - patch_instruction(dest + 5, instrs[5]); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3])); + patch_instruction((struct ppc_inst *)(dest + 4), ppc_inst(instrs[4])); + patch_instruction((struct ppc_inst *)(dest + 5), ppc_inst(instrs[5])); } printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : @@ -259,9 +265,9 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instrs[0]); - patch_instruction(dest + 1, instrs[1]); - patch_instruction(dest + 2, instrs[2]); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); } printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, @@ -294,7 +300,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instr); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instr)); } printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); @@ -337,8 +343,8 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instr[0]); - patch_instruction(dest + 1, instr[1]); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instr[0])); + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instr[1])); } printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); @@ -352,7 +358,7 @@ static void patch_btb_flush_section(long *curr) end = (void *)curr + *(curr + 1); for (; start < end; start++) { pr_devel("patching dest %lx\n", (unsigned long)start); - patch_instruction(start, PPC_INST_NOP); + patch_instruction((struct ppc_inst *)start, ppc_inst(PPC_INST_NOP)); } } @@ -371,7 +377,7 @@ void do_btb_flush_fixups(void) void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; - unsigned int *dest; + struct ppc_inst *dest; if (!(value & CPU_FTR_LWSYNC)) return ; @@ -381,27 +387,27 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) for (; start < end; start++) { dest = (void *)start + *start; - raw_patch_instruction(dest, PPC_INST_LWSYNC); + raw_patch_instruction(dest, ppc_inst(PPC_INST_LWSYNC)); } } static void do_final_fixups(void) { #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE) - int *src, *dest; - unsigned long length; + struct ppc_inst inst, *src, *dest, *end; if (PHYSICAL_START == 0) return; - src = (int *)(KERNELBASE + PHYSICAL_START); - dest = (int *)KERNELBASE; - length = (__end_interrupts - _stext) / sizeof(int); + src = (struct ppc_inst *)(KERNELBASE + PHYSICAL_START); + dest = (struct ppc_inst *)KERNELBASE; + end = (void *)src + (__end_interrupts - _stext); - while (length--) { - raw_patch_instruction(dest, *src); - src++; - dest++; + while (src < end) { + inst = ppc_inst_read(src); + raw_patch_instruction(dest, inst); + src = ppc_inst_next(src, src); + dest = ppc_inst_next(dest, dest); } #endif } @@ -684,6 +690,78 @@ static void test_lwsync_macros(void) } } +#ifdef CONFIG_PPC64 +static void __init test_prefix_patching(void) +{ + extern unsigned int ftr_fixup_prefix1[]; + extern unsigned int end_ftr_fixup_prefix1[]; + extern unsigned int ftr_fixup_prefix1_orig[]; + extern unsigned int ftr_fixup_prefix1_expected[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix1 - ftr_fixup_prefix1); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix1 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix1 + 3); + fixup.alt_start_off = fixup.alt_end_off = 0; + + /* Sanity check */ + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_expected, size) == 0); + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) != 0); +} + +static void __init test_prefix_alt_patching(void) +{ + extern unsigned int ftr_fixup_prefix2[]; + extern unsigned int end_ftr_fixup_prefix2[]; + extern unsigned int ftr_fixup_prefix2_orig[]; + extern unsigned int ftr_fixup_prefix2_expected[]; + extern unsigned int ftr_fixup_prefix2_alt[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix2 - ftr_fixup_prefix2); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix2 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix2 + 3); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix2_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix2_alt + 2); + /* Sanity check */ + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_expected, size) == 0); + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) != 0); +} + +static void __init test_prefix_word_alt_patching(void) +{ + extern unsigned int ftr_fixup_prefix3[]; + extern unsigned int end_ftr_fixup_prefix3[]; + extern unsigned int ftr_fixup_prefix3_orig[]; + extern unsigned int ftr_fixup_prefix3_expected[]; + extern unsigned int ftr_fixup_prefix3_alt[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix3 - ftr_fixup_prefix3); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix3 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix3 + 4); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix3_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix3_alt + 3); + /* Sanity check */ + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_expected, size) == 0); + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) != 0); +} +#else +static inline void test_prefix_patching(void) {} +static inline void test_prefix_alt_patching(void) {} +static inline void test_prefix_word_alt_patching(void) {} +#endif /* CONFIG_PPC64 */ + static int __init test_feature_fixups(void) { printk(KERN_DEBUG "Running feature fixup self-tests ...\n"); @@ -698,6 +776,9 @@ static int __init test_feature_fixups(void) test_cpu_macros(); test_fw_macros(); test_lwsync_macros(); + test_prefix_patching(); + test_prefix_alt_patching(); + test_prefix_word_alt_patching(); return 0; } diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c new file mode 100644 index 000000000000..aedfd6e31e53 --- /dev/null +++ b/arch/powerpc/lib/inst.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020, IBM Corporation. + */ + +#include <linux/uaccess.h> +#include <asm/disassemble.h> +#include <asm/inst.h> +#include <asm/ppc-opcode.h> + +#ifdef CONFIG_PPC64 +int probe_user_read_inst(struct ppc_inst *inst, + struct ppc_inst __user *nip) +{ + unsigned int val, suffix; + int err; + + err = probe_user_read(&val, nip, sizeof(val)); + if (err) + return err; + if (get_op(val) == OP_PREFIX) { + err = probe_user_read(&suffix, (void __user *)nip + 4, 4); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return err; +} + +int probe_kernel_read_inst(struct ppc_inst *inst, + struct ppc_inst *src) +{ + unsigned int val, suffix; + int err; + + err = probe_kernel_read(&val, src, sizeof(val)); + if (err) + return err; + if (get_op(val) == OP_PREFIX) { + err = probe_kernel_read(&suffix, (void *)src + 4, 4); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return err; +} +#else /* !CONFIG_PPC64 */ +int probe_user_read_inst(struct ppc_inst *inst, + struct ppc_inst __user *nip) +{ + unsigned int val; + int err; + + err = probe_user_read(&val, nip, sizeof(val)); + if (!err) + *inst = ppc_inst(val); + + return err; +} + +int probe_kernel_read_inst(struct ppc_inst *inst, + struct ppc_inst *src) +{ + unsigned int val; + int err; + + err = probe_kernel_read(&val, src, sizeof(val)); + if (!err) + *inst = ppc_inst(val); + + return err; +} +#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5f3a7bd9d90d..5abe98216dc2 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -13,6 +13,7 @@ #include <linux/uaccess.h> #include <asm/cpu_has_feature.h> #include <asm/cputable.h> +#include <asm/disassemble.h> extern char system_call_common[]; @@ -188,6 +189,44 @@ static nokprobe_inline unsigned long xform_ea(unsigned int instr, } /* + * Calculate effective address for a MLS:D-form / 8LS:D-form + * prefixed instruction + */ +static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr, + unsigned int suffix, + const struct pt_regs *regs) +{ + int ra, prefix_r; + unsigned int dd; + unsigned long ea, d0, d1, d; + + prefix_r = instr & (1ul << 20); + ra = (suffix >> 16) & 0x1f; + + d0 = instr & 0x3ffff; + d1 = suffix & 0xffff; + d = (d0 << 16) | d1; + + /* + * sign extend a 34 bit number + */ + dd = (unsigned int)(d >> 2); + ea = (signed int)dd; + ea = (ea << 2) | (d & 0x3); + + if (!prefix_r && ra) + ea += regs->gpr[ra]; + else if (!prefix_r && !ra) + ; /* Leave ea as is */ + else if (prefix_r && !ra) + ea += regs->nip; + else if (prefix_r && ra) + ; /* Invalid form. Should already be checked for by caller! */ + + return ea; +} + +/* * Return the largest power of 2, not greater than sizeof(unsigned long), * such that x is a multiple of it. */ @@ -1163,32 +1202,39 @@ static nokprobe_inline int trap_compare(long v1, long v2) * otherwise. */ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, - unsigned int instr) + struct ppc_inst instr) { +#ifdef CONFIG_PPC64 + unsigned int suffixopcode, prefixtype, prefix_r; +#endif unsigned int opcode, ra, rb, rc, rd, spr, u; unsigned long int imm; unsigned long int val, val2; unsigned int mb, me, sh; + unsigned int word, suffix; long ival; + word = ppc_inst_val(instr); + suffix = ppc_inst_suffix(instr); + op->type = COMPUTE; - opcode = instr >> 26; + opcode = ppc_inst_primary_opcode(instr); switch (opcode) { case 16: /* bc */ op->type = BRANCH; - imm = (signed short)(instr & 0xfffc); - if ((instr & 2) == 0) + imm = (signed short)(word & 0xfffc); + if ((word & 2) == 0) imm += regs->nip; op->val = truncate_if_32bit(regs->msr, imm); - if (instr & 1) + if (word & 1) op->type |= SETLK; - if (branch_taken(instr, regs, op)) + if (branch_taken(word, regs, op)) op->type |= BRTAKEN; return 1; #ifdef CONFIG_PPC64 case 17: /* sc */ - if ((instr & 0xfe2) == 2) + if ((word & 0xfe2) == 2) op->type = SYSCALL; else op->type = UNKNOWN; @@ -1196,21 +1242,21 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif case 18: /* b */ op->type = BRANCH | BRTAKEN; - imm = instr & 0x03fffffc; + imm = word & 0x03fffffc; if (imm & 0x02000000) imm -= 0x04000000; - if ((instr & 2) == 0) + if ((word & 2) == 0) imm += regs->nip; op->val = truncate_if_32bit(regs->msr, imm); - if (instr & 1) + if (word & 1) op->type |= SETLK; return 1; case 19: - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { case 0: /* mcrf */ op->type = COMPUTE + SETCC; - rd = 7 - ((instr >> 23) & 0x7); - ra = 7 - ((instr >> 18) & 0x7); + rd = 7 - ((word >> 23) & 0x7); + ra = 7 - ((word >> 18) & 0x7); rd *= 4; ra *= 4; val = (regs->ccr >> ra) & 0xf; @@ -1220,11 +1266,11 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 16: /* bclr */ case 528: /* bcctr */ op->type = BRANCH; - imm = (instr & 0x400)? regs->ctr: regs->link; + imm = (word & 0x400)? regs->ctr: regs->link; op->val = truncate_if_32bit(regs->msr, imm); - if (instr & 1) + if (word & 1) op->type |= SETLK; - if (branch_taken(instr, regs, op)) + if (branch_taken(word, regs, op)) op->type |= BRTAKEN; return 1; @@ -1247,23 +1293,23 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 417: /* crorc */ case 449: /* cror */ op->type = COMPUTE + SETCC; - ra = (instr >> 16) & 0x1f; - rb = (instr >> 11) & 0x1f; - rd = (instr >> 21) & 0x1f; + ra = (word >> 16) & 0x1f; + rb = (word >> 11) & 0x1f; + rd = (word >> 21) & 0x1f; ra = (regs->ccr >> (31 - ra)) & 1; rb = (regs->ccr >> (31 - rb)) & 1; - val = (instr >> (6 + ra * 2 + rb)) & 1; + val = (word >> (6 + ra * 2 + rb)) & 1; op->ccval = (regs->ccr & ~(1UL << (31 - rd))) | (val << (31 - rd)); return 1; } break; case 31: - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { case 598: /* sync */ op->type = BARRIER + BARRIER_SYNC; #ifdef __powerpc64__ - switch ((instr >> 21) & 3) { + switch ((word >> 21) & 3) { case 1: /* lwsync */ op->type = BARRIER + BARRIER_LWSYNC; break; @@ -1285,20 +1331,40 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, if (!FULL_REGS(regs)) return -1; - rd = (instr >> 21) & 0x1f; - ra = (instr >> 16) & 0x1f; - rb = (instr >> 11) & 0x1f; - rc = (instr >> 6) & 0x1f; + rd = (word >> 21) & 0x1f; + ra = (word >> 16) & 0x1f; + rb = (word >> 11) & 0x1f; + rc = (word >> 6) & 0x1f; switch (opcode) { #ifdef __powerpc64__ + case 1: + prefix_r = word & (1ul << 20); + ra = (suffix >> 16) & 0x1f; + rd = (suffix >> 21) & 0x1f; + op->reg = rd; + op->val = regs->gpr[rd]; + suffixopcode = get_op(suffix); + prefixtype = (word >> 24) & 0x3; + switch (prefixtype) { + case 2: + if (prefix_r && ra) + return 0; + switch (suffixopcode) { + case 14: /* paddi */ + op->type = COMPUTE | PREFIXED; + op->val = mlsd_8lsd_ea(word, suffix, regs); + goto compute_done; + } + } + break; case 2: /* tdi */ - if (rd & trap_compare(regs->gpr[ra], (short) instr)) + if (rd & trap_compare(regs->gpr[ra], (short) word)) goto trap; return 1; #endif case 3: /* twi */ - if (rd & trap_compare((int)regs->gpr[ra], (short) instr)) + if (rd & trap_compare((int)regs->gpr[ra], (short) word)) goto trap; return 1; @@ -1307,7 +1373,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, if (!cpu_has_feature(CPU_FTR_ARCH_300)) return -1; - switch (instr & 0x3f) { + switch (word & 0x3f) { case 48: /* maddhd */ asm volatile(PPC_MADDHD(%0, %1, %2, %3) : "=r" (op->val) : "r" (regs->gpr[ra]), @@ -1335,16 +1401,16 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif case 7: /* mulli */ - op->val = regs->gpr[ra] * (short) instr; + op->val = regs->gpr[ra] * (short) word; goto compute_done; case 8: /* subfic */ - imm = (short) instr; + imm = (short) word; add_with_carry(regs, op, rd, ~regs->gpr[ra], imm, 1); return 1; case 10: /* cmpli */ - imm = (unsigned short) instr; + imm = (unsigned short) word; val = regs->gpr[ra]; #ifdef __powerpc64__ if ((rd & 1) == 0) @@ -1354,7 +1420,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; case 11: /* cmpi */ - imm = (short) instr; + imm = (short) word; val = regs->gpr[ra]; #ifdef __powerpc64__ if ((rd & 1) == 0) @@ -1364,35 +1430,35 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; case 12: /* addic */ - imm = (short) instr; + imm = (short) word; add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0); return 1; case 13: /* addic. */ - imm = (short) instr; + imm = (short) word; add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0); set_cr0(regs, op); return 1; case 14: /* addi */ - imm = (short) instr; + imm = (short) word; if (ra) imm += regs->gpr[ra]; op->val = imm; goto compute_done; case 15: /* addis */ - imm = ((short) instr) << 16; + imm = ((short) word) << 16; if (ra) imm += regs->gpr[ra]; op->val = imm; goto compute_done; case 19: - if (((instr >> 1) & 0x1f) == 2) { + if (((word >> 1) & 0x1f) == 2) { /* addpcis */ - imm = (short) (instr & 0xffc1); /* d0 + d2 fields */ - imm |= (instr >> 15) & 0x3e; /* d1 field */ + imm = (short) (word & 0xffc1); /* d0 + d2 fields */ + imm |= (word >> 15) & 0x3e; /* d1 field */ op->val = regs->nip + (imm << 16) + 4; goto compute_done; } @@ -1400,65 +1466,65 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; case 20: /* rlwimi */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; val = DATA32(regs->gpr[rd]); imm = MASK32(mb, me); op->val = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm); goto logical_done; case 21: /* rlwinm */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; val = DATA32(regs->gpr[rd]); op->val = ROTATE(val, rb) & MASK32(mb, me); goto logical_done; case 23: /* rlwnm */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; rb = regs->gpr[rb] & 0x1f; val = DATA32(regs->gpr[rd]); op->val = ROTATE(val, rb) & MASK32(mb, me); goto logical_done; case 24: /* ori */ - op->val = regs->gpr[rd] | (unsigned short) instr; + op->val = regs->gpr[rd] | (unsigned short) word; goto logical_done_nocc; case 25: /* oris */ - imm = (unsigned short) instr; + imm = (unsigned short) word; op->val = regs->gpr[rd] | (imm << 16); goto logical_done_nocc; case 26: /* xori */ - op->val = regs->gpr[rd] ^ (unsigned short) instr; + op->val = regs->gpr[rd] ^ (unsigned short) word; goto logical_done_nocc; case 27: /* xoris */ - imm = (unsigned short) instr; + imm = (unsigned short) word; op->val = regs->gpr[rd] ^ (imm << 16); goto logical_done_nocc; case 28: /* andi. */ - op->val = regs->gpr[rd] & (unsigned short) instr; + op->val = regs->gpr[rd] & (unsigned short) word; set_cr0(regs, op); goto logical_done_nocc; case 29: /* andis. */ - imm = (unsigned short) instr; + imm = (unsigned short) word; op->val = regs->gpr[rd] & (imm << 16); set_cr0(regs, op); goto logical_done_nocc; #ifdef __powerpc64__ case 30: /* rld* */ - mb = ((instr >> 6) & 0x1f) | (instr & 0x20); + mb = ((word >> 6) & 0x1f) | (word & 0x20); val = regs->gpr[rd]; - if ((instr & 0x10) == 0) { - sh = rb | ((instr & 2) << 4); + if ((word & 0x10) == 0) { + sh = rb | ((word & 2) << 4); val = ROTATE(val, sh); - switch ((instr >> 2) & 3) { + switch ((word >> 2) & 3) { case 0: /* rldicl */ val &= MASK64_L(mb); break; @@ -1478,7 +1544,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } else { sh = regs->gpr[rb] & 0x3f; val = ROTATE(val, sh); - switch ((instr >> 1) & 7) { + switch ((word >> 1) & 7) { case 0: /* rldcl */ op->val = val & MASK64_L(mb); goto logical_done; @@ -1493,8 +1559,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 31: /* isel occupies 32 minor opcodes */ - if (((instr >> 1) & 0x1f) == 15) { - mb = (instr >> 6) & 0x1f; /* bc field */ + if (((word >> 1) & 0x1f) == 15) { + mb = (word >> 6) & 0x1f; /* bc field */ val = (regs->ccr >> (31 - mb)) & 1; val2 = (ra) ? regs->gpr[ra] : 0; @@ -1502,7 +1568,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, goto compute_done; } - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { case 4: /* tw */ if (rd == 0x1f || (rd & trap_compare((int)regs->gpr[ra], @@ -1536,17 +1602,17 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->reg = rd; /* only MSR_EE and MSR_RI get changed if bit 15 set */ /* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */ - imm = (instr & 0x10000)? 0x8002: 0xefffffffffffeffeUL; + imm = (word & 0x10000)? 0x8002: 0xefffffffffffeffeUL; op->val = imm; return 0; #endif case 19: /* mfcr */ imm = 0xffffffffUL; - if ((instr >> 20) & 1) { + if ((word >> 20) & 1) { imm = 0xf0000000UL; for (sh = 0; sh < 8; ++sh) { - if (instr & (0x80000 >> sh)) + if (word & (0x80000 >> sh)) break; imm >>= 4; } @@ -1560,7 +1626,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, val = regs->gpr[rd]; op->ccval = regs->ccr; for (sh = 0; sh < 8; ++sh) { - if (instr & (0x80000 >> sh)) + if (word & (0x80000 >> sh)) op->ccval = (op->ccval & ~imm) | (val & imm); imm >>= 4; @@ -1568,7 +1634,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; case 339: /* mfspr */ - spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); + spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0); op->type = MFSPR; op->reg = rd; op->spr = spr; @@ -1578,7 +1644,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; case 467: /* mtspr */ - spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); + spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0); op->type = MTSPR; op->val = regs->gpr[rd]; op->spr = spr; @@ -1948,7 +2014,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 826: /* sradi with sh_5 = 0 */ case 827: /* sradi with sh_5 = 1 */ op->type = COMPUTE + SETREG + SETXER; - sh = rb | ((instr & 2) << 4); + sh = rb | ((word & 2) << 4); ival = (signed long int) regs->gpr[rd]; op->val = ival >> sh; op->xerval = regs->xer; @@ -1964,7 +2030,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, if (!cpu_has_feature(CPU_FTR_ARCH_300)) return -1; op->type = COMPUTE + SETREG; - sh = rb | ((instr & 2) << 4); + sh = rb | ((word & 2) << 4); val = (signed int) regs->gpr[rd]; if (sh) op->val = ROTATE(val, sh) & MASK64(0, 63 - sh); @@ -1979,34 +2045,34 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, */ case 54: /* dcbst */ op->type = MKOP(CACHEOP, DCBST, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; case 86: /* dcbf */ op->type = MKOP(CACHEOP, DCBF, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; case 246: /* dcbtst */ op->type = MKOP(CACHEOP, DCBTST, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); op->reg = rd; return 0; case 278: /* dcbt */ op->type = MKOP(CACHEOP, DCBTST, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); op->reg = rd; return 0; case 982: /* icbi */ op->type = MKOP(CACHEOP, ICBI, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; case 1014: /* dcbz */ op->type = MKOP(CACHEOP, DCBZ, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; } break; @@ -2019,14 +2085,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->update_reg = ra; op->reg = rd; op->val = regs->gpr[rd]; - u = (instr >> 20) & UPDATE; + u = (word >> 20) & UPDATE; op->vsx_flags = 0; switch (opcode) { case 31: - u = instr & UPDATE; - op->ea = xform_ea(instr, regs); - switch ((instr >> 1) & 0x3ff) { + u = word & UPDATE; + op->ea = xform_ea(word, regs); + switch ((word >> 1) & 0x3ff) { case 20: /* lwarx */ op->type = MKOP(LARX, 0, 4); break; @@ -2271,25 +2337,25 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef CONFIG_VSX case 12: /* lxsiwzx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; break; case 76: /* lxsiwax */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, SIGNEXT, 4); op->element_size = 8; break; case 140: /* stxsiwx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 4); op->element_size = 8; break; case 268: /* lxvx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 16; op->vsx_flags = VSX_CHECK_VEC; @@ -2298,33 +2364,33 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 269: /* lxvl */ case 301: { /* lxvll */ int nb; - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; if (nb > 16) nb = 16; op->type = MKOP(LOAD_VSX, 0, nb); op->element_size = 16; - op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) | + op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) | VSX_CHECK_VEC; break; } case 332: /* lxvdsx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; op->vsx_flags = VSX_SPLAT; break; case 364: /* lxvwsx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 4; op->vsx_flags = VSX_SPLAT | VSX_CHECK_VEC; break; case 396: /* stxvx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 16; op->vsx_flags = VSX_CHECK_VEC; @@ -2333,118 +2399,118 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 397: /* stxvl */ case 429: { /* stxvll */ int nb; - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; if (nb > 16) nb = 16; op->type = MKOP(STORE_VSX, 0, nb); op->element_size = 16; - op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) | + op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) | VSX_CHECK_VEC; break; } case 524: /* lxsspx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; op->vsx_flags = VSX_FPCONV; break; case 588: /* lxsdx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; break; case 652: /* stxsspx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 4); op->element_size = 8; op->vsx_flags = VSX_FPCONV; break; case 716: /* stxsdx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 8); op->element_size = 8; break; case 780: /* lxvw4x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 4; break; case 781: /* lxsibzx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 1); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 812: /* lxvh8x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 2; op->vsx_flags = VSX_CHECK_VEC; break; case 813: /* lxsihzx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 2); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 844: /* lxvd2x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 8; break; case 876: /* lxvb16x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 1; op->vsx_flags = VSX_CHECK_VEC; break; case 908: /* stxvw4x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 4; break; case 909: /* stxsibx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 1); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 940: /* stxvh8x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 2; op->vsx_flags = VSX_CHECK_VEC; break; case 941: /* stxsihx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 2); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 972: /* stxvd2x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 8; break; case 1004: /* stxvb16x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 1; op->vsx_flags = VSX_CHECK_VEC; @@ -2457,80 +2523,80 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 32: /* lwz */ case 33: /* lwzu */ op->type = MKOP(LOAD, u, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 34: /* lbz */ case 35: /* lbzu */ op->type = MKOP(LOAD, u, 1); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 36: /* stw */ case 37: /* stwu */ op->type = MKOP(STORE, u, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 38: /* stb */ case 39: /* stbu */ op->type = MKOP(STORE, u, 1); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 40: /* lhz */ case 41: /* lhzu */ op->type = MKOP(LOAD, u, 2); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 42: /* lha */ case 43: /* lhau */ op->type = MKOP(LOAD, SIGNEXT | u, 2); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 44: /* sth */ case 45: /* sthu */ op->type = MKOP(STORE, u, 2); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 46: /* lmw */ if (ra >= rd) break; /* invalid form, ra in range to load */ op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd)); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 47: /* stmw */ op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd)); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; #ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ op->type = MKOP(LOAD_FP, u | FPCONV, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 50: /* lfd */ case 51: /* lfdu */ op->type = MKOP(LOAD_FP, u, 8); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 52: /* stfs */ case 53: /* stfsu */ op->type = MKOP(STORE_FP, u | FPCONV, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 54: /* stfd */ case 55: /* stfdu */ op->type = MKOP(STORE_FP, u, 8); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; #endif @@ -2538,14 +2604,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 56: /* lq */ if (!((rd & 1) || (rd == ra))) op->type = MKOP(LOAD, 0, 16); - op->ea = dqform_ea(instr, regs); + op->ea = dqform_ea(word, regs); break; #endif #ifdef CONFIG_VSX case 57: /* lfdp, lxsd, lxssp */ - op->ea = dsform_ea(instr, regs); - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* lfdp */ if (rd & 1) break; /* reg must be even */ @@ -2569,8 +2635,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 58: /* ld[u], lwa */ - op->ea = dsform_ea(instr, regs); - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* ld */ op->type = MKOP(LOAD, 0, 8); break; @@ -2586,16 +2652,16 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef CONFIG_VSX case 61: /* stfdp, lxv, stxsd, stxssp, stxv */ - switch (instr & 7) { + switch (word & 7) { case 0: /* stfdp with LSB of DS field = 0 */ case 4: /* stfdp with LSB of DS field = 1 */ - op->ea = dsform_ea(instr, regs); + op->ea = dsform_ea(word, regs); op->type = MKOP(STORE_FP, 0, 16); break; case 1: /* lxv */ - op->ea = dqform_ea(instr, regs); - if (instr & 8) + op->ea = dqform_ea(word, regs); + if (word & 8) op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 16; @@ -2604,7 +2670,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 2: /* stxsd with LSB of DS field = 0 */ case 6: /* stxsd with LSB of DS field = 1 */ - op->ea = dsform_ea(instr, regs); + op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 8); op->element_size = 8; @@ -2613,7 +2679,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 3: /* stxssp with LSB of DS field = 0 */ case 7: /* stxssp with LSB of DS field = 1 */ - op->ea = dsform_ea(instr, regs); + op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 4); op->element_size = 8; @@ -2621,8 +2687,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 5: /* stxv */ - op->ea = dqform_ea(instr, regs); - if (instr & 8) + op->ea = dqform_ea(word, regs); + if (word & 8) op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 16; @@ -2634,8 +2700,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 62: /* std[u] */ - op->ea = dsform_ea(instr, regs); - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* std */ op->type = MKOP(STORE, 0, 8); break; @@ -2648,6 +2714,124 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; } break; + case 1: /* Prefixed instructions */ + prefix_r = word & (1ul << 20); + ra = (suffix >> 16) & 0x1f; + op->update_reg = ra; + rd = (suffix >> 21) & 0x1f; + op->reg = rd; + op->val = regs->gpr[rd]; + + suffixopcode = get_op(suffix); + prefixtype = (word >> 24) & 0x3; + switch (prefixtype) { + case 0: /* Type 00 Eight-Byte Load/Store */ + if (prefix_r && ra) + break; + op->ea = mlsd_8lsd_ea(word, suffix, regs); + switch (suffixopcode) { + case 41: /* plwa */ + op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4); + break; + case 42: /* plxsd */ + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, PREFIXED, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 43: /* plxssp */ + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, PREFIXED, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + case 46: /* pstxsd */ + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, PREFIXED, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 47: /* pstxssp */ + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, PREFIXED, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + case 51: /* plxv1 */ + op->reg += 32; + fallthrough; + case 50: /* plxv0 */ + op->type = MKOP(LOAD_VSX, PREFIXED, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 55: /* pstxv1 */ + op->reg = rd + 32; + fallthrough; + case 54: /* pstxv0 */ + op->type = MKOP(STORE_VSX, PREFIXED, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 56: /* plq */ + op->type = MKOP(LOAD, PREFIXED, 16); + break; + case 57: /* pld */ + op->type = MKOP(LOAD, PREFIXED, 8); + break; + case 60: /* stq */ + op->type = MKOP(STORE, PREFIXED, 16); + break; + case 61: /* pstd */ + op->type = MKOP(STORE, PREFIXED, 8); + break; + } + break; + case 1: /* Type 01 Eight-Byte Register-to-Register */ + break; + case 2: /* Type 10 Modified Load/Store */ + if (prefix_r && ra) + break; + op->ea = mlsd_8lsd_ea(word, suffix, regs); + switch (suffixopcode) { + case 32: /* plwz */ + op->type = MKOP(LOAD, PREFIXED, 4); + break; + case 34: /* plbz */ + op->type = MKOP(LOAD, PREFIXED, 1); + break; + case 36: /* pstw */ + op->type = MKOP(STORE, PREFIXED, 4); + break; + case 38: /* pstb */ + op->type = MKOP(STORE, PREFIXED, 1); + break; + case 40: /* plhz */ + op->type = MKOP(LOAD, PREFIXED, 2); + break; + case 42: /* plha */ + op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 2); + break; + case 44: /* psth */ + op->type = MKOP(STORE, PREFIXED, 2); + break; + case 48: /* plfs */ + op->type = MKOP(LOAD_FP, PREFIXED | FPCONV, 4); + break; + case 50: /* plfd */ + op->type = MKOP(LOAD_FP, PREFIXED, 8); + break; + case 52: /* pstfs */ + op->type = MKOP(STORE_FP, PREFIXED | FPCONV, 4); + break; + case 54: /* pstfd */ + op->type = MKOP(STORE_FP, PREFIXED, 8); + break; + } + break; + case 3: /* Type 11 Modified Register-to-Register */ + break; + } #endif /* __powerpc64__ */ } @@ -2663,7 +2847,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; logical_done: - if (instr & 1) + if (word & 1) set_cr0(regs, op); logical_done_nocc: op->reg = ra; @@ -2671,7 +2855,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; arith_done: - if (instr & 1) + if (word & 1) set_cr0(regs, op); compute_done: op->reg = rd; @@ -2756,7 +2940,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) { unsigned long next_pc; - next_pc = truncate_if_32bit(regs->msr, regs->nip + 4); + next_pc = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op->type)); switch (GETTYPE(op->type)) { case COMPUTE: if (op->type & SETREG) @@ -3101,7 +3285,7 @@ NOKPROBE_SYMBOL(emulate_loadstore); * or -1 if the instruction is one that should not be stepped, * such as an rfid, or a mtmsrd that would clear MSR_RI. */ -int emulate_step(struct pt_regs *regs, unsigned int instr) +int emulate_step(struct pt_regs *regs, struct ppc_inst instr) { struct instruction_op op; int r, err, type; @@ -3201,7 +3385,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) return 0; instr_done: - regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + regs->nip = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type)); return 1; } NOKPROBE_SYMBOL(emulate_step); diff --git a/arch/powerpc/lib/test_code-patching.S b/arch/powerpc/lib/test_code-patching.S new file mode 100644 index 000000000000..a9be6107844e --- /dev/null +++ b/arch/powerpc/lib/test_code-patching.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 IBM Corporation + */ +#include <asm/ppc-opcode.h> + + .text + +#define globl(x) \ + .globl x; \ +x: + +globl(code_patching_test1) + nop + nop +globl(end_code_patching_test1) + +globl(code_patching_test1_expected) + .long OP_PREFIX << 26 + .long 0x0000000 diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 53df4146dd32..46af80279ebc 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -11,6 +11,7 @@ #include <asm/sstep.h> #include <asm/ppc-opcode.h> #include <asm/code-patching.h> +#include <asm/inst.h> #define IMM_L(i) ((uintptr_t)(i) & 0xffff) #define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) @@ -19,40 +20,40 @@ * Defined with TEST_ prefix so it does not conflict with other * definitions. */ -#define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \ +#define TEST_LD(r, base, i) ppc_inst(PPC_INST_LD | ___PPC_RT(r) | \ ___PPC_RA(base) | IMM_DS(i)) -#define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \ +#define TEST_LWZ(r, base, i) ppc_inst(PPC_INST_LWZ | ___PPC_RT(r) | \ ___PPC_RA(base) | IMM_L(i)) -#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \ +#define TEST_LWZX(t, a, b) ppc_inst(PPC_INST_LWZX | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \ +#define TEST_STD(r, base, i) ppc_inst(PPC_INST_STD | ___PPC_RS(r) | \ ___PPC_RA(base) | IMM_DS(i)) -#define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \ +#define TEST_LDARX(t, a, b, eh) ppc_inst(PPC_INST_LDARX | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b) | \ __PPC_EH(eh)) -#define TEST_STDCX(s, a, b) (PPC_INST_STDCX | ___PPC_RS(s) | \ +#define TEST_STDCX(s, a, b) ppc_inst(PPC_INST_STDCX | ___PPC_RS(s) | \ ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LFSX(t, a, b) (PPC_INST_LFSX | ___PPC_RT(t) | \ +#define TEST_LFSX(t, a, b) ppc_inst(PPC_INST_LFSX | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STFSX(s, a, b) (PPC_INST_STFSX | ___PPC_RS(s) | \ +#define TEST_STFSX(s, a, b) ppc_inst(PPC_INST_STFSX | ___PPC_RS(s) | \ ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LFDX(t, a, b) (PPC_INST_LFDX | ___PPC_RT(t) | \ +#define TEST_LFDX(t, a, b) ppc_inst(PPC_INST_LFDX | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STFDX(s, a, b) (PPC_INST_STFDX | ___PPC_RS(s) | \ +#define TEST_STFDX(s, a, b) ppc_inst(PPC_INST_STFDX | ___PPC_RS(s) | \ ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LVX(t, a, b) (PPC_INST_LVX | ___PPC_RT(t) | \ +#define TEST_LVX(t, a, b) ppc_inst(PPC_INST_LVX | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STVX(s, a, b) (PPC_INST_STVX | ___PPC_RS(s) | \ +#define TEST_STVX(s, a, b) ppc_inst(PPC_INST_STVX | ___PPC_RS(s) | \ ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LXVD2X(s, a, b) (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) -#define TEST_STXVD2X(s, a, b) (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) -#define TEST_ADD(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | \ +#define TEST_LXVD2X(s, a, b) ppc_inst(PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) +#define TEST_STXVD2X(s, a, b) ppc_inst(PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) +#define TEST_ADD(t, a, b) ppc_inst(PPC_INST_ADD | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | \ +#define TEST_ADD_DOT(t, a, b) ppc_inst(PPC_INST_ADD | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b) | 0x1) -#define TEST_ADDC(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \ +#define TEST_ADDC(t, a, b) ppc_inst(PPC_INST_ADDC | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_ADDC_DOT(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \ +#define TEST_ADDC_DOT(t, a, b) ppc_inst(PPC_INST_ADDC | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b) | 0x1) #define MAX_SUBTESTS 16 @@ -461,7 +462,7 @@ struct compute_test { struct { char *descr; unsigned long flags; - unsigned int instr; + struct ppc_inst instr; struct pt_regs regs; } subtests[MAX_SUBTESTS + 1]; }; @@ -472,7 +473,7 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "R0 = LONG_MAX", - .instr = PPC_INST_NOP, + .instr = ppc_inst(PPC_INST_NOP), .regs = { .gpr[0] = LONG_MAX, } @@ -842,16 +843,16 @@ static struct compute_test compute_tests[] = { }; static int __init emulate_compute_instr(struct pt_regs *regs, - unsigned int instr) + struct ppc_inst instr) { struct instruction_op op; - if (!regs || !instr) + if (!regs || !ppc_inst_val(instr)) return -EINVAL; if (analyse_instr(&op, regs, instr) != 1 || GETTYPE(op.type) != COMPUTE) { - pr_info("emulation failed, instruction = 0x%08x\n", instr); + pr_info("emulation failed, instruction = 0x%08x\n", ppc_inst_val(instr)); return -EFAULT; } @@ -860,18 +861,18 @@ static int __init emulate_compute_instr(struct pt_regs *regs, } static int __init execute_compute_instr(struct pt_regs *regs, - unsigned int instr) + struct ppc_inst instr) { extern int exec_instr(struct pt_regs *regs); extern s32 patch__exec_instr; - if (!regs || !instr) + if (!regs || !ppc_inst_val(instr)) return -EINVAL; /* Patch the NOP with the actual instruction */ patch_instruction_site(&patch__exec_instr, instr); if (exec_instr(regs)) { - pr_info("execution failed, instruction = 0x%08x\n", instr); + pr_info("execution failed, instruction = 0x%08x\n", ppc_inst_val(instr)); return -EFAULT; } @@ -891,7 +892,8 @@ static void __init run_tests_compute(void) unsigned long flags; struct compute_test *test; struct pt_regs *regs, exp, got; - unsigned int i, j, k, instr; + unsigned int i, j, k; + struct ppc_inst instr; bool ignore_gpr, ignore_xer, ignore_ccr, passed; for (i = 0; i < ARRAY_SIZE(compute_tests); i++) { diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 877d880890fe..2702e8762c0d 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -81,7 +81,7 @@ _GLOBAL(hash_page) rlwinm. r8,r8,0,0,20 /* extract pt base address */ #endif #ifdef CONFIG_SMP - beq- hash_page_out /* return if no mapping */ + beq- .Lhash_page_out /* return if no mapping */ #else /* XXX it seems like the 601 will give a machine fault on the rfi if its alignment is wrong (bottom 4 bits of address are @@ -109,11 +109,11 @@ _GLOBAL(hash_page) #if (PTE_FLAGS_OFFSET != 0) addi r8,r8,PTE_FLAGS_OFFSET #endif -retry: +.Lretry: lwarx r6,0,r8 /* get linux-style pte, flag word */ andc. r5,r3,r6 /* check access & ~permission */ #ifdef CONFIG_SMP - bne- hash_page_out /* return if access not permitted */ + bne- .Lhash_page_out /* return if access not permitted */ #else bnelr- #endif @@ -128,7 +128,7 @@ retry: #endif /* CONFIG_SMP */ #endif /* CONFIG_PTE_64BIT */ stwcx. r5,0,r8 /* attempt to update PTE */ - bne- retry /* retry if someone got there first */ + bne- .Lretry /* retry if someone got there first */ mfsrin r3,r4 /* get segment reg for segment */ #ifndef CONFIG_VMAP_STACK @@ -156,13 +156,14 @@ retry: #endif #ifdef CONFIG_SMP -hash_page_out: +.Lhash_page_out: eieio lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha li r0,0 stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) blr #endif /* CONFIG_SMP */ +_ASM_NOKPROBE_SYMBOL(hash_page) /* * Add an entry for a particular page to the hash table. @@ -267,6 +268,7 @@ _GLOBAL(add_hash_page) lwz r0,4(r1) mtlr r0 blr +_ASM_NOKPROBE_SYMBOL(add_hash_page) /* * This routine adds a hardware PTE to the hash table. @@ -360,7 +362,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 1: LDPTEu r6,HPTE_SIZE(r4) /* get next PTE */ CMPPTE 0,r6,r5 bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ - beq+ found_slot + beq+ .Lfound_slot patch_site 0f, patch__hash_page_B /* Search the secondary PTEG for a matching PTE */ @@ -372,7 +374,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 2: LDPTEu r6,HPTE_SIZE(r4) CMPPTE 0,r6,r5 bdnzf 2,2b - beq+ found_slot + beq+ .Lfound_slot xori r5,r5,PTE_H /* clear H bit again */ /* Search the primary PTEG for an empty slot */ @@ -381,7 +383,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 1: LDPTEu r6,HPTE_SIZE(r4) /* get next PTE */ TST_V(r6) /* test valid bit */ bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ - beq+ found_empty + beq+ .Lfound_empty /* update counter of times that the primary PTEG is full */ lis r4, (primary_pteg_full - PAGE_OFFSET)@ha @@ -399,7 +401,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 2: LDPTEu r6,HPTE_SIZE(r4) TST_V(r6) bdnzf 2,2b - beq+ found_empty + beq+ .Lfound_empty xori r5,r5,PTE_H /* clear H bit again */ /* @@ -437,9 +439,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) #ifndef CONFIG_SMP /* Store PTE in PTEG */ -found_empty: +.Lfound_empty: STPTE r5,0(r4) -found_slot: +.Lfound_slot: STPTE r8,HPTE_SIZE/2(r4) #else /* CONFIG_SMP */ @@ -460,8 +462,8 @@ found_slot: * We do however have to make sure that the PTE is never in an invalid * state with the V bit set. */ -found_empty: -found_slot: +.Lfound_empty: +.Lfound_slot: CLR_V(r5,r0) /* clear V (valid) bit in PTE */ STPTE r5,0(r4) sync @@ -474,6 +476,7 @@ found_slot: sync /* make sure pte updates get to memory */ blr +_ASM_NOKPROBE_SYMBOL(create_hpte) .section .bss .align 2 @@ -630,6 +633,7 @@ _GLOBAL(flush_hash_pages) isync blr EXPORT_SYMBOL(flush_hash_pages) +_ASM_NOKPROBE_SYMBOL(flush_hash_pages) /* * Flush an entry from the TLB @@ -667,6 +671,7 @@ _GLOBAL(_tlbie) sync #endif /* CONFIG_SMP */ blr +_ASM_NOKPROBE_SYMBOL(_tlbie) /* * Flush the entire TLB. 603/603e only @@ -708,3 +713,4 @@ _GLOBAL(_tlbia) isync #endif /* CONFIG_SMP */ blr +_ASM_NOKPROBE_SYMBOL(_tlbia) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 39ba53ca5bb5..a6dcc708eee3 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -170,6 +170,12 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) pr_debug("RAM mapped without BATs\n"); return base; } + if (debug_pagealloc_enabled()) { + if (base >= border) + return base; + if (top >= border) + top = border; + } if (!strict_kernel_rwx_enabled() || base >= border || top <= border) return __mmu_mapin_ram(base, top); @@ -187,6 +193,7 @@ void mmu_mark_initmem_nx(void) int i; unsigned long base = (unsigned long)_stext - PAGE_OFFSET; unsigned long top = (unsigned long)_etext - PAGE_OFFSET; + unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; unsigned long size; if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) @@ -201,9 +208,10 @@ void mmu_mark_initmem_nx(void) size = block_size(base, top); size = max(size, 128UL << 10); if ((top - base) > size) { - if (strict_kernel_rwx_enabled()) - pr_warn("Kernel _etext not properly aligned\n"); size <<= 1; + if (strict_kernel_rwx_enabled() && base + size > border) + pr_warn("Some RW data is getting mapped X. " + "Adjust CONFIG_DATA_SHIFT to avoid that.\n"); } setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); base += size; diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 9cd15937e88a..8b4b0a602158 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -365,17 +365,6 @@ pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, * hash fault look at them. */ memset(pgtable, 0, PTE_FRAG_SIZE); - /* - * Serialize against find_current_mm_pte variants which does lock-less - * lookup in page tables with local interrupts disabled. For huge pages - * it casts pmd_t to pte_t. Since format of pte_t is different from - * pmd_t we want to prevent transit from pmd pointing to page table - * to pmd pointing to huge page (and back) while interrupts are disabled. - * We clear pmd to possibly replace it with page table pointer in - * different code paths. So make sure we wait for the parallel - * find_curren_mm_pte to finish. - */ - serialize_against_pte_lookup(mm); return old_pmd; } diff --git a/arch/powerpc/mm/book3s64/hash_tlb.c b/arch/powerpc/mm/book3s64/hash_tlb.c index 4a70d8dd39cd..0fbf3dc9f2c2 100644 --- a/arch/powerpc/mm/book3s64/hash_tlb.c +++ b/arch/powerpc/mm/book3s64/hash_tlb.c @@ -176,7 +176,6 @@ void hash__tlb_flush(struct mmu_gather *tlb) * from the hash table (and the TLB). But keeps * the linux PTEs intact. * - * @mm : mm_struct of the target address space (generally init_mm) * @start : starting address * @end : ending address (not included in the flush) * @@ -189,17 +188,14 @@ void hash__tlb_flush(struct mmu_gather *tlb) * Because of that usage pattern, it is implemented for small size rather * than speed. */ -void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, - unsigned long end) +void __flush_hash_table_range(unsigned long start, unsigned long end) { - bool is_thp; int hugepage_shift; unsigned long flags; - start = _ALIGN_DOWN(start, PAGE_SIZE); - end = _ALIGN_UP(end, PAGE_SIZE); + start = ALIGN_DOWN(start, PAGE_SIZE); + end = ALIGN(end, PAGE_SIZE); - BUG_ON(!mm->pgd); /* * Note: Normally, we should only ever use a batch within a @@ -212,21 +208,15 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, local_irq_save(flags); arch_enter_lazy_mmu_mode(); for (; start < end; start += PAGE_SIZE) { - pte_t *ptep = find_current_mm_pte(mm->pgd, start, &is_thp, - &hugepage_shift); + pte_t *ptep = find_init_mm_pte(start, &hugepage_shift); unsigned long pte; if (ptep == NULL) continue; pte = pte_val(*ptep); - if (is_thp) - trace_hugepage_invalidate(start, pte); if (!(pte & H_PAGE_HASHPTE)) continue; - if (unlikely(is_thp)) - hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte); - else - hpte_need_flush(mm, start, ptep, pte, hugepage_shift); + hpte_need_flush(&init_mm, start, ptep, pte, hugepage_shift); } arch_leave_lazy_mmu_mode(); local_irq_restore(flags); @@ -238,7 +228,7 @@ void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) pte_t *start_pte; unsigned long flags; - addr = _ALIGN_DOWN(addr, PMD_SIZE); + addr = ALIGN_DOWN(addr, PMD_SIZE); /* * Note: Normally, we should only ever use a batch within a * PTE locked section. This violates the rule, but will work diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 8ed2411c3f39..0124003e60d0 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -66,6 +66,9 @@ #include <mm/mmu_decl.h> +#include "internal.h" + + #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) #else @@ -870,6 +873,9 @@ static void __init htab_initialize(void) printk(KERN_INFO "Using 1TB segments\n"); } + if (stress_slb_enabled) + static_branch_enable(&stress_slb_key); + /* * Calculate the required size of the htab. We want the number of * PTEGs to equal one half the number of real pages. @@ -1350,8 +1356,15 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, goto bail; } - /* Add _PAGE_PRESENT to the required access perm */ - access |= _PAGE_PRESENT; + /* + * Add _PAGE_PRESENT to the required access perm. If there are parallel + * updates to the pte that can possibly clear _PAGE_PTE, catch that too. + * + * We can safely use the return pte address in rest of the function + * because we do set H_PAGE_BUSY which prevents further updates to pte + * from generic code. + */ + access |= _PAGE_PRESENT | _PAGE_PTE; /* * Pre-check access permissions (will be re-checked atomically @@ -1539,14 +1552,11 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) } #endif -static void hash_preload(struct mm_struct *mm, unsigned long ea, +static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, bool is_exec, unsigned long trap) { - int hugepage_shift; unsigned long vsid; pgd_t *pgdir; - pte_t *ptep; - unsigned long flags; int rc, ssize, update_flags = 0; unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0); @@ -1568,30 +1578,18 @@ static void hash_preload(struct mm_struct *mm, unsigned long ea, vsid = get_user_vsid(&mm->context, ea, ssize); if (!vsid) return; - /* - * Hash doesn't like irqs. Walking linux page table with irq disabled - * saves us from holding multiple locks. - */ - local_irq_save(flags); - /* - * THP pages use update_mmu_cache_pmd. We don't do - * hash preload there. Hence can ignore THP here - */ - ptep = find_current_mm_pte(pgdir, ea, NULL, &hugepage_shift); - if (!ptep) - goto out_exit; - - WARN_ON(hugepage_shift); #ifdef CONFIG_PPC_64K_PAGES /* If either H_PAGE_4K_PFN or cache inhibited is set (and we are on * a 64K kernel), then we don't preload, hash_page() will take * care of it once we actually try to access the page. * That way we don't have to duplicate all of the logic for segment * page size demotion here + * Called with PTL held, hence can be sure the value won't change in + * between. */ if ((pte_val(*ptep) & H_PAGE_4K_PFN) || pte_ci(*ptep)) - goto out_exit; + return; #endif /* CONFIG_PPC_64K_PAGES */ /* Is that local to this CPU ? */ @@ -1616,8 +1614,6 @@ static void hash_preload(struct mm_struct *mm, unsigned long ea, mm_ctx_user_psize(&mm->context), mm_ctx_user_psize(&mm->context), pte_val(*ptep)); -out_exit: - local_irq_restore(flags); } /* @@ -1638,10 +1634,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, unsigned long trap; bool is_exec; - if (radix_enabled()) { - prefetch((void *)address); + if (radix_enabled()) return; - } /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ if (!pte_young(*ptep) || address >= TASK_SIZE) @@ -1668,32 +1662,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, return; } - hash_preload(vma->vm_mm, address, is_exec, trap); -} - -#ifdef CONFIG_PPC_MEM_KEYS -/* - * Return the protection key associated with the given address and the - * mm_struct. - */ -u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) -{ - pte_t *ptep; - u16 pkey = 0; - unsigned long flags; - - if (!mm || !mm->pgd) - return 0; - - local_irq_save(flags); - ptep = find_linux_pte(mm->pgd, address, NULL, NULL); - if (ptep) - pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep))); - local_irq_restore(flags); - - return pkey; + hash_preload(vma->vm_mm, ptep, address, is_exec, trap); } -#endif /* CONFIG_PPC_MEM_KEYS */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline void tm_flush_hash_page(int local) diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h new file mode 100644 index 000000000000..7eda0d30d765 --- /dev/null +++ b/arch/powerpc/mm/book3s64/internal.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H +#define ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H + +#include <linux/jump_label.h> + +extern bool stress_slb_enabled; + +DECLARE_STATIC_KEY_FALSE(stress_slb_key); + +static inline bool stress_slb(void) +{ + return static_branch_unlikely(&stress_slb_key); +} + +#endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */ diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index e0bb69c616e4..c58ad1049909 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -109,15 +109,25 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + return __pmd(old_pmd); +} + +pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp, int full) +{ + pmd_t pmd; + VM_BUG_ON(addr & ~HPAGE_PMD_MASK); + VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && + !pmd_devmap(*pmdp)) || !pmd_present(*pmdp)); + pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); /* - * This ensures that generic code that rely on IRQ disabling - * to prevent a parallel THP split work as expected. - * - * Marking the entry with _PAGE_INVALID && ~_PAGE_PRESENT requires - * a special case check in pmd_access_permitted. + * if it not a fullmm flush, then we can possibly end up converting + * this PMD pte entry to a regular level 0 PTE by a parallel page fault. + * Make sure we flush the tlb in this case. */ - serialize_against_pte_lookup(vma->vm_mm); - return __pmd(old_pmd); + if (!full) + flush_pmd_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE); + return pmd; } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) @@ -146,19 +156,6 @@ pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) pmdv &= _HPAGE_CHG_MASK; return pmd_set_protbits(__pmd(pmdv), newprot); } - -/* - * This is called at the end of handling a user page fault, when the - * fault has been handled by updating a HUGE PMD entry in the linux page tables. - * We use it to preload an HPTE into the hash table corresponding to - * the updated linux HUGE PMD entry. - */ -void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd) -{ - if (radix_enabled()) - prefetch((void *)addr); -} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* For use by kexec */ diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 97891ca0d428..8acb96de0e48 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -267,7 +267,7 @@ static int __meminit create_physical_mapping(unsigned long start, pgprot_t prot; int psize; - start = _ALIGN_UP(start, PAGE_SIZE); + start = ALIGN(start, PAGE_SIZE); for (addr = start; addr < end; addr += mapping_size) { unsigned long gap, previous_size; int rc; @@ -970,7 +970,13 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre pmd = *pmdp; pmd_clear(pmdp); - /*FIXME!! Verify whether we need this kick below */ + /* + * pmdp collapse_flush need to ensure that there are no parallel gup + * walk after this call. This is needed so that we can have stable + * page ref count when collapsing a page. We don't allow a collapse page + * if we have gup taken on the page. We can ensure that by sending IPI + * because gup walk happens with IRQ disabled. + */ serialize_against_pte_lookup(vma->vm_mm); radix__flush_tlb_collapsed_pmd(vma->vm_mm, address); @@ -1031,17 +1037,6 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0); old_pmd = __pmd(old); - /* - * Serialize against find_current_mm_pte which does lock-less - * lookup in page tables with local interrupts disabled. For huge pages - * it casts pmd_t to pte_t. Since format of pte_t is different from - * pmd_t we want to prevent transit from pmd pointing to page table - * to pmd pointing to huge page (and back) while interrupts are disabled. - * We clear pmd to possibly replace it with page table pointer in - * different code paths. So make sure we wait for the parallel - * find_current_mm_pte to finish. - */ - serialize_against_pte_lookup(mm); return old_pmd; } diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 758ade2c2b6e..b5cc9b23cf02 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -884,9 +884,7 @@ is_local: if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { hstart = (start + PMD_SIZE - 1) & PMD_MASK; hend = end & PMD_MASK; - if (hstart == hend) - hflush = false; - else + if (hstart < hend) hflush = true; } diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 716204aee3da..8141e8b40ee5 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -25,6 +25,9 @@ #include <asm/udbg.h> #include <asm/code-patching.h> +#include "internal.h" + + enum slb_index { LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ KSTACK_INDEX = 1, /* Kernel stack map */ @@ -54,6 +57,17 @@ static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); } +bool stress_slb_enabled __initdata; + +static int __init parse_stress_slb(char *p) +{ + stress_slb_enabled = true; + return 0; +} +early_param("stress_slb", parse_stress_slb); + +__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_slb_key); + static void assert_slb_presence(bool present, unsigned long ea) { #ifdef CONFIG_DEBUG_VM @@ -68,7 +82,7 @@ static void assert_slb_presence(bool present, unsigned long ea) * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware * ignores all other bits from 0-27, so just clear them all. */ - ea &= ~((1UL << 28) - 1); + ea &= ~((1UL << SID_SHIFT) - 1); asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0"); WARN_ON(present == (tmp == 0)); @@ -153,14 +167,42 @@ void slb_flush_all_realmode(void) asm volatile("slbmte %0,%0; slbia" : : "r" (0)); } +static __always_inline void __slb_flush_and_restore_bolted(bool preserve_kernel_lookaside) +{ + struct slb_shadow *p = get_slb_shadow(); + unsigned long ksp_esid_data, ksp_vsid_data; + u32 ih; + + /* + * SLBIA IH=1 on ISA v2.05 and newer processors may preserve lookaside + * information created with Class=0 entries, which we use for kernel + * SLB entries (the SLB entries themselves are still invalidated). + * + * Older processors will ignore this optimisation. Over-invalidation + * is fine because we never rely on lookaside information existing. + */ + if (preserve_kernel_lookaside) + ih = 1; + else + ih = 0; + + ksp_esid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].esid); + ksp_vsid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].vsid); + + asm volatile(PPC_SLBIA(%0)" \n" + "slbmte %1, %2 \n" + :: "i" (ih), + "r" (ksp_vsid_data), + "r" (ksp_esid_data) + : "memory"); +} + /* * This flushes non-bolted entries, it can be run in virtual mode. Must * be called with interrupts disabled. */ void slb_flush_and_restore_bolted(void) { - struct slb_shadow *p = get_slb_shadow(); - BUILD_BUG_ON(SLB_NUM_BOLTED != 2); WARN_ON(!irqs_disabled()); @@ -171,13 +213,10 @@ void slb_flush_and_restore_bolted(void) */ hard_irq_disable(); - asm volatile("isync\n" - "slbia\n" - "slbmte %0, %1\n" - "isync\n" - :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)), - "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid)) - : "memory"); + isync(); + __slb_flush_and_restore_bolted(false); + isync(); + assert_slb_presence(true, get_paca()->kstack); get_paca()->slb_cache_ptr = 0; @@ -400,6 +439,30 @@ void preload_new_slb_context(unsigned long start, unsigned long sp) local_irq_enable(); } +static void slb_cache_slbie_kernel(unsigned int index) +{ + unsigned long slbie_data = get_paca()->slb_cache[index]; + unsigned long ksp = get_paca()->kstack; + + slbie_data <<= SID_SHIFT; + slbie_data |= 0xc000000000000000ULL; + if ((ksp & slb_esid_mask(mmu_kernel_ssize)) == slbie_data) + return; + slbie_data |= mmu_kernel_ssize << SLBIE_SSIZE_SHIFT; + + asm volatile("slbie %0" : : "r" (slbie_data)); +} + +static void slb_cache_slbie_user(unsigned int index) +{ + unsigned long slbie_data = get_paca()->slb_cache[index]; + + slbie_data <<= SID_SHIFT; + slbie_data |= user_segment_size(slbie_data) << SLBIE_SSIZE_SHIFT; + slbie_data |= SLBIE_C; /* user slbs have C=1 */ + + asm volatile("slbie %0" : : "r" (slbie_data)); +} /* Flush all user entries from the segment table of the current processor. */ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) @@ -414,8 +477,14 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) * which would update the slb_cache/slb_cache_ptr fields in the PACA. */ hard_irq_disable(); - asm volatile("isync" : : : "memory"); - if (cpu_has_feature(CPU_FTR_ARCH_300)) { + isync(); + if (stress_slb()) { + __slb_flush_and_restore_bolted(false); + isync(); + get_paca()->slb_cache_ptr = 0; + get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; + + } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { /* * SLBIA IH=3 invalidates all Class=1 SLBEs and their * associated lookaside structures, which matches what @@ -423,47 +492,29 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) * cache. */ asm volatile(PPC_SLBIA(3)); + } else { unsigned long offset = get_paca()->slb_cache_ptr; if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) && offset <= SLB_CACHE_ENTRIES) { - unsigned long slbie_data = 0; - - for (i = 0; i < offset; i++) { - unsigned long ea; - - ea = (unsigned long) - get_paca()->slb_cache[i] << SID_SHIFT; - /* - * Could assert_slb_presence(true) here, but - * hypervisor or machine check could have come - * in and removed the entry at this point. - */ - - slbie_data = ea; - slbie_data |= user_segment_size(slbie_data) - << SLBIE_SSIZE_SHIFT; - slbie_data |= SLBIE_C; /* user slbs have C=1 */ - asm volatile("slbie %0" : : "r" (slbie_data)); - } + /* + * Could assert_slb_presence(true) here, but + * hypervisor or machine check could have come + * in and removed the entry at this point. + */ + + for (i = 0; i < offset; i++) + slb_cache_slbie_user(i); /* Workaround POWER5 < DD2.1 issue */ if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1) - asm volatile("slbie %0" : : "r" (slbie_data)); + slb_cache_slbie_user(0); } else { - struct slb_shadow *p = get_slb_shadow(); - unsigned long ksp_esid_data = - be64_to_cpu(p->save_area[KSTACK_INDEX].esid); - unsigned long ksp_vsid_data = - be64_to_cpu(p->save_area[KSTACK_INDEX].vsid); - - asm volatile(PPC_SLBIA(1) "\n" - "slbmte %0,%1\n" - "isync" - :: "r"(ksp_vsid_data), - "r"(ksp_esid_data)); + /* Flush but retain kernel lookaside information */ + __slb_flush_and_restore_bolted(true); + isync(); get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; } @@ -503,7 +554,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) * address accesses by the kernel (user mode won't happen until * rfid, which is safe). */ - asm volatile("isync" : : : "memory"); + isync(); } void slb_set_size(u16 size) @@ -571,6 +622,9 @@ static void slb_cache_update(unsigned long esid_data) if (cpu_has_feature(CPU_FTR_ARCH_300)) return; /* ISAv3.0B and later does not use slb_cache */ + if (stress_slb()) + return; + /* * Now update slb cache entries */ @@ -580,7 +634,7 @@ static void slb_cache_update(unsigned long esid_data) * We have space in slb cache for optimized switch_slb(). * Top 36 bits from esid_data as per ISA */ - local_paca->slb_cache[slb_cache_index++] = esid_data >> 28; + local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT; local_paca->slb_cache_ptr++; } else { /* @@ -671,6 +725,28 @@ static long slb_insert_entry(unsigned long ea, unsigned long context, * accesses user memory before it returns to userspace with rfid. */ assert_slb_presence(false, ea); + if (stress_slb()) { + int slb_cache_index = local_paca->slb_cache_ptr; + + /* + * stress_slb() does not use slb cache, repurpose as a + * cache of inserted (non-bolted) kernel SLB entries. All + * non-bolted kernel entries are flushed on any user fault, + * or if there are already 3 non-boled kernel entries. + */ + BUILD_BUG_ON(SLB_CACHE_ENTRIES < 3); + if (!kernel || slb_cache_index == 3) { + int i; + + for (i = 0; i < slb_cache_index; i++) + slb_cache_slbie_kernel(i); + slb_cache_index = 0; + } + + if (kernel) + local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT; + local_paca->slb_cache_ptr = slb_cache_index; + } asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)); barrier(); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 84af6c8eecf7..2393ed9d84bb 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -41,18 +41,19 @@ #include <asm/siginfo.h> #include <asm/debug.h> #include <asm/kup.h> +#include <asm/inst.h> /* * Check whether the instruction inst is a store using * an update addressing form which will update r1. */ -static bool store_updates_sp(unsigned int inst) +static bool store_updates_sp(struct ppc_inst inst) { /* check for 1 in the rA field */ - if (((inst >> 16) & 0x1f) != 1) + if (((ppc_inst_val(inst) >> 16) & 0x1f) != 1) return false; /* check major opcode */ - switch (inst >> 26) { + switch (ppc_inst_primary_opcode(inst)) { case OP_STWU: case OP_STBU: case OP_STHU: @@ -60,10 +61,10 @@ static bool store_updates_sp(unsigned int inst) case OP_STFDU: return true; case OP_STD: /* std or stdu */ - return (inst & 3) == 1; + return (ppc_inst_val(inst) & 3) == 1; case OP_31: /* check minor opcode */ - switch ((inst >> 1) & 0x3ff) { + switch ((ppc_inst_val(inst) >> 1) & 0x3ff) { case OP_31_XOP_STDUX: case OP_31_XOP_STWUX: case OP_31_XOP_STBUX: @@ -118,9 +119,34 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address) return __bad_area(regs, address, SEGV_MAPERR); } -static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address, - int pkey) +#ifdef CONFIG_PPC_MEM_KEYS +static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, + struct vm_area_struct *vma) { + struct mm_struct *mm = current->mm; + int pkey; + + /* + * We don't try to fetch the pkey from page table because reading + * page table without locking doesn't guarantee stable pte value. + * Hence the pkey value that we return to userspace can be different + * from the pkey that actually caused access error. + * + * It does *not* guarantee that the VMA we find here + * was the one that we faulted on. + * + * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); + * 2. T1 : set AMR to deny access to pkey=4, touches, page + * 3. T1 : faults... + * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); + * 5. T1 : enters fault handler, takes mmap_sem, etc... + * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really + * faulted on a pte with its pkey=4. + */ + pkey = vma_pkey(vma); + + up_read(&mm->mmap_sem); + /* * If we are in kernel mode, bail out with a SEGV, this will * be caught by the assembly which will restore the non-volatile @@ -133,6 +159,7 @@ static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address, return 0; } +#endif static noinline int bad_access(struct pt_regs *regs, unsigned long address) { @@ -255,7 +282,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, * expand to 1MB without further checks. */ if (address + 0x100000 < vma->vm_end) { - unsigned int __user *nip = (unsigned int __user *)regs->nip; + struct ppc_inst __user *nip = (struct ppc_inst __user *)regs->nip; /* get user regs even if this fault is in kernel mode */ struct pt_regs *uregs = current->thread.regs; if (uregs == NULL) @@ -278,9 +305,9 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && access_ok(nip, sizeof(*nip))) { - unsigned int inst; + struct ppc_inst inst; - if (!probe_user_read(&inst, nip, sizeof(inst))) + if (!probe_user_read_inst(&inst, nip)) return !store_updates_sp(inst); *must_retry = true; } @@ -289,8 +316,23 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, return false; } -static bool access_error(bool is_write, bool is_exec, - struct vm_area_struct *vma) +#ifdef CONFIG_PPC_MEM_KEYS +static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, + struct vm_area_struct *vma) +{ + /* + * Make sure to check the VMA so that we do not perform + * faults just to hit a pkey fault as soon as we fill in a + * page. Only called for current mm, hence foreign == 0 + */ + if (!arch_vma_access_permitted(vma, is_write, is_exec, 0)) + return true; + + return false; +} +#endif + +static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma) { /* * Allow execution from readable areas if the MMU does not @@ -483,10 +525,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - if (error_code & DSISR_KEYFAULT) - return bad_key_fault_exception(regs, address, - get_mm_addr_key(mm, address)); - /* * We want to do this outside mmap_sem, because reading code around nip * can result in fault, which will cause a deadlock when called with @@ -555,6 +593,13 @@ retry: return bad_area(regs, address); good_area: + +#ifdef CONFIG_PPC_MEM_KEYS + if (unlikely(access_pkey_error(is_write, is_exec, + (error_code & DSISR_KEYFAULT), vma))) + return bad_access_pkey(regs, address, vma); +#endif /* CONFIG_PPC_MEM_KEYS */ + if (unlikely(access_error(is_write, is_exec, vma))) return bad_access(regs, address); @@ -565,21 +610,6 @@ good_area: */ fault = handle_mm_fault(vma, address, flags); -#ifdef CONFIG_PPC_MEM_KEYS - /* - * we skipped checking for access error due to key earlier. - * Check that using handle_mm_fault error return. - */ - if (unlikely(fault & VM_FAULT_SIGSEGV) && - !arch_vma_access_permitted(vma, is_write, is_exec, 0)) { - - int pkey = vma_pkey(vma); - - up_read(&mm->mmap_sem); - return bad_key_fault_exception(regs, address, pkey); - } -#endif /* CONFIG_PPC_MEM_KEYS */ - major |= fault & VM_FAULT_MAJOR; if (fault_signal_pending(fault, regs)) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index f122d0f2c295..5b3d01404266 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -30,7 +30,8 @@ bool hugetlb_disabled = false; #define hugepd_none(hpd) (hpd_val(hpd) == 0) -#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *))) +#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \ + __builtin_ffs(sizeof(void *))) pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { @@ -53,24 +54,17 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (pshift >= pdshift) { cachep = PGT_CACHE(PTE_T_ORDER); num_hugepd = 1 << (pshift - pdshift); - new = NULL; - } else if (IS_ENABLED(CONFIG_PPC_8xx)) { - cachep = NULL; - num_hugepd = 1; - new = pte_alloc_one(mm); } else { cachep = PGT_CACHE(pdshift - pshift); num_hugepd = 1; - new = NULL; } - if (!cachep && !new) { + if (!cachep) { WARN_ONCE(1, "No page table cache created for hugetlb tables"); return -ENOMEM; } - if (cachep) - new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -101,10 +95,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (i < num_hugepd) { for (i = i - 1 ; i >= 0; i--, hpdp--) *hpdp = __hugepd(0); - if (cachep) - kmem_cache_free(cachep, new); - else - pte_free(mm, new); + kmem_cache_free(cachep, new); } else { kmemleak_ignore(new); } @@ -190,6 +181,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (!hpdp) return NULL; + if (IS_ENABLED(CONFIG_PPC_8xx) && sz == SZ_512K) + return pte_alloc_map(mm, (pmd_t *)hpdp, addr); + BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, @@ -255,7 +249,7 @@ int __init alloc_bootmem_huge_page(struct hstate *h) struct hugepd_freelist { struct rcu_head rcu; unsigned int index; - void *ptes[0]; + void *ptes[]; }; static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); @@ -332,13 +326,20 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); - else if (IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_free_tlb(tlb, hugepte, 0); else pgtable_free_tlb(tlb, hugepte, get_hugepd_cache_index(pdshift - shift)); } +static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) +{ + pgtable_t token = pmd_pgtable(*pmd); + + pmd_clear(pmd); + pte_free_tlb(tlb, token, addr); + mm_dec_nr_ptes(tlb->mm); +} + static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) @@ -354,11 +355,17 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { + if (pmd_none_or_clear_bad(pmd)) + continue; + /* * if it is not hugepd pointer, we should already find * it cleared. */ - WARN_ON(!pmd_none_or_clear_bad(pmd)); + WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx)); + + hugetlb_free_pte_range(tlb, pmd, addr); + continue; } /* diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 872df48ae41b..36c39bd37256 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -96,11 +96,13 @@ static void __init MMU_setup(void) if (strstr(boot_command_line, "noltlbs")) { __map_without_ltlbs = 1; } - if (debug_pagealloc_enabled()) { - __map_without_bats = 1; + if (IS_ENABLED(CONFIG_PPC_8xx)) + return; + + if (debug_pagealloc_enabled()) __map_without_ltlbs = 1; - } - if (strict_kernel_rwx_enabled() && !IS_ENABLED(CONFIG_PPC_8xx)) + + if (strict_kernel_rwx_enabled()) __map_without_ltlbs = 1; } @@ -170,8 +172,6 @@ void __init MMU_init(void) btext_unmap(); #endif - kasan_mmu_init(); - setup_kup(); /* Shortly after that, the entire linear mapping will be available */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 4002ced3596f..c7ce4ec5060e 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -203,7 +203,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; /* Align to the page size of the linear mapping. */ - start = _ALIGN_DOWN(start, page_size); + start = ALIGN_DOWN(start, page_size); pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); @@ -292,7 +292,7 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, unsigned long alt_start = ~0, alt_end = ~0; unsigned long base_pfn; - start = _ALIGN_DOWN(start, page_size); + start = ALIGN_DOWN(start, page_size); if (altmap) { alt_start = altmap->base_pfn; alt_end = altmap->base_pfn + altmap->reserve + diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c new file mode 100644 index 000000000000..db4ef44af22f --- /dev/null +++ b/arch/powerpc/mm/kasan/8xx.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define DISABLE_BRANCH_PROFILING + +#include <linux/kasan.h> +#include <linux/memblock.h> +#include <linux/hugetlb.h> +#include <asm/pgalloc.h> + +static int __init +kasan_init_shadow_8M(unsigned long k_start, unsigned long k_end, void *block) +{ + pmd_t *pmd = pmd_ptr_k(k_start); + unsigned long k_cur, k_next; + + for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd += 2, block += SZ_8M) { + pte_basic_t *new; + + k_next = pgd_addr_end(k_cur, k_end); + k_next = pgd_addr_end(k_next, k_end); + if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte) + continue; + + new = memblock_alloc(sizeof(pte_basic_t), SZ_4K); + if (!new) + return -ENOMEM; + + *new = pte_val(pte_mkhuge(pfn_pte(PHYS_PFN(__pa(block)), PAGE_KERNEL))); + + hugepd_populate_kernel((hugepd_t *)pmd, (pte_t *)new, PAGE_SHIFT_8M); + hugepd_populate_kernel((hugepd_t *)pmd + 1, (pte_t *)new, PAGE_SHIFT_8M); + } + return 0; +} + +int __init kasan_init_region(void *start, size_t size) +{ + unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); + unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); + unsigned long k_cur; + int ret; + void *block; + + block = memblock_alloc(k_end - k_start, SZ_8M); + if (!block) + return -ENOMEM; + + if (IS_ALIGNED(k_start, SZ_8M)) { + kasan_init_shadow_8M(k_start, ALIGN_DOWN(k_end, SZ_8M), block); + k_cur = ALIGN_DOWN(k_end, SZ_8M); + if (k_cur == k_end) + goto finish; + } else { + k_cur = k_start; + } + + ret = kasan_init_shadow_page_tables(k_start, k_end); + if (ret) + return ret; + + for (; k_cur < k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_ptr_k(k_cur); + void *va = block + k_cur - k_start; + pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + + if (k_cur < ALIGN_DOWN(k_end, SZ_512K)) + pte = pte_mkhuge(pte); + + __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); + } +finish: + flush_tlb_kernel_range(k_start, k_end); + return 0; +} diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile index 6577897673dd..bb1a5408b86b 100644 --- a/arch/powerpc/mm/kasan/Makefile +++ b/arch/powerpc/mm/kasan/Makefile @@ -3,3 +3,5 @@ KASAN_SANITIZE := n obj-$(CONFIG_PPC32) += kasan_init_32.o +obj-$(CONFIG_PPC_8xx) += 8xx.o +obj-$(CONFIG_PPC_BOOK3S_32) += book3s_32.o diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c new file mode 100644 index 000000000000..4bc491a4a1fd --- /dev/null +++ b/arch/powerpc/mm/kasan/book3s_32.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define DISABLE_BRANCH_PROFILING + +#include <linux/kasan.h> +#include <linux/memblock.h> +#include <asm/pgalloc.h> +#include <mm/mmu_decl.h> + +int __init kasan_init_region(void *start, size_t size) +{ + unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); + unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); + unsigned long k_cur = k_start; + int k_size = k_end - k_start; + int k_size_base = 1 << (ffs(k_size) - 1); + int ret; + void *block; + + block = memblock_alloc(k_size, k_size_base); + + if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) { + int k_size_more = 1 << (ffs(k_size - k_size_base) - 1); + + setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL); + if (k_size_more >= SZ_128K) + setbat(-1, k_start + k_size_base, __pa(block) + k_size_base, + k_size_more, PAGE_KERNEL); + if (v_block_mapped(k_start)) + k_cur = k_start + k_size_base; + if (v_block_mapped(k_start + k_size_base)) + k_cur = k_start + k_size_base + k_size_more; + + update_bats(); + } + + if (!block) + block = memblock_alloc(k_size, PAGE_SIZE); + if (!block) + return -ENOMEM; + + ret = kasan_init_shadow_page_tables(k_start, k_end); + if (ret) + return ret; + + kasan_update_early_region(k_start, k_cur, __pte(0)); + + for (; k_cur < k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_ptr_k(k_cur); + void *va = block + k_cur - k_start; + pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + + __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); + } + flush_tlb_kernel_range(k_start, k_end); + return 0; +} diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index c99aa8cbaac5..c42085801c04 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -5,9 +5,7 @@ #include <linux/kasan.h> #include <linux/printk.h> #include <linux/memblock.h> -#include <linux/moduleloader.h> #include <linux/sched/task.h> -#include <linux/vmalloc.h> #include <asm/pgalloc.h> #include <asm/code-patching.h> #include <mm/mmu_decl.h> @@ -30,40 +28,31 @@ static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot) __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); } -static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) +int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) { pmd_t *pmd; unsigned long k_cur, k_next; - pte_t *new = NULL; pmd = pmd_ptr_k(k_start); for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) { + pte_t *new; + k_next = pgd_addr_end(k_cur, k_end); if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte) continue; - if (!new) - new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); + new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); if (!new) return -ENOMEM; kasan_populate_pte(new, PAGE_KERNEL); - - smp_wmb(); /* See comment in __pte_alloc */ - - spin_lock(&init_mm.page_table_lock); - /* Has another populated it ? */ - if (likely((void *)pmd_page_vaddr(*pmd) == kasan_early_shadow_pte)) { - pmd_populate_kernel(&init_mm, pmd, new); - new = NULL; - } - spin_unlock(&init_mm.page_table_lock); + pmd_populate_kernel(&init_mm, pmd, new); } return 0; } -static int __init kasan_init_region(void *start, size_t size) +int __init __weak kasan_init_region(void *start, size_t size) { unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); @@ -76,75 +65,63 @@ static int __init kasan_init_region(void *start, size_t size) return ret; block = memblock_alloc(k_end - k_start, PAGE_SIZE); + if (!block) + return -ENOMEM; for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_ptr_k(k_cur); void *va = block + k_cur - k_start; pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); - if (!va) - return -ENOMEM; - __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); } flush_tlb_kernel_range(k_start, k_end); return 0; } -static void __init kasan_remap_early_shadow_ro(void) +void __init +kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte) { - pgprot_t prot = kasan_prot_ro(); - unsigned long k_start = KASAN_SHADOW_START; - unsigned long k_end = KASAN_SHADOW_END; unsigned long k_cur; phys_addr_t pa = __pa(kasan_early_shadow_page); - kasan_populate_pte(kasan_early_shadow_pte, prot); - - for (k_cur = k_start & PAGE_MASK; k_cur != k_end; k_cur += PAGE_SIZE) { + for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_ptr_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) continue; - __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); + __set_pte_at(&init_mm, k_cur, ptep, pte, 0); } - flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); + + flush_tlb_kernel_range(k_start, k_end); } -static void __init kasan_unmap_early_shadow_vmalloc(void) +static void __init kasan_remap_early_shadow_ro(void) { - unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START); - unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END); - unsigned long k_cur; + pgprot_t prot = kasan_prot_ro(); phys_addr_t pa = __pa(kasan_early_shadow_page); - for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_ptr_k(k_cur); - pte_t *ptep = pte_offset_kernel(pmd, k_cur); + kasan_populate_pte(kasan_early_shadow_pte, prot); - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) - continue; + kasan_update_early_region(KASAN_SHADOW_START, KASAN_SHADOW_END, + pfn_pte(PHYS_PFN(pa), prot)); +} - __set_pte_at(&init_mm, k_cur, ptep, __pte(0), 0); - } - flush_tlb_kernel_range(k_start, k_end); +static void __init kasan_unmap_early_shadow_vmalloc(void) +{ + unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START); + unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END); + + kasan_update_early_region(k_start, k_end, __pte(0)); } -void __init kasan_mmu_init(void) +static void __init kasan_mmu_init(void) { int ret; struct memblock_region *reg; - if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || - IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); - - if (ret) - panic("kasan: kasan_init_shadow_page_tables() failed"); - } - for_each_memblock(memory, reg) { phys_addr_t base = reg->base; phys_addr_t top = min(base + reg->size, total_lowmem); @@ -156,10 +133,21 @@ void __init kasan_mmu_init(void) if (ret) panic("kasan: kasan_init_region() failed"); } + + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || + IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); + + if (ret) + panic("kasan: kasan_init_shadow_page_tables() failed"); + } + } void __init kasan_init(void) { + kasan_mmu_init(); + kasan_remap_early_shadow_ro(); clear_page(kasan_early_shadow_page); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 7cebb9c818d3..5f7fe13211e9 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -32,6 +32,7 @@ #include <linux/vmalloc.h> #include <linux/memremap.h> #include <linux/dma-direct.h> +#include <linux/kprobes.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -465,6 +466,7 @@ static void flush_dcache_icache_phys(unsigned long physaddr) : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) : "ctr", "memory"); } +NOKPROBE_SYMBOL(flush_dcache_icache_phys) #endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) /* diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 7097e07a209a..1b6d39e9baed 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -182,6 +182,10 @@ static inline void mmu_mark_initmem_nx(void) { } static inline void mmu_mark_rodata_ro(void) { } #endif +#ifdef CONFIG_PPC_8xx +void __init mmu_mapin_immr(void); +#endif + #ifdef CONFIG_PPC_DEBUG_WX void ptdump_check_wx(void); #else diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c index 82862723ab42..4eaf462cda30 100644 --- a/arch/powerpc/mm/nohash/40x.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -102,7 +102,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) while (s >= LARGE_PAGE_SIZE_16M) { pmd_t *pmdp; - unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; + unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_RW; pmdp = pmd_ptr_k(v); *pmdp++ = __pmd(val); @@ -117,7 +117,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) while (s >= LARGE_PAGE_SIZE_4M) { pmd_t *pmdp; - unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; + unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_RW; pmdp = pmd_ptr_k(v); *pmdp = __pmd(val); diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index d83a12c5bc7f..286441bbbe49 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -9,8 +9,11 @@ #include <linux/memblock.h> #include <linux/mmu_context.h> +#include <linux/hugetlb.h> #include <asm/fixmap.h> #include <asm/code-patching.h> +#include <asm/inst.h> +#include <asm/pgalloc.h> #include <mm/mmu_decl.h> @@ -54,158 +57,148 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; } -#define LARGE_PAGE_SIZE_8M (1<<23) - -/* - * MMU_init_hw does the chip-specific initialization of the MMU hardware. - */ -void __init MMU_init_hw(void) +static pte_t __init *early_hugepd_alloc_kernel(hugepd_t *pmdp, unsigned long va) { - /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ - if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) { - unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; - unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY; - int i = IS_ENABLED(CONFIG_PIN_TLB_IMMR) ? 29 : 28; - unsigned long addr = 0; - unsigned long mem = total_lowmem; - - for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { - mtspr(SPRN_MD_CTR, ctr | (i << 8)); - mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); - mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); - mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); - addr += LARGE_PAGE_SIZE_8M; - mem -= LARGE_PAGE_SIZE_8M; - } + if (hpd_val(*pmdp) == 0) { + pte_t *ptep = memblock_alloc(sizeof(pte_basic_t), SZ_4K); + + if (!ptep) + return NULL; + + hugepd_populate_kernel((hugepd_t *)pmdp, ptep, PAGE_SHIFT_8M); + hugepd_populate_kernel((hugepd_t *)pmdp + 1, ptep, PAGE_SHIFT_8M); } + return hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT); } -static void __init mmu_mapin_immr(void) +static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa, + pgprot_t prot, int psize, bool new) { - unsigned long p = PHYS_IMMR_BASE; - unsigned long v = VIRT_IMMR_BASE; - int offset; + pmd_t *pmdp = pmd_ptr_k(va); + pte_t *ptep; + + if (WARN_ON(psize != MMU_PAGE_512K && psize != MMU_PAGE_8M)) + return -EINVAL; + + if (new) { + if (WARN_ON(slab_is_available())) + return -EINVAL; + + if (psize == MMU_PAGE_512K) + ptep = early_pte_alloc_kernel(pmdp, va); + else + ptep = early_hugepd_alloc_kernel((hugepd_t *)pmdp, va); + } else { + if (psize == MMU_PAGE_512K) + ptep = pte_offset_kernel(pmdp, va); + else + ptep = hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT); + } + + if (WARN_ON(!ptep)) + return -ENOMEM; - for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) - map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG); + /* The PTE should never be already present */ + if (new && WARN_ON(pte_present(*ptep) && pgprot_val(prot))) + return -EINVAL; + + set_huge_pte_at(&init_mm, va, ptep, pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot))); + + return 0; } -static void mmu_patch_cmp_limit(s32 *site, unsigned long mapped) +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) { - modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16); } -static void mmu_patch_addis(s32 *site, long simm) +static bool immr_is_mapped __initdata; + +void __init mmu_mapin_immr(void) { - unsigned int instr = *(unsigned int *)patch_site_addr(site); + if (immr_is_mapped) + return; + + immr_is_mapped = true; - instr &= 0xffff0000; - instr |= ((unsigned long)simm) >> 16; - patch_instruction_site(site, instr); + __early_map_kernel_hugepage(VIRT_IMMR_BASE, PHYS_IMMR_BASE, + PAGE_KERNEL_NCG, MMU_PAGE_512K, true); } -static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot) +static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, + pgprot_t prot, bool new) { - unsigned long s = offset; - unsigned long v = PAGE_OFFSET + s; - phys_addr_t p = memstart_addr + s; - - for (; s < top; s += PAGE_SIZE) { - map_kernel_page(v, p, prot); - v += PAGE_SIZE; - p += PAGE_SIZE; - } + unsigned long v = PAGE_OFFSET + offset; + unsigned long p = offset; + + WARN_ON(!IS_ALIGNED(offset, SZ_512K) || !IS_ALIGNED(top, SZ_512K)); + + for (; p < ALIGN(p, SZ_8M) && p < top; p += SZ_512K, v += SZ_512K) + __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); + for (; p < ALIGN_DOWN(top, SZ_8M) && p < top; p += SZ_8M, v += SZ_8M) + __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new); + for (; p < ALIGN_DOWN(top, SZ_512K) && p < top; p += SZ_512K, v += SZ_512K) + __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); + + if (!new) + flush_tlb_kernel_range(PAGE_OFFSET + v, PAGE_OFFSET + top); } unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { - unsigned long mapped; - - if (__map_without_ltlbs) { - mapped = 0; - mmu_mapin_immr(); - if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR)) - patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0); + unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); + unsigned long sinittext = __pa(_sinittext); + bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled(); + unsigned long boundary = strict_boundary ? sinittext : etext8; + unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + + WARN_ON(top < einittext8); + + mmu_mapin_immr(); + + if (__map_without_ltlbs) + return 0; + + mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); + if (debug_pagealloc_enabled()) { + top = boundary; } else { - unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); - - mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, einittext8); - - /* - * Populate page tables to: - * - have them appear in /sys/kernel/debug/kernel_page_tables - * - allow the BDI to find the pages when they are not PINNED - */ - mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X); - mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL); - mmu_mapin_immr(); + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); + mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true); } - mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped); - mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped); + if (top > SZ_32M) + memblock_set_current_limit(top); - /* If the size of RAM is not an exact power of two, we may not - * have covered RAM in its entirety with 8 MiB - * pages. Consequently, restrict the top end of RAM currently - * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail" - * coverage with normal-sized pages (or other reasons) do not - * attempt to allocate outside the allowed range. - */ - if (mapped) - memblock_set_current_limit(mapped); + block_mapped_ram = top; - block_mapped_ram = mapped; - - return mapped; + return top; } void mmu_mark_initmem_nx(void) { - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23) - mmu_patch_addis(&patch__itlbmiss_linmem_top8, - -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1))); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) { - unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); - unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); - unsigned long etext = __pa(_etext); - - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext)); - - /* Update page tables for PTDUMP and BDI */ - mmu_mapin_ram_chunk(0, einittext8, __pgprot(0)); - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { - mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_TEXT); - mmu_mapin_ram_chunk(etext, einittext8, PAGE_KERNEL); - } else { - mmu_mapin_ram_chunk(0, etext8, PAGE_KERNEL_TEXT); - mmu_mapin_ram_chunk(etext8, einittext8, PAGE_KERNEL); - } - } - _tlbil_all(); + unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); + unsigned long sinittext = __pa(_sinittext); + unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8; + unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + + mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); + + if (IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_pin_tlb(block_mapped_ram, false); } #ifdef CONFIG_STRICT_KERNEL_RWX void mmu_mark_rodata_ro(void) { unsigned long sinittext = __pa(_sinittext); - unsigned long etext = __pa(_etext); - - if (CONFIG_DATA_SHIFT < 23) - mmu_patch_addis(&patch__dtlbmiss_romem_top8, - -__pa(((unsigned long)_sinittext) & - ~(LARGE_PAGE_SIZE_8M - 1))); - mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext)); - - _tlbil_all(); - /* Update page tables for PTDUMP and BDI */ - mmu_mapin_ram_chunk(0, sinittext, __pgprot(0)); - mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_ROX); - mmu_mapin_ram_chunk(etext, sinittext, PAGE_KERNEL_RO); + mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false); + if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) + mmu_pin_tlb(block_mapped_ram, true); } #endif @@ -218,7 +211,7 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, BUG_ON(first_memblock_base != 0); /* 8xx can only access 32MB at the moment */ - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x02000000)); + memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M)); } /* diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index c2499271f6c1..cea5b4e25a24 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -100,7 +100,7 @@ static pte_t set_pte_filter_hash(pte_t pte) { return pte; } * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so * instead we "filter out" the exec permission for non clean pages. */ -static pte_t set_pte_filter(pte_t pte) +static inline pte_t set_pte_filter(pte_t pte) { struct page *pg; @@ -249,16 +249,42 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, #else /* - * Not used on non book3s64 platforms. But 8xx - * can possibly use tsize derived from hstate. + * Not used on non book3s64 platforms. + * 8xx compares it with mmu_virtual_psize to + * know if it is a huge page or not. */ - psize = 0; + psize = MMU_PAGE_COUNT; #endif __ptep_set_access_flags(vma, ptep, pte, addr, psize); } return changed; #endif } + +#if defined(CONFIG_PPC_8xx) +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +{ + pmd_t *pmd = pmd_ptr(mm, addr); + pte_basic_t val; + pte_basic_t *entry = &ptep->pte; + int num = is_hugepd(*((hugepd_t *)pmd)) ? 1 : SZ_512K / SZ_4K; + int i; + + /* + * Make sure hardware valid bit is not set. We don't do + * tlb flush for this update. + */ + VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); + + pte = pte_mkpte(pte); + + pte = set_pte_filter(pte); + + val = pte_val(pte); + for (i = 0; i < num; i++, entry++, val += SZ_4K) + *entry = val; +} +#endif #endif /* CONFIG_HUGETLB_PAGE */ #ifdef CONFIG_DEBUG_VM diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index f62de06e3d07..05902bbff8d6 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -29,11 +29,27 @@ #include <asm/fixmap.h> #include <asm/setup.h> #include <asm/sections.h> +#include <asm/early_ioremap.h> #include <mm/mmu_decl.h> extern char etext[], _stext[], _sinittext[], _einittext[]; +static u8 early_fixmap_pagetable[FIXMAP_PTE_SIZE] __page_aligned_data; + +notrace void __init early_ioremap_init(void) +{ + unsigned long addr = ALIGN_DOWN(FIXADDR_START, PGDIR_SIZE); + pte_t *ptep = (pte_t *)early_fixmap_pagetable; + pmd_t *pmdp = pmd_ptr_k(addr); + + for (; (s32)(FIXADDR_TOP - addr) > 0; + addr += PGDIR_SIZE, ptep += PTRS_PER_PTE, pmdp++) + pmd_populate_kernel(&init_mm, pmdp, ptep); + + early_ioremap_setup(); +} + static void __init *early_alloc_pgtable(unsigned long size) { void *ptr = memblock_alloc(size, size); @@ -45,7 +61,7 @@ static void __init *early_alloc_pgtable(unsigned long size) return ptr; } -static pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va) +pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va) { if (pmd_none(*pmdp)) { pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE); @@ -169,7 +185,7 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_stext + 1)) + if (v_block_mapped((unsigned long)_sinittext)) mmu_mark_initmem_nx(); else change_page_attr(page, numpages, PAGE_KERNEL); @@ -181,7 +197,7 @@ void mark_rodata_ro(void) struct page *page; unsigned long numpages; - if (v_block_mapped((unsigned long)_sinittext)) { + if (v_block_mapped((unsigned long)_stext + 1)) { mmu_mark_rodata_ro(); ptdump_check_wx(); return; diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c index 9e2d8e847d6e..4bc350736c1d 100644 --- a/arch/powerpc/mm/ptdump/8xx.c +++ b/arch/powerpc/mm/ptdump/8xx.c @@ -11,6 +11,11 @@ static const struct flag_info flag_array[] = { { + .mask = _PAGE_HUGE, + .val = _PAGE_HUGE, + .set = "huge", + .clear = " ", + }, { .mask = _PAGE_SH, .val = 0, .set = "user", diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c index d3a5d6b318d1..cebb58c7e289 100644 --- a/arch/powerpc/mm/ptdump/bats.c +++ b/arch/powerpc/mm/ptdump/bats.c @@ -10,15 +10,17 @@ #include <asm/pgtable.h> #include <asm/cpu_has_feature.h> +#include "ptdump.h" + static char *pp_601(int k, int pp) { if (pp == 0) - return k ? "NA" : "RWX"; + return k ? " " : "rwx"; if (pp == 1) - return k ? "ROX" : "RWX"; + return k ? "r x" : "rwx"; if (pp == 2) - return k ? "RWX" : "RWX"; - return k ? "ROX" : "ROX"; + return "rwx"; + return "r x"; } static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper) @@ -42,15 +44,13 @@ static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper) #else seq_printf(m, "0x%08x ", pbn); #endif + pt_dump_size(m, size); seq_printf(m, "Kernel %s User %s", pp_601(k & 2, pp), pp_601(k & 1, pp)); - if (lower & _PAGE_WRITETHRU) - seq_puts(m, "write through "); - if (lower & _PAGE_NO_CACHE) - seq_puts(m, "no cache "); - if (lower & _PAGE_COHERENT) - seq_puts(m, "coherent "); + seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : " "); + seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : " "); + seq_puts(m, lower & _PAGE_COHERENT ? "m " : " "); seq_puts(m, "\n"); } @@ -88,6 +88,7 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool #else seq_printf(m, "0x%08x ", brpn); #endif + pt_dump_size(m, size); if (k == 1) seq_puts(m, "User "); @@ -97,20 +98,16 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool seq_puts(m, "Kernel/User "); if (lower & BPP_RX) - seq_puts(m, is_d ? "RO " : "EXEC "); + seq_puts(m, is_d ? "r " : " x "); else if (lower & BPP_RW) - seq_puts(m, is_d ? "RW " : "EXEC "); + seq_puts(m, is_d ? "rw " : " x "); else - seq_puts(m, is_d ? "NA " : "NX "); - - if (lower & _PAGE_WRITETHRU) - seq_puts(m, "write through "); - if (lower & _PAGE_NO_CACHE) - seq_puts(m, "no cache "); - if (lower & _PAGE_COHERENT) - seq_puts(m, "coherent "); - if (lower & _PAGE_GUARDED) - seq_puts(m, "guarded "); + seq_puts(m, is_d ? " " : " "); + + seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : " "); + seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : " "); + seq_puts(m, lower & _PAGE_COHERENT ? "m " : " "); + seq_puts(m, lower & _PAGE_GUARDED ? "g " : " "); seq_puts(m, "\n"); } diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index b3fead0230c1..3209f78297ad 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -23,6 +23,7 @@ #include <linux/const.h> #include <asm/page.h> #include <asm/pgalloc.h> +#include <asm/hugetlb.h> #include <mm/mmu_decl.h> @@ -60,6 +61,7 @@ struct pg_state { unsigned long start_address; unsigned long start_pa; unsigned long last_pa; + unsigned long page_size; unsigned int level; u64 current_flags; bool check_wx; @@ -112,6 +114,19 @@ static struct addr_marker address_markers[] = { seq_putc(m, c); \ }) +void pt_dump_size(struct seq_file *m, unsigned long size) +{ + static const char units[] = "KMGTPE"; + const char *unit = units; + + /* Work out what appropriate unit to use */ + while (!(size & 1023) && unit[1]) { + size >>= 10; + unit++; + } + pt_dump_seq_printf(m, "%9lu%c ", size, *unit); +} + static void dump_flag_info(struct pg_state *st, const struct flag_info *flag, u64 pte, int num) { @@ -146,8 +161,6 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info static void dump_addr(struct pg_state *st, unsigned long addr) { - static const char units[] = "KMGTPE"; - const char *unit = units; unsigned long delta; #ifdef CONFIG_PPC64 @@ -157,20 +170,14 @@ static void dump_addr(struct pg_state *st, unsigned long addr) #endif pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); - if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) { + if (st->start_pa == st->last_pa && st->start_address + st->page_size != addr) { pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa); - delta = PAGE_SIZE >> 10; + delta = st->page_size >> 10; } else { pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); delta = (addr - st->start_address) >> 10; } - /* Work out what appropriate unit to use */ - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; - } - pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit); - + pt_dump_size(st->seq, delta); } static void note_prot_wx(struct pg_state *st, unsigned long addr) @@ -190,7 +197,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) } static void note_page(struct pg_state *st, unsigned long addr, - unsigned int level, u64 val) + unsigned int level, u64 val, unsigned long page_size) { u64 flag = val & pg_level[level].mask; u64 pa = val & PTE_RPN_MASK; @@ -202,6 +209,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; + st->page_size = page_size; pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); /* * Dump the section of virtual memory when: @@ -213,7 +221,7 @@ static void note_page(struct pg_state *st, unsigned long addr, */ } else if (flag != st->current_flags || level != st->level || addr >= st->marker[1].start_address || - (pa != st->last_pa + PAGE_SIZE && + (pa != st->last_pa + st->page_size && (pa != st->start_pa || st->start_pa != st->last_pa))) { /* Check the PTE flags */ @@ -241,6 +249,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; + st->page_size = page_size; st->current_flags = flag; st->level = level; } else { @@ -256,11 +265,31 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) for (i = 0; i < PTRS_PER_PTE; i++, pte++) { addr = start + i * PAGE_SIZE; - note_page(st, addr, 4, pte_val(*pte)); + note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE); } } +static void walk_hugepd(struct pg_state *st, hugepd_t *phpd, unsigned long start, + int pdshift, int level) +{ +#ifdef CONFIG_ARCH_HAS_HUGEPD + unsigned int i; + int shift = hugepd_shift(*phpd); + int ptrs_per_hpd = pdshift - shift > 0 ? 1 << (pdshift - shift) : 1; + + if (start & ((1 << shift) - 1)) + return; + + for (i = 0; i < ptrs_per_hpd; i++) { + unsigned long addr = start + (i << shift); + pte_t *pte = hugepte_offset(*phpd, addr, pdshift); + + note_page(st, addr, level + 1, pte_val(*pte), 1 << shift); + } +#endif +} + static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) { pmd_t *pmd = pmd_offset(pud, 0); @@ -273,7 +302,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) /* pmd exists */ walk_pte(st, pmd, addr); else - note_page(st, addr, 3, pmd_val(*pmd)); + note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE); } } @@ -289,7 +318,7 @@ static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) /* pud exists */ walk_pmd(st, pud, addr); else - note_page(st, addr, 2, pud_val(*pud)); + note_page(st, addr, 2, pud_val(*pud), PUD_SIZE); } } @@ -306,11 +335,13 @@ static void walk_pagetables(struct pg_state *st) for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { p4d_t *p4d = p4d_offset(pgd, 0); - if (!p4d_none(*p4d) && !p4d_is_leaf(*p4d)) - /* pgd exists */ - walk_pud(st, p4d, addr); + if (p4d_none(*p4d) || p4d_is_leaf(*p4d)) + note_page(st, addr, 1, p4d_val(*p4d), PGDIR_SIZE); + else if (is_hugepd(__hugepd(p4d_val(*p4d)))) + walk_hugepd(st, (hugepd_t *)p4d, addr, PGDIR_SHIFT, 1); else - note_page(st, addr, 1, p4d_val(*p4d)); + /* p4d exists */ + walk_pud(st, p4d, addr); } } @@ -365,7 +396,7 @@ static int ptdump_show(struct seq_file *m, void *v) /* Traverse kernel page tables */ walk_pagetables(&st); - note_page(&st, 0, 0, 0); + note_page(&st, 0, 0, 0, 0); return 0; } diff --git a/arch/powerpc/mm/ptdump/ptdump.h b/arch/powerpc/mm/ptdump/ptdump.h index 5d513636de73..154efae96ae0 100644 --- a/arch/powerpc/mm/ptdump/ptdump.h +++ b/arch/powerpc/mm/ptdump/ptdump.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/types.h> +#include <linux/seq_file.h> struct flag_info { u64 mask; @@ -17,3 +18,5 @@ struct pgtable_level { }; extern struct pgtable_level pg_level[5]; + +void pt_dump_size(struct seq_file *m, unsigned long delta); diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index f7ed2f187cb0..784f8df17f73 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -31,6 +31,11 @@ static const struct flag_info flag_array[] = { .set = "present", .clear = " ", }, { + .mask = _PAGE_COHERENT, + .val = _PAGE_COHERENT, + .set = "coherent", + .clear = " ", + }, { .mask = _PAGE_GUARDED, .val = _PAGE_GUARDED, .set = "guarded", diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index dffe1a45b6ed..82b45b1cb973 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -478,7 +478,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* If hint, make sure it matches our alignment restrictions */ if (!fixed && addr) { - addr = _ALIGN_UP(addr, page_size); + addr = ALIGN(addr, page_size); slice_dbg(" aligned addr=%lx\n", addr); /* Ignore hint if it's too large or overlaps a VMA */ if (addr > high_limit - len || addr < mmap_min_addr || diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index 1ad03c55c88c..e53c3c161257 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -15,6 +15,7 @@ #include <asm/firmware.h> #include <asm/ptrace.h> #include <asm/code-patching.h> +#include <asm/inst.h> #define PERF_8xx_ID_CPU_CYCLES 1 #define PERF_8xx_ID_HW_INSTRUCTIONS 2 @@ -99,9 +100,6 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags) unsigned long target = patch_site_addr(&patch__itlbmiss_perf); patch_branch_site(&patch__itlbmiss_exit_1, target, 0); -#ifndef CONFIG_PIN_TLB_TEXT - patch_branch_site(&patch__itlbmiss_exit_2, target, 0); -#endif } val = itlb_miss_counter; break; @@ -110,8 +108,6 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags) unsigned long target = patch_site_addr(&patch__dtlbmiss_perf); patch_branch_site(&patch__dtlbmiss_exit_1, target, 0); - patch_branch_site(&patch__dtlbmiss_exit_2, target, 0); - patch_branch_site(&patch__dtlbmiss_exit_3, target, 0); } val = dtlb_miss_counter; break; @@ -170,24 +166,19 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags) case PERF_8xx_ID_ITLB_LOAD_MISS: if (atomic_dec_return(&itlb_miss_ref) == 0) { /* mfspr r10, SPRN_SPRG_SCRATCH0 */ - unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) | - __PPC_SPR(SPRN_SPRG_SCRATCH0); + struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | + __PPC_SPR(SPRN_SPRG_SCRATCH0)); patch_instruction_site(&patch__itlbmiss_exit_1, insn); -#ifndef CONFIG_PIN_TLB_TEXT - patch_instruction_site(&patch__itlbmiss_exit_2, insn); -#endif } break; case PERF_8xx_ID_DTLB_LOAD_MISS: if (atomic_dec_return(&dtlb_miss_ref) == 0) { /* mfspr r10, SPRN_DAR */ - unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) | - __PPC_SPR(SPRN_DAR); + struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | + __PPC_SPR(SPRN_DAR)); patch_instruction_site(&patch__dtlbmiss_exit_1, insn); - patch_instruction_site(&patch__dtlbmiss_exit_2, insn); - patch_instruction_site(&patch__dtlbmiss_exit_3, insn); } break; } diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index df1ffd8b20f2..b63086b663ef 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -26,43 +26,25 @@ */ int read_user_stack_slow(void __user *ptr, void *buf, int nb) { - int ret = -EFAULT; - pgd_t *pgdir; - pte_t *ptep, pte; - unsigned int shift; + unsigned long addr = (unsigned long) ptr; unsigned long offset; - unsigned long pfn, flags; + struct page *page; + int nrpages; void *kaddr; - pgdir = current->mm->pgd; - if (!pgdir) - return -EFAULT; + nrpages = __get_user_pages_fast(addr, 1, 1, &page); + if (nrpages == 1) { + kaddr = page_address(page); + + /* align address to page boundary */ + offset = addr & ~PAGE_MASK; - local_irq_save(flags); - ptep = find_current_mm_pte(pgdir, addr, NULL, &shift); - if (!ptep) - goto err_out; - if (!shift) - shift = PAGE_SHIFT; - - /* align address to page boundary */ - offset = addr & ((1UL << shift) - 1); - - pte = READ_ONCE(*ptep); - if (!pte_present(pte) || !pte_user(pte)) - goto err_out; - pfn = pte_pfn(pte); - if (!page_is_ram(pfn)) - goto err_out; - - /* no highmem to worry about here */ - kaddr = pfn_to_kaddr(pfn); - memcpy(buf, kaddr + offset, nb); - ret = 0; -err_out: - local_irq_restore(flags); - return ret; + memcpy(buf, kaddr + offset, nb); + put_page(page); + return 0; + } + return -EFAULT; } static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3dcfecf858f3..13b9dd5e4a76 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -421,14 +421,14 @@ static __u64 power_pmu_bhrb_to(u64 addr) if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) return 0; - return branch_target(&instr); + return branch_target((struct ppc_inst *)&instr); } /* Userspace: need copy instruction here then translate it */ if (probe_user_read(&instr, (unsigned int __user *)addr, sizeof(instr))) return 0; - target = branch_target(&instr); + target = branch_target((struct ppc_inst *)&instr); if ((!target) || (instr & BRANCH_ABSOLUTE)) return target; diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 573e0b309c0c..db213eb7cb02 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -20,6 +20,7 @@ #include <asm/io.h> #include <linux/byteorder/generic.h> +#include <asm/rtas.h> #include "hv-24x7.h" #include "hv-24x7-catalog.h" #include "hv-common.h" @@ -57,6 +58,65 @@ static bool is_physical_domain(unsigned domain) } } +/* + * The Processor Module Information system parameter allows transferring + * of certain processor module information from the platform to the OS. + * Refer PAPR+ document to get parameter token value as '43'. + */ + +#define PROCESSOR_MODULE_INFO 43 + +static u32 phys_sockets; /* Physical sockets */ +static u32 phys_chipspersocket; /* Physical chips per socket*/ +static u32 phys_coresperchip; /* Physical cores per chip */ + +/* + * read_24x7_sys_info() + * Retrieve the number of sockets and chips per socket and cores per + * chip details through the get-system-parameter rtas call. + */ +void read_24x7_sys_info(void) +{ + int call_status, len, ntypes; + + spin_lock(&rtas_data_buf_lock); + + /* + * Making system parameter: chips and sockets and cores per chip + * default to 1. + */ + phys_sockets = 1; + phys_chipspersocket = 1; + phys_coresperchip = 1; + + call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, + NULL, + PROCESSOR_MODULE_INFO, + __pa(rtas_data_buf), + RTAS_DATA_BUF_SIZE); + + if (call_status != 0) { + pr_err("Error calling get-system-parameter %d\n", + call_status); + } else { + len = be16_to_cpup((__be16 *)&rtas_data_buf[0]); + if (len < 8) + goto out; + + ntypes = be16_to_cpup((__be16 *)&rtas_data_buf[2]); + + if (!ntypes) + goto out; + + phys_sockets = be16_to_cpup((__be16 *)&rtas_data_buf[4]); + phys_chipspersocket = be16_to_cpup((__be16 *)&rtas_data_buf[6]); + phys_coresperchip = be16_to_cpup((__be16 *)&rtas_data_buf[8]); + } + +out: + spin_unlock(&rtas_data_buf_lock); +} + /* Domains for which more than one result element are returned for each event. */ static bool domain_needs_aggregation(unsigned int domain) { @@ -386,6 +446,24 @@ static ssize_t device_show_string(struct device *dev, return sprintf(buf, "%s\n", (char *)d->var); } +static ssize_t sockets_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", phys_sockets); +} + +static ssize_t chipspersocket_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", phys_chipspersocket); +} + +static ssize_t coresperchip_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", phys_coresperchip); +} + static struct attribute *device_str_attr_create_(char *name, char *str) { struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL); @@ -1032,6 +1110,9 @@ PAGE_0_ATTR(catalog_len, "%lld\n", (unsigned long long)be32_to_cpu(page_0->length) * 4096); static BIN_ATTR_RO(catalog, 0/* real length varies */); static DEVICE_ATTR_RO(domains); +static DEVICE_ATTR_RO(sockets); +static DEVICE_ATTR_RO(chipspersocket); +static DEVICE_ATTR_RO(coresperchip); static struct bin_attribute *if_bin_attrs[] = { &bin_attr_catalog, @@ -1042,6 +1123,9 @@ static struct attribute *if_attrs[] = { &dev_attr_catalog_len.attr, &dev_attr_catalog_version.attr, &dev_attr_domains.attr, + &dev_attr_sockets.attr, + &dev_attr_chipspersocket.attr, + &dev_attr_coresperchip.attr, NULL, }; @@ -1400,16 +1484,6 @@ static void h_24x7_event_read(struct perf_event *event) h24x7hw = &get_cpu_var(hv_24x7_hw); h24x7hw->events[i] = event; put_cpu_var(h24x7hw); - /* - * Clear the event count so we can compute the _change_ - * in the 24x7 raw counter value at the end of the txn. - * - * Note that we could alternatively read the 24x7 value - * now and save its value in event->hw.prev_count. But - * that would require issuing a hcall, which would then - * defeat the purpose of using the txn interface. - */ - local64_set(&event->count, 0); } put_cpu_var(hv_24x7_reqb); @@ -1615,6 +1689,8 @@ static int hv_24x7_init(void) if (r) return r; + read_24x7_sys_info(); + return 0; } diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index 6da813b65b42..e3e5217c9822 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -7,14 +7,6 @@ config ACADIA help This option enables support for the AMCC 405EZ Acadia evaluation board. -config EP405 - bool "EP405/EP405PC" - depends on 40x - select 405GP - select FORCE_PCI - help - This option enables support for the EP405/EP405PC boards. - config HOTFOOT bool "Hotfoot" depends on 40x @@ -45,33 +37,6 @@ config MAKALU help This option enables support for the AMCC PPC405EX board. -config WALNUT - bool "Walnut" - depends on 40x - default y - select 405GP - select FORCE_PCI - select OF_RTC - help - This option enables support for the IBM PPC405GP evaluation board. - -config XILINX_VIRTEX_GENERIC_BOARD - bool "Generic Xilinx Virtex board" - depends on 40x - select XILINX_VIRTEX_II_PRO - select XILINX_VIRTEX_4_FX - select XILINX_INTC - help - This option enables generic support for Xilinx Virtex based boards. - - The generic virtex board support matches any device tree which - specifies 'xilinx,virtex' in its compatible field. This includes - the Xilinx ML3xx and ML4xx reference designs using the powerpc - core. - - Most Virtex designs should use this unless it needs to do some - special configuration at board probe time. - config OBS600 bool "OpenBlockS 600" depends on 40x @@ -86,18 +51,6 @@ config PPC40x_SIMPLE help This option enables the simple PowerPC 40x platform support. -# OAK doesn't exist but wanted to keep this around for any future 403GCX boards -config 403GCX - bool - #depends on OAK - select IBM405_ERR51 - -config 405GP - bool - select IBM405_ERR77 - select IBM405_ERR51 - select IBM_EMAC_ZMII if IBM_EMAC - config 405EX bool select IBM_EMAC_EMAC4 if IBM_EMAC @@ -109,25 +62,6 @@ config 405EZ select IBM_EMAC_MAL_CLR_ICINTSTAT if IBM_EMAC select IBM_EMAC_MAL_COMMON_ERR if IBM_EMAC -config XILINX_VIRTEX - bool - select DEFAULT_UIMAGE - -config XILINX_VIRTEX_II_PRO - bool - select XILINX_VIRTEX - select IBM405_ERR77 - select IBM405_ERR51 - -config XILINX_VIRTEX_4_FX - bool - select XILINX_VIRTEX - -config STB03xxx - bool - select IBM405_ERR77 - select IBM405_ERR51 - config PPC4xx_GPIO bool "PPC4xx GPIO support" depends on 40x @@ -135,16 +69,6 @@ config PPC4xx_GPIO help Enable gpiolib support for ppc40x based boards -# 40x errata/workaround config symbols, selected by the CPU models above - -# All 405-based cores up until the 405GPR and 405EP have this errata. -config IBM405_ERR77 - bool - -# All 40x-based cores, up until the 405GPR and 405EP have this errata. -config IBM405_ERR51 - bool - config APM8018X bool "APM8018X" depends on 40x diff --git a/arch/powerpc/platforms/40x/Makefile b/arch/powerpc/platforms/40x/Makefile index 828d78340dd9..122de98527c4 100644 --- a/arch/powerpc/platforms/40x/Makefile +++ b/arch/powerpc/platforms/40x/Makefile @@ -1,5 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_WALNUT) += walnut.o -obj-$(CONFIG_XILINX_VIRTEX_GENERIC_BOARD) += virtex.o -obj-$(CONFIG_EP405) += ep405.o obj-$(CONFIG_PPC40x_SIMPLE) += ppc40x_simple.o diff --git a/arch/powerpc/platforms/40x/ep405.c b/arch/powerpc/platforms/40x/ep405.c deleted file mode 100644 index 1c8aec6e9bb7..000000000000 --- a/arch/powerpc/platforms/40x/ep405.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Architecture- / platform-specific boot-time initialization code for - * IBM PowerPC 4xx based boards. Adapted from original - * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek - * <dan@net4x.com>. - * - * Copyright(c) 1999-2000 Grant Erickson <grant@lcse.umn.edu> - * - * Rewritten and ported to the merged powerpc tree: - * Copyright 2007 IBM Corporation - * Josh Boyer <jwboyer@linux.vnet.ibm.com> - * - * Adapted to EP405 by Ben. Herrenschmidt <benh@kernel.crashing.org> - * - * TODO: Wire up the PCI IRQ mux and the southbridge interrupts - * - * 2002 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#include <linux/init.h> -#include <linux/of_platform.h> - -#include <asm/machdep.h> -#include <asm/prom.h> -#include <asm/udbg.h> -#include <asm/time.h> -#include <asm/uic.h> -#include <asm/pci-bridge.h> -#include <asm/ppc4xx.h> - -static struct device_node *bcsr_node; -static void __iomem *bcsr_regs; - -/* BCSR registers */ -#define BCSR_ID 0 -#define BCSR_PCI_CTRL 1 -#define BCSR_FLASH_NV_POR_CTRL 2 -#define BCSR_FENET_UART_CTRL 3 -#define BCSR_PCI_IRQ 4 -#define BCSR_XIRQ_SELECT 5 -#define BCSR_XIRQ_ROUTING 6 -#define BCSR_XIRQ_STATUS 7 -#define BCSR_XIRQ_STATUS2 8 -#define BCSR_SW_STAT_LED_CTRL 9 -#define BCSR_GPIO_IRQ_PAR_CTRL 10 -/* there's more, can't be bothered typing them tho */ - - -static const struct of_device_id ep405_of_bus[] __initconst = { - { .compatible = "ibm,plb3", }, - { .compatible = "ibm,opb", }, - { .compatible = "ibm,ebc", }, - {}, -}; - -static int __init ep405_device_probe(void) -{ - of_platform_bus_probe(NULL, ep405_of_bus, NULL); - - return 0; -} -machine_device_initcall(ep405, ep405_device_probe); - -static void __init ep405_init_bcsr(void) -{ - const u8 *irq_routing; - int i; - - /* Find the bloody thing & map it */ - bcsr_node = of_find_compatible_node(NULL, NULL, "ep405-bcsr"); - if (bcsr_node == NULL) { - printk(KERN_ERR "EP405 BCSR not found !\n"); - return; - } - bcsr_regs = of_iomap(bcsr_node, 0); - if (bcsr_regs == NULL) { - printk(KERN_ERR "EP405 BCSR failed to map !\n"); - return; - } - - /* Get the irq-routing property and apply the routing to the CPLD */ - irq_routing = of_get_property(bcsr_node, "irq-routing", NULL); - if (irq_routing == NULL) - return; - for (i = 0; i < 16; i++) { - u8 irq = irq_routing[i]; - out_8(bcsr_regs + BCSR_XIRQ_SELECT, i); - out_8(bcsr_regs + BCSR_XIRQ_ROUTING, irq); - } - in_8(bcsr_regs + BCSR_XIRQ_SELECT); - mb(); - out_8(bcsr_regs + BCSR_GPIO_IRQ_PAR_CTRL, 0xfe); -} - -static void __init ep405_setup_arch(void) -{ - /* Find & init the BCSR CPLD */ - ep405_init_bcsr(); - - pci_set_flags(PCI_REASSIGN_ALL_RSRC); -} - -static int __init ep405_probe(void) -{ - if (!of_machine_is_compatible("ep405")) - return 0; - - return 1; -} - -define_machine(ep405) { - .name = "EP405", - .probe = ep405_probe, - .setup_arch = ep405_setup_arch, - .progress = udbg_progress, - .init_IRQ = uic_init_tree, - .get_irq = uic_get_irq, - .restart = ppc4xx_reset_system, - .calibrate_decr = generic_calibrate_decr, -}; diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c deleted file mode 100644 index e3d5e095846b..000000000000 --- a/arch/powerpc/platforms/40x/virtex.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Xilinx Virtex (IIpro & 4FX) based board support - * - * Copyright 2007 Secret Lab Technologies Ltd. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include <linux/init.h> -#include <linux/of_platform.h> -#include <asm/machdep.h> -#include <asm/prom.h> -#include <asm/time.h> -#include <asm/xilinx_intc.h> -#include <asm/xilinx_pci.h> -#include <asm/ppc4xx.h> - -static const struct of_device_id xilinx_of_bus_ids[] __initconst = { - { .compatible = "xlnx,plb-v46-1.00.a", }, - { .compatible = "xlnx,plb-v34-1.01.a", }, - { .compatible = "xlnx,plb-v34-1.02.a", }, - { .compatible = "xlnx,opb-v20-1.10.c", }, - { .compatible = "xlnx,dcr-v29-1.00.a", }, - { .compatible = "xlnx,compound", }, - {} -}; - -static int __init virtex_device_probe(void) -{ - of_platform_bus_probe(NULL, xilinx_of_bus_ids, NULL); - - return 0; -} -machine_device_initcall(virtex, virtex_device_probe); - -static int __init virtex_probe(void) -{ - if (!of_machine_is_compatible("xlnx,virtex")) - return 0; - - return 1; -} - -define_machine(virtex) { - .name = "Xilinx Virtex", - .probe = virtex_probe, - .setup_arch = xilinx_pci_init, - .init_IRQ = xilinx_intc_init_tree, - .get_irq = xintc_get_irq, - .restart = ppc4xx_reset_system, - .calibrate_decr = generic_calibrate_decr, -}; diff --git a/arch/powerpc/platforms/40x/walnut.c b/arch/powerpc/platforms/40x/walnut.c deleted file mode 100644 index e5797815e2f1..000000000000 --- a/arch/powerpc/platforms/40x/walnut.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Architecture- / platform-specific boot-time initialization code for - * IBM PowerPC 4xx based boards. Adapted from original - * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek - * <dan@net4x.com>. - * - * Copyright(c) 1999-2000 Grant Erickson <grant@lcse.umn.edu> - * - * Rewritten and ported to the merged powerpc tree: - * Copyright 2007 IBM Corporation - * Josh Boyer <jwboyer@linux.vnet.ibm.com> - * - * 2002 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#include <linux/init.h> -#include <linux/of_platform.h> -#include <linux/rtc.h> - -#include <asm/machdep.h> -#include <asm/prom.h> -#include <asm/udbg.h> -#include <asm/time.h> -#include <asm/uic.h> -#include <asm/pci-bridge.h> -#include <asm/ppc4xx.h> - -static const struct of_device_id walnut_of_bus[] __initconst = { - { .compatible = "ibm,plb3", }, - { .compatible = "ibm,opb", }, - { .compatible = "ibm,ebc", }, - {}, -}; - -static int __init walnut_device_probe(void) -{ - of_platform_bus_probe(NULL, walnut_of_bus, NULL); - of_instantiate_rtc(); - - return 0; -} -machine_device_initcall(walnut, walnut_device_probe); - -static int __init walnut_probe(void) -{ - if (!of_machine_is_compatible("ibm,walnut")) - return 0; - - pci_set_flags(PCI_REASSIGN_ALL_RSRC); - - return 1; -} - -define_machine(walnut) { - .name = "Walnut", - .probe = walnut_probe, - .progress = udbg_progress, - .init_IRQ = uic_init_tree, - .get_irq = uic_get_irq, - .restart = ppc4xx_reset_system, - .calibrate_decr = generic_calibrate_decr, -}; diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 25ebe634a661..78ac6d67a935 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -167,8 +167,7 @@ config YOSEMITE config ISS4xx bool "ISS 4xx Simulator" - depends on (44x || 40x) - select 405GP if 40x + depends on 44x select 440GP if 44x && !PPC_47x select PPC_FPU select OF_RTC @@ -232,33 +231,6 @@ config ICON help This option enables support for the AMCC PPC440SPe evaluation board. -config XILINX_VIRTEX440_GENERIC_BOARD - bool "Generic Xilinx Virtex 5 FXT board support" - depends on 44x - select XILINX_VIRTEX_5_FXT - select XILINX_INTC - help - This option enables generic support for Xilinx Virtex based boards - that use a 440 based processor in the Virtex 5 FXT FPGA architecture. - - The generic virtex board support matches any device tree which - specifies 'xlnx,virtex440' in its compatible field. This includes - the Xilinx ML5xx reference designs using the powerpc core. - - Most Virtex 5 designs should use this unless it needs to do some - special configuration at board probe time. - -config XILINX_ML510 - bool "Xilinx ML510 extra support" - depends on XILINX_VIRTEX440_GENERIC_BOARD - select HAVE_PCI - select XILINX_PCI if PCI - select PPC_INDIRECT_PCI if PCI - select PPC_I8259 if PCI - help - This option enables extra support for features on the Xilinx ML510 - board. The ML510 has a PCI bus with ALI south bridge. - config PPC44x_SIMPLE bool "Simple PowerPC 44x board support" depends on 44x @@ -354,13 +326,3 @@ config 476FPE_ERR46 config IBM440EP_ERR42 bool -# Xilinx specific config options. -config XILINX_VIRTEX - bool - select DEFAULT_UIMAGE - -# Xilinx Virtex 5 FXT FPGA architecture, selected by a Xilinx board above -config XILINX_VIRTEX_5_FXT - bool - select XILINX_VIRTEX - diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile index 1b78c6af821a..5ba031f57652 100644 --- a/arch/powerpc/platforms/44x/Makefile +++ b/arch/powerpc/platforms/44x/Makefile @@ -7,8 +7,6 @@ obj-$(CONFIG_PPC44x_SIMPLE) += ppc44x_simple.o obj-$(CONFIG_EBONY) += ebony.o obj-$(CONFIG_SAM440EP) += sam440ep.o obj-$(CONFIG_WARP) += warp.o -obj-$(CONFIG_XILINX_VIRTEX_5_FXT) += virtex.o -obj-$(CONFIG_XILINX_ML510) += virtex_ml510.o obj-$(CONFIG_ISS4xx) += iss4xx.o obj-$(CONFIG_CANYONLANDS)+= canyonlands.o obj-$(CONFIG_CURRITUCK) += ppc476.o diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c deleted file mode 100644 index 3eb13ed926ee..000000000000 --- a/arch/powerpc/platforms/44x/virtex.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Xilinx Virtex 5FXT based board support, derived from - * the Xilinx Virtex (IIpro & 4FX) based board support - * - * Copyright 2007 Secret Lab Technologies Ltd. - * Copyright 2008 Xilinx, Inc. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include <linux/init.h> -#include <linux/of_platform.h> -#include <asm/machdep.h> -#include <asm/prom.h> -#include <asm/time.h> -#include <asm/xilinx_intc.h> -#include <asm/xilinx_pci.h> -#include <asm/reg.h> -#include <asm/ppc4xx.h> -#include "44x.h" - -static const struct of_device_id xilinx_of_bus_ids[] __initconst = { - { .compatible = "simple-bus", }, - { .compatible = "xlnx,plb-v46-1.00.a", }, - { .compatible = "xlnx,plb-v46-1.02.a", }, - { .compatible = "xlnx,plb-v34-1.01.a", }, - { .compatible = "xlnx,plb-v34-1.02.a", }, - { .compatible = "xlnx,opb-v20-1.10.c", }, - { .compatible = "xlnx,dcr-v29-1.00.a", }, - { .compatible = "xlnx,compound", }, - {} -}; - -static int __init virtex_device_probe(void) -{ - of_platform_bus_probe(NULL, xilinx_of_bus_ids, NULL); - - return 0; -} -machine_device_initcall(virtex, virtex_device_probe); - -static int __init virtex_probe(void) -{ - if (!of_machine_is_compatible("xlnx,virtex440")) - return 0; - - return 1; -} - -define_machine(virtex) { - .name = "Xilinx Virtex440", - .probe = virtex_probe, - .setup_arch = xilinx_pci_init, - .init_IRQ = xilinx_intc_init_tree, - .get_irq = xintc_get_irq, - .calibrate_decr = generic_calibrate_decr, - .restart = ppc4xx_reset_system, -}; diff --git a/arch/powerpc/platforms/44x/virtex_ml510.c b/arch/powerpc/platforms/44x/virtex_ml510.c deleted file mode 100644 index 349f218b335c..000000000000 --- a/arch/powerpc/platforms/44x/virtex_ml510.c +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <asm/i8259.h> -#include <linux/pci.h> -#include "44x.h" - -/** - * ml510_ail_quirk - */ -static void ml510_ali_quirk(struct pci_dev *dev) -{ - /* Enable the IDE controller */ - pci_write_config_byte(dev, 0x58, 0x4c); - /* Assign irq 14 to the primary ide channel */ - pci_write_config_byte(dev, 0x44, 0x0d); - /* Assign irq 15 to the secondary ide channel */ - pci_write_config_byte(dev, 0x75, 0x0f); - /* Set the ide controller in native mode */ - pci_write_config_byte(dev, 0x09, 0xff); - - /* INTB = disabled, INTA = disabled */ - pci_write_config_byte(dev, 0x48, 0x00); - /* INTD = disabled, INTC = disabled */ - pci_write_config_byte(dev, 0x4a, 0x00); - /* Audio = INT7, Modem = disabled. */ - pci_write_config_byte(dev, 0x4b, 0x60); - /* USB = INT7 */ - pci_write_config_byte(dev, 0x74, 0x06); -} -DECLARE_PCI_FIXUP_EARLY(0x10b9, 0x1533, ml510_ali_quirk); - diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c index e6e2adcc7b64..c13d64c3b019 100644 --- a/arch/powerpc/platforms/4xx/pci.c +++ b/arch/powerpc/platforms/4xx/pci.c @@ -1242,7 +1242,7 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port) if (mbase == NULL) { printk(KERN_ERR "%pOF: Can't map internal config space !", port->node); - goto done; + return; } while (attempt && (0 == (in_le32(mbase + PECFG_460SX_DLLSTA) @@ -1252,9 +1252,7 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port) } if (attempt) port->link = 1; -done: iounmap(mbase); - } static struct ppc4xx_pciex_hwops ppc460sx_pcie_hwops __initdata = { diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index 3a9969c429b3..70083649c9ea 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -248,6 +248,7 @@ mmu_on: blr +_ASM_NOKPROBE_SYMBOL(lite5200_wakeup) /* ---------------------------------------------------------------------- */ @@ -391,6 +392,7 @@ restore_regs: LOAD_SPRN(TBWU, 0x5b); blr +_ASM_NOKPROBE_SYMBOL(restore_regs) diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c index 1cdd5ed9d896..3b5cb39a564c 100644 --- a/arch/powerpc/platforms/82xx/pq2.c +++ b/arch/powerpc/platforms/82xx/pq2.c @@ -10,6 +10,8 @@ * Copyright (c) 2006 MontaVista Software, Inc. */ +#include <linux/kprobes.h> + #include <asm/cpm2.h> #include <asm/io.h> #include <asm/pci-bridge.h> @@ -29,6 +31,7 @@ void __noreturn pq2_restart(char *cmd) panic("Restart failed\n"); } +NOKPROBE_SYMBOL(pq2_restart) #ifdef CONFIG_PCI static int pq2_pci_exclude_device(struct pci_controller *hose, diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S index 3acd7470dc5e..bc6bd4d0ae96 100644 --- a/arch/powerpc/platforms/83xx/suspend-asm.S +++ b/arch/powerpc/platforms/83xx/suspend-asm.S @@ -548,3 +548,4 @@ mpc83xx_deep_resume: mtdec r0 rfi +_ASM_NOKPROBE_SYMBOL(mpc83xx_deep_resume) diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index 5b91ea5694e3..dba3aa73c062 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -17,6 +17,7 @@ #include <asm/pci-bridge.h> #include <asm/mpic.h> #include <asm/cacheflush.h> +#include <asm/inst.h> #include <sysdev/fsl_soc.h> @@ -72,7 +73,7 @@ smp_86xx_kick_cpu(int nr) /* Setup fake reset vector to call __secondary_start_mpc86xx. */ target = (unsigned long) __secondary_start_mpc86xx; - patch_branch(vector, target, BRANCH_SET_LINK); + patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); /* Kick that CPU */ smp_86xx_release_core(nr); @@ -82,7 +83,7 @@ smp_86xx_kick_cpu(int nr) mdelay(1); /* Restore the exception vector */ - patch_instruction(vector, save_vector); + patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); local_irq_restore(flags); diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index e0fe670f06f6..abb2b45b2789 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -98,15 +98,6 @@ menu "MPC8xx CPM Options" # 8xx specific questions. comment "Generic MPC8xx Options" -config 8xx_COPYBACK - bool "Copy-Back Data Cache (else Writethrough)" - help - Saying Y here will cause the cache on an MPC8xx processor to be used - in Copy-Back mode. If you say N here, it is used in Writethrough - mode. - - If in doubt, say Y here. - config 8xx_GPIO bool "GPIO API Support" select GPIOLIB @@ -171,4 +162,45 @@ config UCODE_PATCH default y depends on !NO_UCODE_PATCH +menu "8xx advanced setup" + depends on PPC_8xx + +config PIN_TLB + bool "Pinned Kernel TLBs" + depends on ADVANCED_OPTIONS + help + On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each + table 4 TLBs can be pinned. + + It reduces the amount of usable TLBs to 28 (ie by 12%). That's the + reason why we make it selectable. + + This option does nothing, it just activate the selection of what + to pin. + +config PIN_TLB_DATA + bool "Pinned TLB for DATA" + depends on PIN_TLB + default y + help + This pins the first 32 Mbytes of memory with 8M pages. + +config PIN_TLB_IMMR + bool "Pinned TLB for IMMR" + depends on PIN_TLB + default y + help + This pins the IMMR area with a 512kbytes page. In case + CONFIG_PIN_TLB_DATA is also selected, it will reduce + CONFIG_PIN_TLB_DATA to 24 Mbytes. + +config PIN_TLB_TEXT + bool "Pinned TLB for TEXT" + depends on PIN_TLB + default y + help + This pins kernel text with 8M pages. + +endmenu + endmenu diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 1f8025383caa..5e6479d409a0 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -317,8 +317,4 @@ config MCU_MPC8349EMITX also register MCU GPIOs with the generic GPIO API, so you'll able to use MCU pins as GPIOs. -config XILINX_PCI - bool "Xilinx PCI host bridge support" - depends on PCI && XILINX_VIRTEX - endmenu diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 27a81c291be8..d349603fb889 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -55,8 +55,8 @@ config PPC_8xx select SYS_SUPPORTS_HUGETLBFS select PPC_HAVE_KUEP select PPC_HAVE_KUAP - select PPC_MM_SLICES if HUGETLB_PAGE select HAVE_ARCH_VMAP_STACK + select HUGETLBFS config 40x bool "AMCC 40x" @@ -377,7 +377,7 @@ config PPC_HAVE_KUEP config PPC_KUEP bool "Kernel Userspace Execution Prevention" depends on PPC_HAVE_KUEP - default y + default y if !PPC_BOOK3S_32 help Enable support for Kernel Userspace Execution Prevention (KUEP) @@ -389,7 +389,7 @@ config PPC_HAVE_KUAP config PPC_KUAP bool "Kernel Userspace Access Protection" depends on PPC_HAVE_KUAP - default y + default y if !PPC_BOOK3S_32 help Enable support for Kernel Userspace Access Protection (KUAP) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index ca9ffc1c8685..2124831cf57c 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -943,7 +943,7 @@ static int __init cell_iommu_fixed_mapping_init(void) fbase = max(fbase, dbase + dsize); } - fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT); + fbase = ALIGN(fbase, 1 << IO_SEGMENT_SHIFT); fsize = memblock_phys_mem_size(); if ((fbase + fsize) <= 0x800000000ul) @@ -963,8 +963,8 @@ static int __init cell_iommu_fixed_mapping_init(void) hend = hbase + htab_size_bytes; /* The window must start and end on a segment boundary */ - if ((hbase != _ALIGN_UP(hbase, 1 << IO_SEGMENT_SHIFT)) || - (hend != _ALIGN_UP(hend, 1 << IO_SEGMENT_SHIFT))) { + if ((hbase != ALIGN(hbase, 1 << IO_SEGMENT_SHIFT)) || + (hend != ALIGN(hend, 1 << IO_SEGMENT_SHIFT))) { pr_debug("iommu: hash window not segment aligned\n"); return -1; } diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 67e48b0a164e..a802ef957d63 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -172,19 +172,6 @@ static void wii_shutdown(void) flipper_quiesce(); } -define_machine(wii) { - .name = "wii", - .probe = wii_probe, - .setup_arch = wii_setup_arch, - .restart = wii_restart, - .halt = wii_halt, - .init_IRQ = wii_pic_probe, - .get_irq = flipper_pic_get_irq, - .calibrate_decr = generic_calibrate_decr, - .progress = udbg_progress, - .machine_shutdown = wii_shutdown, -}; - static const struct of_device_id wii_of_bus[] = { { .compatible = "nintendo,hollywood", }, { }, @@ -200,3 +187,15 @@ static int __init wii_device_probe(void) } device_initcall(wii_device_probe); +define_machine(wii) { + .name = "wii", + .probe = wii_probe, + .setup_arch = wii_setup_arch, + .restart = wii_restart, + .halt = wii_halt, + .init_IRQ = wii_pic_probe, + .get_irq = flipper_pic_get_irq, + .calibrate_decr = generic_calibrate_decr, + .progress = udbg_progress, + .machine_shutdown = wii_shutdown, +}; diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index af309ee99114..9d4ecd292255 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -108,7 +108,7 @@ static void * __init bootx_early_getprop(unsigned long base, #define dt_push_token(token, mem) \ do { \ - *(mem) = _ALIGN_UP(*(mem),4); \ + *(mem) = ALIGN(*(mem),4); \ *((u32 *)*(mem)) = token; \ *(mem) += 4; \ } while(0) @@ -150,7 +150,7 @@ static void __init bootx_dt_add_prop(char *name, void *data, int size, /* push property content */ if (size && data) { memcpy((void *)*mem_end, data, size); - *mem_end = _ALIGN_UP(*mem_end + size, 4); + *mem_end = ALIGN(*mem_end + size, 4); } } @@ -303,7 +303,7 @@ static void __init bootx_scan_dt_build_struct(unsigned long base, *lp++ = *p; } *lp = 0; - *mem_end = _ALIGN_UP((unsigned long)lp + 1, 4); + *mem_end = ALIGN((unsigned long)lp + 1, 4); /* get and store all properties */ while (*ppp) { @@ -356,11 +356,11 @@ static unsigned long __init bootx_flatten_dt(unsigned long start) /* Start using memory after the big blob passed by BootX, get * some space for the header */ - mem_start = mem_end = _ALIGN_UP(((unsigned long)bi) + start, 4); + mem_start = mem_end = ALIGN(((unsigned long)bi) + start, 4); DBG("Boot params header at: %x\n", mem_start); hdr = (struct boot_param_header *)mem_start; mem_end += sizeof(struct boot_param_header); - rsvmap = (u64 *)(_ALIGN_UP(mem_end, 8)); + rsvmap = (u64 *)(ALIGN(mem_end, 8)); hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - mem_start; mem_end = ((unsigned long)rsvmap) + 8 * sizeof(u64); @@ -386,7 +386,7 @@ static unsigned long __init bootx_flatten_dt(unsigned long start) hdr->dt_strings_size = bootx_dt_strend - bootx_dt_strbase; /* Build structure */ - mem_end = _ALIGN(mem_end, 16); + mem_end = ALIGN(mem_end, 16); DBG("Building device tree structure at: %x\n", mem_end); hdr->off_dt_struct = mem_end - mem_start; bootx_scan_dt_build_struct(base, 4, &mem_end); @@ -404,7 +404,7 @@ static unsigned long __init bootx_flatten_dt(unsigned long start) * also bump mem_reserve_cnt to cause further reservations to * fail since it's too late. */ - mem_end = _ALIGN(mem_end, PAGE_SIZE); + mem_end = ALIGN(mem_end, PAGE_SIZE); DBG("End of boot params: %x\n", mem_end); rsvmap[0] = mem_start; rsvmap[1] = mem_end; diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S index da69e0fcb4f1..ced225415486 100644 --- a/arch/powerpc/platforms/powermac/cache.S +++ b/arch/powerpc/platforms/powermac/cache.S @@ -184,6 +184,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) mtlr r10 blr +_ASM_NOKPROBE_SYMBOL(flush_disable_75x) /* This code is for 745x processors */ flush_disable_745x: @@ -351,4 +352,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR) mtmsr r11 /* restore DR and EE */ isync blr +_ASM_NOKPROBE_SYMBOL(flush_disable_745x) #endif /* CONFIG_PPC_BOOK3S_32 */ diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index dc7a5bae8f1c..853ccc4480e2 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -55,7 +55,7 @@ struct chrp_header { u8 cksum; u16 len; char name[12]; - u8 data[0]; + u8 data[]; }; struct core99_header { diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index bd6085b470b7..f9a680fdd9c4 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -244,7 +244,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) mtmsr r2 isync b 1b - +_ASM_NOKPROBE_SYMBOL(low_cpu_die) /* * Here is the resume code. */ @@ -282,6 +282,7 @@ _GLOBAL(core99_wake_up) lwz r1,0(r3) /* Pass thru to older resume code ... */ +_ASM_NOKPROBE_SYMBOL(core99_wake_up) /* * Here is the resume code for older machines. * r1 has the physical address of SL_PC(sp). @@ -429,6 +430,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) lwz r0,4(r1) mtlr r0 blr +_ASM_NOKPROBE_SYMBOL(grackle_wake_up) turn_on_mmu: mflr r4 @@ -438,6 +440,7 @@ turn_on_mmu: sync isync rfi +_ASM_NOKPROBE_SYMBOL(turn_on_mmu) #endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */ diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index be2ab5b11e57..9969c07035b6 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -49,6 +49,7 @@ #include <asm/keylargo.h> #include <asm/pmac_low_i2c.h> #include <asm/pmac_pfunc.h> +#include <asm/inst.h> #include "pmac.h" @@ -813,7 +814,7 @@ static int smp_core99_kick_cpu(int nr) * b __secondary_start_pmac_0 + nr*8 */ target = (unsigned long) __secondary_start_pmac_0 + nr * 8; - patch_branch(vector, target, BRANCH_SET_LINK); + patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); /* Put some life in our friend */ pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0); @@ -826,7 +827,7 @@ static int smp_core99_kick_cpu(int nr) mdelay(1); /* Restore our exception vector */ - patch_instruction(vector, save_vector); + patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index c0f8120045c3..fe3f0fb5aeca 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -17,7 +17,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o -obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o vas-api.o obj-$(CONFIG_OCXL_BASE) += ocxl.o obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 78599bca66c2..2dd467383a88 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -1270,7 +1270,7 @@ static int pnv_parse_cpuidle_dt(void) /* Read residencies */ if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", temp_u32, nr_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); rc = -EINVAL; goto out; } diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index b95b9e3c4c98..abeaa533b976 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -15,6 +15,7 @@ #include <asm/debugfs.h> #include <asm/powernv.h> +#include <asm/ppc-pci.h> #include <asm/opal.h> #include "pci.h" @@ -425,9 +426,10 @@ static void pnv_comp_attach_table_group(struct npu_comp *npucomp, ++npucomp->pe_num; } -struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) +static struct iommu_table_group * + pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) { - struct iommu_table_group *table_group; + struct iommu_table_group *compound_group; struct npu_comp *npucomp; struct pci_dev *gpdev = NULL; struct pci_controller *hose; @@ -446,39 +448,52 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) hose = pci_bus_to_host(npdev->bus); if (hose->npu) { - table_group = &hose->npu->npucomp.table_group; - - if (!table_group->group) { - table_group->ops = &pnv_npu_peers_ops; - iommu_register_group(table_group, - hose->global_number, - pe->pe_number); - } + /* P9 case: compound group is per-NPU (all gpus, all links) */ + npucomp = &hose->npu->npucomp; } else { - /* Create a group for 1 GPU and attached NPUs for POWER8 */ - pe->npucomp = kzalloc(sizeof(*pe->npucomp), GFP_KERNEL); - table_group = &pe->npucomp->table_group; - table_group->ops = &pnv_npu_peers_ops; - iommu_register_group(table_group, hose->global_number, - pe->pe_number); + /* P8 case: Compound group is per-GPU (1 gpu, 2 links) */ + npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL); } - /* Steal capabilities from a GPU PE */ - table_group->max_dynamic_windows_supported = - pe->table_group.max_dynamic_windows_supported; - table_group->tce32_start = pe->table_group.tce32_start; - table_group->tce32_size = pe->table_group.tce32_size; - table_group->max_levels = pe->table_group.max_levels; - if (!table_group->pgsizes) - table_group->pgsizes = pe->table_group.pgsizes; + compound_group = &npucomp->table_group; + if (!compound_group->group) { + compound_group->ops = &pnv_npu_peers_ops; + iommu_register_group(compound_group, hose->global_number, + pe->pe_number); - npucomp = container_of(table_group, struct npu_comp, table_group); + /* Steal capabilities from a GPU PE */ + compound_group->max_dynamic_windows_supported = + pe->table_group.max_dynamic_windows_supported; + compound_group->tce32_start = pe->table_group.tce32_start; + compound_group->tce32_size = pe->table_group.tce32_size; + compound_group->max_levels = pe->table_group.max_levels; + if (!compound_group->pgsizes) + compound_group->pgsizes = pe->table_group.pgsizes; + } + + /* + * The gpu would have been added to the iommu group that's created + * for the PE. Pull it out now. + */ + iommu_del_device(&gpdev->dev); + + /* + * I'm not sure this is strictly required, but it's probably a good idea + * since the table_group for the PE is going to be attached to the + * compound table group. If we leave the PE's iommu group active then + * we might have the same table_group being modifiable via two sepeate + * iommu groups. + */ + iommu_group_put(pe->table_group.group); + + /* now put the GPU into the compound group */ pnv_comp_attach_table_group(npucomp, pe); + iommu_add_device(compound_group, &gpdev->dev); - return table_group; + return compound_group; } -struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) +static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) { struct iommu_table_group *table_group; struct npu_comp *npucomp; @@ -521,6 +536,54 @@ struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) return table_group; } + +void pnv_pci_npu_setup_iommu_groups(void) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + struct pnv_ioda_pe *pe; + + /* + * For non-nvlink devices the IOMMU group is registered when the PE is + * configured and devices are added to the group when the per-device + * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is + * only initialise for "normal" IODA PHBs. + * + * For NVLink devices we need to ensure the NVLinks and the GPU end up + * in the same IOMMU group, so that's handled here. + */ + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + + if (phb->type == PNV_PHB_IODA2) + list_for_each_entry(pe, &phb->ioda.pe_list, list) + pnv_try_setup_npu_table_group(pe); + } + + /* + * Now we have all PHBs discovered, time to add NPU devices to + * the corresponding IOMMU groups. + */ + list_for_each_entry(hose, &hose_list, list_node) { + unsigned long pgsizes; + + phb = hose->private_data; + + if (phb->type != PNV_PHB_NPU_NVLINK) + continue; + + pgsizes = pnv_ioda_parse_tce_sizes(phb); + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + /* + * IODA2 bridges get this set up from + * pci_controller_ops::setup_bridge but NPU bridges + * do not have this hook defined so we do it here. + */ + pe->table_group.pgsizes = pgsizes; + pnv_npu_compound_attach(pe); + } + } +} #endif /* CONFIG_IOMMU_API */ int pnv_npu2_init(struct pci_controller *hose) diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index d361d37d975f..9a360ced663b 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -671,7 +671,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) * Firmware supports 32-bit field for size. Align it to PAGE_SIZE * and request firmware to copy multiple kernel boot memory regions. */ - fadump_conf->max_copy_size = _ALIGN_DOWN(U32_MAX, PAGE_SIZE); + fadump_conf->max_copy_size = ALIGN_DOWN(U32_MAX, PAGE_SIZE); /* * Check if dump has been initiated on last reboot. diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 2b3dfd0b6cdd..d95954ad4c0a 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -811,6 +811,10 @@ static int opal_add_one_export(struct kobject *parent, const char *export_name, goto out; attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) { + rc = -ENOMEM; + goto out; + } name = kstrdup(export_name, GFP_KERNEL); if (!name) { rc = -ENOMEM; diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index 5dc6847d5f4c..f923359d8afc 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -17,6 +17,34 @@ #include <asm/tce.h> #include "pci.h" +unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) +{ + struct pci_controller *hose = phb->hose; + struct device_node *dn = hose->dn; + unsigned long mask = 0; + int i, rc, count; + u32 val; + + count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes"); + if (count <= 0) { + mask = SZ_4K | SZ_64K; + /* Add 16M for POWER8 by default */ + if (cpu_has_feature(CPU_FTR_ARCH_207S) && + !cpu_has_feature(CPU_FTR_ARCH_300)) + mask |= SZ_16M | SZ_256M; + return mask; + } + + for (i = 0; i < count; i++) { + rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes", + i, &val); + if (rc == 0) + mask |= 1ULL << val; + } + + return mask; +} + void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned int page_shift) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 57d3a6af1d52..73a63efcf855 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -51,6 +51,7 @@ static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK", "NPU_OCAPI" }; static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); +static void pnv_pci_configure_bus(struct pci_bus *bus); void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...) @@ -264,8 +265,8 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, if (!r->parent || !pnv_pci_is_m64(phb, r)) continue; - start = _ALIGN_DOWN(r->start - base, sgsz); - end = _ALIGN_UP(r->end - base, sgsz); + start = ALIGN_DOWN(r->start - base, sgsz); + end = ALIGN(r->end - base, sgsz); for (segno = start / sgsz; segno < end / sgsz; segno++) { if (pe_bitmap) set_bit(segno, pe_bitmap); @@ -361,7 +362,7 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) return NULL; /* Allocate bitmap */ - size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); + size = ALIGN(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); pe_alloc = kzalloc(size, GFP_KERNEL); if (!pe_alloc) { pr_warn("%s: Out of memory !\n", @@ -660,6 +661,16 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) return state; } +struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn) +{ + int pe_number = phb->ioda.pe_rmap[bdfn]; + + if (pe_number == IODA_INVALID_PE) + return NULL; + + return &phb->ioda.pe_array[pe_number]; +} + struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); @@ -1110,34 +1121,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) return pe; } -static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - struct pci_dn *pdn = pci_get_pdn(dev); - - if (pdn == NULL) { - pr_warn("%s: No device node associated with device !\n", - pci_name(dev)); - continue; - } - - /* - * In partial hotplug case, the PCI device might be still - * associated with the PE and needn't attach it to the PE - * again. - */ - if (pdn->pe_number != IODA_INVALID_PE) - continue; - - pe->device_count++; - pdn->pe_number = pe->pe_number; - if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) - pnv_ioda_setup_same_PE(dev->subordinate, pe); - } -} - /* * There're 2 types of PCI bus sensitive PEs: One that is compromised of * single PCI bus. Another one that contains the primary PCI bus and its @@ -1156,15 +1139,13 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) * We should reuse it instead of allocating a new one. */ pe_num = phb->ioda.pe_rmap[bus->number << 8]; - if (pe_num != IODA_INVALID_PE) { + if (WARN_ON(pe_num != IODA_INVALID_PE)) { pe = &phb->ioda.pe_array[pe_num]; - pnv_ioda_setup_same_PE(bus, pe); return NULL; } /* PE number for root bus should have been reserved */ - if (pci_is_root_bus(bus) && - phb->ioda.root_pe_idx != IODA_INVALID_PE) + if (pci_is_root_bus(bus)) pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx]; /* Check if PE is determined by M64 */ @@ -1202,9 +1183,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) return NULL; } - /* Associate it with all child devices */ - pnv_ioda_setup_same_PE(bus, pe); - /* Put PE to the list */ list_add_tail(&pe->list, &phb->ioda.pe_list); @@ -1288,7 +1266,7 @@ static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus) pnv_ioda_setup_npu_PE(pdev); } -static void pnv_pci_ioda_setup_PEs(void) +static void pnv_pci_ioda_setup_nvlink(void) { struct pci_controller *hose; struct pnv_phb *phb; @@ -1312,6 +1290,11 @@ static void pnv_pci_ioda_setup_PEs(void) list_for_each_entry(pe, &phb->ioda.pe_list, list) pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV); } + +#ifdef CONFIG_IOMMU_API + /* setup iommu groups so we can do nvlink pass-thru */ + pnv_pci_npu_setup_iommu_groups(); +#endif } #ifdef CONFIG_PCI_IOV @@ -1550,11 +1533,6 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); -#ifdef CONFIG_IOMMU_API -static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe, - struct iommu_table_group *table_group, struct pci_bus *bus); - -#endif static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) { struct pci_bus *bus; @@ -1619,11 +1597,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) } pnv_pci_ioda2_setup_dma_pe(phb, pe); -#ifdef CONFIG_IOMMU_API - iommu_register_group(&pe->table_group, - pe->phb->hose->global_number, pe->pe_number); - pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL); -#endif } } @@ -1767,24 +1740,39 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev) struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; - /* - * The function can be called while the PE# - * hasn't been assigned. Do nothing for the - * case. - */ - if (!pdn || pdn->pe_number == IODA_INVALID_PE) - return; + /* Check if the BDFN for this device is associated with a PE yet */ + pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8)); + if (!pe) { + /* VF PEs should be pre-configured in pnv_pci_sriov_enable() */ + if (WARN_ON(pdev->is_virtfn)) + return; + + pnv_pci_configure_bus(pdev->bus); + pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8)); + pci_info(pdev, "Configured PE#%x\n", pe ? pe->pe_number : 0xfffff); + + + /* + * If we can't setup the IODA PE something has gone horribly + * wrong and we can't enable DMA for the device. + */ + if (WARN_ON(!pe)) + return; + } else { + pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number); + } + + if (pdn) + pdn->pe_number = pe->pe_number; + pe->device_count++; - pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); pdev->dev.archdata.dma_offset = pe->tce_bypass_base; set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); - /* - * Note: iommu_add_device() will fail here as - * for physical PE: the device is already added by now; - * for virtual PE: sysfs entries are not ready yet and - * tce_iommu_bus_notifier will add the device to a group later. - */ + + /* PEs with a DMA weight of zero won't have a group */ + if (pe->table_group.group) + iommu_add_device(&pe->table_group, &pdev->dev); } /* @@ -2297,9 +2285,6 @@ found: pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; iommu_init_table(tbl, phb->hose->node, 0, 0); - if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) - pnv_ioda_setup_bus_dma(pe, pe->pbus); - return; fail: /* XXX Failure: Try to fallback to 64-bit only ? */ @@ -2537,7 +2522,7 @@ unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, direct_table_size = 1UL << table_shift; for ( ; levels; --levels) { - bytes += _ALIGN_UP(tce_table_size, direct_table_size); + bytes += ALIGN(tce_table_size, direct_table_size); tce_table_size /= direct_table_size; tce_table_size <<= 3; @@ -2596,137 +2581,8 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = { .take_ownership = pnv_ioda2_take_ownership, .release_ownership = pnv_ioda2_release_ownership, }; - -static void pnv_ioda_setup_bus_iommu_group_add_devices(struct pnv_ioda_pe *pe, - struct iommu_table_group *table_group, - struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - iommu_add_device(table_group, &dev->dev); - - if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) - pnv_ioda_setup_bus_iommu_group_add_devices(pe, - table_group, dev->subordinate); - } -} - -static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe, - struct iommu_table_group *table_group, struct pci_bus *bus) -{ - - if (pe->flags & PNV_IODA_PE_DEV) - iommu_add_device(table_group, &pe->pdev->dev); - - if ((pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) || bus) - pnv_ioda_setup_bus_iommu_group_add_devices(pe, table_group, - bus); -} - -static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb); - -static void pnv_pci_ioda_setup_iommu_api(void) -{ - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - - /* - * There are 4 types of PEs: - * - PNV_IODA_PE_BUS: a downstream port with an adapter, - * created from pnv_pci_setup_bridge(); - * - PNV_IODA_PE_BUS_ALL: a PCI-PCIX bridge with devices behind it, - * created from pnv_pci_setup_bridge(); - * - PNV_IODA_PE_VF: a SRIOV virtual function, - * created from pnv_pcibios_sriov_enable(); - * - PNV_IODA_PE_DEV: an NPU or OCAPI device, - * created from pnv_pci_ioda_fixup(). - * - * Normally a PE is represented by an IOMMU group, however for - * devices with side channels the groups need to be more strict. - */ - list_for_each_entry(hose, &hose_list, list_node) { - phb = hose->private_data; - - if (phb->type == PNV_PHB_NPU_NVLINK || - phb->type == PNV_PHB_NPU_OCAPI) - continue; - - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - struct iommu_table_group *table_group; - - table_group = pnv_try_setup_npu_table_group(pe); - if (!table_group) { - if (!pnv_pci_ioda_pe_dma_weight(pe)) - continue; - - table_group = &pe->table_group; - iommu_register_group(&pe->table_group, - pe->phb->hose->global_number, - pe->pe_number); - } - pnv_ioda_setup_bus_iommu_group(pe, table_group, - pe->pbus); - } - } - - /* - * Now we have all PHBs discovered, time to add NPU devices to - * the corresponding IOMMU groups. - */ - list_for_each_entry(hose, &hose_list, list_node) { - unsigned long pgsizes; - - phb = hose->private_data; - - if (phb->type != PNV_PHB_NPU_NVLINK) - continue; - - pgsizes = pnv_ioda_parse_tce_sizes(phb); - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - /* - * IODA2 bridges get this set up from - * pci_controller_ops::setup_bridge but NPU bridges - * do not have this hook defined so we do it here. - */ - pe->table_group.pgsizes = pgsizes; - pnv_npu_compound_attach(pe); - } - } -} -#else /* !CONFIG_IOMMU_API */ -static void pnv_pci_ioda_setup_iommu_api(void) { }; #endif -static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) -{ - struct pci_controller *hose = phb->hose; - struct device_node *dn = hose->dn; - unsigned long mask = 0; - int i, rc, count; - u32 val; - - count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes"); - if (count <= 0) { - mask = SZ_4K | SZ_64K; - /* Add 16M for POWER8 by default */ - if (cpu_has_feature(CPU_FTR_ARCH_207S) && - !cpu_has_feature(CPU_FTR_ARCH_300)) - mask |= SZ_16M | SZ_256M; - return mask; - } - - for (i = 0; i < count; i++) { - rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes", - i, &val); - if (rc == 0) - mask |= 1ULL << val; - } - - return mask; -} - static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { @@ -2749,16 +2605,16 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, IOMMU_TABLE_GROUP_MAX_TABLES; pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS; pe->table_group.pgsizes = pnv_ioda_parse_tce_sizes(phb); -#ifdef CONFIG_IOMMU_API - pe->table_group.ops = &pnv_pci_ioda2_ops; -#endif rc = pnv_pci_ioda2_setup_default_config(pe); if (rc) return; - if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) - pnv_ioda_setup_bus_dma(pe, pe->pbus); +#ifdef CONFIG_IOMMU_API + pe->table_group.ops = &pnv_pci_ioda2_ops; + iommu_register_group(&pe->table_group, phb->hose->global_number, + pe->pe_number); +#endif } int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq) @@ -3220,8 +3076,7 @@ static void pnv_pci_enable_bridges(void) static void pnv_pci_ioda_fixup(void) { - pnv_pci_ioda_setup_PEs(); - pnv_pci_ioda_setup_iommu_api(); + pnv_pci_ioda_setup_nvlink(); pnv_pci_ioda_create_dbgfs(); pnv_pci_enable_bridges(); @@ -3333,28 +3188,18 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus, } } -static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type) +static void pnv_pci_configure_bus(struct pci_bus *bus) { struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; struct pci_dev *bridge = bus->self; struct pnv_ioda_pe *pe; - bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); - - /* Extend bridge's windows if necessary */ - pnv_pci_fixup_bridge_resources(bus, type); + bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); - /* The PE for root bus should be realized before any one else */ - if (!phb->ioda.root_pe_populated) { - pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false); - if (pe) { - phb->ioda.root_pe_idx = pe->pe_number; - phb->ioda.root_pe_populated = true; - } - } + dev_info(&bus->dev, "Configuring PE for bus\n"); /* Don't assign PE to PCI bus, which doesn't have subordinate devices */ - if (list_empty(&bus->devices)) + if (WARN_ON(list_empty(&bus->devices))) return; /* Reserve PEs according to used M64 resources */ @@ -3599,6 +3444,8 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) struct pnv_phb *phb = pe->phb; struct pnv_ioda_pe *slave, *tmp; + pe_info(pe, "Releasing PE\n"); + mutex_lock(&phb->ioda.pe_list_mutex); list_del(&pe->list); mutex_unlock(&phb->ioda.pe_list_mutex); @@ -3633,11 +3480,10 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) * that it can be populated again in PCI hot add path. The PE * shouldn't be destroyed as it's the global reserved resource. */ - if (phb->ioda.root_pe_populated && - phb->ioda.root_pe_idx == pe->pe_number) - phb->ioda.root_pe_populated = false; - else - pnv_ioda_free_pe(pe); + if (phb->ioda.root_pe_idx == pe->pe_number) + return; + + pnv_ioda_free_pe(pe); } static void pnv_pci_release_device(struct pci_dev *pdev) @@ -3715,7 +3561,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .enable_device_hook = pnv_pci_enable_device_hook, .release_device = pnv_pci_release_device, .window_alignment = pnv_pci_window_alignment, - .setup_bridge = pnv_pci_setup_bridge, + .setup_bridge = pnv_pci_fixup_bridge_resources, .reset_secondary_bus = pnv_pci_reset_secondary_bus, .shutdown = pnv_pci_ioda_shutdown, }; @@ -3745,6 +3591,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, struct pnv_phb *phb; unsigned long size, m64map_off, m32map_off, pemap_off; unsigned long iomap_off = 0, dma32map_off = 0; + struct pnv_ioda_pe *root_pe; struct resource r; const __be64 *prop64; const __be32 *prop32; @@ -3863,7 +3710,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, PNV_IODA1_DMA32_SEGSIZE; /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ - size = _ALIGN_UP(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8, + size = ALIGN(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8, sizeof(unsigned long)); m64map_off = size; size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]); @@ -3912,7 +3759,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->ioda.root_pe_idx = phb->ioda.reserved_pe_idx - 1; pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx); } else { - phb->ioda.root_pe_idx = IODA_INVALID_PE; + /* otherwise just allocate one */ + root_pe = pnv_ioda_alloc_pe(phb); + phb->ioda.root_pe_idx = root_pe->pe_number; } INIT_LIST_HEAD(&phb->ioda.pe_list); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 5bf818246339..091fe1cf386b 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -955,28 +955,8 @@ static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct pci_dev *pdev; - struct pci_dn *pdn; - struct pnv_ioda_pe *pe; - struct pci_controller *hose; - struct pnv_phb *phb; switch (action) { - case BUS_NOTIFY_ADD_DEVICE: - pdev = to_pci_dev(dev); - pdn = pci_get_pdn(pdev); - hose = pci_bus_to_host(pdev->bus); - phb = hose->private_data; - - WARN_ON_ONCE(!phb); - if (!pdn || pdn->pe_number == IODA_INVALID_PE || !phb) - return 0; - - pe = &phb->ioda.pe_array[pdn->pe_number]; - if (!pe->table_group.group) - return 0; - iommu_add_device(&pe->table_group, dev); - return 0; case BUS_NOTIFY_DEL_DEVICE: iommu_del_device(dev); return 0; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index d3bbdeab3a32..51c254f2f3cb 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -33,6 +33,24 @@ enum pnv_phb_model { #define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */ #define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */ +/* + * A brief note on PNV_IODA_PE_BUS_ALL + * + * This is needed because of the behaviour of PCIe-to-PCI bridges. The PHB uses + * the Requester ID field of the PCIe request header to determine the device + * (and PE) that initiated a DMA. In legacy PCI individual memory read/write + * requests aren't tagged with the RID. To work around this the PCIe-to-PCI + * bridge will use (secondary_bus_no << 8) | 0x00 as the RID on the PCIe side. + * + * PCIe-to-X bridges have a similar issue even though PCI-X requests also have + * a RID in the transaction header. The PCIe-to-X bridge is permitted to "take + * ownership" of a transaction by a PCI-X device when forwarding it to the PCIe + * side of the bridge. + * + * To work around these problems we use the BUS_ALL flag since every subordinate + * bus of the bridge should go into the same PE. + */ + /* Indicates operations are frozen for a PE: MMIO in PESTA & DMA in PESTB. */ #define PNV_IODA_STOPPED_STATE 0x8000000000000000 @@ -118,7 +136,6 @@ struct pnv_phb { unsigned int total_pe_num; unsigned int reserved_pe_idx; unsigned int root_pe_idx; - bool root_pe_populated; /* 32-bit MMIO window */ unsigned int m32_size; @@ -190,6 +207,7 @@ extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); +extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, @@ -209,11 +227,7 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, /* Nvlink functions */ extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); -extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe); -extern struct iommu_table_group *pnv_try_setup_npu_table_group( - struct pnv_ioda_pe *pe); -extern struct iommu_table_group *pnv_npu_compound_attach( - struct pnv_ioda_pe *pe); +extern void pnv_pci_npu_setup_iommu_groups(void); /* pci-ioda-tce.c */ #define POWERNV_IOMMU_DEFAULT_LEVELS 2 @@ -244,4 +258,6 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned int page_shift); +extern unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb); + #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/vas-api.c b/arch/powerpc/platforms/powernv/vas-api.c new file mode 100644 index 000000000000..98ed5d8c5441 --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas-api.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * VAS user space API for its accelerators (Only NX-GZIP is supported now) + * Copyright (C) 2019 Haren Myneni, IBM Corp + */ + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <asm/vas.h> +#include <uapi/asm/vas-api.h> +#include "vas.h" + +/* + * The driver creates the device node that can be used as follows: + * For NX-GZIP + * + * fd = open("/dev/crypto/nx-gzip", O_RDWR); + * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); + * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL). + * vas_copy(&crb, 0, 1); + * vas_paste(paste_addr, 0, 1); + * close(fd) or exit process to close window. + * + * where "vas_copy" and "vas_paste" are defined in copy-paste.h. + * copy/paste returns to the user space directly. So refer NX hardware + * documententation for exact copy/paste usage and completion / error + * conditions. + */ + +/* + * Wrapper object for the nx-gzip device - there is just one instance of + * this node for the whole system. + */ +static struct coproc_dev { + struct cdev cdev; + struct device *device; + char *name; + dev_t devt; + struct class *class; + enum vas_cop_type cop_type; +} coproc_device; + +struct coproc_instance { + struct coproc_dev *coproc; + struct vas_window *txwin; +}; + +static char *coproc_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); +} + +static int coproc_open(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst; + + cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL); + if (!cp_inst) + return -ENOMEM; + + cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev, + cdev); + fp->private_data = cp_inst; + + return 0; +} + +static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) +{ + void __user *uptr = (void __user *)arg; + struct vas_tx_win_attr txattr = {}; + struct vas_tx_win_open_attr uattr; + struct coproc_instance *cp_inst; + struct vas_window *txwin; + int rc, vasid; + + cp_inst = fp->private_data; + + /* + * One window for file descriptor + */ + if (cp_inst->txwin) + return -EEXIST; + + rc = copy_from_user(&uattr, uptr, sizeof(uattr)); + if (rc) { + pr_err("%s(): copy_from_user() returns %d\n", __func__, rc); + return -EFAULT; + } + + if (uattr.version != 1) { + pr_err("Invalid version\n"); + return -EINVAL; + } + + vasid = uattr.vas_id; + + vas_init_tx_win_attr(&txattr, cp_inst->coproc->cop_type); + + txattr.lpid = mfspr(SPRN_LPID); + txattr.pidr = mfspr(SPRN_PID); + txattr.user_win = true; + txattr.rsvd_txbuf_count = false; + txattr.pswid = false; + + pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, + mfspr(SPRN_PID)); + + txwin = vas_tx_win_open(vasid, cp_inst->coproc->cop_type, &txattr); + if (IS_ERR(txwin)) { + pr_err("%s() vas_tx_win_open() failed, %ld\n", __func__, + PTR_ERR(txwin)); + return PTR_ERR(txwin); + } + + cp_inst->txwin = txwin; + + return 0; +} + +static int coproc_release(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst = fp->private_data; + + if (cp_inst->txwin) { + vas_win_close(cp_inst->txwin); + cp_inst->txwin = NULL; + } + + kfree(cp_inst); + fp->private_data = NULL; + + /* + * We don't know here if user has other receive windows + * open, so we can't really call clear_thread_tidr(). + * So, once the process calls set_thread_tidr(), the + * TIDR value sticks around until process exits, resulting + * in an extra copy in restore_sprs(). + */ + + return 0; +} + +static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) +{ + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + unsigned long pfn; + u64 paste_addr; + pgprot_t prot; + int rc; + + txwin = cp_inst->txwin; + + if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { + pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__, + (vma->vm_end - vma->vm_start), PAGE_SIZE); + return -EINVAL; + } + + /* Ensure instance has an open send window */ + if (!txwin) { + pr_err("%s(): No send window open?\n", __func__); + return -EINVAL; + } + + vas_win_paste_addr(txwin, &paste_addr, NULL); + pfn = paste_addr >> PAGE_SHIFT; + + /* flags, page_prot from cxl_mmap(), except we want cachable */ + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + + prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY); + + rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, prot); + + pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, + paste_addr, vma->vm_start, rc); + + return rc; +} + +static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case VAS_TX_WIN_OPEN: + return coproc_ioc_tx_win_open(fp, arg); + default: + return -EINVAL; + } +} + +static struct file_operations coproc_fops = { + .open = coproc_open, + .release = coproc_release, + .mmap = coproc_mmap, + .unlocked_ioctl = coproc_ioctl, +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + int rc = -EINVAL; + dev_t devno; + + rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name); + if (rc) { + pr_err("Unable to allocate coproc major number: %i\n", rc); + return rc; + } + + pr_devel("%s device allocated, dev [%i,%i]\n", name, + MAJOR(coproc_device.devt), MINOR(coproc_device.devt)); + + coproc_device.class = class_create(mod, name); + if (IS_ERR(coproc_device.class)) { + rc = PTR_ERR(coproc_device.class); + pr_err("Unable to create %s class %d\n", name, rc); + goto err_class; + } + coproc_device.class->devnode = coproc_devnode; + coproc_device.cop_type = cop_type; + + coproc_fops.owner = mod; + cdev_init(&coproc_device.cdev, &coproc_fops); + + devno = MKDEV(MAJOR(coproc_device.devt), 0); + rc = cdev_add(&coproc_device.cdev, devno, 1); + if (rc) { + pr_err("cdev_add() failed %d\n", rc); + goto err_cdev; + } + + coproc_device.device = device_create(coproc_device.class, NULL, + devno, NULL, name, MINOR(devno)); + if (IS_ERR(coproc_device.device)) { + rc = PTR_ERR(coproc_device.device); + pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc); + goto err; + } + + pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno), + MINOR(devno)); + + return 0; + +err: + cdev_del(&coproc_device.cdev); +err_cdev: + class_destroy(coproc_device.class); +err_class: + unregister_chrdev_region(coproc_device.devt, 1); + return rc; +} +EXPORT_SYMBOL_GPL(vas_register_coproc_api); + +void vas_unregister_coproc_api(void) +{ + dev_t devno; + + cdev_del(&coproc_device.cdev); + devno = MKDEV(MAJOR(coproc_device.devt), 0); + device_destroy(coproc_device.class, devno); + + class_destroy(coproc_device.class); + unregister_chrdev_region(coproc_device.devt, 1); +} +EXPORT_SYMBOL_GPL(vas_unregister_coproc_api); diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c index 44035a3d6414..41fa90d2f4ab 100644 --- a/arch/powerpc/platforms/powernv/vas-debug.c +++ b/arch/powerpc/platforms/powernv/vas-debug.c @@ -38,7 +38,7 @@ static int info_show(struct seq_file *s, void *private) seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop), window->tx_win ? "Send" : "Receive"); - seq_printf(s, "Pid : %d\n", window->pid); + seq_printf(s, "Pid : %d\n", vas_window_pid(window)); unlock: mutex_unlock(&vas_mutex); diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c new file mode 100644 index 000000000000..25db70be4c9c --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * VAS Fault handling. + * Copyright 2019, IBM Corporation + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/kthread.h> +#include <linux/sched/signal.h> +#include <linux/mmu_context.h> +#include <asm/icswx.h> + +#include "vas.h" + +/* + * The maximum FIFO size for fault window can be 8MB + * (VAS_RX_FIFO_SIZE_MAX). Using 4MB FIFO since each VAS + * instance will be having fault window. + * 8MB FIFO can be used if expects more faults for each VAS + * instance. + */ +#define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) + +static void dump_crb(struct coprocessor_request_block *crb) +{ + struct data_descriptor_entry *dde; + struct nx_fault_stamp *nx; + + dde = &crb->source; + pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + dde = &crb->target; + pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + nx = &crb->stamp.nx; + pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n", + be32_to_cpu(nx->pswid), + be64_to_cpu(crb->stamp.nx.fault_storage_addr), + nx->flags, nx->fault_status); +} + +/* + * Update the CSB to indicate a translation error. + * + * User space will be polling on CSB after the request is issued. + * If NX can handle the request without any issues, it updates CSB. + * Whereas if NX encounters page fault, the kernel will handle the + * fault and update CSB with translation error. + * + * If we are unable to update the CSB means copy_to_user failed due to + * invalid csb_addr, send a signal to the process. + */ +static void update_csb(struct vas_window *window, + struct coprocessor_request_block *crb) +{ + struct coprocessor_status_block csb; + struct kernel_siginfo info; + struct task_struct *tsk; + void __user *csb_addr; + struct pid *pid; + int rc; + + /* + * NX user space windows can not be opened for task->mm=NULL + * and faults will not be generated for kernel requests. + */ + if (WARN_ON_ONCE(!window->mm || !window->user_win)) + return; + + csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); + + memset(&csb, 0, sizeof(csb)); + csb.cc = CSB_CC_TRANSLATION; + csb.ce = CSB_CE_TERMINATION; + csb.cs = 0; + csb.count = 0; + + /* + * NX operates and returns in BE format as defined CRB struct. + * So saves fault_storage_addr in BE as NX pastes in FIFO and + * expects user space to convert to CPU format. + */ + csb.address = crb->stamp.nx.fault_storage_addr; + csb.flags = 0; + + pid = window->pid; + tsk = get_pid_task(pid, PIDTYPE_PID); + /* + * Process closes send window after all pending NX requests are + * completed. In multi-thread applications, a child thread can + * open a window and can exit without closing it. May be some + * requests are pending or this window can be used by other + * threads later. We should handle faults if NX encounters + * pages faults on these requests. Update CSB with translation + * error and fault address. If csb_addr passed by user space is + * invalid, send SEGV signal to pid saved in window. If the + * child thread is not running, send the signal to tgid. + * Parent thread (tgid) will close this window upon its exit. + * + * pid and mm references are taken when window is opened by + * process (pid). So tgid is used only when child thread opens + * a window and exits without closing it. + */ + if (!tsk) { + pid = window->tgid; + tsk = get_pid_task(pid, PIDTYPE_PID); + /* + * Parent thread (tgid) will be closing window when it + * exits. So should not get here. + */ + if (WARN_ON_ONCE(!tsk)) + return; + } + + /* Return if the task is exiting. */ + if (tsk->flags & PF_EXITING) { + put_task_struct(tsk); + return; + } + + use_mm(window->mm); + rc = copy_to_user(csb_addr, &csb, sizeof(csb)); + /* + * User space polls on csb.flags (first byte). So add barrier + * then copy first byte with csb flags update. + */ + if (!rc) { + csb.flags = CSB_V; + /* Make sure update to csb.flags is visible now */ + smp_mb(); + rc = copy_to_user(csb_addr, &csb, sizeof(u8)); + } + unuse_mm(window->mm); + put_task_struct(tsk); + + /* Success */ + if (!rc) + return; + + pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", + csb_addr, pid_vnr(pid)); + + clear_siginfo(&info); + info.si_signo = SIGSEGV; + info.si_errno = EFAULT; + info.si_code = SEGV_MAPERR; + info.si_addr = csb_addr; + + /* + * process will be polling on csb.flags after request is sent to + * NX. So generally CSB update should not fail except when an + * application passes invalid csb_addr. So an error message will + * be displayed and leave it to user space whether to ignore or + * handle this signal. + */ + rcu_read_lock(); + rc = kill_pid_info(SIGSEGV, &info, pid); + rcu_read_unlock(); + + pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, + pid_vnr(pid), rc); +} + +static void dump_fifo(struct vas_instance *vinst, void *entry) +{ + unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size; + unsigned long *fifo = entry; + int i; + + pr_err("Fault fifo size %d, Max crbs %d\n", vinst->fault_fifo_size, + vinst->fault_fifo_size / CRB_SIZE); + + /* Dump 10 CRB entries or until end of FIFO */ + pr_err("Fault FIFO Dump:\n"); + for (i = 0; i < 10*(CRB_SIZE/8) && fifo < end; i += 4, fifo += 4) { + pr_err("[%.3d, %p]: 0x%.16lx 0x%.16lx 0x%.16lx 0x%.16lx\n", + i, fifo, *fifo, *(fifo+1), *(fifo+2), *(fifo+3)); + } +} + +/* + * Process valid CRBs in fault FIFO. + * NX process user space requests, return credit and update the status + * in CRB. If it encounters transalation error when accessing CRB or + * request buffers, raises interrupt on the CPU to handle the fault. + * It takes credit on fault window, updates nx_fault_stamp in CRB with + * the following information and pastes CRB in fault FIFO. + * + * pswid - window ID of the window on which the request is sent. + * fault_storage_addr - fault address + * + * It can raise a single interrupt for multiple faults. Expects OS to + * process all valid faults and return credit for each fault on user + * space and fault windows. This fault FIFO control will be done with + * credit mechanism. NX can continuously paste CRBs until credits are not + * available on fault window. Otherwise, returns with RMA_reject. + * + * Total credits available on fault window: FIFO_SIZE(4MB)/CRBS_SIZE(128) + * + */ +irqreturn_t vas_fault_thread_fn(int irq, void *data) +{ + struct vas_instance *vinst = data; + struct coprocessor_request_block *crb, *entry; + struct coprocessor_request_block buf; + struct vas_window *window; + unsigned long flags; + void *fifo; + + crb = &buf; + + /* + * VAS can interrupt with multiple page faults. So process all + * valid CRBs within fault FIFO until reaches invalid CRB. + * We use CCW[0] and pswid to validate validate CRBs: + * + * CCW[0] Reserved bit. When NX pastes CRB, CCW[0]=0 + * OS sets this bit to 1 after reading CRB. + * pswid NX assigns window ID. Set pswid to -1 after + * reading CRB from fault FIFO. + * + * We exit this function if no valid CRBs are available to process. + * So acquire fault_lock and reset fifo_in_progress to 0 before + * exit. + * In case kernel receives another interrupt with different page + * fault, interrupt handler returns with IRQ_HANDLED if + * fifo_in_progress is set. Means these new faults will be + * handled by the current thread. Otherwise set fifo_in_progress + * and return IRQ_WAKE_THREAD to wake up thread. + */ + while (true) { + spin_lock_irqsave(&vinst->fault_lock, flags); + /* + * Advance the fault fifo pointer to next CRB. + * Use CRB_SIZE rather than sizeof(*crb) since the latter is + * aligned to CRB_ALIGN (256) but the CRB written to by VAS is + * only CRB_SIZE in len. + */ + fifo = vinst->fault_fifo + (vinst->fault_crbs * CRB_SIZE); + entry = fifo; + + if ((entry->stamp.nx.pswid == cpu_to_be32(FIFO_INVALID_ENTRY)) + || (entry->ccw & cpu_to_be32(CCW0_INVALID))) { + vinst->fifo_in_progress = 0; + spin_unlock_irqrestore(&vinst->fault_lock, flags); + return IRQ_HANDLED; + } + + spin_unlock_irqrestore(&vinst->fault_lock, flags); + vinst->fault_crbs++; + if (vinst->fault_crbs == (vinst->fault_fifo_size / CRB_SIZE)) + vinst->fault_crbs = 0; + + memcpy(crb, fifo, CRB_SIZE); + entry->stamp.nx.pswid = cpu_to_be32(FIFO_INVALID_ENTRY); + entry->ccw |= cpu_to_be32(CCW0_INVALID); + /* + * Return credit for the fault window. + */ + vas_return_credit(vinst->fault_win, false); + + pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d\n", + vinst->vas_id, vinst->fault_fifo, fifo, + vinst->fault_crbs); + + dump_crb(crb); + window = vas_pswid_to_window(vinst, + be32_to_cpu(crb->stamp.nx.pswid)); + + if (IS_ERR(window)) { + /* + * We got an interrupt about a specific send + * window but we can't find that window and we can't + * even clean it up (return credit on user space + * window). + * But we should not get here. + * TODO: Disable IRQ. + */ + dump_fifo(vinst, (void *)entry); + pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, fault_crbs %d bad CRB?\n", + vinst->vas_id, vinst->fault_fifo, fifo, + be32_to_cpu(crb->stamp.nx.pswid), + vinst->fault_crbs); + + WARN_ON_ONCE(1); + } else { + update_csb(window, crb); + /* + * Return credit for send window after processing + * fault CRB. + */ + vas_return_credit(window, true); + } + } +} + +irqreturn_t vas_fault_handler(int irq, void *dev_id) +{ + struct vas_instance *vinst = dev_id; + irqreturn_t ret = IRQ_WAKE_THREAD; + unsigned long flags; + + /* + * NX can generate an interrupt for multiple faults. So the + * fault handler thread process all CRBs until finds invalid + * entry. In case if NX sees continuous faults, it is possible + * that the thread function entered with the first interrupt + * can execute and process all valid CRBs. + * So wake up thread only if the fault thread is not in progress. + */ + spin_lock_irqsave(&vinst->fault_lock, flags); + + if (vinst->fifo_in_progress) + ret = IRQ_HANDLED; + else + vinst->fifo_in_progress = 1; + + spin_unlock_irqrestore(&vinst->fault_lock, flags); + + return ret; +} + +/* + * Fault window is opened per VAS instance. NX pastes fault CRB in fault + * FIFO upon page faults. + */ +int vas_setup_fault_window(struct vas_instance *vinst) +{ + struct vas_rx_win_attr attr; + + vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE; + vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL); + if (!vinst->fault_fifo) { + pr_err("Unable to alloc %d bytes for fault_fifo\n", + vinst->fault_fifo_size); + return -ENOMEM; + } + + /* + * Invalidate all CRB entries. NX pastes valid entry for each fault. + */ + memset(vinst->fault_fifo, FIFO_INVALID_ENTRY, vinst->fault_fifo_size); + vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT); + + attr.rx_fifo_size = vinst->fault_fifo_size; + attr.rx_fifo = vinst->fault_fifo; + + /* + * Max creds is based on number of CRBs can fit in the FIFO. + * (fault_fifo_size/CRB_SIZE). If 8MB FIFO is used, max creds + * will be 0xffff since the receive creds field is 16bits wide. + */ + attr.wcreds_max = vinst->fault_fifo_size / CRB_SIZE; + attr.lnotify_lpid = 0; + attr.lnotify_pid = mfspr(SPRN_PID); + attr.lnotify_tid = mfspr(SPRN_PID); + + vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, + &attr); + + if (IS_ERR(vinst->fault_win)) { + pr_err("VAS: Error %ld opening FaultWin\n", + PTR_ERR(vinst->fault_win)); + kfree(vinst->fault_fifo); + return PTR_ERR(vinst->fault_win); + } + + pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n", + vinst->fault_win->winid, attr.lnotify_lpid, + attr.lnotify_pid, attr.lnotify_tid); + + return 0; +} diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 0c0d27d17976..6434f9cb5aed 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -12,6 +12,8 @@ #include <linux/log2.h> #include <linux/rcupdate.h> #include <linux/cred.h> +#include <linux/sched/mm.h> +#include <linux/mmu_context.h> #include <asm/switch_to.h> #include <asm/ppc-opcode.h> #include "vas.h" @@ -24,7 +26,7 @@ * Compute the paste address region for the window @window using the * ->paste_base_addr and ->paste_win_id_shift we got from device tree. */ -static void compute_paste_address(struct vas_window *window, u64 *addr, int *len) +void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len) { int winid; u64 base, shift; @@ -78,7 +80,7 @@ static void *map_paste_region(struct vas_window *txwin) goto free_name; txwin->paste_addr_name = name; - compute_paste_address(txwin, &start, &len); + vas_win_paste_addr(txwin, &start, &len); if (!request_mem_region(start, len, name)) { pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n", @@ -136,7 +138,7 @@ static void unmap_paste_region(struct vas_window *window) u64 busaddr_start; if (window->paste_kaddr) { - compute_paste_address(window, &busaddr_start, &len); + vas_win_paste_addr(window, &busaddr_start, &len); unmap_region(window->paste_kaddr, busaddr_start, len); window->paste_kaddr = NULL; kfree(window->paste_addr_name); @@ -373,7 +375,7 @@ int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx) init_xlate_regs(window, winctx->user_win); val = 0ULL; - val = SET_FIELD(VAS_FAULT_TX_WIN, val, 0); + val = SET_FIELD(VAS_FAULT_TX_WIN, val, winctx->fault_win_id); write_hvwc_reg(window, VREG(FAULT_TX_WIN), val); /* In PowerNV, interrupts go to HV. */ @@ -748,6 +750,8 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin, winctx->min_scope = VAS_SCOPE_LOCAL; winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; + if (rxwin->vinst->virq) + winctx->irq_port = rxwin->vinst->irq_port; } static bool rx_win_args_valid(enum vas_cop_type cop, @@ -768,7 +772,7 @@ static bool rx_win_args_valid(enum vas_cop_type cop, if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX) return false; - if (attr->wcreds_max > VAS_RX_WCREDS_MAX) + if (!attr->wcreds_max) return false; if (attr->nx_win) { @@ -813,7 +817,8 @@ void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop) { memset(rxattr, 0, sizeof(*rxattr)); - if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) { + if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI || + cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) { rxattr->pin_win = true; rxattr->nx_win = true; rxattr->fault_win = false; @@ -827,9 +832,9 @@ void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop) rxattr->fault_win = true; rxattr->notify_disable = true; rxattr->rx_wcred_mode = true; - rxattr->tx_wcred_mode = true; rxattr->rx_win_ord_mode = true; - rxattr->tx_win_ord_mode = true; + rxattr->rej_no_credit = true; + rxattr->tc_mode = VAS_THRESH_DISABLED; } else if (cop == VAS_COP_TYPE_FTW) { rxattr->user_win = true; rxattr->intr_disable = true; @@ -873,9 +878,7 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, rxwin->nx_win = rxattr->nx_win; rxwin->user_win = rxattr->user_win; rxwin->cop = cop; - rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT; - if (rxattr->user_win) - rxwin->pid = task_pid_vnr(current); + rxwin->wcreds_max = rxattr->wcreds_max; init_winctx_for_rxwin(rxwin, rxattr, &winctx); init_winctx_regs(rxwin, &winctx); @@ -890,7 +893,8 @@ void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop) { memset(txattr, 0, sizeof(*txattr)); - if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) { + if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI || + cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) { txattr->rej_no_credit = false; txattr->rx_wcred_mode = true; txattr->tx_wcred_mode = true; @@ -944,13 +948,22 @@ static void init_winctx_for_txwin(struct vas_window *txwin, winctx->lpid = txattr->lpid; winctx->pidr = txattr->pidr; winctx->rx_win_id = txwin->rxwin->winid; + /* + * IRQ and fault window setup is successful. Set fault window + * for the send window so that ready to handle faults. + */ + if (txwin->vinst->virq) + winctx->fault_win_id = txwin->vinst->fault_win->winid; winctx->dma_type = VAS_DMA_TYPE_INJECT; winctx->tc_mode = txattr->tc_mode; winctx->min_scope = VAS_SCOPE_LOCAL; winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; + if (txwin->vinst->virq) + winctx->irq_port = txwin->vinst->irq_port; - winctx->pswid = 0; + winctx->pswid = txattr->pswid ? txattr->pswid : + encode_pswid(txwin->vinst->vas_id, txwin->winid); } static bool tx_win_args_valid(enum vas_cop_type cop, @@ -965,9 +978,14 @@ static bool tx_win_args_valid(enum vas_cop_type cop, if (attr->wcreds_max > VAS_TX_WCREDS_MAX) return false; - if (attr->user_win && - (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count)) - return false; + if (attr->user_win) { + if (attr->rsvd_txbuf_count) + return false; + + if (cop != VAS_COP_TYPE_FTW && cop != VAS_COP_TYPE_GZIP && + cop != VAS_COP_TYPE_GZIP_HIPRI) + return false; + } return true; } @@ -1016,7 +1034,6 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, txwin->tx_win = 1; txwin->rxwin = rxwin; txwin->nx_win = txwin->rxwin->nx_win; - txwin->pid = attr->pid; txwin->user_win = attr->user_win; txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; @@ -1040,12 +1057,59 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, } } else { /* - * A user mapping must ensure that context switch issues - * CP_ABORT for this thread. + * Interrupt hanlder or fault window setup failed. Means + * NX can not generate fault for page fault. So not + * opening for user space tx window. */ - rc = set_thread_uses_vas(); - if (rc) + if (!vinst->virq) { + rc = -ENODEV; goto free_window; + } + + /* + * Window opened by a child thread may not be closed when + * it exits. So take reference to its pid and release it + * when the window is free by parent thread. + * Acquire a reference to the task's pid to make sure + * pid will not be re-used - needed only for multithread + * applications. + */ + txwin->pid = get_task_pid(current, PIDTYPE_PID); + /* + * Acquire a reference to the task's mm. + */ + txwin->mm = get_task_mm(current); + + if (!txwin->mm) { + put_pid(txwin->pid); + pr_err("VAS: pid(%d): mm_struct is not found\n", + current->pid); + rc = -EPERM; + goto free_window; + } + + mmgrab(txwin->mm); + mmput(txwin->mm); + mm_context_add_vas_window(txwin->mm); + /* + * Process closes window during exit. In the case of + * multithread application, the child thread can open + * window and can exit without closing it. Expects parent + * thread to use and close the window. So do not need + * to take pid reference for parent thread. + */ + txwin->tgid = find_get_pid(task_tgid_vnr(current)); + /* + * Even a process that has no foreign real address mapping can + * use an unpaired COPY instruction (to no real effect). Issue + * CP_ABORT to clear any pending COPY and prevent a covert + * channel. + * + * __switch_to() will issue CP_ABORT on future context switches + * if process / thread has any open VAS window (Use + * current->mm->context.vas_windows). + */ + asm volatile(PPC_CP_ABORT); } set_vinst_win(vinst, txwin); @@ -1128,6 +1192,7 @@ static void poll_window_credits(struct vas_window *window) { u64 val; int creds, mode; + int count = 0; val = read_hvwc_reg(window, VREG(WINCTL)); if (window->tx_win) @@ -1146,10 +1211,27 @@ retry: creds = GET_FIELD(VAS_LRX_WCRED, val); } + /* + * Takes around few milliseconds to complete all pending requests + * and return credits. + * TODO: Scan fault FIFO and invalidate CRBs points to this window + * and issue CRB Kill to stop all pending requests. Need only + * if there is a bug in NX or fault handling in kernel. + */ if (creds < window->wcreds_max) { val = 0; set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(msecs_to_jiffies(10)); + count++; + /* + * Process can not close send window until all credits are + * returned. + */ + if (!(count % 1000)) + pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n", + vas_window_pid(window), window->winid, + creds, count); + goto retry; } } @@ -1163,6 +1245,7 @@ static void poll_window_busy_state(struct vas_window *window) { int busy; u64 val; + int count = 0; retry: val = read_hvwc_reg(window, VREG(WIN_STATUS)); @@ -1170,7 +1253,16 @@ retry: if (busy) { val = 0; set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(msecs_to_jiffies(5)); + schedule_timeout(msecs_to_jiffies(10)); + count++; + /* + * Takes around few milliseconds to process all pending + * requests. + */ + if (!(count % 1000)) + pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n", + vas_window_pid(window), window->winid, count); + goto retry; } } @@ -1235,22 +1327,118 @@ int vas_win_close(struct vas_window *window) unmap_paste_region(window); - clear_vinst_win(window); - poll_window_busy_state(window); unpin_close_window(window); poll_window_credits(window); + clear_vinst_win(window); + poll_window_castout(window); /* if send window, drop reference to matching receive window */ - if (window->tx_win) + if (window->tx_win) { + if (window->user_win) { + /* Drop references to pid and mm */ + put_pid(window->pid); + if (window->mm) { + mm_context_remove_vas_window(window->mm); + mmdrop(window->mm); + } + } put_rx_win(window->rxwin); + } vas_window_free(window); return 0; } EXPORT_SYMBOL_GPL(vas_win_close); + +/* + * Return credit for the given window. + * Send windows and fault window uses credit mechanism as follows: + * + * Send windows: + * - The default number of credits available for each send window is + * 1024. It means 1024 requests can be issued asynchronously at the + * same time. If the credit is not available, that request will be + * returned with RMA_Busy. + * - One credit is taken when NX request is issued. + * - This credit is returned after NX processed that request. + * - If NX encounters translation error, kernel will return the + * credit on the specific send window after processing the fault CRB. + * + * Fault window: + * - The total number credits available is FIFO_SIZE/CRB_SIZE. + * Means 4MB/128 in the current implementation. If credit is not + * available, RMA_Reject is returned. + * - A credit is taken when NX pastes CRB in fault FIFO. + * - The kernel with return credit on fault window after reading entry + * from fault FIFO. + */ +void vas_return_credit(struct vas_window *window, bool tx) +{ + uint64_t val; + + val = 0ULL; + if (tx) { /* send window */ + val = SET_FIELD(VAS_TX_WCRED, val, 1); + write_hvwc_reg(window, VREG(TX_WCRED_ADDER), val); + } else { + val = SET_FIELD(VAS_LRX_WCRED, val, 1); + write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), val); + } +} + +struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, + uint32_t pswid) +{ + struct vas_window *window; + int winid; + + if (!pswid) { + pr_devel("%s: called for pswid 0!\n", __func__); + return ERR_PTR(-ESRCH); + } + + decode_pswid(pswid, NULL, &winid); + + if (winid >= VAS_WINDOWS_PER_CHIP) + return ERR_PTR(-ESRCH); + + /* + * If application closes the window before the hardware + * returns the fault CRB, we should wait in vas_win_close() + * for the pending requests. so the window must be active + * and the process alive. + * + * If its a kernel process, we should not get any faults and + * should not get here. + */ + window = vinst->windows[winid]; + + if (!window) { + pr_err("PSWID decode: Could not find window for winid %d pswid %d vinst 0x%p\n", + winid, pswid, vinst); + return NULL; + } + + /* + * Do some sanity checks on the decoded window. Window should be + * NX GZIP user send window. FTW windows should not incur faults + * since their CRBs are ignored (not queued on FIFO or processed + * by NX). + */ + if (!window->tx_win || !window->user_win || !window->nx_win || + window->cop == VAS_COP_TYPE_FAULT || + window->cop == VAS_COP_TYPE_FTW) { + pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n", + winid, window->tx_win, window->user_win, + window->nx_win, window->cop); + WARN_ON(1); + } + + return window; +} diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index ed9cc6df329a..598e4cd563fb 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -14,7 +14,10 @@ #include <linux/of_platform.h> #include <linux/of_address.h> #include <linux/of.h> +#include <linux/irqdomain.h> +#include <linux/interrupt.h> #include <asm/prom.h> +#include <asm/xive.h> #include "vas.h" @@ -23,12 +26,37 @@ static LIST_HEAD(vas_instances); static DEFINE_PER_CPU(int, cpu_vas_id); +static int vas_irq_fault_window_setup(struct vas_instance *vinst) +{ + char devname[64]; + int rc = 0; + + snprintf(devname, sizeof(devname), "vas-%d", vinst->vas_id); + rc = request_threaded_irq(vinst->virq, vas_fault_handler, + vas_fault_thread_fn, 0, devname, vinst); + + if (rc) { + pr_err("VAS[%d]: Request IRQ(%d) failed with %d\n", + vinst->vas_id, vinst->virq, rc); + goto out; + } + + rc = vas_setup_fault_window(vinst); + if (rc) + free_irq(vinst->virq, vinst); + +out: + return rc; +} + static int init_vas_instance(struct platform_device *pdev) { - int rc, cpu, vasid; - struct resource *res; - struct vas_instance *vinst; struct device_node *dn = pdev->dev.of_node; + struct vas_instance *vinst; + struct xive_irq_data *xd; + uint32_t chipid, hwirq; + struct resource *res; + int rc, cpu, vasid; rc = of_property_read_u32(dn, "ibm,vas-id", &vasid); if (rc) { @@ -36,6 +64,12 @@ static int init_vas_instance(struct platform_device *pdev) return -ENODEV; } + rc = of_property_read_u32(dn, "ibm,chip-id", &chipid); + if (rc) { + pr_err("No ibm,chip-id property for %s?\n", pdev->name); + return -ENODEV; + } + if (pdev->num_resources != 4) { pr_err("Unexpected DT configuration for [%s, %d]\n", pdev->name, vasid); @@ -69,9 +103,32 @@ static int init_vas_instance(struct platform_device *pdev) vinst->paste_win_id_shift = 63 - res->end; - pr_devel("Initialized instance [%s, %d], paste_base 0x%llx, " - "paste_win_id_shift 0x%llx\n", pdev->name, vasid, - vinst->paste_base_addr, vinst->paste_win_id_shift); + hwirq = xive_native_alloc_irq_on_chip(chipid); + if (!hwirq) { + pr_err("Inst%d: Unable to allocate global irq for chip %d\n", + vinst->vas_id, chipid); + return -ENOENT; + } + + vinst->virq = irq_create_mapping(NULL, hwirq); + if (!vinst->virq) { + pr_err("Inst%d: Unable to map global irq %d\n", + vinst->vas_id, hwirq); + return -EINVAL; + } + + xd = irq_get_handler_data(vinst->virq); + if (!xd) { + pr_err("Inst%d: Invalid virq %d\n", + vinst->vas_id, vinst->virq); + return -EINVAL; + } + + vinst->irq_port = xd->trig_page; + pr_devel("Initialized instance [%s, %d] paste_base 0x%llx paste_win_id_shift 0x%llx IRQ %d Port 0x%llx\n", + pdev->name, vasid, vinst->paste_base_addr, + vinst->paste_win_id_shift, vinst->virq, + vinst->irq_port); for_each_possible_cpu(cpu) { if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn)) @@ -82,6 +139,22 @@ static int init_vas_instance(struct platform_device *pdev) list_add(&vinst->node, &vas_instances); mutex_unlock(&vas_mutex); + spin_lock_init(&vinst->fault_lock); + /* + * IRQ and fault handling setup is needed only for user space + * send windows. + */ + if (vinst->virq) { + rc = vas_irq_fault_window_setup(vinst); + /* + * Fault window is used only for user space send windows. + * So if vinst->virq is NULL, tx_win_open returns -ENODEV + * for user space. + */ + if (rc) + vinst->virq = 0; + } + vas_instance_init_dbgdir(vinst); dev_set_drvdata(&pdev->dev, vinst); diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 5574aec9ee88..70f793e8f6cc 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -101,11 +101,9 @@ /* * Initial per-process credits. * Max send window credits: 4K-1 (12-bits in VAS_TX_WCRED) - * Max receive window credits: 64K-1 (16 bits in VAS_LRX_WCRED) * * TODO: Needs tuning for per-process credits */ -#define VAS_RX_WCREDS_MAX ((64 << 10) - 1) #define VAS_TX_WCREDS_MAX ((4 << 10) - 1) #define VAS_WCREDS_DEFAULT (1 << 10) @@ -296,6 +294,22 @@ enum vas_notify_after_count { }; /* + * NX can generate an interrupt for multiple faults and expects kernel + * to process all of them. So read all valid CRB entries until find the + * invalid one. So use pswid which is pasted by NX and ccw[0] (reserved + * bit in BE) to check valid CRB. CCW[0] will not be touched by user + * space. Application gets CRB formt error if it updates this bit. + * + * Invalidate FIFO during allocation and process all entries from last + * successful read until finds invalid pswid and ccw[0] values. + * After reading each CRB entry from fault FIFO, the kernel invalidate + * it by updating pswid with FIFO_INVALID_ENTRY and CCW[0] with + * CCW0_INVALID. + */ +#define FIFO_INVALID_ENTRY 0xffffffff +#define CCW0_INVALID 1 + +/* * One per instance of VAS. Each instance will have a separate set of * receive windows, one per coprocessor type. * @@ -313,6 +327,15 @@ struct vas_instance { u64 paste_base_addr; u64 paste_win_id_shift; + u64 irq_port; + int virq; + int fault_crbs; + int fault_fifo_size; + int fifo_in_progress; /* To wake up thread or return IRQ_HANDLED */ + spinlock_t fault_lock; /* Protects fifo_in_progress update */ + void *fault_fifo; + struct vas_window *fault_win; /* Fault window */ + struct mutex mutex; struct vas_window *rxwin[VAS_COP_TYPE_MAX]; struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; @@ -333,7 +356,9 @@ struct vas_window { bool user_win; /* True if user space window */ void *hvwc_map; /* HV window context */ void *uwc_map; /* OS/User window context */ - pid_t pid; /* Linux process id of owner */ + struct pid *pid; /* Linux process id of owner */ + struct pid *tgid; /* Thread group ID of owner */ + struct mm_struct *mm; /* Linux process mm_struct */ int wcreds_max; /* Window credits */ char *dbgname; @@ -406,6 +431,19 @@ extern void vas_init_dbgdir(void); extern void vas_instance_init_dbgdir(struct vas_instance *vinst); extern void vas_window_init_dbgdir(struct vas_window *win); extern void vas_window_free_dbgdir(struct vas_window *win); +extern int vas_setup_fault_window(struct vas_instance *vinst); +extern irqreturn_t vas_fault_thread_fn(int irq, void *data); +extern irqreturn_t vas_fault_handler(int irq, void *dev_id); +extern void vas_return_credit(struct vas_window *window, bool tx); +extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, + uint32_t pswid); +extern void vas_win_paste_addr(struct vas_window *window, u64 *addr, + int *len); + +static inline int vas_window_pid(struct vas_window *window) +{ + return pid_vnr(window->pid); +} static inline void vas_log_write(struct vas_window *win, char *name, void *regptr, u64 val) @@ -444,6 +482,21 @@ static inline u64 read_hvwc_reg(struct vas_window *win, return in_be64(win->hvwc_map+reg); } +/* + * Encode/decode the Partition Send Window ID (PSWID) for a window in + * a way that we can uniquely identify any window in the system. i.e. + * we should be able to locate the 'struct vas_window' given the PSWID. + * + * Bits Usage + * 0:7 VAS id (8 bits) + * 8:15 Unused, 0 (3 bits) + * 16:31 Window id (16 bits) + */ +static inline u32 encode_pswid(int vasid, int winid) +{ + return ((u32)winid | (vasid << (31 - 7))); +} + static inline void decode_pswid(u32 pswid, int *vasid, int *winid) { if (vasid) diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 423be34f0f5f..d094321964fb 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -200,13 +200,14 @@ void ps3_mm_vas_destroy(void) { int result; - DBG("%s:%d: map.vas_id = %llu\n", __func__, __LINE__, map.vas_id); - if (map.vas_id) { result = lv1_select_virtual_address_space(0); - BUG_ON(result); - result = lv1_destruct_virtual_address_space(map.vas_id); - BUG_ON(result); + result += lv1_destruct_virtual_address_space(map.vas_id); + + if (result) { + lv1_panic(0); + } + map.vas_id = 0; } } @@ -263,7 +264,7 @@ static int ps3_mm_region_create(struct mem_region *r, unsigned long size) int result; u64 muid; - r->size = _ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M); + r->size = ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M); DBG("%s:%d requested %lxh\n", __func__, __LINE__, size); DBG("%s:%d actual %llxh\n", __func__, __LINE__, r->size); @@ -304,19 +305,20 @@ static void ps3_mm_region_destroy(struct mem_region *r) int result; if (!r->destroy) { - pr_info("%s:%d: Not destroying high region: %llxh %llxh\n", - __func__, __LINE__, r->base, r->size); return; } - DBG("%s:%d: r->base = %llxh\n", __func__, __LINE__, r->base); - if (r->base) { result = lv1_release_memory(r->base); - BUG_ON(result); + + if (result) { + lv1_panic(0); + } + r->size = r->base = r->offset = 0; map.total = map.rm.size; } + ps3_mm_set_repository_highmem(NULL); } @@ -394,8 +396,8 @@ static struct dma_chunk * dma_find_chunk(struct ps3_dma_region *r, unsigned long bus_addr, unsigned long len) { struct dma_chunk *c; - unsigned long aligned_bus = _ALIGN_DOWN(bus_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len+bus_addr-aligned_bus, + unsigned long aligned_bus = ALIGN_DOWN(bus_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len+bus_addr-aligned_bus, 1 << r->page_size); list_for_each_entry(c, &r->chunk_list.head, link) { @@ -423,8 +425,8 @@ static struct dma_chunk *dma_find_chunk_lpar(struct ps3_dma_region *r, unsigned long lpar_addr, unsigned long len) { struct dma_chunk *c; - unsigned long aligned_lpar = _ALIGN_DOWN(lpar_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + lpar_addr - aligned_lpar, + unsigned long aligned_lpar = ALIGN_DOWN(lpar_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + lpar_addr - aligned_lpar, 1 << r->page_size); list_for_each_entry(c, &r->chunk_list.head, link) { @@ -775,8 +777,8 @@ static int dma_sb_map_area(struct ps3_dma_region *r, unsigned long virt_addr, struct dma_chunk *c; unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr) : virt_addr; - unsigned long aligned_phys = _ALIGN_DOWN(phys_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + phys_addr - aligned_phys, + unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys, 1 << r->page_size); *bus_addr = dma_sb_lpar_to_bus(r, ps3_mm_phys_to_lpar(phys_addr)); @@ -830,8 +832,8 @@ static int dma_ioc0_map_area(struct ps3_dma_region *r, unsigned long virt_addr, struct dma_chunk *c; unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr) : virt_addr; - unsigned long aligned_phys = _ALIGN_DOWN(phys_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + phys_addr - aligned_phys, + unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys, 1 << r->page_size); DBG(KERN_ERR "%s: vaddr=%#lx, len=%#lx\n", __func__, @@ -889,9 +891,9 @@ static int dma_sb_unmap_area(struct ps3_dma_region *r, dma_addr_t bus_addr, c = dma_find_chunk(r, bus_addr, len); if (!c) { - unsigned long aligned_bus = _ALIGN_DOWN(bus_addr, + unsigned long aligned_bus = ALIGN_DOWN(bus_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + bus_addr + unsigned long aligned_len = ALIGN(len + bus_addr - aligned_bus, 1 << r->page_size); DBG("%s:%d: not found: bus_addr %llxh\n", __func__, __LINE__, bus_addr); @@ -926,9 +928,9 @@ static int dma_ioc0_unmap_area(struct ps3_dma_region *r, c = dma_find_chunk(r, bus_addr, len); if (!c) { - unsigned long aligned_bus = _ALIGN_DOWN(bus_addr, + unsigned long aligned_bus = ALIGN_DOWN(bus_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + bus_addr + unsigned long aligned_len = ALIGN(len + bus_addr - aligned_bus, 1 << r->page_size); DBG("%s:%d: not found: bus_addr %llxh\n", @@ -974,7 +976,7 @@ static int dma_sb_region_create_linear(struct ps3_dma_region *r) pr_info("%s:%d: forcing 16M pages for linear map\n", __func__, __LINE__); r->page_size = PS3_DMA_16M; - r->len = _ALIGN_UP(r->len, 1 << r->page_size); + r->len = ALIGN(r->len, 1 << r->page_size); } } @@ -1125,7 +1127,7 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, r->offset = lpar_addr; if (r->offset >= map.rm.size) r->offset -= map.r1.offset; - r->len = len ? len : _ALIGN_UP(map.total, 1 << r->page_size); + r->len = len ? len : ALIGN(map.total, 1 << r->page_size); switch (dev->dev_type) { case PS3_DEVICE_TYPE_SB: diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index b29368931c56..e9ae5dd03593 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -138,7 +138,7 @@ static int __init early_parse_ps3fb(char *p) if (!p) return 1; - ps3fb_videomemory.size = _ALIGN_UP(memparse(p, &p), + ps3fb_videomemory.size = ALIGN(memparse(p, &p), ps3fb_videomemory.align); return 0; } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 845342814edc..ace117f99d94 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -664,6 +664,8 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) if (!ret) return ret; + if (ret < 0) + break; /* * If RTAS returns a delay value that's above 100ms, cut it @@ -684,7 +686,11 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n", __func__, pe->phb->global_number, pe->addr, ret); - return ret; + /* PAPR defines -3 as "Parameter Error" for this function: */ + if (ret == -3) + return -EINVAL; + else + return -EIO; } /** diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index b91eb0929ed1..a6f101c958e8 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -47,6 +47,7 @@ #include <linux/stat.h> #include <linux/of_platform.h> #include <asm/ibmebus.h> +#include <asm/machdep.h> static struct device ibmebus_bus_device = { /* fake "parent" device */ .init_name = "ibmebus", @@ -464,4 +465,4 @@ static int __init ibmebus_bus_init(void) return 0; } -postcore_initcall(ibmebus_bus_init); +machine_postcore_initcall(pseries, ibmebus_bus_init); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index b571285f6c14..10d982997736 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -371,6 +371,9 @@ void post_mobility_fixup(void) /* Possibly switch to a new RFI flush type */ pseries_setup_rfi_flush(); + /* Reinitialise system information for hv-24x7 */ + read_24x7_sys_info(); + return; } diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 1d1da639b8b7..f3736fcd98fc 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -395,16 +395,31 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) /* * Some versions of FWNMI place the buffer inside the 4kB page starting at * 0x7000. Other versions place it inside the rtas buffer. We check both. + * Minimum size of the buffer is 16 bytes. */ #define VALID_FWNMI_BUFFER(A) \ - ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \ - (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16)))) + ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \ + (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16)))) static inline struct rtas_error_log *fwnmi_get_errlog(void) { return (struct rtas_error_log *)local_paca->mce_data_buf; } +static __be64 *fwnmi_get_savep(struct pt_regs *regs) +{ + unsigned long savep_ra; + + /* Mask top two bits */ + savep_ra = regs->gpr[3] & ~(0x3UL << 62); + if (!VALID_FWNMI_BUFFER(savep_ra)) { + printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); + return NULL; + } + + return __va(savep_ra); +} + /* * Get the error information for errors coming through the * FWNMI vectors. The pt_regs' r3 will be updated to reflect @@ -422,19 +437,14 @@ static inline struct rtas_error_log *fwnmi_get_errlog(void) */ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) { - unsigned long *savep; struct rtas_error_log *h; + __be64 *savep; - /* Mask top two bits */ - regs->gpr[3] &= ~(0x3UL << 62); - - if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { - printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); + savep = fwnmi_get_savep(regs); + if (!savep) return NULL; - } - savep = __va(regs->gpr[3]); - regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ h = (struct rtas_error_log *)&savep[1]; /* Use the per cpu buffer from paca to store rtas error log */ @@ -458,7 +468,15 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) */ static void fwnmi_release_errinfo(void) { - int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); + struct rtas_args rtas_args; + int ret; + + /* + * On pseries, the machine check stack is limited to under 4GB, so + * args can be on-stack. + */ + rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL); + ret = be32_to_cpu(rtas_args.rets[0]); if (ret != 0) printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret); } @@ -481,11 +499,21 @@ int pSeries_system_reset_exception(struct pt_regs *regs) #endif if (fwnmi_active) { - struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs); - if (errhdr) { - /* XXX Should look at FWNMI information */ - } - fwnmi_release_errinfo(); + __be64 *savep; + + /* + * Firmware (PowerVM and KVM) saves r3 to a save area like + * machine check, which is not exactly what PAPR (2.9) + * suggests but there is no way to detect otherwise, so this + * is the interface now. + * + * System resets do not save any error log or require an + * "ibm,nmi-interlock" rtas call to release. + */ + + savep = fwnmi_get_savep(regs); + if (savep) + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ } if (smp_handle_nmi_ipi(regs)) diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c index 70c3013fdd07..81343908ed33 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.c +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -506,7 +506,7 @@ void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) fadump_conf->fadump_supported = 1; /* Firmware supports 64-bit value for size, align it to pagesize. */ - fadump_conf->max_copy_size = _ALIGN_DOWN(U64_MAX, PAGE_SIZE); + fadump_conf->max_copy_size = ALIGN_DOWN(U64_MAX, PAGE_SIZE); /* * The 'ibm,kernel-dump' rtas node is present only if there is diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 0c8421dd01ab..64d18f4bf093 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -68,6 +68,7 @@ #include <asm/isa-bridge.h> #include <asm/security_features.h> #include <asm/asm-const.h> +#include <asm/idle.h> #include <asm/swiotlb.h> #include <asm/svm.h> @@ -83,6 +84,7 @@ unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K); EXPORT_SYMBOL(CMO_PageSize); int fwnmi_active; /* TRUE if an FWNMI handler is present */ +int ibm_nmi_interlock_token; static void pSeries_show_cpuinfo(struct seq_file *m) { @@ -113,9 +115,14 @@ static void __init fwnmi_init(void) struct slb_entry *slb_ptr; size_t size; #endif + int ibm_nmi_register_token; - int ibm_nmi_register = rtas_token("ibm,nmi-register"); - if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE) + ibm_nmi_register_token = rtas_token("ibm,nmi-register"); + if (ibm_nmi_register_token == RTAS_UNKNOWN_SERVICE) + return; + + ibm_nmi_interlock_token = rtas_token("ibm,nmi-interlock"); + if (WARN_ON(ibm_nmi_interlock_token == RTAS_UNKNOWN_SERVICE)) return; /* If the kernel's not linked at zero we point the firmware at low @@ -123,8 +130,8 @@ static void __init fwnmi_init(void) system_reset_addr = __pa(system_reset_fwnmi) - PHYSICAL_START; machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START; - if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr, - machine_check_addr)) + if (0 == rtas_call(ibm_nmi_register_token, 2, 1, NULL, + system_reset_addr, machine_check_addr)) fwnmi_active = 1; /* @@ -317,6 +324,9 @@ static int alloc_dispatch_log_kmem_cache(void) } machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache); +DEFINE_PER_CPU(u64, idle_spurr_cycles); +DEFINE_PER_CPU(u64, idle_entry_purr_snap); +DEFINE_PER_CPU(u64, idle_entry_spurr_snap); static void pseries_lpar_idle(void) { /* @@ -328,7 +338,7 @@ static void pseries_lpar_idle(void) return; /* Indicate to hypervisor that we are idle. */ - get_lppaca()->idle = 1; + pseries_idle_prolog(); /* * Yield the processor to the hypervisor. We return if @@ -339,7 +349,7 @@ static void pseries_lpar_idle(void) */ cede_processor(); - get_lppaca()->idle = 0; + pseries_idle_epilog(); } /* diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 37f1f25ba804..0487b26f6f1a 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -31,6 +31,7 @@ #include <asm/tce.h> #include <asm/page.h> #include <asm/hvcall.h> +#include <asm/machdep.h> static struct vio_dev vio_bus_device = { /* fake "parent" device */ .name = "vio", @@ -1513,7 +1514,7 @@ static int __init vio_bus_init(void) return 0; } -postcore_initcall(vio_bus_init); +machine_postcore_initcall(pseries, vio_bus_init); static int __init vio_device_init(void) { @@ -1522,7 +1523,7 @@ static int __init vio_device_init(void) return 0; } -device_initcall(vio_device_init); +machine_device_initcall(pseries, vio_device_init); static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1703,4 +1704,4 @@ static int __init vio_init(void) dma_debug_add_bus(&vio_bus_type); return 0; } -fs_initcall(vio_init); +machine_fs_initcall(pseries, vio_init); diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index cb5a5bd2cef5..026b3f01a991 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -31,8 +31,6 @@ obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o obj-$(CONFIG_PPC_INDIRECT_PCI) += indirect_pci.o obj-$(CONFIG_PPC_I8259) += i8259.o obj-$(CONFIG_IPIC) += ipic.o -obj-$(CONFIG_XILINX_VIRTEX) += xilinx_intc.o -obj-$(CONFIG_XILINX_PCI) += xilinx_pci.o obj-$(CONFIG_OF_RTC) += of_rtc.o obj-$(CONFIG_CPM) += cpm_common.o diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index 71660bacb264..7dc1960f8bdb 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -68,6 +68,8 @@ static void udbg_putc_cpm(char c) void __init udbg_init_cpm(void) { #ifdef CONFIG_PPC_8xx + mmu_mapin_immr(); + cpm_udbg_txdesc = (u32 __iomem __force *) (CONFIG_PPC_EARLY_DEBUG_CPM_ADDR - PHYS_IMMR_BASE + VIRT_IMMR_BASE); diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index 6aabc74688a6..4cf18000f07c 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -50,8 +50,8 @@ static void ics_rtas_unmask_irq(struct irq_data *d) server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0); - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server, - DEFAULT_PRIORITY); + call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq, + server, DEFAULT_PRIORITY); if (call_status != 0) { printk(KERN_ERR "%s: ibm_set_xive irq %u server %x returned %d\n", @@ -60,7 +60,7 @@ static void ics_rtas_unmask_irq(struct irq_data *d) } /* Now unmask the interrupt (often a no-op) */ - call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq); + call_status = rtas_call_reentrant(ibm_int_on, 1, 1, NULL, hw_irq); if (call_status != 0) { printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n", __func__, hw_irq, call_status); @@ -91,7 +91,7 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq) if (hw_irq == XICS_IPI) return; - call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq); + call_status = rtas_call_reentrant(ibm_int_off, 1, 1, NULL, hw_irq); if (call_status != 0) { printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n", __func__, hw_irq, call_status); @@ -99,8 +99,8 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq) } /* Have to set XIVE to 0xff to be able to remove a slot */ - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, - xics_default_server, 0xff); + call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq, + xics_default_server, 0xff); if (call_status != 0) { printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n", __func__, hw_irq, call_status); @@ -131,7 +131,7 @@ static int ics_rtas_set_affinity(struct irq_data *d, if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) return -1; - status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq); + status = rtas_call_reentrant(ibm_get_xive, 1, 3, xics_status, hw_irq); if (status) { printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", @@ -146,8 +146,8 @@ static int ics_rtas_set_affinity(struct irq_data *d, return -1; } - status = rtas_call(ibm_set_xive, 3, 1, NULL, - hw_irq, irq_server, xics_status[1]); + status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, + hw_irq, irq_server, xics_status[1]); if (status) { printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", @@ -179,7 +179,7 @@ static int ics_rtas_map(struct ics *ics, unsigned int virq) return -EINVAL; /* Check if RTAS knows about this interrupt */ - rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq); + rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, hw_irq); if (rc) return -ENXIO; @@ -198,7 +198,7 @@ static long ics_rtas_get_server(struct ics *ics, unsigned long vec) { int rc, status[2]; - rc = rtas_call(ibm_get_xive, 1, 3, status, vec); + rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, vec); if (rc) return -1; return status[0]; diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c deleted file mode 100644 index 4a86dcff3fcd..000000000000 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Interrupt controller driver for Xilinx Virtex FPGAs - * - * Copyright (C) 2007 Secret Lab Technologies Ltd. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - * - */ - -/* - * This is a driver for the interrupt controller typically found in - * Xilinx Virtex FPGA designs. - * - * The interrupt sense levels are hard coded into the FPGA design with - * typically a 1:1 relationship between irq lines and devices (no shared - * irq lines). Therefore, this driver does not attempt to handle edge - * and level interrupts differently. - */ -#undef DEBUG - -#include <linux/kernel.h> -#include <linux/irq.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> -#include <asm/io.h> -#include <asm/processor.h> -#include <asm/i8259.h> -#include <asm/irq.h> -#include <linux/irqchip.h> - -#if defined(CONFIG_PPC_I8259) -/* - * Support code for cascading to 8259 interrupt controllers - */ -static void xilinx_i8259_cascade(struct irq_desc *desc) -{ - struct irq_chip *chip = irq_desc_get_chip(desc); - unsigned int cascade_irq = i8259_irq(); - - if (cascade_irq) - generic_handle_irq(cascade_irq); - - /* Let xilinx_intc end the interrupt */ - chip->irq_unmask(&desc->irq_data); -} - -static void __init xilinx_i8259_setup_cascade(void) -{ - struct device_node *cascade_node; - int cascade_irq; - - /* Initialize i8259 controller */ - cascade_node = of_find_compatible_node(NULL, NULL, "chrp,iic"); - if (!cascade_node) - return; - - cascade_irq = irq_of_parse_and_map(cascade_node, 0); - if (!cascade_irq) { - pr_err("virtex_ml510: Failed to map cascade interrupt\n"); - goto out; - } - - i8259_init(cascade_node, 0); - irq_set_chained_handler(cascade_irq, xilinx_i8259_cascade); - - /* Program irq 7 (usb/audio), 14/15 (ide) to level sensitive */ - /* This looks like a dirty hack to me --gcl */ - outb(0xc0, 0x4d0); - outb(0xc0, 0x4d1); - - out: - of_node_put(cascade_node); -} -#else -static inline void xilinx_i8259_setup_cascade(void) { return; } -#endif /* defined(CONFIG_PPC_I8259) */ - -/* - * Initialize master Xilinx interrupt controller - */ -void __init xilinx_intc_init_tree(void) -{ - irqchip_init(); - xilinx_i8259_setup_cascade(); -} diff --git a/arch/powerpc/sysdev/xilinx_pci.c b/arch/powerpc/sysdev/xilinx_pci.c deleted file mode 100644 index fea5667699ed..000000000000 --- a/arch/powerpc/sysdev/xilinx_pci.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * PCI support for Xilinx plbv46_pci soft-core which can be used on - * Xilinx Virtex ML410 / ML510 boards. - * - * Copyright 2009 Roderick Colenbrander - * Copyright 2009 Secret Lab Technologies Ltd. - * - * The pci bridge fixup code was copied from ppc4xx_pci.c and was written - * by Benjamin Herrenschmidt. - * Copyright 2007 Ben. Herrenschmidt <benh@kernel.crashing.org>, IBM Corp. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include <linux/ioport.h> -#include <linux/of.h> -#include <linux/pci.h> -#include <mm/mmu_decl.h> -#include <asm/io.h> -#include <asm/xilinx_pci.h> - -#define XPLB_PCI_ADDR 0x10c -#define XPLB_PCI_DATA 0x110 -#define XPLB_PCI_BUS 0x114 - -#define PCI_HOST_ENABLE_CMD PCI_COMMAND_SERR | PCI_COMMAND_PARITY | PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY - -static const struct of_device_id xilinx_pci_match[] = { - { .compatible = "xlnx,plbv46-pci-1.03.a", }, - {} -}; - -/** - * xilinx_pci_fixup_bridge - Block Xilinx PHB configuration. - */ -static void xilinx_pci_fixup_bridge(struct pci_dev *dev) -{ - struct pci_controller *hose; - int i; - - if (dev->devfn || dev->bus->self) - return; - - hose = pci_bus_to_host(dev->bus); - if (!hose) - return; - - if (!of_match_node(xilinx_pci_match, hose->dn)) - return; - - /* Hide the PCI host BARs from the kernel as their content doesn't - * fit well in the resource management - */ - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - dev->resource[i].start = 0; - dev->resource[i].end = 0; - dev->resource[i].flags = 0; - } - - dev_info(&dev->dev, "Hiding Xilinx plb-pci host bridge resources %s\n", - pci_name(dev)); -} -DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, xilinx_pci_fixup_bridge); - -/** - * xilinx_pci_exclude_device - Don't do config access for non-root bus - * - * This is a hack. Config access to any bus other than bus 0 does not - * currently work on the ML510 so we prevent it here. - */ -static int -xilinx_pci_exclude_device(struct pci_controller *hose, u_char bus, u8 devfn) -{ - return (bus != 0); -} - -/** - * xilinx_pci_init - Find and register a Xilinx PCI host bridge - */ -void __init xilinx_pci_init(void) -{ - struct pci_controller *hose; - struct resource r; - void __iomem *pci_reg; - struct device_node *pci_node; - - pci_node = of_find_matching_node(NULL, xilinx_pci_match); - if(!pci_node) - return; - - if (of_address_to_resource(pci_node, 0, &r)) { - pr_err("xilinx-pci: cannot resolve base address\n"); - return; - } - - hose = pcibios_alloc_controller(pci_node); - if (!hose) { - pr_err("xilinx-pci: pcibios_alloc_controller() failed\n"); - return; - } - - /* Setup config space */ - setup_indirect_pci(hose, r.start + XPLB_PCI_ADDR, - r.start + XPLB_PCI_DATA, - PPC_INDIRECT_TYPE_SET_CFG_TYPE); - - /* According to the xilinx plbv46_pci documentation the soft-core starts - * a self-init when the bus master enable bit is set. Without this bit - * set the pci bus can't be scanned. - */ - early_write_config_word(hose, 0, 0, PCI_COMMAND, PCI_HOST_ENABLE_CMD); - - /* Set the max latency timer to 255 */ - early_write_config_byte(hose, 0, 0, PCI_LATENCY_TIMER, 0xff); - - /* Set the max bus number to 255 */ - pci_reg = of_iomap(pci_node, 0); - out_8(pci_reg + XPLB_PCI_BUS, 0xff); - iounmap(pci_reg); - - /* Nothing past the root bridge is working right now. By default - * exclude config access to anything except bus 0 */ - if (!ppc_md.pci_exclude_device) - ppc_md.pci_exclude_device = xilinx_pci_exclude_device; - - /* Register the host bridge with the linux kernel! */ - pci_process_bridge_OF_ranges(hose, pci_node, 1); - - pr_info("xilinx-pci: Registered PCI host bridge\n"); -} diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index b294f70f1a67..f591be9f01f4 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/msi.h> +#include <linux/vmalloc.h> #include <asm/debugfs.h> #include <asm/prom.h> @@ -196,6 +197,9 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) { u64 val; + if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + offset |= XIVE_ESB_LD_ST_MO; + /* Handle HW errata */ if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) offset |= offset << 4; @@ -1017,12 +1021,16 @@ EXPORT_SYMBOL_GPL(is_xive_irq); void xive_cleanup_irq_data(struct xive_irq_data *xd) { if (xd->eoi_mmio) { + unmap_kernel_range((unsigned long)xd->eoi_mmio, + 1u << xd->esb_shift); iounmap(xd->eoi_mmio); if (xd->eoi_mmio == xd->trig_mmio) xd->trig_mmio = NULL; xd->eoi_mmio = NULL; } if (xd->trig_mmio) { + unmap_kernel_range((unsigned long)xd->trig_mmio, + 1u << xd->esb_shift); iounmap(xd->trig_mmio); xd->trig_mmio = NULL; } @@ -1656,7 +1664,8 @@ DEFINE_SHOW_ATTRIBUTE(xive_core_debug); int xive_core_debug_init(void) { - debugfs_create_file("xive", 0400, powerpc_debugfs_root, - NULL, &xive_core_debug_fops); + if (xive_enabled()) + debugfs_create_file("xive", 0400, powerpc_debugfs_root, + NULL, &xive_core_debug_fops); return 0; } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 5218fdc4b29a..71b881e554fc 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -280,12 +280,12 @@ static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc) } #endif /* CONFIG_SMP */ -u32 xive_native_alloc_irq(void) +u32 xive_native_alloc_irq_on_chip(u32 chip_id) { s64 rc; for (;;) { - rc = opal_xive_allocate_irq(OPAL_XIVE_ANY_CHIP); + rc = opal_xive_allocate_irq(chip_id); if (rc != OPAL_BUSY) break; msleep(OPAL_BUSY_DELAY_MS); @@ -294,7 +294,7 @@ u32 xive_native_alloc_irq(void) return 0; return rc; } -EXPORT_SYMBOL_GPL(xive_native_alloc_irq); +EXPORT_SYMBOL_GPL(xive_native_alloc_irq_on_chip); void xive_native_free_irq(u32 irq) { diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 7ab5c6780997..f0551a2be9df 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -27,6 +27,8 @@ #include <asm/xive.h> #include <asm/xive-regs.h> #include <asm/hvcall.h> +#include <asm/svm.h> +#include <asm/ultravisor.h> #include "xive-internal.h" @@ -502,6 +504,9 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, rc = -EIO; } else { q->qpage = qpage; + if (is_secure_guest()) + uv_share_page(PHYS_PFN(qpage_phys), + 1 << xive_alloc_order(order)); } fail: return rc; @@ -535,6 +540,8 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, hw_cpu, prio); alloc_order = xive_alloc_order(xive_queue_shift); + if (is_secure_guest()) + uv_unshare_page(PHYS_PFN(__pa(q->qpage)), 1 << alloc_order); free_pages((unsigned long)q->qpage, alloc_order); q->qpage = NULL; } diff --git a/arch/powerpc/tools/head_check.sh b/arch/powerpc/tools/head_check.sh index ad9e57209aa4..e32d3162e5ed 100644 --- a/arch/powerpc/tools/head_check.sh +++ b/arch/powerpc/tools/head_check.sh @@ -31,8 +31,10 @@ # level entry code (boot, interrupt vectors, etc) until r2 is set up. This # could cause the kernel to die in early boot. -# Turn this on if you want more debug output: -# set -x +# Allow for verbose output +if [ "$V" = "1" ]; then + set -x +fi if [ $# -lt 2 ]; then echo "$0 [path to nm] [path to vmlinux]" 1>&2 @@ -44,7 +46,7 @@ nm="$1" vmlinux="$2" # gcc-4.6-era toolchain make _stext an A (absolute) symbol rather than T -$nm "$vmlinux" | grep -e " [TA] _stext$" -e " t start_first_256B$" -e " a text_start$" -e " t start_text$" -m4 > .tmp_symbols.txt +$nm "$vmlinux" | grep -e " [TA] _stext$" -e " t start_first_256B$" -e " a text_start$" -e " t start_text$" > .tmp_symbols.txt vma=$(cat .tmp_symbols.txt | grep -e " [TA] _stext$" | cut -d' ' -f1) diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 6f9cccea54f3..89c76ca35640 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -18,7 +18,7 @@ endif ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y += xmon.o nonstdio.o spr_access.o +obj-y += xmon.o nonstdio.o spr_access.o xmon_bpts.o ifdef CONFIG_XMON_DISASSEMBLY obj-y += ppc-dis.o ppc-opc.o diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 89415b84c597..a001711863e5 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -54,6 +54,7 @@ #include <asm/firmware.h> #include <asm/code-patching.h> #include <asm/sections.h> +#include <asm/inst.h> #ifdef CONFIG_PPC64 #include <asm/hvcall.h> @@ -62,6 +63,7 @@ #include "nonstdio.h" #include "dis-asm.h" +#include "xmon_bpts.h" #ifdef CONFIG_SMP static cpumask_t cpus_in_xmon = CPU_MASK_NONE; @@ -98,7 +100,7 @@ static long *xmon_fault_jmp[NR_CPUS]; /* Breakpoint stuff */ struct bpt { unsigned long address; - unsigned int instr[2]; + struct ppc_inst *instr; atomic_t ref_count; int enabled; unsigned long pad; @@ -109,9 +111,8 @@ struct bpt { #define BP_TRAP 2 #define BP_DABR 4 -#define NBPTS 256 static struct bpt bpts[NBPTS]; -static struct bpt dabr; +static struct bpt dabr[HBP_NUM_MAX]; static struct bpt *iabr; static unsigned bpinstr = 0x7fe00008; /* trap */ @@ -121,6 +122,7 @@ static unsigned bpinstr = 0x7fe00008; /* trap */ static int cmds(struct pt_regs *); static int mread(unsigned long, void *, int); static int mwrite(unsigned long, void *, int); +static int mread_instr(unsigned long, struct ppc_inst *); static int handle_fault(struct pt_regs *); static void byterev(unsigned char *, int); static void memex(void); @@ -326,11 +328,6 @@ static inline void sync(void) asm volatile("sync; isync"); } -static inline void store_inst(void *p) -{ - asm volatile ("dcbst 0,%0; sync; icbi 0,%0; isync" : : "r" (p)); -} - static inline void cflush(void *p) { asm volatile ("dcbf 0,%0; icbi 0,%0" : : "r" (p)); @@ -706,13 +703,13 @@ static int xmon_core(struct pt_regs *regs, int fromipi) if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) { bp = at_breakpoint(regs->nip); if (bp != NULL) { - int stepped = emulate_step(regs, bp->instr[0]); + int stepped = emulate_step(regs, ppc_inst_read(bp->instr)); if (stepped == 0) { regs->nip = (unsigned long) &bp->instr[0]; atomic_inc(&bp->ref_count); } else if (stepped < 0) { printf("Couldn't single-step %s instruction\n", - (IS_RFID(bp->instr[0])? "rfid": "mtmsrd")); + IS_RFID(ppc_inst_read(bp->instr))? "rfid": "mtmsrd"); } } } @@ -761,8 +758,8 @@ static int xmon_bpt(struct pt_regs *regs) /* Are we at the trap at bp->instr[1] for some bp? */ bp = in_breakpoint_table(regs->nip, &offset); - if (bp != NULL && offset == 4) { - regs->nip = bp->address + 4; + if (bp != NULL && (offset == 4 || offset == 8)) { + regs->nip = bp->address + offset; atomic_dec(&bp->ref_count); return 1; } @@ -787,10 +784,17 @@ static int xmon_sstep(struct pt_regs *regs) static int xmon_break_match(struct pt_regs *regs) { + int i; + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT)) return 0; - if (dabr.enabled == 0) - return 0; + for (i = 0; i < nr_wp_slots(); i++) { + if (dabr[i].enabled) + goto found; + } + return 0; + +found: xmon_core(regs, 0); return 1; } @@ -859,15 +863,13 @@ static struct bpt *in_breakpoint_table(unsigned long nip, unsigned long *offp) { unsigned long off; - off = nip - (unsigned long) bpts; - if (off >= sizeof(bpts)) + off = nip - (unsigned long)bpt_table; + if (off >= sizeof(bpt_table)) return NULL; - off %= sizeof(struct bpt); - if (off != offsetof(struct bpt, instr[0]) - && off != offsetof(struct bpt, instr[1])) + *offp = off & (BPT_SIZE - 1); + if (off & 3) return NULL; - *offp = off - offsetof(struct bpt, instr[0]); - return (struct bpt *) (nip - off); + return bpts + (off / BPT_SIZE); } static struct bpt *new_breakpoint(unsigned long a) @@ -882,8 +884,7 @@ static struct bpt *new_breakpoint(unsigned long a) for (bp = bpts; bp < &bpts[NBPTS]; ++bp) { if (!bp->enabled && atomic_read(&bp->ref_count) == 0) { bp->address = a; - bp->instr[1] = bpinstr; - store_inst(&bp->instr[1]); + bp->instr = (void *)(bpt_table + ((bp - bpts) * BPT_WORDS)); return bp; } } @@ -895,47 +896,75 @@ static struct bpt *new_breakpoint(unsigned long a) static void insert_bpts(void) { int i; - struct bpt *bp; + struct ppc_inst instr, instr2; + struct bpt *bp, *bp2; bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) { if ((bp->enabled & (BP_TRAP|BP_CIABR)) == 0) continue; - if (mread(bp->address, &bp->instr[0], 4) != 4) { + if (!mread_instr(bp->address, &instr)) { printf("Couldn't read instruction at %lx, " "disabling breakpoint there\n", bp->address); bp->enabled = 0; continue; } - if (IS_MTMSRD(bp->instr[0]) || IS_RFID(bp->instr[0])) { + if (IS_MTMSRD(instr) || IS_RFID(instr)) { printf("Breakpoint at %lx is on an mtmsrd or rfid " "instruction, disabling it\n", bp->address); bp->enabled = 0; continue; } - store_inst(&bp->instr[0]); + /* + * Check the address is not a suffix by looking for a prefix in + * front of it. + */ + if (mread_instr(bp->address - 4, &instr2) == 8) { + printf("Breakpoint at %lx is on the second word of a prefixed instruction, disabling it\n", + bp->address); + bp->enabled = 0; + continue; + } + /* + * We might still be a suffix - if the prefix has already been + * replaced by a breakpoint we won't catch it with the above + * test. + */ + bp2 = at_breakpoint(bp->address - 4); + if (bp2 && ppc_inst_prefixed(ppc_inst_read(bp2->instr))) { + printf("Breakpoint at %lx is on the second word of a prefixed instruction, disabling it\n", + bp->address); + bp->enabled = 0; + continue; + } + + patch_instruction(bp->instr, instr); + patch_instruction(ppc_inst_next(bp->instr, &instr), + ppc_inst(bpinstr)); if (bp->enabled & BP_CIABR) continue; - if (patch_instruction((unsigned int *)bp->address, - bpinstr) != 0) { + if (patch_instruction((struct ppc_inst *)bp->address, + ppc_inst(bpinstr)) != 0) { printf("Couldn't write instruction at %lx, " "disabling breakpoint there\n", bp->address); bp->enabled &= ~BP_TRAP; continue; } - store_inst((void *)bp->address); } } static void insert_cpu_bpts(void) { + int i; struct arch_hw_breakpoint brk; - if (dabr.enabled) { - brk.address = dabr.address; - brk.type = (dabr.enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; - brk.len = DABR_MAX_LEN; - __set_breakpoint(&brk); + for (i = 0; i < nr_wp_slots(); i++) { + if (dabr[i].enabled) { + brk.address = dabr[i].address; + brk.type = (dabr[i].enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; + brk.len = 8; + __set_breakpoint(i, &brk); + } } if (iabr) @@ -946,20 +975,18 @@ static void remove_bpts(void) { int i; struct bpt *bp; - unsigned instr; + struct ppc_inst instr; bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) { if ((bp->enabled & (BP_TRAP|BP_CIABR)) != BP_TRAP) continue; - if (mread(bp->address, &instr, 4) == 4 - && instr == bpinstr + if (mread_instr(bp->address, &instr) + && ppc_inst_equal(instr, ppc_inst(bpinstr)) && patch_instruction( - (unsigned int *)bp->address, bp->instr[0]) != 0) + (struct ppc_inst *)bp->address, ppc_inst_read(bp->instr)) != 0) printf("Couldn't remove breakpoint at %lx\n", bp->address); - else - store_inst((void *)bp->address); } } @@ -1164,13 +1191,13 @@ static int do_step(struct pt_regs *regs) */ static int do_step(struct pt_regs *regs) { - unsigned int instr; + struct ppc_inst instr; int stepped; force_enable_xmon(); /* check we are in 64-bit kernel mode, translation enabled */ if ((regs->msr & (MSR_64BIT|MSR_PR|MSR_IR)) == (MSR_64BIT|MSR_IR)) { - if (mread(regs->nip, &instr, 4) == 4) { + if (mread_instr(regs->nip, &instr)) { stepped = emulate_step(regs, instr); if (stepped < 0) { printf("Couldn't single-step %s instruction\n", @@ -1178,7 +1205,7 @@ static int do_step(struct pt_regs *regs) return 0; } if (stepped > 0) { - regs->trap = 0xd00 | (regs->trap & 1); + set_trap(regs, 0xd00); printf("stepped to "); xmon_print_symbol(regs->nip, " ", "\n"); ppc_inst_dump(regs->nip, 1, 0); @@ -1330,14 +1357,14 @@ csum(void) */ static long check_bp_loc(unsigned long addr) { - unsigned int instr; + struct ppc_inst instr; addr &= ~3; if (!is_kernel_addr(addr)) { printf("Breakpoints may only be placed at kernel addresses\n"); return 0; } - if (!mread(addr, &instr, sizeof(instr))) { + if (!mread_instr(addr, &instr)) { printf("Can't read instruction at address %lx\n", addr); return 0; } @@ -1349,6 +1376,35 @@ static long check_bp_loc(unsigned long addr) return 1; } +static int find_free_data_bpt(void) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (!dabr[i].enabled) + return i; + } + printf("Couldn't find free breakpoint register\n"); + return -1; +} + +static void print_data_bpts(void) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (!dabr[i].enabled) + continue; + + printf(" data "REG" [", dabr[i].address); + if (dabr[i].enabled & 1) + printf("r"); + if (dabr[i].enabled & 2) + printf("w"); + printf("]\n"); + } +} + static char *breakpoint_help_string = "Breakpoint command usage:\n" "b show breakpoints\n" @@ -1382,6 +1438,9 @@ bpt_cmds(void) printf("Hardware data breakpoint not supported on this cpu\n"); break; } + i = find_free_data_bpt(); + if (i < 0) + break; mode = 7; cmd = inchar(); if (cmd == 'r') @@ -1390,15 +1449,15 @@ bpt_cmds(void) mode = 6; else termch = cmd; - dabr.address = 0; - dabr.enabled = 0; - if (scanhex(&dabr.address)) { - if (!is_kernel_addr(dabr.address)) { + dabr[i].address = 0; + dabr[i].enabled = 0; + if (scanhex(&dabr[i].address)) { + if (!is_kernel_addr(dabr[i].address)) { printf(badaddr); break; } - dabr.address &= ~HW_BRK_TYPE_DABR; - dabr.enabled = mode | BP_DABR; + dabr[i].address &= ~HW_BRK_TYPE_DABR; + dabr[i].enabled = mode | BP_DABR; } force_enable_xmon(); @@ -1437,7 +1496,9 @@ bpt_cmds(void) for (i = 0; i < NBPTS; ++i) bpts[i].enabled = 0; iabr = NULL; - dabr.enabled = 0; + for (i = 0; i < nr_wp_slots(); i++) + dabr[i].enabled = 0; + printf("All breakpoints cleared\n"); break; } @@ -1471,14 +1532,7 @@ bpt_cmds(void) if (xmon_is_ro || !scanhex(&a)) { /* print all breakpoints */ printf(" type address\n"); - if (dabr.enabled) { - printf(" data "REG" [", dabr.address); - if (dabr.enabled & 1) - printf("r"); - if (dabr.enabled & 2) - printf("w"); - printf("]\n"); - } + print_data_bpts(); for (bp = bpts; bp < &bpts[NBPTS]; ++bp) { if (!bp->enabled) continue; @@ -1776,7 +1830,7 @@ static void prregs(struct pt_regs *fp) #endif printf("pc = "); xmon_print_symbol(fp->nip, " ", "\n"); - if (TRAP(fp) != 0xc00 && cpu_has_feature(CPU_FTR_CFAR)) { + if (!trap_is_syscall(fp) && cpu_has_feature(CPU_FTR_CFAR)) { printf("cfar= "); xmon_print_symbol(fp->orig_gpr3, " ", "\n"); } @@ -1938,8 +1992,13 @@ static void dump_207_sprs(void) printf("hfscr = %.16lx dhdes = %.16lx rpr = %.16lx\n", mfspr(SPRN_HFSCR), mfspr(SPRN_DHDES), mfspr(SPRN_RPR)); - printf("dawr = %.16lx dawrx = %.16lx ciabr = %.16lx\n", - mfspr(SPRN_DAWR), mfspr(SPRN_DAWRX), mfspr(SPRN_CIABR)); + printf("dawr0 = %.16lx dawrx0 = %.16lx\n", + mfspr(SPRN_DAWR0), mfspr(SPRN_DAWRX0)); + if (nr_wp_slots() > 1) { + printf("dawr1 = %.16lx dawrx1 = %.16lx\n", + mfspr(SPRN_DAWR1), mfspr(SPRN_DAWRX1)); + } + printf("ciabr = %.16lx\n", mfspr(SPRN_CIABR)); #endif } @@ -2130,6 +2189,25 @@ mwrite(unsigned long adrs, void *buf, int size) return n; } +static int +mread_instr(unsigned long adrs, struct ppc_inst *instr) +{ + volatile int n; + + n = 0; + if (setjmp(bus_error_jmp) == 0) { + catch_memory_errors = 1; + sync(); + *instr = ppc_inst_read((struct ppc_inst *)adrs); + sync(); + /* wait a little while to see if we get a machine check */ + __delay(200); + n = ppc_inst_len(*instr); + } + catch_memory_errors = 0; + return n; +} + static int fault_type; static int fault_except; static char *fault_chars[] = { "--", "**", "##" }; @@ -2856,7 +2934,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, { int nr, dotted; unsigned long first_adr; - unsigned int inst, last_inst = 0; + struct ppc_inst inst, last_inst = ppc_inst(0); unsigned char val[4]; dotted = 0; @@ -2869,8 +2947,8 @@ generic_inst_dump(unsigned long adr, long count, int praddr, } break; } - inst = GETWORD(val); - if (adr > first_adr && inst == last_inst) { + inst = ppc_inst(GETWORD(val)); + if (adr > first_adr && ppc_inst_equal(inst, last_inst)) { if (!dotted) { printf(" ...\n"); dotted = 1; @@ -2880,9 +2958,9 @@ generic_inst_dump(unsigned long adr, long count, int praddr, dotted = 0; last_inst = inst; if (praddr) - printf(REG" %.8x", adr, inst); + printf(REG" %.8x", adr, ppc_inst_val(inst)); printf("\t"); - dump_func(inst, adr); + dump_func(ppc_inst_val(inst), adr); printf("\n"); } return adr - first_adr; @@ -3107,8 +3185,8 @@ static void show_task(struct task_struct *tsk) (tsk->exit_state & EXIT_DEAD) ? 'E' : (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; - printf("%px %016lx %6d %6d %c %2d %s\n", tsk, - tsk->thread.ksp, + printf("%16px %16lx %16px %6d %6d %c %2d %s\n", tsk, + tsk->thread.ksp, tsk->thread.regs, tsk->pid, rcu_dereference(tsk->parent)->pid, state, task_cpu(tsk), tsk->comm); @@ -3230,7 +3308,7 @@ static void show_tasks(void) unsigned long tskv; struct task_struct *tsk = NULL; - printf(" task_struct ->thread.ksp PID PPID S P CMD\n"); + printf(" task_struct ->thread.ksp ->thread.regs PID PPID S P CMD\n"); if (scanhex(&tskv)) tsk = (struct task_struct *)tskv; @@ -3868,10 +3946,9 @@ static void clear_all_bpt(void) bpts[i].enabled = 0; /* Clear any data or iabr breakpoints */ - if (iabr || dabr.enabled) { - iabr = NULL; - dabr.enabled = 0; - } + iabr = NULL; + for (i = 0; i < nr_wp_slots(); i++) + dabr[i].enabled = 0; } #ifdef CONFIG_DEBUG_FS diff --git a/arch/powerpc/xmon/xmon_bpts.S b/arch/powerpc/xmon/xmon_bpts.S new file mode 100644 index 000000000000..69726814cd27 --- /dev/null +++ b/arch/powerpc/xmon/xmon_bpts.S @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <asm/ppc_asm.h> +#include <asm/asm-compat.h> +#include <asm/asm-offsets.h> +#include "xmon_bpts.h" + +/* Prefixed instructions can not cross 64 byte boundaries */ +.align 6 +.global bpt_table +bpt_table: + .space NBPTS * BPT_SIZE diff --git a/arch/powerpc/xmon/xmon_bpts.h b/arch/powerpc/xmon/xmon_bpts.h new file mode 100644 index 000000000000..57e6fb03de48 --- /dev/null +++ b/arch/powerpc/xmon/xmon_bpts.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef XMON_BPTS_H +#define XMON_BPTS_H + +#define NBPTS 256 +#ifndef __ASSEMBLY__ +#include <asm/inst.h> +#define BPT_SIZE (sizeof(struct ppc_inst) * 2) +#define BPT_WORDS (BPT_SIZE / sizeof(struct ppc_inst)) + +extern unsigned int bpt_table[NBPTS * BPT_WORDS]; +#endif /* __ASSEMBLY__ */ + +#endif /* XMON_BPTS_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 6076c8c912d2..e2528e057980 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1560,7 +1560,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, } #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL -static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, +static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp, int full) { @@ -1569,7 +1569,7 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); return pmd; } - return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); } #define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH |