diff options
Diffstat (limited to 'arch/powerpc')
591 files changed, 17770 insertions, 13168 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b29d7cb38368..787e829b6f25 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -130,6 +130,7 @@ config PPC select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS + select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST @@ -144,12 +145,16 @@ config PPC select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS + select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select DMA_OPS if PPC64 + select DMA_OPS_BYPASS if PPC64 select DYNAMIC_FTRACE if FUNCTION_TRACER select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT @@ -170,8 +175,8 @@ config PPC select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL - select HAVE_ARCH_KASAN if PPC32 - select HAVE_ARCH_KASAN_VMALLOC if PPC32 + select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 + select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT @@ -185,7 +190,6 @@ config PPC select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) select HAVE_CONTEXT_TRACKING if PPC64 select HAVE_TIF_NOHZ if PPC64 - select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE @@ -211,7 +215,6 @@ config PPC select HAVE_KRETPROBES select HAVE_LD_DEAD_CODE_DATA_ELIMINATION select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS - select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_HARDLOCKUP_DETECTOR_ARCH if (PPC64 && PPC_BOOK3S) @@ -477,7 +480,7 @@ config LD_HEAD_STUB_CATCH If unsure, say "N". config MPROFILE_KERNEL - depends on PPC64 && CPU_LITTLE_ENDIAN + depends on PPC64 && CPU_LITTLE_ENDIAN && FUNCTION_TRACER def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -I$(srctree)/include -D__KERNEL__) config HOTPLUG_CPU @@ -490,6 +493,19 @@ config HOTPLUG_CPU Say N if you are unsure. +config PPC_QUEUED_SPINLOCKS + bool "Queued spinlocks" + depends on SMP + help + Say Y here to use queued spinlocks which give better scalability and + fairness on large SMP and NUMA systems without harming single threaded + performance. + + This option is currently experimental, the code is more complex and + less tested so it defaults to "N" for the moment. + + If unsure, say "N". + config ARCH_CPU_PROBE_RELEASE def_bool y depends on HOTPLUG_CPU @@ -687,15 +703,6 @@ config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG -# Some NUMA nodes have memory ranges that span -# other nodes. Even though a pfn is valid and -# between a node's start and end pfns, it may not -# reside on that node. See memmap_init_zone() -# for details. -config NODES_SPAN_OTHER_NODES - def_bool y - depends on NEED_MULTIPLE_NODES - config STDBINUTILS bool "Using standard binutils settings" depends on 44x @@ -773,41 +780,18 @@ config THREAD_SHIFT range 13 15 default "15" if PPC_256K_PAGES default "14" if PPC64 + default "14" if KASAN default "13" help Used to define the stack size. The default is almost always what you want. Only change this if you know what you are doing. -config ETEXT_SHIFT_BOOL - bool "Set custom etext alignment" if STRICT_KERNEL_RWX && \ - (PPC_BOOK3S_32 || PPC_8xx) - depends on ADVANCED_OPTIONS - help - This option allows you to set the kernel end of text alignment. When - RAM is mapped by blocks, the alignment needs to fit the size and - number of possible blocks. The default should be OK for most configs. - - Say N here unless you know what you are doing. - -config ETEXT_SHIFT - int "_etext shift" if ETEXT_SHIFT_BOOL - range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 - range 19 23 if STRICT_KERNEL_RWX && PPC_8xx - default 17 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 - default 19 if STRICT_KERNEL_RWX && PPC_8xx - default PPC_PAGE_SHIFT - help - On Book3S 32 (603+), IBATs are used to map kernel text. - Smaller is the alignment, greater is the number of necessary IBATs. - - On 8xx, large pages (512kb or 8M) are used to map kernel linear - memory. Aligning to 8M reduces TLB misses as only 8M pages are used - in that case. - config DATA_SHIFT_BOOL - bool "Set custom data alignment" if STRICT_KERNEL_RWX && \ - (PPC_BOOK3S_32 || PPC_8xx) + bool "Set custom data alignment" depends on ADVANCED_OPTIONS + depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC + depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && \ + (!PIN_TLB_TEXT || !STRICT_KERNEL_RWX)) help This option allows you to set the kernel data alignment. When RAM is mapped by blocks, the alignment needs to fit the size and @@ -818,10 +802,13 @@ config DATA_SHIFT_BOOL config DATA_SHIFT int "Data shift" if DATA_SHIFT_BOOL default 24 if STRICT_KERNEL_RWX && PPC64 - range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 - range 19 23 if STRICT_KERNEL_RWX && PPC_8xx + range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_BOOK3S_32 + range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_8xx default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 + default 18 if DEBUG_PAGEALLOC && PPC_BOOK3S_32 default 23 if STRICT_KERNEL_RWX && PPC_8xx + default 23 if DEBUG_PAGEALLOC && PPC_8xx && PIN_TLB_DATA + default 19 if DEBUG_PAGEALLOC && PPC_8xx default PPC_PAGE_SHIFT help On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO. @@ -829,7 +816,8 @@ config DATA_SHIFT On 8xx, large pages (512kb or 8M) are used to map kernel linear memory. Aligning to 8M reduces TLB misses as only 8M pages are used - in that case. + in that case. If PIN_TLB is selected, it must be aligned to 8M as + 8M pages will be pinned. config FORCE_MAX_ZONEORDER int "Maximum zone order" @@ -861,13 +849,28 @@ config FORCE_MAX_ZONEORDER this in mind when choosing a value for this option. config PPC_SUBPAGE_PROT - bool "Support setting protections for 4k subpages" + bool "Support setting protections for 4k subpages (subpage_prot syscall)" + default n depends on PPC_BOOK3S_64 && PPC_64K_PAGES help - This option adds support for a system call to allow user programs + This option adds support for system call to allow user programs to set access permissions (read/write, readonly, or no access) on the 4k subpages of each 64k page. + If unsure, say N here. + +config PPC_PROT_SAO_LPAR + bool "Support PROT_SAO mappings in LPARs" + depends on PPC_BOOK3S_64 + help + This option adds support for PROT_SAO mappings from userspace + inside LPARs on supported CPUs. + + This may cause issues when performing guest migration from + a CPU that supports SAO to one that does not. + + If unsure, say N here. + config PPC_COPRO_BASE bool @@ -887,12 +890,8 @@ config PPC_DENORMALISATION Add support for handling denormalisation of single precision values. Useful for bare metal only. If unsure say Y here. -config CMDLINE_BOOL - bool "Default bootloader kernel arguments" - config CMDLINE - string "Initial kernel command string" if CMDLINE_BOOL - default "console=ttyS0,9600 console=tty0 root=/dev/sda2" if CMDLINE_BOOL + string "Initial kernel command string" default "" help On some platforms, there is currently no way for the boot loader to @@ -1226,27 +1225,8 @@ config TASK_SIZE_BOOL config TASK_SIZE hex "Size of user task space" if TASK_SIZE_BOOL default "0x80000000" if PPC_8xx + default "0xb0000000" if PPC_BOOK3S_32 && STRICT_KERNEL_RWX default "0xc0000000" - -config PIN_TLB - bool "Pinned Kernel TLBs (860 ONLY)" - depends on ADVANCED_OPTIONS && PPC_8xx && \ - !DEBUG_PAGEALLOC && !STRICT_KERNEL_RWX - -config PIN_TLB_DATA - bool "Pinned TLB for DATA" - depends on PIN_TLB - default y - -config PIN_TLB_IMMR - bool "Pinned TLB for IMMR" - depends on PIN_TLB || PPC_EARLY_DEBUG_CPM - default y - -config PIN_TLB_TEXT - bool "Pinned TLB for TEXT" - depends on PIN_TLB - default y endmenu if PPC64 diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 0b063830eea8..b88900f4832f 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -230,7 +230,7 @@ config PPC_EARLY_DEBUG_40x help Select this to enable early debugging for IBM 40x chips via the inbuilt serial port. This works on chips with a 16550 compatible - UART. Xilinx chips with uartlite cannot use this option. + UART. config PPC_EARLY_DEBUG_CPM bool "Early serial debugging for Freescale CPM-based serial ports" diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index f310c32e88a4..3e8da9cf2eb9 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -246,7 +246,8 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # often slow when they are implemented at all KBUILD_CFLAGS += $(call cc-option,-mno-string) -cpu-as-$(CONFIG_4xx) += -Wa,-m405 +cpu-as-$(CONFIG_40x) += -Wa,-m405 +cpu-as-$(CONFIG_44x) += -Wa,-m440 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) cpu-as-$(CONFIG_E200) += -Wa,-me200 cpu-as-$(CONFIG_E500) += -Wa,-me500 diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index c53a1b8bba8b..b88fd27a45f0 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -70,16 +70,14 @@ BOOTCFLAGS += -I$(objtree)/$(obj) -I$(srctree)/$(obj) DTC_FLAGS ?= -p 1024 $(obj)/4xx.o: BOOTCFLAGS += -mcpu=405 -$(obj)/ebony.o: BOOTCFLAGS += -mcpu=405 +$(obj)/ebony.o: BOOTCFLAGS += -mcpu=440 $(obj)/cuboot-hotfoot.o: BOOTCFLAGS += -mcpu=405 -$(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405 -$(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405 +$(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=440 +$(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=440 $(obj)/cuboot-acadia.o: BOOTCFLAGS += -mcpu=405 -$(obj)/treeboot-walnut.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-iss4xx.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-currituck.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-akebono.o: BOOTCFLAGS += -mcpu=405 -$(obj)/virtex405-head.o: BOOTAFLAGS += -mcpu=405 # The pre-boot decompressors pull in a lot of kernel headers and other source # files. This creates a bit of a dependency headache since we need to copy @@ -119,7 +117,7 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ elf_util.c $(zlib-y) devtree.c stdlib.c \ oflib.c ofconsole.c cuboot.c -src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c +src-wlib-$(CONFIG_PPC_MPC52xx) += mpc52xx-psc.c src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c ifndef CONFIG_PPC64_BOOT_WRAPPER src-wlib-y += crtsavres.S @@ -129,14 +127,12 @@ src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c src-wlib-$(CONFIG_EMBEDDED6xx) += ugecon.c fsl-soc.c -src-wlib-$(CONFIG_XILINX_VIRTEX) += uartlite.c src-wlib-$(CONFIG_CPM) += cpm-serial.c src-plat-y := of.c epapr.c -src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \ - treeboot-walnut.c cuboot-acadia.c \ - cuboot-kilauea.c simpleboot.c \ - virtex405-head.S virtex.c +src-plat-$(CONFIG_40x) += fixed-head.S cuboot-hotfoot.c \ + cuboot-acadia.c \ + cuboot-kilauea.c simpleboot.c src-plat-$(CONFIG_44x) += treeboot-ebony.c cuboot-ebony.c treeboot-bamboo.c \ cuboot-bamboo.c cuboot-sam440ep.c \ cuboot-sequoia.c cuboot-rainier.c \ @@ -144,7 +140,7 @@ src-plat-$(CONFIG_44x) += treeboot-ebony.c cuboot-ebony.c treeboot-bamboo.c \ cuboot-warp.c cuboot-yosemite.c \ treeboot-iss4xx.c treeboot-currituck.c \ treeboot-akebono.c \ - simpleboot.c fixed-head.S virtex.c + simpleboot.c fixed-head.S src-plat-$(CONFIG_PPC_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c src-plat-$(CONFIG_PPC_MPC52xx) += cuboot-52xx.c src-plat-$(CONFIG_PPC_82xx) += cuboot-pq2.c fixed-head.S ep8248e.c cuboot-824x.c @@ -279,9 +275,7 @@ image-$(CONFIG_EPAPR_BOOT) += zImage.epapr # # Board ports in arch/powerpc/platform/40x/Kconfig -image-$(CONFIG_EP405) += dtbImage.ep405 image-$(CONFIG_HOTFOOT) += cuImage.hotfoot -image-$(CONFIG_WALNUT) += treeImage.walnut image-$(CONFIG_ACADIA) += cuImage.acadia image-$(CONFIG_OBS600) += uImage.obs600 @@ -372,6 +366,8 @@ initrd-y := $(patsubst zImage%, zImage.initrd%, \ $(patsubst treeImage%, treeImage.initrd%, $(image-y))))) initrd-y := $(filter-out $(image-y), $(initrd-y)) targets += $(image-y) $(initrd-y) +targets += $(foreach x, dtbImage uImage cuImage simpleImage treeImage, \ + $(patsubst $(x).%, dts/%.dtb, $(filter $(x).%, $(image-y)))) $(addprefix $(obj)/, $(initrd-y)): $(obj)/ramdisk.image.gz diff --git a/arch/powerpc/boot/dts/Makefile b/arch/powerpc/boot/dts/Makefile index 1cbc0e4ce857..fb335d05aae8 100644 --- a/arch/powerpc/boot/dts/Makefile +++ b/arch/powerpc/boot/dts/Makefile @@ -4,4 +4,3 @@ subdir-y += fsl dtstree := $(srctree)/$(src) dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts)) -dtb-$(CONFIG_XILINX_VIRTEX440_GENERIC_BOARD) += virtex440-ml507.dtb virtex440-ml510.dtb diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts index cd9d66041a3f..df18f8dc4642 100644 --- a/arch/powerpc/boot/dts/akebono.dts +++ b/arch/powerpc/boot/dts/akebono.dts @@ -248,7 +248,7 @@ }; }; - PCIE0: pciex@10100000000 { + PCIE0: pcie@10100000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -288,7 +288,7 @@ 0x0 0x0 0x0 0x4 &MPIC 48 0x2 /* int D */>; }; - PCIE1: pciex@20100000000 { + PCIE1: pcie@20100000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -328,7 +328,7 @@ 0x0 0x0 0x0 0x4 &MPIC 56 0x2 /* int D */>; }; - PCIE2: pciex@18100000000 { + PCIE2: pcie@18100000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -368,7 +368,7 @@ 0x0 0x0 0x0 0x4 &MPIC 64 0x2 /* int D */>; }; - PCIE3: pciex@28100000000 { + PCIE3: pcie@28100000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/bluestone.dts b/arch/powerpc/boot/dts/bluestone.dts index cc965a1816b6..aa1ae94cd776 100644 --- a/arch/powerpc/boot/dts/bluestone.dts +++ b/arch/powerpc/boot/dts/bluestone.dts @@ -325,7 +325,7 @@ }; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts index 0d6ac92d0f5e..c5fbb08e0a6e 100644 --- a/arch/powerpc/boot/dts/canyonlands.dts +++ b/arch/powerpc/boot/dts/canyonlands.dts @@ -461,7 +461,7 @@ interrupt-map = < 0x0 0x0 0x0 0x0 &UIC1 0x0 0x8 >; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -503,7 +503,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts index b6d87b9c2cef..aea8af810106 100644 --- a/arch/powerpc/boot/dts/currituck.dts +++ b/arch/powerpc/boot/dts/currituck.dts @@ -122,7 +122,7 @@ }; }; - PCIE0: pciex@10100000000 { // 4xGBIF1 + PCIE0: pcie@10100000000 { // 4xGBIF1 device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -160,7 +160,7 @@ 0x0 0x0 0x0 0x4 &MPIC 49 0x2 /* int D */>; }; - PCIE1: pciex@30100000000 { // 4xGBIF0 + PCIE1: pcie@30100000000 { // 4xGBIF0 device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -197,7 +197,7 @@ 0x0 0x0 0x0 0x4 &MPIC 41 0x2 /* int D */>; }; - PCIE2: pciex@38100000000 { // 2xGBIF0 + PCIE2: pcie@38100000000 { // 2xGBIF0 device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/ep405.dts b/arch/powerpc/boot/dts/ep405.dts deleted file mode 100644 index 4ac9c5ab6e6b..000000000000 --- a/arch/powerpc/boot/dts/ep405.dts +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Device Tree Source for EP405 - * - * Copyright 2007 IBM Corp. - * Benjamin Herrenschmidt <benh@kernel.crashing.org> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without - * any warranty of any kind, whether express or implied. - */ - -/dts-v1/; - -/ { - #address-cells = <1>; - #size-cells = <1>; - model = "ep405"; - compatible = "ep405"; - dcr-parent = <&{/cpus/cpu@0}>; - - aliases { - ethernet0 = &EMAC; - serial0 = &UART0; - serial1 = &UART1; - }; - - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu@0 { - device_type = "cpu"; - model = "PowerPC,405GP"; - reg = <0x00000000>; - clock-frequency = <200000000>; /* Filled in by zImage */ - timebase-frequency = <0>; /* Filled in by zImage */ - i-cache-line-size = <32>; - d-cache-line-size = <32>; - i-cache-size = <16384>; - d-cache-size = <16384>; - dcr-controller; - dcr-access-method = "native"; - }; - }; - - memory { - device_type = "memory"; - reg = <0x00000000 0x00000000>; /* Filled in by zImage */ - }; - - UIC0: interrupt-controller { - compatible = "ibm,uic"; - interrupt-controller; - cell-index = <0>; - dcr-reg = <0x0c0 0x009>; - #address-cells = <0>; - #size-cells = <0>; - #interrupt-cells = <2>; - }; - - plb { - compatible = "ibm,plb3"; - #address-cells = <1>; - #size-cells = <1>; - ranges; - clock-frequency = <0>; /* Filled in by zImage */ - - SDRAM0: memory-controller { - compatible = "ibm,sdram-405gp"; - dcr-reg = <0x010 0x002>; - }; - - MAL: mcmal { - compatible = "ibm,mcmal-405gp", "ibm,mcmal"; - dcr-reg = <0x180 0x062>; - num-tx-chans = <1>; - num-rx-chans = <1>; - interrupt-parent = <&UIC0>; - interrupts = < - 0xb 0x4 /* TXEOB */ - 0xc 0x4 /* RXEOB */ - 0xa 0x4 /* SERR */ - 0xd 0x4 /* TXDE */ - 0xe 0x4 /* RXDE */>; - }; - - POB0: opb { - compatible = "ibm,opb-405gp", "ibm,opb"; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0xef600000 0xef600000 0x00a00000>; - dcr-reg = <0x0a0 0x005>; - clock-frequency = <0>; /* Filled in by zImage */ - - UART0: serial@ef600300 { - device_type = "serial"; - compatible = "ns16550"; - reg = <0xef600300 0x00000008>; - virtual-reg = <0xef600300>; - clock-frequency = <0>; /* Filled in by zImage */ - current-speed = <9600>; - interrupt-parent = <&UIC0>; - interrupts = <0x0 0x4>; - }; - - UART1: serial@ef600400 { - device_type = "serial"; - compatible = "ns16550"; - reg = <0xef600400 0x00000008>; - virtual-reg = <0xef600400>; - clock-frequency = <0>; /* Filled in by zImage */ - current-speed = <9600>; - interrupt-parent = <&UIC0>; - interrupts = <0x1 0x4>; - }; - - IIC: i2c@ef600500 { - compatible = "ibm,iic-405gp", "ibm,iic"; - reg = <0xef600500 0x00000011>; - interrupt-parent = <&UIC0>; - interrupts = <0x2 0x4>; - }; - - GPIO: gpio@ef600700 { - compatible = "ibm,gpio-405gp"; - reg = <0xef600700 0x00000020>; - }; - - EMAC: ethernet@ef600800 { - linux,network-index = <0x0>; - device_type = "network"; - compatible = "ibm,emac-405gp", "ibm,emac"; - interrupt-parent = <&UIC0>; - interrupts = < - 0xf 0x4 /* Ethernet */ - 0x9 0x4 /* Ethernet Wake Up */>; - local-mac-address = [000000000000]; /* Filled in by zImage */ - reg = <0xef600800 0x00000070>; - mal-device = <&MAL>; - mal-tx-channel = <0>; - mal-rx-channel = <0>; - cell-index = <0>; - max-frame-size = <1500>; - rx-fifo-size = <4096>; - tx-fifo-size = <2048>; - phy-mode = "rmii"; - phy-map = <0x00000000>; - }; - - }; - - EBC0: ebc { - compatible = "ibm,ebc-405gp", "ibm,ebc"; - dcr-reg = <0x012 0x002>; - #address-cells = <2>; - #size-cells = <1>; - - - /* The ranges property is supplied by the bootwrapper - * and is based on the firmware's configuration of the - * EBC bridge - */ - clock-frequency = <0>; /* Filled in by zImage */ - - /* NVRAM and RTC */ - nvrtc@4,200000 { - compatible = "ds1742"; - reg = <0x00000004 0x00200000 0x00000000>; /* size fixed up by zImage */ - }; - - /* "BCSR" CPLD contains a PCI irq controller */ - bcsr@4,0 { - compatible = "ep405-bcsr"; - reg = <0x00000004 0x00000000 0x00000010>; - interrupt-controller; - /* Routing table */ - irq-routing = [ 00 /* SYSERR */ - 01 /* STTM */ - 01 /* RTC */ - 01 /* FENET */ - 02 /* NB PCIIRQ mux ? */ - 03 /* SB Winbond 8259 ? */ - 04 /* Serial Ring */ - 05 /* USB (ep405pc) */ - 06 /* XIRQ 0 */ - 06 /* XIRQ 1 */ - 06 /* XIRQ 2 */ - 06 /* XIRQ 3 */ - 06 /* XIRQ 4 */ - 06 /* XIRQ 5 */ - 06 /* XIRQ 6 */ - 07]; /* Reserved */ - }; - }; - - PCI0: pci@ec000000 { - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - compatible = "ibm,plb405gp-pci", "ibm,plb-pci"; - primary; - reg = <0xeec00000 0x00000008 /* Config space access */ - 0xeed80000 0x00000004 /* IACK */ - 0xeed80000 0x00000004 /* Special cycle */ - 0xef480000 0x00000040>; /* Internal registers */ - - /* Outbound ranges, one memory and one IO, - * later cannot be changed. Chip supports a second - * IO range but we don't use it for now - */ - ranges = <0x02000000 0x00000000 0x80000000 0x80000000 0x00000000 0x20000000 - 0x01000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>; - - /* Inbound 2GB range starting at 0 */ - dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>; - - /* That's all I know about IRQs on that thing ... */ - interrupt-map-mask = <0xf800 0x0 0x0 0x0>; - interrupt-map = < - /* USB */ - 0x7000 0x0 0x0 0x0 &UIC0 0x1e 0x8 /* IRQ5 */ - >; - }; - }; - - chosen { - stdout-path = "/plb/opb/serial@ef600300"; - }; -}; diff --git a/arch/powerpc/boot/dts/fsl/p4080ds.dts b/arch/powerpc/boot/dts/fsl/p4080ds.dts index 65e20152e22f..969b32c4f2d5 100644 --- a/arch/powerpc/boot/dts/fsl/p4080ds.dts +++ b/arch/powerpc/boot/dts/fsl/p4080ds.dts @@ -125,11 +125,11 @@ i2c@118100 { eeprom@51 { - compatible = "atmel,24c256"; + compatible = "atmel,spd"; reg = <0x51>; }; eeprom@52 { - compatible = "atmel,24c256"; + compatible = "atmel,spd"; reg = <0x52>; }; rtc@68 { @@ -143,6 +143,45 @@ }; }; + i2c@118000 { + zl2006@21 { + compatible = "zl2006"; + reg = <0x21>; + }; + zl2006@22 { + compatible = "zl2006"; + reg = <0x22>; + }; + zl2006@23 { + compatible = "zl2006"; + reg = <0x23>; + }; + zl2006@24 { + compatible = "zl2006"; + reg = <0x24>; + }; + eeprom@50 { + compatible = "atmel,24c64"; + reg = <0x50>; + }; + eeprom@55 { + compatible = "atmel,24c64"; + reg = <0x55>; + }; + eeprom@56 { + compatible = "atmel,24c64"; + reg = <0x56>; + }; + eeprom@57 { + compatible = "atmel,24c02"; + reg = <0x57>; + }; + }; + + i2c@119100 { + /* 0x6E: ICS9FG108 */ + }; + usb0: usb@210000 { phy_type = "ulpi"; }; diff --git a/arch/powerpc/boot/dts/glacier.dts b/arch/powerpc/boot/dts/glacier.dts index a7a802f4ffdd..e84ff1afb58c 100644 --- a/arch/powerpc/boot/dts/glacier.dts +++ b/arch/powerpc/boot/dts/glacier.dts @@ -489,7 +489,7 @@ interrupt-map = < 0x0 0x0 0x0 0x0 &UIC1 0x0 0x8 >; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -531,7 +531,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/haleakala.dts b/arch/powerpc/boot/dts/haleakala.dts index cb16dad43c92..f81ce8786d59 100644 --- a/arch/powerpc/boot/dts/haleakala.dts +++ b/arch/powerpc/boot/dts/haleakala.dts @@ -237,7 +237,7 @@ }; }; - PCIE0: pciex@a0000000 { + PCIE0: pcie@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/icon.dts b/arch/powerpc/boot/dts/icon.dts index 2e6e3a7b2604..fbaa60b8f87a 100644 --- a/arch/powerpc/boot/dts/icon.dts +++ b/arch/powerpc/boot/dts/icon.dts @@ -315,7 +315,7 @@ interrupt-map = <0x0 0x0 0x0 0x0 &UIC1 19 0x8>; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -356,7 +356,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts index 02629e119b87..a8f353229fb7 100644 --- a/arch/powerpc/boot/dts/katmai.dts +++ b/arch/powerpc/boot/dts/katmai.dts @@ -319,7 +319,7 @@ >; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -360,7 +360,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -401,7 +401,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0x7 0x4 /* swizzled int D */>; }; - PCIE2: pciex@d40000000 { + PCIE2: pcie@d40000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts index 2a3413221cc1..a709fb47a180 100644 --- a/arch/powerpc/boot/dts/kilauea.dts +++ b/arch/powerpc/boot/dts/kilauea.dts @@ -322,7 +322,7 @@ }; }; - PCIE0: pciex@a0000000 { + PCIE0: pcie@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -363,7 +363,7 @@ 0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@c0000000 { + PCIE1: pcie@c0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/makalu.dts b/arch/powerpc/boot/dts/makalu.dts index bf8fe1629392..c473cd911bca 100644 --- a/arch/powerpc/boot/dts/makalu.dts +++ b/arch/powerpc/boot/dts/makalu.dts @@ -268,7 +268,7 @@ }; }; - PCIE0: pciex@a0000000 { + PCIE0: pcie@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -309,7 +309,7 @@ 0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@c0000000 { + PCIE1: pcie@c0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/pcm032.dts b/arch/powerpc/boot/dts/pcm032.dts index c259c6b3ac5a..780e13d99e7b 100644 --- a/arch/powerpc/boot/dts/pcm032.dts +++ b/arch/powerpc/boot/dts/pcm032.dts @@ -3,9 +3,7 @@ * phyCORE-MPC5200B-IO (pcm032) board Device Tree Source * * Copyright (C) 2006-2009 Pengutronix - * Sascha Hauer <s.hauer@pengutronix.de> - * Juergen Beisert <j.beisert@pengutronix.de> - * Wolfram Sang <w.sang@pengutronix.de> + * Sascha Hauer, Juergen Beisert, Wolfram Sang <kernel@pengutronix.de> */ /include/ "mpc5200b.dtsi" diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts index f3e046fb49e2..f38035a1f4a1 100644 --- a/arch/powerpc/boot/dts/redwood.dts +++ b/arch/powerpc/boot/dts/redwood.dts @@ -235,7 +235,7 @@ has-new-stacr-staopc; }; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -276,7 +276,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -317,7 +317,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0x7 0x4 /* swizzled int D */>; }; - PCIE2: pciex@d40000000 { + PCIE2: pcie@d40000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/virtex440-ml507.dts b/arch/powerpc/boot/dts/virtex440-ml507.dts deleted file mode 100644 index 66f1c6312de6..000000000000 --- a/arch/powerpc/boot/dts/virtex440-ml507.dts +++ /dev/null @@ -1,406 +0,0 @@ -/* - * This file supports the Xilinx ML507 board with the 440 processor. - * A reference design for the FPGA is provided at http://git.xilinx.com. - * - * (C) Copyright 2008 Xilinx, Inc. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - * - * --- - * - * Device Tree Generator version: 1.1 - * - * CAUTION: This file is automatically generated by libgen. - * Version: Xilinx EDK 10.1.03 EDK_K_SP3.6 - * - * XPS project directory: ml507_ppc440_emb_ref - */ - -/dts-v1/; - -/ { - #address-cells = <1>; - #size-cells = <1>; - compatible = "xlnx,virtex440"; - dcr-parent = <&ppc440_0>; - model = "testing"; - DDR2_SDRAM: memory@0 { - device_type = "memory"; - reg = < 0 0x10000000 >; - } ; - chosen { - bootargs = "console=ttyS0 root=/dev/ram"; - stdout-path = &RS232_Uart_1; - } ; - cpus { - #address-cells = <1>; - #cpus = <1>; - #size-cells = <0>; - ppc440_0: cpu@0 { - clock-frequency = <400000000>; - compatible = "PowerPC,440", "ibm,ppc440"; - d-cache-line-size = <0x20>; - d-cache-size = <0x8000>; - dcr-access-method = "native"; - dcr-controller ; - device_type = "cpu"; - i-cache-line-size = <0x20>; - i-cache-size = <0x8000>; - model = "PowerPC,440"; - reg = <0>; - timebase-frequency = <400000000>; - xlnx,apu-control = <1>; - xlnx,apu-udi-0 = <0>; - xlnx,apu-udi-1 = <0>; - xlnx,apu-udi-10 = <0>; - xlnx,apu-udi-11 = <0>; - xlnx,apu-udi-12 = <0>; - xlnx,apu-udi-13 = <0>; - xlnx,apu-udi-14 = <0>; - xlnx,apu-udi-15 = <0>; - xlnx,apu-udi-2 = <0>; - xlnx,apu-udi-3 = <0>; - xlnx,apu-udi-4 = <0>; - xlnx,apu-udi-5 = <0>; - xlnx,apu-udi-6 = <0>; - xlnx,apu-udi-7 = <0>; - xlnx,apu-udi-8 = <0>; - xlnx,apu-udi-9 = <0>; - xlnx,dcr-autolock-enable = <1>; - xlnx,dcu-rd-ld-cache-plb-prio = <0>; - xlnx,dcu-rd-noncache-plb-prio = <0>; - xlnx,dcu-rd-touch-plb-prio = <0>; - xlnx,dcu-rd-urgent-plb-prio = <0>; - xlnx,dcu-wr-flush-plb-prio = <0>; - xlnx,dcu-wr-store-plb-prio = <0>; - xlnx,dcu-wr-urgent-plb-prio = <0>; - xlnx,dma0-control = <0>; - xlnx,dma0-plb-prio = <0>; - xlnx,dma0-rxchannelctrl = <0x1010000>; - xlnx,dma0-rxirqtimer = <0x3ff>; - xlnx,dma0-txchannelctrl = <0x1010000>; - xlnx,dma0-txirqtimer = <0x3ff>; - xlnx,dma1-control = <0>; - xlnx,dma1-plb-prio = <0>; - xlnx,dma1-rxchannelctrl = <0x1010000>; - xlnx,dma1-rxirqtimer = <0x3ff>; - xlnx,dma1-txchannelctrl = <0x1010000>; - xlnx,dma1-txirqtimer = <0x3ff>; - xlnx,dma2-control = <0>; - xlnx,dma2-plb-prio = <0>; - xlnx,dma2-rxchannelctrl = <0x1010000>; - xlnx,dma2-rxirqtimer = <0x3ff>; - xlnx,dma2-txchannelctrl = <0x1010000>; - xlnx,dma2-txirqtimer = <0x3ff>; - xlnx,dma3-control = <0>; - xlnx,dma3-plb-prio = <0>; - xlnx,dma3-rxchannelctrl = <0x1010000>; - xlnx,dma3-rxirqtimer = <0x3ff>; - xlnx,dma3-txchannelctrl = <0x1010000>; - xlnx,dma3-txirqtimer = <0x3ff>; - xlnx,endian-reset = <0>; - xlnx,generate-plb-timespecs = <1>; - xlnx,icu-rd-fetch-plb-prio = <0>; - xlnx,icu-rd-spec-plb-prio = <0>; - xlnx,icu-rd-touch-plb-prio = <0>; - xlnx,interconnect-imask = <0xffffffff>; - xlnx,mplb-allow-lock-xfer = <1>; - xlnx,mplb-arb-mode = <0>; - xlnx,mplb-awidth = <0x20>; - xlnx,mplb-counter = <0x500>; - xlnx,mplb-dwidth = <0x80>; - xlnx,mplb-max-burst = <8>; - xlnx,mplb-native-dwidth = <0x80>; - xlnx,mplb-p2p = <0>; - xlnx,mplb-prio-dcur = <2>; - xlnx,mplb-prio-dcuw = <3>; - xlnx,mplb-prio-icu = <4>; - xlnx,mplb-prio-splb0 = <1>; - xlnx,mplb-prio-splb1 = <0>; - xlnx,mplb-read-pipe-enable = <1>; - xlnx,mplb-sync-tattribute = <0>; - xlnx,mplb-wdog-enable = <1>; - xlnx,mplb-write-pipe-enable = <1>; - xlnx,mplb-write-post-enable = <1>; - xlnx,num-dma = <1>; - xlnx,pir = <0xf>; - xlnx,ppc440mc-addr-base = <0>; - xlnx,ppc440mc-addr-high = <0xfffffff>; - xlnx,ppc440mc-arb-mode = <0>; - xlnx,ppc440mc-bank-conflict-mask = <0xc00000>; - xlnx,ppc440mc-control = <0xf810008f>; - xlnx,ppc440mc-max-burst = <8>; - xlnx,ppc440mc-prio-dcur = <2>; - xlnx,ppc440mc-prio-dcuw = <3>; - xlnx,ppc440mc-prio-icu = <4>; - xlnx,ppc440mc-prio-splb0 = <1>; - xlnx,ppc440mc-prio-splb1 = <0>; - xlnx,ppc440mc-row-conflict-mask = <0x3ffe00>; - xlnx,ppcdm-asyncmode = <0>; - xlnx,ppcds-asyncmode = <0>; - xlnx,user-reset = <0>; - DMA0: sdma@80 { - compatible = "xlnx,ll-dma-1.00.a"; - dcr-reg = < 0x80 0x11 >; - interrupt-parent = <&xps_intc_0>; - interrupts = < 10 2 11 2 >; - } ; - } ; - } ; - plb_v46_0: plb@0 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "xlnx,plb-v46-1.03.a", "simple-bus"; - ranges ; - DIP_Switches_8Bit: gpio@81460000 { - compatible = "xlnx,xps-gpio-1.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 7 2 >; - reg = < 0x81460000 0x10000 >; - xlnx,all-inputs = <1>; - xlnx,all-inputs-2 = <0>; - xlnx,dout-default = <0>; - xlnx,dout-default-2 = <0>; - xlnx,family = "virtex5"; - xlnx,gpio-width = <8>; - xlnx,interrupt-present = <1>; - xlnx,is-bidir = <1>; - xlnx,is-bidir-2 = <1>; - xlnx,is-dual = <0>; - xlnx,tri-default = <0xffffffff>; - xlnx,tri-default-2 = <0xffffffff>; - } ; - FLASH: flash@fc000000 { - bank-width = <2>; - compatible = "xlnx,xps-mch-emc-2.00.a", "cfi-flash"; - reg = < 0xfc000000 0x2000000 >; - xlnx,family = "virtex5"; - xlnx,include-datawidth-matching-0 = <0x1>; - xlnx,include-datawidth-matching-1 = <0x0>; - xlnx,include-datawidth-matching-2 = <0x0>; - xlnx,include-datawidth-matching-3 = <0x0>; - xlnx,include-negedge-ioregs = <0x0>; - xlnx,include-plb-ipif = <0x1>; - xlnx,include-wrbuf = <0x1>; - xlnx,max-mem-width = <0x10>; - xlnx,mch-native-dwidth = <0x20>; - xlnx,mch-plb-clk-period-ps = <0x2710>; - xlnx,mch-splb-awidth = <0x20>; - xlnx,mch0-accessbuf-depth = <0x10>; - xlnx,mch0-protocol = <0x0>; - xlnx,mch0-rddatabuf-depth = <0x10>; - xlnx,mch1-accessbuf-depth = <0x10>; - xlnx,mch1-protocol = <0x0>; - xlnx,mch1-rddatabuf-depth = <0x10>; - xlnx,mch2-accessbuf-depth = <0x10>; - xlnx,mch2-protocol = <0x0>; - xlnx,mch2-rddatabuf-depth = <0x10>; - xlnx,mch3-accessbuf-depth = <0x10>; - xlnx,mch3-protocol = <0x0>; - xlnx,mch3-rddatabuf-depth = <0x10>; - xlnx,mem0-width = <0x10>; - xlnx,mem1-width = <0x20>; - xlnx,mem2-width = <0x20>; - xlnx,mem3-width = <0x20>; - xlnx,num-banks-mem = <0x1>; - xlnx,num-channels = <0x2>; - xlnx,priority-mode = <0x0>; - xlnx,synch-mem-0 = <0x0>; - xlnx,synch-mem-1 = <0x0>; - xlnx,synch-mem-2 = <0x0>; - xlnx,synch-mem-3 = <0x0>; - xlnx,synch-pipedelay-0 = <0x2>; - xlnx,synch-pipedelay-1 = <0x2>; - xlnx,synch-pipedelay-2 = <0x2>; - xlnx,synch-pipedelay-3 = <0x2>; - xlnx,tavdv-ps-mem-0 = <0x1adb0>; - xlnx,tavdv-ps-mem-1 = <0x3a98>; - xlnx,tavdv-ps-mem-2 = <0x3a98>; - xlnx,tavdv-ps-mem-3 = <0x3a98>; - xlnx,tcedv-ps-mem-0 = <0x1adb0>; - xlnx,tcedv-ps-mem-1 = <0x3a98>; - xlnx,tcedv-ps-mem-2 = <0x3a98>; - xlnx,tcedv-ps-mem-3 = <0x3a98>; - xlnx,thzce-ps-mem-0 = <0x88b8>; - xlnx,thzce-ps-mem-1 = <0x1b58>; - xlnx,thzce-ps-mem-2 = <0x1b58>; - xlnx,thzce-ps-mem-3 = <0x1b58>; - xlnx,thzoe-ps-mem-0 = <0x1b58>; - xlnx,thzoe-ps-mem-1 = <0x1b58>; - xlnx,thzoe-ps-mem-2 = <0x1b58>; - xlnx,thzoe-ps-mem-3 = <0x1b58>; - xlnx,tlzwe-ps-mem-0 = <0x88b8>; - xlnx,tlzwe-ps-mem-1 = <0x0>; - xlnx,tlzwe-ps-mem-2 = <0x0>; - xlnx,tlzwe-ps-mem-3 = <0x0>; - xlnx,twc-ps-mem-0 = <0x2af8>; - xlnx,twc-ps-mem-1 = <0x3a98>; - xlnx,twc-ps-mem-2 = <0x3a98>; - xlnx,twc-ps-mem-3 = <0x3a98>; - xlnx,twp-ps-mem-0 = <0x11170>; - xlnx,twp-ps-mem-1 = <0x2ee0>; - xlnx,twp-ps-mem-2 = <0x2ee0>; - xlnx,twp-ps-mem-3 = <0x2ee0>; - xlnx,xcl0-linesize = <0x4>; - xlnx,xcl0-writexfer = <0x1>; - xlnx,xcl1-linesize = <0x4>; - xlnx,xcl1-writexfer = <0x1>; - xlnx,xcl2-linesize = <0x4>; - xlnx,xcl2-writexfer = <0x1>; - xlnx,xcl3-linesize = <0x4>; - xlnx,xcl3-writexfer = <0x1>; - } ; - Hard_Ethernet_MAC: xps-ll-temac@81c00000 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "xlnx,compound"; - ethernet@81c00000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "xlnx,xps-ll-temac-1.01.b"; - device_type = "network"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 5 2 >; - llink-connected = <&DMA0>; - local-mac-address = [ 02 00 00 00 00 00 ]; - reg = < 0x81c00000 0x40 >; - xlnx,bus2core-clk-ratio = <1>; - xlnx,phy-type = <1>; - xlnx,phyaddr = <1>; - xlnx,rxcsum = <1>; - xlnx,rxfifo = <0x1000>; - xlnx,temac-type = <0>; - xlnx,txcsum = <1>; - xlnx,txfifo = <0x1000>; - phy-handle = <&phy7>; - clock-frequency = <100000000>; - phy7: phy@7 { - compatible = "marvell,88e1111"; - reg = <7>; - } ; - } ; - } ; - IIC_EEPROM: i2c@81600000 { - compatible = "xlnx,xps-iic-2.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 6 2 >; - reg = < 0x81600000 0x10000 >; - xlnx,clk-freq = <0x5f5e100>; - xlnx,family = "virtex5"; - xlnx,gpo-width = <0x1>; - xlnx,iic-freq = <0x186a0>; - xlnx,scl-inertial-delay = <0x0>; - xlnx,sda-inertial-delay = <0x0>; - xlnx,ten-bit-adr = <0x0>; - } ; - LEDs_8Bit: gpio@81400000 { - compatible = "xlnx,xps-gpio-1.00.a"; - reg = < 0x81400000 0x10000 >; - xlnx,all-inputs = <0>; - xlnx,all-inputs-2 = <0>; - xlnx,dout-default = <0>; - xlnx,dout-default-2 = <0>; - xlnx,family = "virtex5"; - xlnx,gpio-width = <8>; - xlnx,interrupt-present = <0>; - xlnx,is-bidir = <1>; - xlnx,is-bidir-2 = <1>; - xlnx,is-dual = <0>; - xlnx,tri-default = <0xffffffff>; - xlnx,tri-default-2 = <0xffffffff>; - } ; - LEDs_Positions: gpio@81420000 { - compatible = "xlnx,xps-gpio-1.00.a"; - reg = < 0x81420000 0x10000 >; - xlnx,all-inputs = <0>; - xlnx,all-inputs-2 = <0>; - xlnx,dout-default = <0>; - xlnx,dout-default-2 = <0>; - xlnx,family = "virtex5"; - xlnx,gpio-width = <5>; - xlnx,interrupt-present = <0>; - xlnx,is-bidir = <1>; - xlnx,is-bidir-2 = <1>; - xlnx,is-dual = <0>; - xlnx,tri-default = <0xffffffff>; - xlnx,tri-default-2 = <0xffffffff>; - } ; - Push_Buttons_5Bit: gpio@81440000 { - compatible = "xlnx,xps-gpio-1.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 8 2 >; - reg = < 0x81440000 0x10000 >; - xlnx,all-inputs = <1>; - xlnx,all-inputs-2 = <0>; - xlnx,dout-default = <0>; - xlnx,dout-default-2 = <0>; - xlnx,family = "virtex5"; - xlnx,gpio-width = <5>; - xlnx,interrupt-present = <1>; - xlnx,is-bidir = <1>; - xlnx,is-bidir-2 = <1>; - xlnx,is-dual = <0>; - xlnx,tri-default = <0xffffffff>; - xlnx,tri-default-2 = <0xffffffff>; - } ; - RS232_Uart_1: serial@83e00000 { - clock-frequency = <100000000>; - compatible = "xlnx,xps-uart16550-2.00.b", "ns16550"; - current-speed = <9600>; - device_type = "serial"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 9 2 >; - reg = < 0x83e00000 0x10000 >; - reg-offset = <0x1003>; - reg-shift = <2>; - xlnx,family = "virtex5"; - xlnx,has-external-rclk = <0>; - xlnx,has-external-xin = <0>; - xlnx,is-a-16550 = <1>; - } ; - SysACE_CompactFlash: sysace@83600000 { - compatible = "xlnx,xps-sysace-1.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 4 2 >; - reg = < 0x83600000 0x10000 >; - xlnx,family = "virtex5"; - xlnx,mem-width = <0x10>; - } ; - xps_bram_if_cntlr_1: xps-bram-if-cntlr@ffff0000 { - compatible = "xlnx,xps-bram-if-cntlr-1.00.a"; - reg = < 0xffff0000 0x10000 >; - xlnx,family = "virtex5"; - } ; - xps_intc_0: interrupt-controller@81800000 { - #interrupt-cells = <2>; - compatible = "xlnx,xps-intc-1.00.a"; - interrupt-controller ; - reg = < 0x81800000 0x10000 >; - xlnx,num-intr-inputs = <0xc>; - } ; - xps_timebase_wdt_1: xps-timebase-wdt@83a00000 { - compatible = "xlnx,xps-timebase-wdt-1.00.b"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 2 0 1 2 >; - reg = < 0x83a00000 0x10000 >; - xlnx,family = "virtex5"; - xlnx,wdt-enable-once = <0>; - xlnx,wdt-interval = <0x1e>; - } ; - xps_timer_1: timer@83c00000 { - compatible = "xlnx,xps-timer-1.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 3 2 >; - reg = < 0x83c00000 0x10000 >; - xlnx,count-width = <0x20>; - xlnx,family = "virtex5"; - xlnx,gen0-assert = <1>; - xlnx,gen1-assert = <1>; - xlnx,one-timer-only = <1>; - xlnx,trig0-assert = <1>; - xlnx,trig1-assert = <1>; - } ; - } ; -} ; diff --git a/arch/powerpc/boot/dts/virtex440-ml510.dts b/arch/powerpc/boot/dts/virtex440-ml510.dts deleted file mode 100644 index 3b736ca26ddc..000000000000 --- a/arch/powerpc/boot/dts/virtex440-ml510.dts +++ /dev/null @@ -1,466 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Xilinx ML510 Reference Design support - * - * This DTS file was created for the ml510_bsb1_pcores_ppc440 reference design. - * The reference design contains a bug which prevent PCI DMA from working - * properly. A description of the bug is given in the plbv46_pci section. It - * needs to be fixed by the user until Xilinx updates their reference design. - * - * Copyright 2009, Roderick Colenbrander - */ - -/dts-v1/; -/ { - #address-cells = <1>; - #size-cells = <1>; - compatible = "xlnx,ml510-ref-design", "xlnx,virtex440"; - dcr-parent = <&ppc440_0>; - DDR2_SDRAM_DIMM0: memory@0 { - device_type = "memory"; - reg = < 0x0 0x20000000 >; - } ; - alias { - ethernet0 = &Hard_Ethernet_MAC; - serial0 = &RS232_Uart_1; - } ; - chosen { - bootargs = "console=ttyS0 root=/dev/ram"; - stdout-path = "/plb@0/serial@83e00000"; - } ; - cpus { - #address-cells = <1>; - #cpus = <0x1>; - #size-cells = <0>; - ppc440_0: cpu@0 { - #address-cells = <1>; - #size-cells = <1>; - clock-frequency = <300000000>; - compatible = "PowerPC,440", "ibm,ppc440"; - d-cache-line-size = <0x20>; - d-cache-size = <0x8000>; - dcr-access-method = "native"; - dcr-controller ; - device_type = "cpu"; - i-cache-line-size = <0x20>; - i-cache-size = <0x8000>; - model = "PowerPC,440"; - reg = <0>; - timebase-frequency = <300000000>; - xlnx,apu-control = <0x2000>; - xlnx,apu-udi-0 = <0x0>; - xlnx,apu-udi-1 = <0x0>; - xlnx,apu-udi-10 = <0x0>; - xlnx,apu-udi-11 = <0x0>; - xlnx,apu-udi-12 = <0x0>; - xlnx,apu-udi-13 = <0x0>; - xlnx,apu-udi-14 = <0x0>; - xlnx,apu-udi-15 = <0x0>; - xlnx,apu-udi-2 = <0x0>; - xlnx,apu-udi-3 = <0x0>; - xlnx,apu-udi-4 = <0x0>; - xlnx,apu-udi-5 = <0x0>; - xlnx,apu-udi-6 = <0x0>; - xlnx,apu-udi-7 = <0x0>; - xlnx,apu-udi-8 = <0x0>; - xlnx,apu-udi-9 = <0x0>; - xlnx,dcr-autolock-enable = <0x1>; - xlnx,dcu-rd-ld-cache-plb-prio = <0x0>; - xlnx,dcu-rd-noncache-plb-prio = <0x0>; - xlnx,dcu-rd-touch-plb-prio = <0x0>; - xlnx,dcu-rd-urgent-plb-prio = <0x0>; - xlnx,dcu-wr-flush-plb-prio = <0x0>; - xlnx,dcu-wr-store-plb-prio = <0x0>; - xlnx,dcu-wr-urgent-plb-prio = <0x0>; - xlnx,dma0-control = <0x0>; - xlnx,dma0-plb-prio = <0x0>; - xlnx,dma0-rxchannelctrl = <0x1010000>; - xlnx,dma0-rxirqtimer = <0x3ff>; - xlnx,dma0-txchannelctrl = <0x1010000>; - xlnx,dma0-txirqtimer = <0x3ff>; - xlnx,dma1-control = <0x0>; - xlnx,dma1-plb-prio = <0x0>; - xlnx,dma1-rxchannelctrl = <0x1010000>; - xlnx,dma1-rxirqtimer = <0x3ff>; - xlnx,dma1-txchannelctrl = <0x1010000>; - xlnx,dma1-txirqtimer = <0x3ff>; - xlnx,dma2-control = <0x0>; - xlnx,dma2-plb-prio = <0x0>; - xlnx,dma2-rxchannelctrl = <0x1010000>; - xlnx,dma2-rxirqtimer = <0x3ff>; - xlnx,dma2-txchannelctrl = <0x1010000>; - xlnx,dma2-txirqtimer = <0x3ff>; - xlnx,dma3-control = <0x0>; - xlnx,dma3-plb-prio = <0x0>; - xlnx,dma3-rxchannelctrl = <0x1010000>; - xlnx,dma3-rxirqtimer = <0x3ff>; - xlnx,dma3-txchannelctrl = <0x1010000>; - xlnx,dma3-txirqtimer = <0x3ff>; - xlnx,endian-reset = <0x0>; - xlnx,generate-plb-timespecs = <0x1>; - xlnx,icu-rd-fetch-plb-prio = <0x0>; - xlnx,icu-rd-spec-plb-prio = <0x0>; - xlnx,icu-rd-touch-plb-prio = <0x0>; - xlnx,interconnect-imask = <0xffffffff>; - xlnx,mplb-allow-lock-xfer = <0x1>; - xlnx,mplb-arb-mode = <0x0>; - xlnx,mplb-awidth = <0x20>; - xlnx,mplb-counter = <0x500>; - xlnx,mplb-dwidth = <0x80>; - xlnx,mplb-max-burst = <0x8>; - xlnx,mplb-native-dwidth = <0x80>; - xlnx,mplb-p2p = <0x0>; - xlnx,mplb-prio-dcur = <0x2>; - xlnx,mplb-prio-dcuw = <0x3>; - xlnx,mplb-prio-icu = <0x4>; - xlnx,mplb-prio-splb0 = <0x1>; - xlnx,mplb-prio-splb1 = <0x0>; - xlnx,mplb-read-pipe-enable = <0x1>; - xlnx,mplb-sync-tattribute = <0x0>; - xlnx,mplb-wdog-enable = <0x1>; - xlnx,mplb-write-pipe-enable = <0x1>; - xlnx,mplb-write-post-enable = <0x1>; - xlnx,num-dma = <0x0>; - xlnx,pir = <0xf>; - xlnx,ppc440mc-addr-base = <0x0>; - xlnx,ppc440mc-addr-high = <0x1fffffff>; - xlnx,ppc440mc-arb-mode = <0x0>; - xlnx,ppc440mc-bank-conflict-mask = <0x1800000>; - xlnx,ppc440mc-control = <0xf810008f>; - xlnx,ppc440mc-max-burst = <0x8>; - xlnx,ppc440mc-prio-dcur = <0x2>; - xlnx,ppc440mc-prio-dcuw = <0x3>; - xlnx,ppc440mc-prio-icu = <0x4>; - xlnx,ppc440mc-prio-splb0 = <0x1>; - xlnx,ppc440mc-prio-splb1 = <0x0>; - xlnx,ppc440mc-row-conflict-mask = <0x7ffe00>; - xlnx,ppcdm-asyncmode = <0x0>; - xlnx,ppcds-asyncmode = <0x0>; - xlnx,user-reset = <0x0>; - } ; - } ; - plb_v46_0: plb@0 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "xlnx,plb-v46-1.03.a", "simple-bus"; - ranges ; - FLASH: flash@fc000000 { - bank-width = <2>; - compatible = "xlnx,xps-mch-emc-2.00.a", "cfi-flash"; - reg = < 0xfc000000 0x2000000 >; - xlnx,family = "virtex5"; - xlnx,include-datawidth-matching-0 = <0x1>; - xlnx,include-datawidth-matching-1 = <0x0>; - xlnx,include-datawidth-matching-2 = <0x0>; - xlnx,include-datawidth-matching-3 = <0x0>; - xlnx,include-negedge-ioregs = <0x0>; - xlnx,include-plb-ipif = <0x1>; - xlnx,include-wrbuf = <0x1>; - xlnx,max-mem-width = <0x10>; - xlnx,mch-native-dwidth = <0x20>; - xlnx,mch-plb-clk-period-ps = <0x2710>; - xlnx,mch-splb-awidth = <0x20>; - xlnx,mch0-accessbuf-depth = <0x10>; - xlnx,mch0-protocol = <0x0>; - xlnx,mch0-rddatabuf-depth = <0x10>; - xlnx,mch1-accessbuf-depth = <0x10>; - xlnx,mch1-protocol = <0x0>; - xlnx,mch1-rddatabuf-depth = <0x10>; - xlnx,mch2-accessbuf-depth = <0x10>; - xlnx,mch2-protocol = <0x0>; - xlnx,mch2-rddatabuf-depth = <0x10>; - xlnx,mch3-accessbuf-depth = <0x10>; - xlnx,mch3-protocol = <0x0>; - xlnx,mch3-rddatabuf-depth = <0x10>; - xlnx,mem0-width = <0x10>; - xlnx,mem1-width = <0x20>; - xlnx,mem2-width = <0x20>; - xlnx,mem3-width = <0x20>; - xlnx,num-banks-mem = <0x1>; - xlnx,num-channels = <0x2>; - xlnx,priority-mode = <0x0>; - xlnx,synch-mem-0 = <0x0>; - xlnx,synch-mem-1 = <0x0>; - xlnx,synch-mem-2 = <0x0>; - xlnx,synch-mem-3 = <0x0>; - xlnx,synch-pipedelay-0 = <0x2>; - xlnx,synch-pipedelay-1 = <0x2>; - xlnx,synch-pipedelay-2 = <0x2>; - xlnx,synch-pipedelay-3 = <0x2>; - xlnx,tavdv-ps-mem-0 = <0x1adb0>; - xlnx,tavdv-ps-mem-1 = <0x3a98>; - xlnx,tavdv-ps-mem-2 = <0x3a98>; - xlnx,tavdv-ps-mem-3 = <0x3a98>; - xlnx,tcedv-ps-mem-0 = <0x1adb0>; - xlnx,tcedv-ps-mem-1 = <0x3a98>; - xlnx,tcedv-ps-mem-2 = <0x3a98>; - xlnx,tcedv-ps-mem-3 = <0x3a98>; - xlnx,thzce-ps-mem-0 = <0x88b8>; - xlnx,thzce-ps-mem-1 = <0x1b58>; - xlnx,thzce-ps-mem-2 = <0x1b58>; - xlnx,thzce-ps-mem-3 = <0x1b58>; - xlnx,thzoe-ps-mem-0 = <0x1b58>; - xlnx,thzoe-ps-mem-1 = <0x1b58>; - xlnx,thzoe-ps-mem-2 = <0x1b58>; - xlnx,thzoe-ps-mem-3 = <0x1b58>; - xlnx,tlzwe-ps-mem-0 = <0x88b8>; - xlnx,tlzwe-ps-mem-1 = <0x0>; - xlnx,tlzwe-ps-mem-2 = <0x0>; - xlnx,tlzwe-ps-mem-3 = <0x0>; - xlnx,twc-ps-mem-0 = <0x1adb0>; - xlnx,twc-ps-mem-1 = <0x3a98>; - xlnx,twc-ps-mem-2 = <0x3a98>; - xlnx,twc-ps-mem-3 = <0x3a98>; - xlnx,twp-ps-mem-0 = <0x11170>; - xlnx,twp-ps-mem-1 = <0x2ee0>; - xlnx,twp-ps-mem-2 = <0x2ee0>; - xlnx,twp-ps-mem-3 = <0x2ee0>; - xlnx,xcl0-linesize = <0x4>; - xlnx,xcl0-writexfer = <0x1>; - xlnx,xcl1-linesize = <0x4>; - xlnx,xcl1-writexfer = <0x1>; - xlnx,xcl2-linesize = <0x4>; - xlnx,xcl2-writexfer = <0x1>; - xlnx,xcl3-linesize = <0x4>; - xlnx,xcl3-writexfer = <0x1>; - } ; - Hard_Ethernet_MAC: xps-ll-temac@81c00000 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "xlnx,compound"; - ethernet@81c00000 { - compatible = "xlnx,xps-ll-temac-1.01.b"; - device_type = "network"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 8 2 >; - llink-connected = <&Hard_Ethernet_MAC_fifo>; - local-mac-address = [ 02 00 00 00 00 00 ]; - reg = < 0x81c00000 0x40 >; - xlnx,bus2core-clk-ratio = <0x1>; - xlnx,phy-type = <0x3>; - xlnx,phyaddr = <0x1>; - xlnx,rxcsum = <0x0>; - xlnx,rxfifo = <0x8000>; - xlnx,temac-type = <0x0>; - xlnx,txcsum = <0x0>; - xlnx,txfifo = <0x8000>; - } ; - } ; - Hard_Ethernet_MAC_fifo: xps-ll-fifo@81a00000 { - compatible = "xlnx,xps-ll-fifo-1.01.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 6 2 >; - reg = < 0x81a00000 0x10000 >; - xlnx,family = "virtex5"; - } ; - IIC_EEPROM: i2c@81600000 { - compatible = "xlnx,xps-iic-2.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 9 2 >; - reg = < 0x81600000 0x10000 >; - xlnx,clk-freq = <0x5f5e100>; - xlnx,family = "virtex5"; - xlnx,gpo-width = <0x1>; - xlnx,iic-freq = <0x186a0>; - xlnx,scl-inertial-delay = <0x5>; - xlnx,sda-inertial-delay = <0x5>; - xlnx,ten-bit-adr = <0x0>; - } ; - LCD_OPTIONAL: gpio@81420000 { - compatible = "xlnx,xps-gpio-1.00.a"; - reg = < 0x81420000 0x10000 >; - xlnx,all-inputs = <0x0>; - xlnx,all-inputs-2 = <0x0>; - xlnx,dout-default = <0x0>; - xlnx,dout-default-2 = <0x0>; - xlnx,family = "virtex5"; - xlnx,gpio-width = <0xb>; - xlnx,interrupt-present = <0x0>; - xlnx,is-bidir = <0x1>; - xlnx,is-bidir-2 = <0x1>; - xlnx,is-dual = <0x0>; - xlnx,tri-default = <0xffffffff>; - xlnx,tri-default-2 = <0xffffffff>; - } ; - LEDs_4Bit: gpio@81400000 { - compatible = "xlnx,xps-gpio-1.00.a"; - reg = < 0x81400000 0x10000 >; - xlnx,all-inputs = <0x0>; - xlnx,all-inputs-2 = <0x0>; - xlnx,dout-default = <0x0>; - xlnx,dout-default-2 = <0x0>; - xlnx,family = "virtex5"; - xlnx,gpio-width = <0x4>; - xlnx,interrupt-present = <0x0>; - xlnx,is-bidir = <0x1>; - xlnx,is-bidir-2 = <0x1>; - xlnx,is-dual = <0x0>; - xlnx,tri-default = <0xffffffff>; - xlnx,tri-default-2 = <0xffffffff>; - } ; - RS232_Uart_1: serial@83e00000 { - clock-frequency = <100000000>; - compatible = "xlnx,xps-uart16550-2.00.b", "ns16550"; - current-speed = <9600>; - device_type = "serial"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 11 2 >; - reg = < 0x83e00000 0x10000 >; - reg-offset = <0x1003>; - reg-shift = <2>; - xlnx,family = "virtex5"; - xlnx,has-external-rclk = <0x0>; - xlnx,has-external-xin = <0x0>; - xlnx,is-a-16550 = <0x1>; - } ; - SPI_EEPROM: xps-spi@feff8000 { - compatible = "xlnx,xps-spi-2.00.b"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 10 2 >; - reg = < 0xfeff8000 0x80 >; - xlnx,family = "virtex5"; - xlnx,fifo-exist = <0x1>; - xlnx,num-ss-bits = <0x1>; - xlnx,num-transfer-bits = <0x8>; - xlnx,sck-ratio = <0x80>; - } ; - SysACE_CompactFlash: sysace@83600000 { - compatible = "xlnx,xps-sysace-1.00.a"; - interrupt-parent = <&xps_intc_0>; - interrupts = < 7 2 >; - reg = < 0x83600000 0x10000 >; - xlnx,family = "virtex5"; - xlnx,mem-width = <0x10>; - } ; - plbv46_pci_0: plbv46-pci@85e00000 { - #size-cells = <2>; - #address-cells = <3>; - compatible = "xlnx,plbv46-pci-1.03.a"; - device_type = "pci"; - reg = < 0x85e00000 0x10000 >; - - /* - * The default ML510 BSB has C_IPIFBAR2PCIBAR_0 set to - * 0 which means that a read/write to the memory mapped - * i/o region (which starts at 0xa0000000) for pci - * bar 0 on the plb side translates to 0. - * It is important to set this value to 0xa0000000, so - * that inbound and outbound pci transactions work - * properly including DMA. - */ - ranges = <0x02000000 0 0xa0000000 0xa0000000 0 0x20000000 - 0x01000000 0 0x00000000 0xf0000000 0 0x00010000>; - - #interrupt-cells = <1>; - interrupt-parent = <&xps_intc_0>; - interrupt-map-mask = <0xff00 0x0 0x0 0x7>; - interrupt-map = < - /* IRQ mapping for pci slots and ALI M1533 - * periperhals. In total there are 5 interrupt - * lines connected to a xps_intc controller. - * Four of them are PCI IRQ A, B, C, D and - * which correspond to respectively xpx_intc - * 5, 4, 3 and 2. The fifth interrupt line is - * connected to the south bridge and this one - * uses irq 1 and is active high instead of - * active low. - * - * The M1533 contains various peripherals - * including AC97 audio, a modem, USB, IDE and - * some power management stuff. The modem - * isn't connected on the ML510 and the power - * management core also isn't used. - */ - - /* IDSEL 0x16 / dev=6, bus=0 / PCI slot 3 */ - 0x3000 0 0 1 &xps_intc_0 3 2 - 0x3000 0 0 2 &xps_intc_0 2 2 - 0x3000 0 0 3 &xps_intc_0 5 2 - 0x3000 0 0 4 &xps_intc_0 4 2 - - /* IDSEL 0x13 / dev=3, bus=1 / PCI slot 4 */ - /* - 0x11800 0 0 1 &xps_intc_0 5 0 2 - 0x11800 0 0 2 &xps_intc_0 4 0 2 - 0x11800 0 0 3 &xps_intc_0 3 0 2 - 0x11800 0 0 4 &xps_intc_0 2 0 2 - */ - - /* According to the datasheet + schematic - * ABCD [FPGA] of slot 5 is mapped to DABC. - * Testing showed that at least A maps to B, - * the mapping of the other pins is a guess - * and for that reason the lines have been - * commented out. - */ - /* IDSEL 0x15 / dev=5, bus=0 / PCI slot 5 */ - 0x2800 0 0 1 &xps_intc_0 4 2 - /* - 0x2800 0 0 2 &xps_intc_0 3 2 - 0x2800 0 0 3 &xps_intc_0 2 2 - 0x2800 0 0 4 &xps_intc_0 5 2 - */ - - /* IDSEL 0x12 / dev=2, bus=1 / PCI slot 6 */ - /* - 0x11000 0 0 1 &xps_intc_0 4 0 2 - 0x11000 0 0 2 &xps_intc_0 3 0 2 - 0x11000 0 0 3 &xps_intc_0 2 0 2 - 0x11000 0 0 4 &xps_intc_0 5 0 2 - */ - - /* IDSEL 0x11 / dev=1, bus=0 / AC97 audio */ - 0x0800 0 0 1 &i8259 7 2 - - /* IDSEL 0x1b / dev=11, bus=0 / IDE */ - 0x5800 0 0 1 &i8259 14 2 - - /* IDSEL 0x1f / dev 15, bus=0 / 2x USB 1.1 */ - 0x7800 0 0 1 &i8259 7 2 - >; - ali_m1533 { - #size-cells = <1>; - #address-cells = <2>; - i8259: interrupt-controller@20 { - reg = <1 0x20 2 - 1 0xa0 2 - 1 0x4d0 2>; - interrupt-controller; - device_type = "interrupt-controller"; - #address-cells = <0>; - #interrupt-cells = <2>; - compatible = "chrp,iic"; - - /* south bridge irq is active high */ - interrupts = <1 3>; - interrupt-parent = <&xps_intc_0>; - }; - }; - } ; - xps_bram_if_cntlr_1: xps-bram-if-cntlr@ffff0000 { - compatible = "xlnx,xps-bram-if-cntlr-1.00.a"; - reg = < 0xffff0000 0x10000 >; - xlnx,family = "virtex5"; - } ; - xps_intc_0: interrupt-controller@81800000 { - #interrupt-cells = <0x2>; - compatible = "xlnx,xps-intc-1.00.a"; - interrupt-controller ; - reg = < 0x81800000 0x10000 >; - xlnx,num-intr-inputs = <0xc>; - } ; - xps_tft_0: tft@86e00000 { - compatible = "xlnx,xps-tft-1.00.a"; - reg = < 0x86e00000 0x10000 >; - xlnx,dcr-splb-slave-if = <0x1>; - xlnx,default-tft-base-addr = <0x0>; - xlnx,family = "virtex5"; - xlnx,i2c-slave-addr = <0x76>; - xlnx,mplb-awidth = <0x20>; - xlnx,mplb-dwidth = <0x80>; - xlnx,mplb-native-dwidth = <0x40>; - xlnx,mplb-smallest-slave = <0x20>; - xlnx,tft-interface = <0x1>; - } ; - } ; -} ; diff --git a/arch/powerpc/boot/dts/walnut.dts b/arch/powerpc/boot/dts/walnut.dts deleted file mode 100644 index 0872862c9363..000000000000 --- a/arch/powerpc/boot/dts/walnut.dts +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Device Tree Source for IBM Walnut - * - * Copyright 2007 IBM Corp. - * Josh Boyer <jwboyer@linux.vnet.ibm.com> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without - * any warranty of any kind, whether express or implied. - */ - -/dts-v1/; - -/ { - #address-cells = <1>; - #size-cells = <1>; - model = "ibm,walnut"; - compatible = "ibm,walnut"; - dcr-parent = <&{/cpus/cpu@0}>; - - aliases { - ethernet0 = &EMAC; - serial0 = &UART0; - serial1 = &UART1; - }; - - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu@0 { - device_type = "cpu"; - model = "PowerPC,405GP"; - reg = <0x00000000>; - clock-frequency = <200000000>; /* Filled in by zImage */ - timebase-frequency = <0>; /* Filled in by zImage */ - i-cache-line-size = <32>; - d-cache-line-size = <32>; - i-cache-size = <16384>; - d-cache-size = <16384>; - dcr-controller; - dcr-access-method = "native"; - }; - }; - - memory { - device_type = "memory"; - reg = <0x00000000 0x00000000>; /* Filled in by zImage */ - }; - - UIC0: interrupt-controller { - compatible = "ibm,uic"; - interrupt-controller; - cell-index = <0>; - dcr-reg = <0x0c0 0x009>; - #address-cells = <0>; - #size-cells = <0>; - #interrupt-cells = <2>; - }; - - plb { - compatible = "ibm,plb3"; - #address-cells = <1>; - #size-cells = <1>; - ranges; - clock-frequency = <0>; /* Filled in by zImage */ - - SDRAM0: memory-controller { - compatible = "ibm,sdram-405gp"; - dcr-reg = <0x010 0x002>; - }; - - MAL: mcmal { - compatible = "ibm,mcmal-405gp", "ibm,mcmal"; - dcr-reg = <0x180 0x062>; - num-tx-chans = <1>; - num-rx-chans = <1>; - interrupt-parent = <&UIC0>; - interrupts = < - 0xb 0x4 /* TXEOB */ - 0xc 0x4 /* RXEOB */ - 0xa 0x4 /* SERR */ - 0xd 0x4 /* TXDE */ - 0xe 0x4 /* RXDE */>; - }; - - POB0: opb { - compatible = "ibm,opb-405gp", "ibm,opb"; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0xef600000 0xef600000 0x00a00000>; - dcr-reg = <0x0a0 0x005>; - clock-frequency = <0>; /* Filled in by zImage */ - - UART0: serial@ef600300 { - device_type = "serial"; - compatible = "ns16550"; - reg = <0xef600300 0x00000008>; - virtual-reg = <0xef600300>; - clock-frequency = <0>; /* Filled in by zImage */ - current-speed = <9600>; - interrupt-parent = <&UIC0>; - interrupts = <0x0 0x4>; - }; - - UART1: serial@ef600400 { - device_type = "serial"; - compatible = "ns16550"; - reg = <0xef600400 0x00000008>; - virtual-reg = <0xef600400>; - clock-frequency = <0>; /* Filled in by zImage */ - current-speed = <9600>; - interrupt-parent = <&UIC0>; - interrupts = <0x1 0x4>; - }; - - IIC: i2c@ef600500 { - compatible = "ibm,iic-405gp", "ibm,iic"; - reg = <0xef600500 0x00000011>; - interrupt-parent = <&UIC0>; - interrupts = <0x2 0x4>; - }; - - GPIO: gpio@ef600700 { - compatible = "ibm,gpio-405gp"; - reg = <0xef600700 0x00000020>; - }; - - EMAC: ethernet@ef600800 { - device_type = "network"; - compatible = "ibm,emac-405gp", "ibm,emac"; - interrupt-parent = <&UIC0>; - interrupts = < - 0xf 0x4 /* Ethernet */ - 0x9 0x4 /* Ethernet Wake Up */>; - local-mac-address = [000000000000]; /* Filled in by zImage */ - reg = <0xef600800 0x00000070>; - mal-device = <&MAL>; - mal-tx-channel = <0>; - mal-rx-channel = <0>; - cell-index = <0>; - max-frame-size = <1500>; - rx-fifo-size = <4096>; - tx-fifo-size = <2048>; - phy-mode = "rmii"; - phy-map = <0x00000001>; - }; - - }; - - EBC0: ebc { - compatible = "ibm,ebc-405gp", "ibm,ebc"; - dcr-reg = <0x012 0x002>; - #address-cells = <2>; - #size-cells = <1>; - /* The ranges property is supplied by the bootwrapper - * and is based on the firmware's configuration of the - * EBC bridge - */ - clock-frequency = <0>; /* Filled in by zImage */ - - sram@0,0 { - reg = <0x00000000 0x00000000 0x00080000>; - }; - - flash@0,80000 { - compatible = "jedec-flash"; - bank-width = <1>; - reg = <0x00000000 0x00080000 0x00080000>; - #address-cells = <1>; - #size-cells = <1>; - partition@0 { - label = "OpenBIOS"; - reg = <0x00000000 0x00080000>; - read-only; - }; - }; - - nvram@1,0 { - /* NVRAM and RTC */ - compatible = "ds1743-nvram"; - #bytes = <0x2000>; - reg = <0x00000001 0x00000000 0x00002000>; - }; - - keyboard@2,0 { - compatible = "intel,82C42PC"; - reg = <0x00000002 0x00000000 0x00000002>; - }; - - ir@3,0 { - compatible = "ti,TIR2000PAG"; - reg = <0x00000003 0x00000000 0x00000010>; - }; - - fpga@7,0 { - compatible = "Walnut-FPGA"; - reg = <0x00000007 0x00000000 0x00000010>; - virtual-reg = <0xf0300005>; - }; - }; - - PCI0: pci@ec000000 { - device_type = "pci"; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - compatible = "ibm,plb405gp-pci", "ibm,plb-pci"; - primary; - reg = <0xeec00000 0x00000008 /* Config space access */ - 0xeed80000 0x00000004 /* IACK */ - 0xeed80000 0x00000004 /* Special cycle */ - 0xef480000 0x00000040>; /* Internal registers */ - - /* Outbound ranges, one memory and one IO, - * later cannot be changed. Chip supports a second - * IO range but we don't use it for now - */ - ranges = <0x02000000 0x00000000 0x80000000 0x80000000 0x00000000 0x20000000 - 0x01000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>; - - /* Inbound 2GB range starting at 0 */ - dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>; - - /* Walnut has all 4 IRQ pins tied together per slot */ - interrupt-map-mask = <0xf800 0x0 0x0 0x0>; - interrupt-map = < - /* IDSEL 1 */ - 0x800 0x0 0x0 0x0 &UIC0 0x1c 0x8 - - /* IDSEL 2 */ - 0x1000 0x0 0x0 0x0 &UIC0 0x1d 0x8 - - /* IDSEL 3 */ - 0x1800 0x0 0x0 0x0 &UIC0 0x1e 0x8 - - /* IDSEL 4 */ - 0x2000 0x0 0x0 0x0 &UIC0 0x1f 0x8 - >; - }; - }; - - chosen { - stdout-path = "/plb/opb/serial@ef600300"; - }; -}; diff --git a/arch/powerpc/boot/ep405.c b/arch/powerpc/boot/ep405.c deleted file mode 100644 index f9ad1e6a844e..000000000000 --- a/arch/powerpc/boot/ep405.c +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Embedded Planet EP405 with PlanetCore firmware - * - * (c) Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp,\ - * - * Based on ep88xc.c by - * - * Scott Wood <scottwood@freescale.com> - * - * Copyright (c) 2007 Freescale Semiconductor, Inc. - */ - -#include "ops.h" -#include "stdio.h" -#include "planetcore.h" -#include "dcr.h" -#include "4xx.h" -#include "io.h" - -static char *table; -static u64 mem_size; - -static void platform_fixups(void) -{ - u64 val; - void *nvrtc; - - dt_fixup_memory(0, mem_size); - planetcore_set_mac_addrs(table); - - if (!planetcore_get_decimal(table, PLANETCORE_KEY_CRYSTAL_HZ, &val)) { - printf("No PlanetCore crystal frequency key.\r\n"); - return; - } - ibm405gp_fixup_clocks(val, 0xa8c000); - ibm4xx_quiesce_eth((u32 *)0xef600800, NULL); - ibm4xx_fixup_ebc_ranges("/plb/ebc"); - - if (!planetcore_get_decimal(table, PLANETCORE_KEY_KB_NVRAM, &val)) { - printf("No PlanetCore NVRAM size key.\r\n"); - return; - } - nvrtc = finddevice("/plb/ebc/nvrtc@4,200000"); - if (nvrtc != NULL) { - u32 reg[3] = { 4, 0x200000, 0}; - getprop(nvrtc, "reg", reg, 3); - reg[2] = (val << 10) & 0xffffffff; - setprop(nvrtc, "reg", reg, 3); - } -} - -void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7) -{ - table = (char *)r3; - planetcore_prepare_table(table); - - if (!planetcore_get_decimal(table, PLANETCORE_KEY_MB_RAM, &mem_size)) - return; - - mem_size *= 1024 * 1024; - simple_alloc_init(_end, mem_size - (unsigned long)_end, 32, 64); - - fdt_init(_dtb_start); - - planetcore_set_stdout_path(table); - - serial_console_init(); - platform_ops.fixups = platform_fixups; -} diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index a9d209135975..cae31a6e8f02 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -104,7 +104,7 @@ static struct addr_range prep_initrd(struct addr_range vmlinux, void *chosen, { /* If we have an image attached to us, it overrides anything * supplied by the loader. */ - if (_initrd_end > _initrd_start) { + if (&_initrd_end > &_initrd_start) { printf("Attached initrd image at 0x%p-0x%p\n\r", _initrd_start, _initrd_end); initrd_addr = (unsigned long)_initrd_start; @@ -152,7 +152,7 @@ static void prep_esm_blob(struct addr_range vmlinux, void *chosen) unsigned long esm_blob_addr, esm_blob_size; /* Do we have an ESM (Enter Secure Mode) blob? */ - if (_esm_blob_end <= _esm_blob_start) + if (&_esm_blob_end <= &_esm_blob_start) return; printf("Attached ESM blob at 0x%p-0x%p\n\r", diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h index e0606766480f..6455fc9a244f 100644 --- a/arch/powerpc/boot/ops.h +++ b/arch/powerpc/boot/ops.h @@ -88,7 +88,6 @@ int serial_console_init(void); int ns16550_console_init(void *devp, struct serial_console_data *scdp); int cpm_console_init(void *devp, struct serial_console_data *scdp); int mpc5200_psc_console_init(void *devp, struct serial_console_data *scdp); -int uartlite_console_init(void *devp, struct serial_console_data *scdp); int opal_console_init(void *devp, struct serial_console_data *scdp); void *simple_alloc_init(char *base, unsigned long heap_size, unsigned long granularity, unsigned long max_allocs); diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index c52552a681c5..6e4efbdb6b7c 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -127,7 +127,7 @@ void platform_init(void) ps3_repository_read_rm_size(&rm_size); dt_fixup_memory(0, rm_size); - if (_initrd_end > _initrd_start) { + if (&_initrd_end > &_initrd_start) { setprop_val(chosen, "linux,initrd-start", (u32)(_initrd_start)); setprop_val(chosen, "linux,initrd-end", (u32)(_initrd_end)); } diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index 9457863147f9..9a19e5905485 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -128,15 +128,10 @@ int serial_console_init(void) dt_is_compatible(devp, "fsl,cpm2-smc-uart")) rc = cpm_console_init(devp, &serial_cd); #endif -#ifdef CONFIG_PPC_MPC52XX +#ifdef CONFIG_PPC_MPC52xx else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart")) rc = mpc5200_psc_console_init(devp, &serial_cd); #endif -#ifdef CONFIG_XILINX_VIRTEX - else if (dt_is_compatible(devp, "xlnx,opb-uartlite-1.00.b") || - dt_is_compatible(devp, "xlnx,xps-uartlite-1.00.a")) - rc = uartlite_console_init(devp, &serial_cd); -#endif #ifdef CONFIG_PPC64_BOOT_WRAPPER else if (dt_is_compatible(devp, "ibm,opal-console-raw")) rc = opal_console_init(devp, &serial_cd); diff --git a/arch/powerpc/boot/treeboot-walnut.c b/arch/powerpc/boot/treeboot-walnut.c deleted file mode 100644 index 623f58e7f7c9..000000000000 --- a/arch/powerpc/boot/treeboot-walnut.c +++ /dev/null @@ -1,81 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Old U-boot compatibility for Walnut - * - * Author: Josh Boyer <jwboyer@linux.vnet.ibm.com> - * - * Copyright 2007 IBM Corporation - * Based on cuboot-83xx.c, which is: - * Copyright (c) 2007 Freescale Semiconductor, Inc. - */ - -#include "ops.h" -#include "stdio.h" -#include "dcr.h" -#include "4xx.h" -#include "io.h" - -BSS_STACK(4096); - -static void walnut_flashsel_fixup(void) -{ - void *devp, *sram; - u32 reg_flash[3] = {0x0, 0x0, 0x80000}; - u32 reg_sram[3] = {0x0, 0x0, 0x80000}; - u8 *fpga; - u8 fpga_brds1 = 0x0; - - devp = finddevice("/plb/ebc/fpga"); - if (!devp) - fatal("Couldn't locate FPGA node\n\r"); - - if (getprop(devp, "virtual-reg", &fpga, sizeof(fpga)) != sizeof(fpga)) - fatal("no virtual-reg property\n\r"); - - fpga_brds1 = in_8(fpga); - - devp = finddevice("/plb/ebc/flash"); - if (!devp) - fatal("Couldn't locate flash node\n\r"); - - if (getprop(devp, "reg", reg_flash, sizeof(reg_flash)) != sizeof(reg_flash)) - fatal("flash reg property has unexpected size\n\r"); - - sram = finddevice("/plb/ebc/sram"); - if (!sram) - fatal("Couldn't locate sram node\n\r"); - - if (getprop(sram, "reg", reg_sram, sizeof(reg_sram)) != sizeof(reg_sram)) - fatal("sram reg property has unexpected size\n\r"); - - if (fpga_brds1 & 0x1) { - reg_flash[1] ^= 0x80000; - reg_sram[1] ^= 0x80000; - } - - setprop(devp, "reg", reg_flash, sizeof(reg_flash)); - setprop(sram, "reg", reg_sram, sizeof(reg_sram)); -} - -#define WALNUT_OPENBIOS_MAC_OFF 0xfffffe0b -static void walnut_fixups(void) -{ - ibm4xx_sdram_fixup_memsize(); - ibm405gp_fixup_clocks(33330000, 0xa8c000); - ibm4xx_quiesce_eth((u32 *)0xef600800, NULL); - ibm4xx_fixup_ebc_ranges("/plb/ebc"); - walnut_flashsel_fixup(); - dt_fixup_mac_address_by_alias("ethernet0", (u8 *) WALNUT_OPENBIOS_MAC_OFF); -} - -void platform_init(void) -{ - unsigned long end_of_ram = 0x2000000; - unsigned long avail_ram = end_of_ram - (unsigned long) _end; - - simple_alloc_init(_end, avail_ram, 32, 32); - platform_ops.fixups = walnut_fixups; - platform_ops.exit = ibm40x_dbcr_reset; - fdt_init(_dtb_start); - serial_console_init(); -} diff --git a/arch/powerpc/boot/uartlite.c b/arch/powerpc/boot/uartlite.c deleted file mode 100644 index 46bed69b4169..000000000000 --- a/arch/powerpc/boot/uartlite.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Xilinx UARTLITE bootloader driver - * - * Copyright (C) 2007 Secret Lab Technologies Ltd. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include <stdarg.h> -#include <stddef.h> -#include "types.h" -#include "string.h" -#include "stdio.h" -#include "io.h" -#include "ops.h" - -#define ULITE_RX 0x00 -#define ULITE_TX 0x04 -#define ULITE_STATUS 0x08 -#define ULITE_CONTROL 0x0c - -#define ULITE_STATUS_RXVALID 0x01 -#define ULITE_STATUS_TXFULL 0x08 - -#define ULITE_CONTROL_RST_RX 0x02 - -static void * reg_base; - -static int uartlite_open(void) -{ - /* Clear the RX FIFO */ - out_be32(reg_base + ULITE_CONTROL, ULITE_CONTROL_RST_RX); - return 0; -} - -static void uartlite_putc(unsigned char c) -{ - u32 reg = ULITE_STATUS_TXFULL; - while (reg & ULITE_STATUS_TXFULL) /* spin on TXFULL bit */ - reg = in_be32(reg_base + ULITE_STATUS); - out_be32(reg_base + ULITE_TX, c); -} - -static unsigned char uartlite_getc(void) -{ - u32 reg = 0; - while (!(reg & ULITE_STATUS_RXVALID)) /* spin waiting for RXVALID bit */ - reg = in_be32(reg_base + ULITE_STATUS); - return in_be32(reg_base + ULITE_RX); -} - -static u8 uartlite_tstc(void) -{ - u32 reg = in_be32(reg_base + ULITE_STATUS); - return reg & ULITE_STATUS_RXVALID; -} - -int uartlite_console_init(void *devp, struct serial_console_data *scdp) -{ - int n; - unsigned long reg_phys; - - n = getprop(devp, "virtual-reg", ®_base, sizeof(reg_base)); - if (n != sizeof(reg_base)) { - if (!dt_xlate_reg(devp, 0, ®_phys, NULL)) - return -1; - - reg_base = (void *)reg_phys; - } - - scdp->open = uartlite_open; - scdp->putc = uartlite_putc; - scdp->getc = uartlite_getc; - scdp->tstc = uartlite_tstc; - scdp->close = NULL; - return 0; -} diff --git a/arch/powerpc/boot/virtex.c b/arch/powerpc/boot/virtex.c deleted file mode 100644 index f731cbb4bff0..000000000000 --- a/arch/powerpc/boot/virtex.c +++ /dev/null @@ -1,97 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * The platform specific code for virtex devices since a boot loader is not - * always used. - * - * (C) Copyright 2008 Xilinx, Inc. - */ - -#include "ops.h" -#include "io.h" -#include "stdio.h" - -#define UART_DLL 0 /* Out: Divisor Latch Low */ -#define UART_DLM 1 /* Out: Divisor Latch High */ -#define UART_FCR 2 /* Out: FIFO Control Register */ -#define UART_FCR_CLEAR_RCVR 0x02 /* Clear the RCVR FIFO */ -#define UART_FCR_CLEAR_XMIT 0x04 /* Clear the XMIT FIFO */ -#define UART_LCR 3 /* Out: Line Control Register */ -#define UART_MCR 4 /* Out: Modem Control Register */ -#define UART_MCR_RTS 0x02 /* RTS complement */ -#define UART_MCR_DTR 0x01 /* DTR complement */ -#define UART_LCR_DLAB 0x80 /* Divisor latch access bit */ -#define UART_LCR_WLEN8 0x03 /* Wordlength: 8 bits */ - -static int virtex_ns16550_console_init(void *devp) -{ - unsigned char *reg_base; - u32 reg_shift, reg_offset, clk, spd; - u16 divisor; - int n; - - if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) - return -1; - - n = getprop(devp, "reg-offset", ®_offset, sizeof(reg_offset)); - if (n == sizeof(reg_offset)) - reg_base += reg_offset; - - n = getprop(devp, "reg-shift", ®_shift, sizeof(reg_shift)); - if (n != sizeof(reg_shift)) - reg_shift = 0; - - n = getprop(devp, "current-speed", (void *)&spd, sizeof(spd)); - if (n != sizeof(spd)) - spd = 9600; - - /* should there be a default clock rate?*/ - n = getprop(devp, "clock-frequency", (void *)&clk, sizeof(clk)); - if (n != sizeof(clk)) - return -1; - - divisor = clk / (16 * spd); - - /* Access baud rate */ - out_8(reg_base + (UART_LCR << reg_shift), UART_LCR_DLAB); - - /* Baud rate based on input clock */ - out_8(reg_base + (UART_DLL << reg_shift), divisor & 0xFF); - out_8(reg_base + (UART_DLM << reg_shift), divisor >> 8); - - /* 8 data, 1 stop, no parity */ - out_8(reg_base + (UART_LCR << reg_shift), UART_LCR_WLEN8); - - /* RTS/DTR */ - out_8(reg_base + (UART_MCR << reg_shift), UART_MCR_RTS | UART_MCR_DTR); - - /* Clear transmitter and receiver */ - out_8(reg_base + (UART_FCR << reg_shift), - UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); - return 0; -} - -/* For virtex, the kernel may be loaded without using a bootloader and if so - some UARTs need more setup than is provided in the normal console init -*/ -int platform_specific_init(void) -{ - void *devp; - char devtype[MAX_PROP_LEN]; - char path[MAX_PATH_LEN]; - - devp = finddevice("/chosen"); - if (devp == NULL) - return -1; - - if (getprop(devp, "linux,stdout-path", path, MAX_PATH_LEN) > 0) { - devp = finddevice(path); - if (devp == NULL) - return -1; - - if ((getprop(devp, "device_type", devtype, sizeof(devtype)) > 0) - && !strcmp(devtype, "serial") - && (dt_is_compatible(devp, "ns16550"))) - virtex_ns16550_console_init(devp); - } - return 0; -} diff --git a/arch/powerpc/boot/virtex405-head.S b/arch/powerpc/boot/virtex405-head.S deleted file mode 100644 index 00bab7d7c48c..000000000000 --- a/arch/powerpc/boot/virtex405-head.S +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include "ppc_asm.h" - - .text - .global _zimage_start -_zimage_start: - - /* PPC errata 213: needed by Virtex-4 FX */ - mfccr0 0 - oris 0,0,0x50000000@h - mtccr0 0 - - /* - * Invalidate the data cache if the data cache is turned off. - * - The 405 core does not invalidate the data cache on power-up - * or reset but does turn off the data cache. We cannot assume - * that the cache contents are valid. - * - If the data cache is turned on this must have been done by - * a bootloader and we assume that the cache contents are - * valid. - */ - mfdccr r9 - cmplwi r9,0 - bne 2f - lis r9,0 - li r8,256 - mtctr r8 -1: dccci r0,r9 - addi r9,r9,0x20 - bdnz 1b -2: b _zimage_start_lib diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index ed6266367bc0..cd58a62e810d 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -29,6 +29,7 @@ set -e # Allow for verbose output if [ "$V" = 1 ]; then set -x + map="-Map wrapper.map" fi # defaults @@ -323,14 +324,6 @@ adder875-redboot) platformo="$object/fixed-head.o $object/redboot-8xx.o" binary=y ;; -simpleboot-virtex405-*) - platformo="$object/virtex405-head.o $object/simpleboot.o $object/virtex.o" - binary=y - ;; -simpleboot-virtex440-*) - platformo="$object/fixed-head.o $object/simpleboot.o $object/virtex.o" - binary=y - ;; simpleboot-*) platformo="$object/fixed-head.o $object/simpleboot.o" binary=y @@ -500,7 +493,7 @@ if [ "$platform" != "miboot" ]; then text_start="-Ttext $link_address" fi #link everything - ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" \ + ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" $map \ $platformo $tmp $object/wrapper.a rm $tmp fi @@ -570,7 +563,18 @@ ps3) count=$overlay_size bs=1 odir="$(dirname "$ofile.bin")" - rm -f "$odir/otheros.bld" - gzip -n --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld" + + # The ps3's flash loader has a size limit of 16 MiB for the uncompressed + # image. If a compressed image that exceeded this limit is written to + # flash the loader will decompress that image until the 16 MiB limit is + # reached, then enter the system reset vector of the partially decompressed + # image. No warning is issued. + rm -f "$odir"/{otheros,otheros-too-big}.bld + size=$(${CROSS}nm --no-sort --radix=d "$ofile" | egrep ' _end$' | cut -d' ' -f1) + bld="otheros.bld" + if [ $size -gt $((0x1000000)) ]; then + bld="otheros-too-big.bld" + fi + gzip -n --force -9 --stdout "$ofile.bin" > "$odir/$bld" ;; esac diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig index db93c117be36..25eed86ec528 100644 --- a/arch/powerpc/configs/40x/acadia_defconfig +++ b/arch/powerpc/configs/40x/acadia_defconfig @@ -9,7 +9,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_ACADIA=y -# CONFIG_WALNUT is not set CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig deleted file mode 100644 index a3854cf65f8d..000000000000 --- a/arch/powerpc/configs/40x/ep405_defconfig +++ /dev/null @@ -1,62 +0,0 @@ -CONFIG_40x=y -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -CONFIG_KALLSYMS_ALL=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_EP405=y -# CONFIG_WALNUT is not set -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -# CONFIG_IPV6 is not set -CONFIG_CONNECTOR=y -CONFIG_MTD=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_BLOCK=m -CONFIG_MTD_CFI=y -CONFIG_MTD_JEDECPROBE=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_PHYSMAP_OF=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=35000 -CONFIG_NETDEVICES=y -CONFIG_IBM_EMAC=y -# CONFIG_INPUT is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_EXTENDED=y -CONFIG_SERIAL_8250_SHARE_IRQ=y -CONFIG_SERIAL_OF_PLATFORM=y -# CONFIG_HW_RANDOM is not set -# CONFIG_HWMON is not set -CONFIG_THERMAL=y -CONFIG_USB=y -CONFIG_USB_MON=y -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_OHCI_HCD_PPC_OF_BE=y -CONFIG_USB_OHCI_HCD_PPC_OF_LE=y -CONFIG_EXT2_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_CRAMFS=y -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y -CONFIG_DEBUG_FS=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_CRYPTO_CBC=y -CONFIG_CRYPTO_ECB=y -CONFIG_CRYPTO_PCBC=y -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_DES=y diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig index edc22464dfb5..3549c9e950e8 100644 --- a/arch/powerpc/configs/40x/kilauea_defconfig +++ b/arch/powerpc/configs/40x/kilauea_defconfig @@ -11,7 +11,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_KILAUEA=y -# CONFIG_WALNUT is not set CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/powerpc/configs/40x/klondike_defconfig b/arch/powerpc/configs/40x/klondike_defconfig index 579fa846839c..6a735ee75715 100644 --- a/arch/powerpc/configs/40x/klondike_defconfig +++ b/arch/powerpc/configs/40x/klondike_defconfig @@ -8,7 +8,6 @@ CONFIG_EMBEDDED=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_WALNUT is not set CONFIG_APM8018X=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_MATH_EMULATION=y diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig index 188789b9aa4c..4563f88acf0c 100644 --- a/arch/powerpc/configs/40x/makalu_defconfig +++ b/arch/powerpc/configs/40x/makalu_defconfig @@ -9,7 +9,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_MAKALU=y -# CONFIG_WALNUT is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig index 5bf6af7ef093..2a2bb3f46847 100644 --- a/arch/powerpc/configs/40x/obs600_defconfig +++ b/arch/powerpc/configs/40x/obs600_defconfig @@ -10,7 +10,6 @@ CONFIG_KALLSYMS_ALL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set -# CONFIG_WALNUT is not set CONFIG_OBS600=y CONFIG_MATH_EMULATION=y CONFIG_NET=y diff --git a/arch/powerpc/configs/40x/virtex_defconfig b/arch/powerpc/configs/40x/virtex_defconfig deleted file mode 100644 index 5e7c61d1d7d0..000000000000 --- a/arch/powerpc/configs/40x/virtex_defconfig +++ /dev/null @@ -1,75 +0,0 @@ -CONFIG_40x=y -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_BLK_DEV_INITRD=y -CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -# CONFIG_BLK_DEV_BSG is not set -# CONFIG_WALNUT is not set -CONFIG_XILINX_VIRTEX_GENERIC_BOARD=y -CONFIG_PREEMPT=y -CONFIG_MATH_EMULATION=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" -CONFIG_PCI=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_NETFILTER=y -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_MANGLE=m -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_XILINX_SYSACE=y -CONFIG_NETDEVICES=y -# CONFIG_SERIO_SERPORT is not set -CONFIG_SERIO_XILINX_XPS_PS2=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_UARTLITE=y -CONFIG_SERIAL_UARTLITE_CONSOLE=y -CONFIG_XILINX_HWICAP=y -CONFIG_GPIOLIB=y -CONFIG_GPIO_SYSFS=y -CONFIG_GPIO_XILINX=y -# CONFIG_HWMON is not set -CONFIG_FB=y -CONFIG_FB_XILINX=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y -# CONFIG_USB_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_AUTOFS4_FS=y -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_TMPFS=y -CONFIG_CRAMFS=y -CONFIG_ROMFS_FS=y -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_UTF8=m -CONFIG_CRC_CCITT=y -CONFIG_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y -CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_KERNEL=y diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index 7705a5c3f4ea..3894ba8f8ffc 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -19,8 +19,6 @@ CONFIG_HIGHMEM=y CONFIG_HZ_100=y CONFIG_IRQ_ALL_CPUS=y # CONFIG_COMPACTION is not set -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" # CONFIG_SUSPEND is not set CONFIG_NET=y CONFIG_PACKET=y @@ -58,7 +56,6 @@ CONFIG_BLK_DEV_SD=y # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set -# CONFIG_NET_VENDOR_EXAR is not set CONFIG_IBM_EMAC=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MELLANOX is not set diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig index 82c6f49b8dcb..41d04e70d4fb 100644 --- a/arch/powerpc/configs/44x/arches_defconfig +++ b/arch/powerpc/configs/44x/arches_defconfig @@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_ARCHES=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig index 679213214a75..acbce718eaa8 100644 --- a/arch/powerpc/configs/44x/bamboo_defconfig +++ b/arch/powerpc/configs/44x/bamboo_defconfig @@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_BAMBOO=y # CONFIG_EBONY is not set -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig index 8006a5728afd..37088f250c9e 100644 --- a/arch/powerpc/configs/44x/bluestone_defconfig +++ b/arch/powerpc/configs/44x/bluestone_defconfig @@ -11,8 +11,6 @@ CONFIG_EXPERT=y # CONFIG_COMPAT_BRK is not set CONFIG_BLUESTONE=y # CONFIG_EBONY is not set -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig index ccc14eb7a2f1..61776ade572b 100644 --- a/arch/powerpc/configs/44x/canyonlands_defconfig +++ b/arch/powerpc/configs/44x/canyonlands_defconfig @@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_CANYONLANDS=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig index be76e066df01..34c86b3abecb 100644 --- a/arch/powerpc/configs/44x/currituck_defconfig +++ b/arch/powerpc/configs/44x/currituck_defconfig @@ -17,8 +17,6 @@ CONFIG_HIGHMEM=y CONFIG_HZ_100=y CONFIG_MATH_EMULATION=y CONFIG_IRQ_ALL_CPUS=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" # CONFIG_SUSPEND is not set CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig index 1abaa63e067f..509300f400e2 100644 --- a/arch/powerpc/configs/44x/eiger_defconfig +++ b/arch/powerpc/configs/44x/eiger_defconfig @@ -10,8 +10,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_EBONY is not set CONFIG_EIGER=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig index e67fc041ca3e..30845ce0885a 100644 --- a/arch/powerpc/configs/44x/fsp2_defconfig +++ b/arch/powerpc/configs/44x/fsp2_defconfig @@ -28,7 +28,6 @@ CONFIG_476FPE_ERR46=y CONFIG_SWIOTLB=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="ip=on rw" # CONFIG_SUSPEND is not set # CONFIG_PCI is not set diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig index 7d7ff84c8200..930948a1da76 100644 --- a/arch/powerpc/configs/44x/icon_defconfig +++ b/arch/powerpc/configs/44x/icon_defconfig @@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_EBONY is not set CONFIG_ICON=y CONFIG_HIGHMEM=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig index fb5c73a29bf4..2c3834eebca3 100644 --- a/arch/powerpc/configs/44x/iss476-smp_defconfig +++ b/arch/powerpc/configs/44x/iss476-smp_defconfig @@ -17,7 +17,6 @@ CONFIG_ISS4xx=y CONFIG_HZ_100=y CONFIG_MATH_EMULATION=y CONFIG_IRQ_ALL_CPUS=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="root=/dev/issblk0" # CONFIG_PCI is not set CONFIG_ADVANCED_OPTIONS=y diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig index c6dc1445fc04..1a0f1c3e0ee9 100644 --- a/arch/powerpc/configs/44x/katmai_defconfig +++ b/arch/powerpc/configs/44x/katmai_defconfig @@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_KATMAI=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig index c83ad03182df..6dd67de06a0b 100644 --- a/arch/powerpc/configs/44x/rainier_defconfig +++ b/arch/powerpc/configs/44x/rainier_defconfig @@ -10,8 +10,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_EBONY is not set CONFIG_RAINIER=y CONFIG_MATH_EMULATION=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig index 640fe1d5af28..e28d76416537 100644 --- a/arch/powerpc/configs/44x/redwood_defconfig +++ b/arch/powerpc/configs/44x/redwood_defconfig @@ -10,8 +10,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_EBONY is not set CONFIG_REDWOOD=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig index 22dc0dadf576..ef09786d49b9 100644 --- a/arch/powerpc/configs/44x/sam440ep_defconfig +++ b/arch/powerpc/configs/44x/sam440ep_defconfig @@ -12,8 +12,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y # CONFIG_EBONY is not set CONFIG_SAM440EP=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig index 2c0973db8837..b4984eab43eb 100644 --- a/arch/powerpc/configs/44x/sequoia_defconfig +++ b/arch/powerpc/configs/44x/sequoia_defconfig @@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_SEQUOIA=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig index a2d355ca62b2..3ea5932ab852 100644 --- a/arch/powerpc/configs/44x/taishan_defconfig +++ b/arch/powerpc/configs/44x/taishan_defconfig @@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_TAISHAN=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/virtex5_defconfig b/arch/powerpc/configs/44x/virtex5_defconfig deleted file mode 100644 index 1f74079e1703..000000000000 --- a/arch/powerpc/configs/44x/virtex5_defconfig +++ /dev/null @@ -1,74 +0,0 @@ -CONFIG_44x=y -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_BLK_DEV_INITRD=y -CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -# CONFIG_BLK_DEV_BSG is not set -# CONFIG_EBONY is not set -CONFIG_XILINX_VIRTEX440_GENERIC_BOARD=y -CONFIG_PREEMPT=y -CONFIG_MATH_EMULATION=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_NETFILTER=y -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_MANGLE=m -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_XILINX_SYSACE=y -CONFIG_NETDEVICES=y -# CONFIG_SERIO_SERPORT is not set -CONFIG_SERIO_XILINX_XPS_PS2=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_UARTLITE=y -CONFIG_SERIAL_UARTLITE_CONSOLE=y -CONFIG_XILINX_HWICAP=y -CONFIG_GPIOLIB=y -CONFIG_GPIO_SYSFS=y -CONFIG_GPIO_XILINX=y -# CONFIG_HWMON is not set -CONFIG_FB=y -CONFIG_FB_XILINX=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y -# CONFIG_USB_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_AUTOFS4_FS=y -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_TMPFS=y -CONFIG_CRAMFS=y -CONFIG_ROMFS_FS=y -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_UTF8=m -CONFIG_CRC_CCITT=y -CONFIG_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y -CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_KERNEL=y diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index af66c69c49fe..47252c2d7669 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -14,7 +14,6 @@ CONFIG_MODULE_UNLOAD=y CONFIG_WARP=y CONFIG_PPC4xx_GPIO=y CONFIG_HZ_1000=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="ip=on" # CONFIG_PCI is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/85xx-hw.config b/arch/powerpc/configs/85xx-hw.config index b507df6ac69f..524db76f47b7 100644 --- a/arch/powerpc/configs/85xx-hw.config +++ b/arch/powerpc/configs/85xx-hw.config @@ -67,7 +67,6 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_M25P80=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y CONFIG_MTD_RAW_NAND=y diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index d50aca608736..3a6381aa9fdc 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -51,9 +51,6 @@ CONFIG_NET_IPIP=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_MTD=y CONFIG_MTD_REDBOOT_PARTS=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/86xx-hw.config b/arch/powerpc/configs/86xx-hw.config index 151164cf8cb3..0cb24b33c88e 100644 --- a/arch/powerpc/configs/86xx-hw.config +++ b/arch/powerpc/configs/86xx-hw.config @@ -32,8 +32,6 @@ CONFIG_HW_RANDOM=y CONFIG_HZ_1000=y CONFIG_I2C_MPC=y CONFIG_I2C=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSEDEV is not set diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index f55e23cb176c..5326bc739279 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -10,7 +10,6 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_PPC_ADDER875=y -CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y CONFIG_HZ_1000=y # CONFIG_SECCOMP is not set diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index 0e2e5e81a359..f5c3e72da719 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -12,7 +12,6 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_PPC_EP88XC=y -CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y CONFIG_HZ_100=y # CONFIG_SECCOMP is not set diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index 3c7dad19a691..df37efed0aec 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -56,7 +56,6 @@ CONFIG_IKCONFIG=y CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_INET=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MROUTE=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index a68c7f3af10e..1c674c4c1d86 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -51,7 +51,6 @@ CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_IRC=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig index 067f433c8f5e..271daff47d1d 100644 --- a/arch/powerpc/configs/holly_defconfig +++ b/arch/powerpc/configs/holly_defconfig @@ -13,7 +13,6 @@ CONFIG_EMBEDDED6xx=y CONFIG_PPC_HOLLY=y CONFIG_GEN_RTC=y CONFIG_BINFMT_MISC=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0,115200" # CONFIG_SECCOMP is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig index ea59f3d146df..d4be64f190ff 100644 --- a/arch/powerpc/configs/linkstation_defconfig +++ b/arch/powerpc/configs/linkstation_defconfig @@ -37,7 +37,6 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NETFILTER_XT_MATCH_MAC=m CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index e39346b3dc3b..e75d3f3060c9 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -47,7 +47,6 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=1 CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_BLK_DEV_RAM_DAX=y CONFIG_EEPROM_AT24=y CONFIG_EEPROM_AT25=y CONFIG_SCSI=y diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig index be125729635c..95d43f8a3869 100644 --- a/arch/powerpc/configs/mpc83xx_defconfig +++ b/arch/powerpc/configs/mpc83xx_defconfig @@ -19,7 +19,6 @@ CONFIG_MPC836x_MDS=y CONFIG_MPC836x_RDK=y CONFIG_MPC837x_MDS=y CONFIG_MPC837x_RDB=y -CONFIG_SBC834x=y CONFIG_ASP834x=y CONFIG_QE_GPIO=y CONFIG_MATH_EMULATION=y diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index 5320735395e7..5c56d36cdfc5 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -12,7 +12,6 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_MPC86XADS=y -CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y CONFIG_HZ_1000=y CONFIG_MATH_EMULATION=y diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 82a008c04eae..949ff9ccda5e 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -11,7 +11,6 @@ CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y -CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y CONFIG_HZ_100=y # CONFIG_SECCOMP is not set diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig index 0a0d046fc445..1fed6be95d53 100644 --- a/arch/powerpc/configs/mvme5100_defconfig +++ b/arch/powerpc/configs/mvme5100_defconfig @@ -20,10 +20,9 @@ CONFIG_EMBEDDED6xx=y CONFIG_MVME5100=y CONFIG_KVM_GUEST=y CONFIG_HZ_100=y +CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs" # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_COMPACTION is not set -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -46,7 +45,6 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NETFILTER_XT_MATCH_MAC=m CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 08b7f4cef243..15ed8d0aa014 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -58,7 +58,6 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_EEPROM_LEGACY=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y -CONFIG_CHR_DEV_OSST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_SCH=y @@ -109,7 +108,6 @@ CONFIG_FB_NVIDIA=y CONFIG_FB_NVIDIA_I2C=y CONFIG_FB_RADEON=y # CONFIG_LCD_CLASS_DEVICE is not set -CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y CONFIG_SOUND=y CONFIG_SND=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 05e325ca3fbd..665a8d7cded0 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -75,7 +75,6 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_MATCH_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_TIME=m CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -90,13 +89,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_IP_DCCP=m -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -CONFIG_IRTTY_SIR=m CONFIG_BT=m CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index df8bdbaa5d8f..cf30fc24413b 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -64,7 +64,6 @@ CONFIG_HWPOISON_INJECT=m CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_PPC_64K_PAGES=y -CONFIG_PPC_SUBPAGE_PROT=y CONFIG_SCHED_SMT=y CONFIG_PM=y CONFIG_HOTPLUG_PCI=y @@ -246,7 +245,6 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_MAD=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_CXGB3=m CONFIG_INFINIBAND_CXGB4=m CONFIG_MLX4_INFINIBAND=m CONFIG_INFINIBAND_IPOIB=m @@ -347,3 +345,4 @@ CONFIG_KVM_BOOK3S_64=m CONFIG_KVM_BOOK3S_64_HV=m CONFIG_VHOST_NET=m CONFIG_PRINTK_TIME=y +CONFIG_PRINTK_CALLER=y diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig index a5f683aed328..7e48693775f4 100644 --- a/arch/powerpc/configs/ppc40x_defconfig +++ b/arch/powerpc/configs/ppc40x_defconfig @@ -10,11 +10,9 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PPC4xx_GPIO=y CONFIG_ACADIA=y -CONFIG_EP405=y CONFIG_HOTFOOT=y CONFIG_KILAUEA=y CONFIG_MAKALU=y -CONFIG_XILINX_VIRTEX_GENERIC_BOARD=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -22,9 +20,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y @@ -37,33 +32,26 @@ CONFIG_MTD_UBI=m CONFIG_MTD_UBI_GLUEBI=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 -CONFIG_XILINX_SYSACE=m CONFIG_NETDEVICES=y CONFIG_IBM_EMAC=y # CONFIG_INPUT is not set CONFIG_SERIO=m # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set -CONFIG_SERIO_XILINX_XPS_PS2=m # CONFIG_VT is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_UARTLITE=y -CONFIG_SERIAL_UARTLITE_CONSOLE=y # CONFIG_HW_RANDOM is not set -CONFIG_XILINX_HWICAP=m CONFIG_I2C=m CONFIG_I2C_CHARDEV=m CONFIG_I2C_GPIO=m CONFIG_I2C_IBM_IIC=m -CONFIG_GPIO_XILINX=y # CONFIG_HWMON is not set CONFIG_THERMAL=y CONFIG_FB=m -CONFIG_FB_XILINX=m CONFIG_EXT2_FS=y CONFIG_EXT4_FS=m CONFIG_VFAT_FS=m diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index a41eedfe0a5f..8b595f67068c 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -22,7 +22,6 @@ CONFIG_GLACIER=y CONFIG_REDWOOD=y CONFIG_EIGER=y CONFIG_YOSEMITE=y -CONFIG_XILINX_VIRTEX440_GENERIC_BOARD=y CONFIG_PPC4xx_GPIO=y CONFIG_MATH_EMULATION=y CONFIG_NET=y @@ -46,7 +45,6 @@ CONFIG_MTD_UBI=m CONFIG_MTD_UBI_GLUEBI=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 -CONFIG_XILINX_SYSACE=m CONFIG_SCSI=m CONFIG_BLK_DEV_SD=m # CONFIG_SCSI_LOWLEVEL is not set @@ -57,7 +55,6 @@ CONFIG_IBM_EMAC=y CONFIG_SERIO=m # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set -CONFIG_SERIO_XILINX_XPS_PS2=m # CONFIG_VT is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y @@ -65,18 +62,13 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_UARTLITE=y -CONFIG_SERIAL_UARTLITE_CONSOLE=y # CONFIG_HW_RANDOM is not set -CONFIG_XILINX_HWICAP=m CONFIG_I2C=m CONFIG_I2C_CHARDEV=m CONFIG_I2C_GPIO=m CONFIG_I2C_IBM_IIC=m -CONFIG_GPIO_XILINX=y # CONFIG_HWMON is not set CONFIG_FB=m -CONFIG_FB_XILINX=m CONFIG_USB=m CONFIG_USB_EHCI_HCD=m CONFIG_USB_OHCI_HCD=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index bae8170d7401..48759656a067 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -268,7 +268,6 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_MAD=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_CXGB3=m CONFIG_INFINIBAND_CXGB4=m CONFIG_MLX4_INFINIBAND=m CONFIG_INFINIBAND_IPOIB=m @@ -281,6 +280,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m +CONFIG_LIBNVDIMM=y CONFIG_RAS=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y @@ -358,6 +358,7 @@ CONFIG_CRYPTO_DEV_NX=y CONFIG_CRYPTO_DEV_NX_ENCRYPT=m CONFIG_CRYPTO_DEV_VMX=y CONFIG_PRINTK_TIME=y +CONFIG_PRINTK_CALLER=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index feb5d47d8d1e..0ad0f6d41980 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -53,7 +53,6 @@ CONFIG_MPC836x_MDS=y CONFIG_MPC836x_RDK=y CONFIG_MPC837x_MDS=y CONFIG_MPC837x_RDB=y -CONFIG_SBC834x=y CONFIG_ASP834x=y CONFIG_PPC_86xx=y CONFIG_MPC8641_HPCN=y @@ -187,7 +186,6 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_MATCH_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_TIME=m CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -203,7 +201,6 @@ CONFIG_IP_NF_SECURITY=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_CONNTRACK_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -241,7 +238,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_DCCP=m -CONFIG_NET_DCCPPROBE=m CONFIG_TIPC=m CONFIG_ATM=m CONFIG_ATM_CLIP=m @@ -251,7 +247,6 @@ CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_DECNET=m CONFIG_DECNET_ROUTER=y -CONFIG_IPX=m CONFIG_ATALK=m CONFIG_DEV_APPLETALK=m CONFIG_IPDDP=m @@ -297,26 +292,6 @@ CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -CONFIG_IRTTY_SIR=m -CONFIG_KINGSUN_DONGLE=m -CONFIG_KSDAZZLE_DONGLE=m -CONFIG_KS959_DONGLE=m -CONFIG_USB_IRDA=m -CONFIG_SIGMATEL_FIR=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m -CONFIG_VIA_FIR=m -CONFIG_MCS_FIR=m CONFIG_BT=m CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y @@ -332,7 +307,6 @@ CONFIG_BT_HCIBFUSB=m CONFIG_BT_HCIDTL1=m CONFIG_BT_HCIBT3C=m CONFIG_BT_HCIBLUECARD=m -CONFIG_BT_HCIBTUART=m CONFIG_BT_HCIVHCI=m CONFIG_CFG80211=m CONFIG_MAC80211=m @@ -366,7 +340,6 @@ CONFIG_EEPROM_93CX6=m CONFIG_RAID_ATTRS=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=m CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_SCH=m @@ -606,7 +579,6 @@ CONFIG_JOYSTICK_XPAD_LEDS=y CONFIG_INPUT_TABLET=y CONFIG_TABLET_USB_ACECAD=m CONFIG_TABLET_USB_AIPTEK=m -CONFIG_TABLET_USB_GTCO=m CONFIG_TABLET_USB_KBTAB=m CONFIG_INPUT_MISC=y CONFIG_INPUT_PCSPKR=m @@ -663,7 +635,6 @@ CONFIG_I2C_MPC=m CONFIG_I2C_PCA_PLATFORM=m CONFIG_I2C_SIMTEC=m CONFIG_I2C_PARPORT=m -CONFIG_I2C_PARPORT_LIGHT=m CONFIG_I2C_TINY_USB=m CONFIG_I2C_PCA_ISA=m CONFIG_I2C_STUB=m @@ -676,7 +647,6 @@ CONFIG_W1_SLAVE_THERM=m CONFIG_W1_SLAVE_SMEM=m CONFIG_W1_SLAVE_DS2433=m CONFIG_W1_SLAVE_DS2433_CRC=y -CONFIG_W1_SLAVE_DS2760=m CONFIG_APM_POWER=m CONFIG_BATTERY_PMU=m CONFIG_HWMON=m @@ -772,7 +742,6 @@ CONFIG_FB_TRIDENT=m CONFIG_FB_SM501=m CONFIG_FB_IBM_GXT4500=y CONFIG_LCD_PLATFORM=m -CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y @@ -1065,15 +1034,6 @@ CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y CONFIG_CIFS_DFS_UPCALL=y -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y CONFIG_CODA_FS=m CONFIG_9P_FS=m CONFIG_NLS_DEFAULT="utf8" @@ -1117,7 +1077,6 @@ CONFIG_NLS_KOI8_U=m CONFIG_DEBUG_INFO=y CONFIG_UNUSED_SYMBOLS=y CONFIG_HEADERS_INSTALL=y -CONFIG_HEADERS_CHECK=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_OBJECTS=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 81b55c880fc3..142f1321fa58 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -34,8 +34,6 @@ CONFIG_KEXEC=y # CONFIG_SPARSEMEM_VMEMMAP is not set # CONFIG_COMPACTION is not set CONFIG_SCHED_SMT=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_PM=y CONFIG_PM_DEBUG=y # CONFIG_SECCOMP is not set diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 0bea4d3ffb85..d5dece981c02 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -57,7 +57,6 @@ CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_PPC_64K_PAGES=y -CONFIG_PPC_SUBPAGE_PROT=y CONFIG_SCHED_SMT=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_RPA=m @@ -94,6 +93,7 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_VIRTIO_BLK=m +CONFIG_BLK_DEV_NVME=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -224,7 +224,6 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_MAD=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_CXGB3=m CONFIG_INFINIBAND_CXGB4=m CONFIG_MLX4_INFINIBAND=m CONFIG_INFINIBAND_IPOIB=m @@ -322,3 +321,4 @@ CONFIG_KVM_BOOK3S_64=m CONFIG_KVM_BOOK3S_64_HV=m CONFIG_VHOST_NET=m CONFIG_PRINTK_TIME=y +CONFIG_PRINTK_CALLER=y diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index ad6739ac63dc..b806a5d3a695 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -45,7 +45,6 @@ CONFIG_IRQ_ALL_CPUS=y CONFIG_NUMA=y CONFIG_PPC_64K_PAGES=y CONFIG_SCHED_SMT=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet" # CONFIG_SECCOMP is not set # CONFIG_PPC_MEM_KEYS is not set diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index b964084e4056..47dcfaddc1ac 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -12,7 +12,6 @@ CONFIG_EMBEDDED6xx=y CONFIG_STORCENTER=y CONFIG_HZ_100=y CONFIG_BINFMT_MISC=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0,115200" # CONFIG_SECCOMP is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index eda8bfb2d0a3..77857d513022 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -15,7 +15,6 @@ CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_TQM8XX=y -CONFIG_8xx_COPYBACK=y # CONFIG_8xx_CPU15 is not set CONFIG_GEN_RTC=y CONFIG_HZ_100=y diff --git a/arch/powerpc/crypto/crc32-vpmsum_core.S b/arch/powerpc/crypto/crc32-vpmsum_core.S index c3524eba4d0d..a16a717c809c 100644 --- a/arch/powerpc/crypto/crc32-vpmsum_core.S +++ b/arch/powerpc/crypto/crc32-vpmsum_core.S @@ -19,7 +19,7 @@ * We then use fixed point Barrett reduction to compute a mod n over GF(2) * for n = CRC using POWER8 instructions. We use x = 32. * - * http://en.wikipedia.org/wiki/Barrett_reduction + * https://en.wikipedia.org/wiki/Barrett_reduction * * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM */ diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c index 7d1bf2fcf668..c24f605033bd 100644 --- a/arch/powerpc/crypto/md5-glue.c +++ b/arch/powerpc/crypto/md5-glue.c @@ -11,7 +11,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/md5.h> #include <asm/byteorder.h> diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c index 6379990bd604..cb57be4ada61 100644 --- a/arch/powerpc/crypto/sha1-spe-glue.c +++ b/arch/powerpc/crypto/sha1-spe-glue.c @@ -11,7 +11,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> #include <asm/byteorder.h> diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c index 7b43fc352089..b40dc50a6908 100644 --- a/arch/powerpc/crypto/sha1.c +++ b/arch/powerpc/crypto/sha1.c @@ -16,14 +16,13 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> #include <asm/byteorder.h> -extern void powerpc_sha_transform(u32 *state, const u8 *src, u32 *temp); +void powerpc_sha_transform(u32 *state, const u8 *src); -static int sha1_init(struct shash_desc *desc) +static int powerpc_sha1_init(struct shash_desc *desc) { struct sha1_state *sctx = shash_desc_ctx(desc); @@ -34,8 +33,8 @@ static int sha1_init(struct shash_desc *desc) return 0; } -static int sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int powerpc_sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { struct sha1_state *sctx = shash_desc_ctx(desc); unsigned int partial, done; @@ -47,7 +46,6 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, src = data; if ((partial + len) > 63) { - u32 temp[SHA_WORKSPACE_WORDS]; if (partial) { done = -partial; @@ -56,12 +54,11 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, } do { - powerpc_sha_transform(sctx->state, src, temp); + powerpc_sha_transform(sctx->state, src); done += 64; src = data + done; } while (done + 63 < len); - memzero_explicit(temp, sizeof(temp)); partial = 0; } memcpy(sctx->buffer + partial, src, len - done); @@ -71,7 +68,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, /* Add padding and return the message digest. */ -static int sha1_final(struct shash_desc *desc, u8 *out) +static int powerpc_sha1_final(struct shash_desc *desc, u8 *out) { struct sha1_state *sctx = shash_desc_ctx(desc); __be32 *dst = (__be32 *)out; @@ -84,10 +81,10 @@ static int sha1_final(struct shash_desc *desc, u8 *out) /* Pad out to 56 mod 64 */ index = sctx->count & 0x3f; padlen = (index < 56) ? (56 - index) : ((64+56) - index); - sha1_update(desc, padding, padlen); + powerpc_sha1_update(desc, padding, padlen); /* Append length */ - sha1_update(desc, (const u8 *)&bits, sizeof(bits)); + powerpc_sha1_update(desc, (const u8 *)&bits, sizeof(bits)); /* Store state in digest */ for (i = 0; i < 5; i++) @@ -99,7 +96,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out) return 0; } -static int sha1_export(struct shash_desc *desc, void *out) +static int powerpc_sha1_export(struct shash_desc *desc, void *out) { struct sha1_state *sctx = shash_desc_ctx(desc); @@ -107,7 +104,7 @@ static int sha1_export(struct shash_desc *desc, void *out) return 0; } -static int sha1_import(struct shash_desc *desc, const void *in) +static int powerpc_sha1_import(struct shash_desc *desc, const void *in) { struct sha1_state *sctx = shash_desc_ctx(desc); @@ -117,11 +114,11 @@ static int sha1_import(struct shash_desc *desc, const void *in) static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, - .init = sha1_init, - .update = sha1_update, - .final = sha1_final, - .export = sha1_export, - .import = sha1_import, + .init = powerpc_sha1_init, + .update = powerpc_sha1_update, + .final = powerpc_sha1_final, + .export = powerpc_sha1_export, + .import = powerpc_sha1_import, .descsize = sizeof(struct sha1_state), .statesize = sizeof(struct sha1_state), .base = { diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c index 84939e563b81..ceb0b6c980b3 100644 --- a/arch/powerpc/crypto/sha256-spe-glue.c +++ b/arch/powerpc/crypto/sha256-spe-glue.c @@ -12,7 +12,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> #include <asm/byteorder.h> diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index dadbcf3a0b1e..90cd5c53af66 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -4,7 +4,9 @@ generated-y += syscall_table_64.h generated-y += syscall_table_c32.h generated-y += syscall_table_spu.h generic-y += export.h +generic-y += kvm_types.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += qrwlock.h generic-y += vtime.h generic-y += early_ioremap.h diff --git a/arch/powerpc/include/asm/asm-405.h b/arch/powerpc/include/asm/asm-405.h deleted file mode 100644 index 7270d3ae7c8e..000000000000 --- a/arch/powerpc/include/asm/asm-405.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _ASM_POWERPC_ASM_405_H -#define _ASM_POWERPC_ASM_405_H - -#include <asm/asm-const.h> - -#ifdef __KERNEL__ -#ifdef CONFIG_IBM405_ERR77 -/* Erratum #77 on the 405 means we need a sync or dcbt before every - * stwcx. The old ATOMIC_SYNC_FIX covered some but not all of this. - */ -#define PPC405_ERR77(ra,rb) stringify_in_c(dcbt ra, rb;) -#define PPC405_ERR77_SYNC stringify_in_c(sync;) -#else -#define PPC405_ERR77(ra,rb) -#define PPC405_ERR77_SYNC -#endif -#endif - -#endif /* _ASM_POWERPC_ASM_405_H */ diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 7d81e86a1e5d..de14b1a34d56 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -98,7 +98,7 @@ unsigned long __init early_init(unsigned long dt_ptr); void __init machine_init(u64 dt_ptr); #endif long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs); -notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs); +notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr); notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr); @@ -144,13 +144,13 @@ void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); /* Patch sites */ -extern s32 patch__call_flush_count_cache; +extern s32 patch__call_flush_branch_caches; extern s32 patch__flush_count_cache_return; extern s32 patch__flush_link_stack_return; extern s32 patch__call_kvm_flush_link_stack; extern s32 patch__memset_nocache, patch__memcpy_nocache; -extern long flush_count_cache; +extern long flush_branch_caches; extern long kvm_flush_link_stack; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 31c231ea56b7..8a55eb8cc97b 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -10,9 +10,6 @@ #include <linux/types.h> #include <asm/cmpxchg.h> #include <asm/barrier.h> -#include <asm/asm-405.h> - -#define ATOMIC_INIT(i) { (i) } /* * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with @@ -47,7 +44,6 @@ static __inline__ void atomic_##op(int a, atomic_t *v) \ __asm__ __volatile__( \ "1: lwarx %0,0,%3 # atomic_" #op "\n" \ #asm_op " %0,%2,%0\n" \ - PPC405_ERR77(0,%3) \ " stwcx. %0,0,%3 \n" \ " bne- 1b\n" \ : "=&r" (t), "+m" (v->counter) \ @@ -63,7 +59,6 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \ __asm__ __volatile__( \ "1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \ #asm_op " %0,%2,%0\n" \ - PPC405_ERR77(0, %3) \ " stwcx. %0,0,%3\n" \ " bne- 1b\n" \ : "=&r" (t), "+m" (v->counter) \ @@ -81,7 +76,6 @@ static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \ __asm__ __volatile__( \ "1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \ #asm_op " %1,%3,%0\n" \ - PPC405_ERR77(0, %4) \ " stwcx. %1,0,%4\n" \ " bne- 1b\n" \ : "=&r" (res), "=&r" (t), "+m" (v->counter) \ @@ -130,7 +124,6 @@ static __inline__ void atomic_inc(atomic_t *v) __asm__ __volatile__( "1: lwarx %0,0,%2 # atomic_inc\n\ addic %0,%0,1\n" - PPC405_ERR77(0,%2) " stwcx. %0,0,%2 \n\ bne- 1b" : "=&r" (t), "+m" (v->counter) @@ -146,7 +139,6 @@ static __inline__ int atomic_inc_return_relaxed(atomic_t *v) __asm__ __volatile__( "1: lwarx %0,0,%2 # atomic_inc_return_relaxed\n" " addic %0,%0,1\n" - PPC405_ERR77(0, %2) " stwcx. %0,0,%2\n" " bne- 1b" : "=&r" (t), "+m" (v->counter) @@ -163,7 +155,6 @@ static __inline__ void atomic_dec(atomic_t *v) __asm__ __volatile__( "1: lwarx %0,0,%2 # atomic_dec\n\ addic %0,%0,-1\n" - PPC405_ERR77(0,%2)\ " stwcx. %0,0,%2\n\ bne- 1b" : "=&r" (t), "+m" (v->counter) @@ -179,7 +170,6 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v) __asm__ __volatile__( "1: lwarx %0,0,%2 # atomic_dec_return_relaxed\n" " addic %0,%0,-1\n" - PPC405_ERR77(0, %2) " stwcx. %0,0,%2\n" " bne- 1b" : "=&r" (t), "+m" (v->counter) @@ -201,6 +191,34 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) +/* + * Don't want to override the generic atomic_try_cmpxchg_acquire, because + * we add a lock hint to the lwarx, which may not be wanted for the + * _acquire case (and is not used by the other _acquire variants so it + * would be a surprise). + */ +static __always_inline bool +atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new) +{ + int r, o = *old; + + __asm__ __volatile__ ( +"1:\t" PPC_LWARX(%0,0,%2,1) " # atomic_try_cmpxchg_acquire \n" +" cmpw 0,%0,%3 \n" +" bne- 2f \n" +" stwcx. %4,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"2: \n" + : "=&r" (r), "+m" (v->counter) + : "r" (&v->counter), "r" (o), "r" (new) + : "cr0", "memory"); + + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} + /** * atomic_fetch_add_unless - add unless the number is a given value * @v: pointer of type atomic_t @@ -220,7 +238,6 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u) cmpw 0,%0,%3 \n\ beq 2f \n\ add %0,%2,%0 \n" - PPC405_ERR77(0,%2) " stwcx. %0,0,%1 \n\ bne- 1b \n" PPC_ATOMIC_EXIT_BARRIER @@ -251,7 +268,6 @@ static __inline__ int atomic_inc_not_zero(atomic_t *v) cmpwi 0,%0,0\n\ beq- 2f\n\ addic %1,%0,1\n" - PPC405_ERR77(0,%2) " stwcx. %1,0,%2\n\ bne- 1b\n" PPC_ATOMIC_EXIT_BARRIER @@ -280,7 +296,6 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v) cmpwi %0,1\n\ addi %0,%0,-1\n\ blt- 2f\n" - PPC405_ERR77(0,%1) " stwcx. %0,0,%1\n\ bne- 1b" PPC_ATOMIC_EXIT_BARRIER diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 123adcefd40f..f53c42380832 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -7,6 +7,10 @@ #include <asm/asm-const.h> +#ifndef __ASSEMBLY__ +#include <asm/ppc-opcode.h> +#endif + /* * Memory barrier. * The sync instruction guarantees that all memory accesses initiated @@ -76,6 +80,22 @@ do { \ ___p1; \ }) +#ifdef CONFIG_PPC64 +#define smp_cond_load_relaxed(ptr, cond_expr) ({ \ + typeof(ptr) __PTR = (ptr); \ + __unqual_scalar_typeof(*ptr) VAL; \ + VAL = READ_ONCE(*__PTR); \ + if (unlikely(!(cond_expr))) { \ + spin_begin(); \ + do { \ + VAL = READ_ONCE(*__PTR); \ + } while (!(cond_expr)); \ + spin_end(); \ + } \ + (typeof(*ptr))VAL; \ +}) +#endif + #ifdef CONFIG_PPC_BOOK3S_64 #define NOSPEC_BARRIER_SLOT nop #elif defined(CONFIG_PPC_FSL_BOOK3E) @@ -97,6 +117,15 @@ do { \ #define barrier_nospec() #endif /* CONFIG_PPC_BARRIER_NOSPEC */ +/* + * pmem_wmb() ensures that all stores for which the modification + * are written to persistent storage by preceding dcbfps/dcbstps + * instructions have updated persistent storage before any data + * access or data transfer caused by subsequent instructions is + * initiated. + */ +#define pmem_wmb() __asm__ __volatile__(PPC_PHWSYNC ::: "memory") + #include <asm-generic/barrier.h> #endif /* _ASM_POWERPC_BARRIER_H */ diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 28dcf8222943..4a4d3afd5340 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -41,7 +41,6 @@ #include <linux/compiler.h> #include <asm/asm-compat.h> #include <asm/synch.h> -#include <asm/asm-405.h> /* PPC bit number conversion */ #define PPC_BITLSHIFT(be) (BITS_PER_LONG - 1 - (be)) @@ -73,7 +72,6 @@ static inline void fn(unsigned long mask, \ prefix \ "1:" PPC_LLARX(%0,0,%3,0) "\n" \ stringify_in_c(op) "%0,%0,%2\n" \ - PPC405_ERR77(0,%3) \ PPC_STLCX "%0,0,%3\n" \ "bne- 1b\n" \ : "=&r" (old), "+m" (*p) \ @@ -119,7 +117,6 @@ static inline unsigned long fn( \ prefix \ "1:" PPC_LLARX(%0,0,%3,eh) "\n" \ stringify_in_c(op) "%1,%0,%2\n" \ - PPC405_ERR77(0,%3) \ PPC_STLCX "%1,0,%3\n" \ "bne- 1b\n" \ postfix \ @@ -175,7 +172,6 @@ clear_bit_unlock_return_word(int nr, volatile unsigned long *addr) PPC_RELEASE_BARRIER "1:" PPC_LLARX(%0,0,%3,0) "\n" "andc %1,%0,%2\n" - PPC405_ERR77(0,%3) PPC_STLCX "%1,0,%3\n" "bne- 1b\n" : "=&r" (old), "=&r" (t) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index db0a1c281587..32fd4452e960 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -2,6 +2,7 @@ #ifndef _ASM_POWERPC_BOOK3S_32_KUP_H #define _ASM_POWERPC_BOOK3S_32_KUP_H +#include <asm/bug.h> #include <asm/book3s/32/mmu-hash.h> #ifdef __ASSEMBLY__ @@ -75,7 +76,7 @@ .macro kuap_check current, gpr #ifdef CONFIG_PPC_KUAP_DEBUG - lwz \gpr, KUAP(thread) + lwz \gpr, THREAD + KUAP(\current) 999: twnei \gpr, 0 EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) #endif @@ -108,7 +109,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user u32 addr, end; BUILD_BUG_ON(!__builtin_constant_p(dir)); - BUILD_BUG_ON(dir == KUAP_CURRENT); + BUILD_BUG_ON(dir & ~KUAP_READ_WRITE); if (!(dir & KUAP_WRITE)) return; @@ -131,7 +132,7 @@ static __always_inline void prevent_user_access(void __user *to, const void __us BUILD_BUG_ON(!__builtin_constant_p(dir)); - if (dir == KUAP_CURRENT) { + if (dir & KUAP_CURRENT_WRITE) { u32 kuap = current->thread.kuap; if (unlikely(!kuap)) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 7549393c4c43..36443cda8dcf 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -2,7 +2,6 @@ #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H -#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/book3s/32/hash.h> @@ -113,6 +112,9 @@ static inline bool pte_user(pte_t pte) #define PMD_TABLE_SIZE 0 #define PUD_TABLE_SIZE 0 #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) + +/* Bits to mask out from a PMD to get to the PTE page */ +#define PMD_MASKED_BITS (PTE_TABLE_SIZE - 1) #endif /* __ASSEMBLY__ */ #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) @@ -182,24 +184,19 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #define VMALLOC_OFFSET (0x1000000) /* 16M */ -/* - * With CONFIG_STRICT_KERNEL_RWX, kernel segments are set NX. But when modules - * are used, NX cannot be set on VMALLOC space. So vmalloc VM space and linear - * memory shall not share segments. - */ -#if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_MODULES) -#define VMALLOC_START ((_ALIGN((long)high_memory, 256L << 20) + VMALLOC_OFFSET) & \ - ~(VMALLOC_OFFSET - 1)) -#else #define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) -#endif #ifdef CONFIG_KASAN_VMALLOC -#define VMALLOC_END _ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) +#define VMALLOC_END ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) #else #define VMALLOC_END ioremap_bot #endif +#ifdef CONFIG_STRICT_KERNEL_RWX +#define MODULES_END ALIGN_DOWN(PAGE_OFFSET, SZ_256M) +#define MODULES_VADDR (MODULES_END - SZ_256M) +#endif + #ifndef __ASSEMBLY__ #include <linux/sched.h> #include <linux/threads.h> @@ -218,7 +215,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #define pte_clear(mm, addr, ptep) \ - do { pte_update(ptep, ~_PAGE_HASHPTE, 0); } while (0) + do { pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0); } while (0) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD) @@ -253,84 +250,68 @@ extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, * and the PTE may be either 32 or 64 bit wide. In the later case, * when using atomic updates, only the low part of the PTE is * accessed atomically. - * - * In addition, on 44x, we also maintain a global flag indicating - * that an executable user mapping was modified, which is needed - * to properly flush the virtually tagged instruction cache of - * those implementations. */ -#ifndef CONFIG_PTE_64BIT -static inline unsigned long pte_update(pte_t *p, - unsigned long clr, - unsigned long set) +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge) { - unsigned long old, tmp; - - __asm__ __volatile__("\ -1: lwarx %0,0,%3\n\ - andc %1,%0,%4\n\ - or %1,%1,%5\n" -" stwcx. %1,0,%3\n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p) - : "r" (p), "r" (clr), "r" (set), "m" (*p) - : "cc" ); - - return old; -} -#else /* CONFIG_PTE_64BIT */ -static inline unsigned long long pte_update(pte_t *p, - unsigned long clr, - unsigned long set) -{ - unsigned long long old; + pte_basic_t old; unsigned long tmp; - __asm__ __volatile__("\ -1: lwarx %L0,0,%4\n\ - lwzx %0,0,%3\n\ - andc %1,%L0,%5\n\ - or %1,%1,%6\n" -" stwcx. %1,0,%4\n\ - bne- 1b" + __asm__ __volatile__( +#ifndef CONFIG_PTE_64BIT +"1: lwarx %0, 0, %3\n" +" andc %1, %0, %4\n" +#else +"1: lwarx %L0, 0, %3\n" +" lwz %0, -4(%3)\n" +" andc %1, %L0, %4\n" +#endif +" or %1, %1, %5\n" +" stwcx. %1, 0, %3\n" +" bne- 1b" : "=&r" (old), "=&r" (tmp), "=m" (*p) - : "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p) +#ifndef CONFIG_PTE_64BIT + : "r" (p), +#else + : "b" ((unsigned long)(p) + 4), +#endif + "r" (clr), "r" (set), "m" (*p) : "cc" ); return old; } -#endif /* CONFIG_PTE_64BIT */ /* * 2.6 calls this without flushing the TLB entry; this is wrong * for our hash-based implementation, we fix that up here. */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep) +static inline int __ptep_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { unsigned long old; - old = pte_update(ptep, _PAGE_ACCESSED, 0); + old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); if (old & _PAGE_HASHPTE) { unsigned long ptephys = __pa(ptep) & PAGE_MASK; - flush_hash_pages(context, addr, ptephys, 1); + flush_hash_pages(mm->context.id, addr, ptephys, 1); } return (old & _PAGE_ACCESSED) != 0; } #define ptep_test_and_clear_young(__vma, __addr, __ptep) \ - __ptep_test_and_clear_young((__vma)->vm_mm->context.id, __addr, __ptep) + __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep) #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0)); + return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0)); } #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_update(ptep, _PAGE_RW, 0); + pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); } static inline void __ptep_set_access_flags(struct vm_area_struct *vma, @@ -341,7 +322,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - pte_update(ptep, 0, set); + pte_update(vma->vm_mm, address, ptep, 0, set, 0); flush_tlb_page(vma, address); } @@ -349,26 +330,9 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __HAVE_ARCH_PTE_SAME #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0) -#define pmd_page_vaddr(pmd) \ - ((unsigned long)__va(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1))) #define pmd_page(pmd) \ pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(address) pgd_offset(&init_mm, address) - -/* to find an entry in a page-table-directory */ -#define pgd_index(address) ((address) >> PGDIR_SHIFT) -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) - -/* Find an entry in the third-level page table.. */ -#define pte_index(address) \ - (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -#define pte_offset_kernel(dir, addr) \ - ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr)) -#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) -static inline void pte_unmap(pte_t *pte) { } - /* * Encode and decode a swap entry. * Note that the bits we use in a PTE for representing a swap entry @@ -539,7 +503,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) | (pte_val(pte) & ~_PAGE_HASHPTE)); else - pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte)); + pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, pte_val(pte), 0); #elif defined(CONFIG_PTE_64BIT) /* Second case is 32-bit with 64-bit PTE. In this case, we diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 3f9ae3585ab9..082b98808701 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -34,11 +34,11 @@ #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) -#define H_PAGE_F_GIX_SHIFT 53 -#define H_PAGE_F_SECOND _RPAGE_RPN44 /* HPTE is in 2ndary HPTEG */ -#define H_PAGE_F_GIX (_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41) -#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ -#define H_PAGE_HASHPTE _RPAGE_RSV2 /* software: PTE & hash are busy */ +#define H_PAGE_F_GIX_SHIFT _PAGE_PA_MAX +#define H_PAGE_F_SECOND _RPAGE_PKEY_BIT0 /* HPTE is in 2ndary HPTEG */ +#define H_PAGE_F_GIX (_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41) +#define H_PAGE_BUSY _RPAGE_RSV1 +#define H_PAGE_HASHPTE _RPAGE_PKEY_BIT4 /* PTE flags to conserve for HPTE identification */ #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \ @@ -57,11 +57,12 @@ #define H_PMD_FRAG_NR (PAGE_SIZE >> H_PMD_FRAG_SIZE_SHIFT) /* memory key bits, only 8 keys supported */ -#define H_PTE_PKEY_BIT0 0 -#define H_PTE_PKEY_BIT1 0 -#define H_PTE_PKEY_BIT2 _RPAGE_RSV3 -#define H_PTE_PKEY_BIT3 _RPAGE_RSV4 -#define H_PTE_PKEY_BIT4 _RPAGE_RSV5 +#define H_PTE_PKEY_BIT4 0 +#define H_PTE_PKEY_BIT3 0 +#define H_PTE_PKEY_BIT2 _RPAGE_PKEY_BIT3 +#define H_PTE_PKEY_BIT1 _RPAGE_PKEY_BIT2 +#define H_PTE_PKEY_BIT0 _RPAGE_PKEY_BIT1 + /* * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 0729c034e56f..f20de1149ebe 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -32,15 +32,15 @@ */ #define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */ #define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */ -#define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */ +#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ #define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ /* memory key bits. */ -#define H_PTE_PKEY_BIT0 _RPAGE_RSV1 -#define H_PTE_PKEY_BIT1 _RPAGE_RSV2 -#define H_PTE_PKEY_BIT2 _RPAGE_RSV3 -#define H_PTE_PKEY_BIT3 _RPAGE_RSV4 -#define H_PTE_PKEY_BIT4 _RPAGE_RSV5 +#define H_PTE_PKEY_BIT4 _RPAGE_PKEY_BIT4 +#define H_PTE_PKEY_BIT3 _RPAGE_PKEY_BIT3 +#define H_PTE_PKEY_BIT2 _RPAGE_PKEY_BIT2 +#define H_PTE_PKEY_BIT1 _RPAGE_PKEY_BIT1 +#define H_PTE_PKEY_BIT0 _RPAGE_PKEY_BIT0 /* * We need to differentiate between explicit huge page and THP huge diff --git a/arch/powerpc/include/asm/book3s/64/hash-pkey.h b/arch/powerpc/include/asm/book3s/64/hash-pkey.h new file mode 100644 index 000000000000..795010897e5d --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/hash-pkey.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H +#define _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H + +static inline u64 hash__vmflag_to_pte_pkey_bits(u64 vm_flags) +{ + return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT0 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT1 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT3 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT4 : 0x0UL)); +} + +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +{ + return (((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL)); +} + +static inline u16 hash__pte_to_pkey_bits(u64 pteflags) +{ + return (((pteflags & H_PTE_PKEY_BIT4) ? 0x10 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? 0x8 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? 0x2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT0) ? 0x1 : 0x0UL)); +} + +#endif diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 6fc4520092c7..73ad038ed10b 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -134,9 +134,9 @@ static inline int get_region_id(unsigned long ea) #define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS) #define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS) -static inline int hash__pgd_bad(pgd_t pgd) +static inline int hash__p4d_bad(p4d_t p4d) { - return (pgd_val(pgd) == 0); + return (p4d_val(p4d) == 0); } #ifdef CONFIG_STRICT_KERNEL_RWX extern void hash__mark_rodata_ro(void); diff --git a/arch/powerpc/include/asm/book3s/64/kexec.h b/arch/powerpc/include/asm/book3s/64/kexec.h new file mode 100644 index 000000000000..6b5c3a248ba2 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/kexec.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_POWERPC_BOOK3S_64_KEXEC_H_ +#define _ASM_POWERPC_BOOK3S_64_KEXEC_H_ + + +#define reset_sprs reset_sprs +static inline void reset_sprs(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + mtspr(SPRN_AMR, 0); + mtspr(SPRN_UAMOR, 0); + } + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_IAMR, 0); + } + + /* Do we need isync()? We are going via a kexec reset */ + isync(); +} + +#endif diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 3bcef989a35d..3ee1ec60be84 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -12,11 +12,17 @@ #ifdef __ASSEMBLY__ -.macro kuap_restore_amr gpr +.macro kuap_restore_amr gpr1, gpr2 #ifdef CONFIG_PPC_KUAP BEGIN_MMU_FTR_SECTION_NESTED(67) - ld \gpr, STACK_REGS_KUAP(r1) - mtspr SPRN_AMR, \gpr + mfspr \gpr1, SPRN_AMR + ld \gpr2, STACK_REGS_KUAP(r1) + cmpd \gpr1, \gpr2 + beq 998f + isync + mtspr SPRN_AMR, \gpr2 + /* No isync required, see kuap_restore_amr() */ +998: END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) #endif .endm @@ -60,10 +66,28 @@ #include <asm/mmu.h> #include <asm/ptrace.h> -static inline void kuap_restore_amr(struct pt_regs *regs) +static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { - if (mmu_has_feature(MMU_FTR_RADIX_KUAP)) + if (mmu_has_feature(MMU_FTR_RADIX_KUAP) && unlikely(regs->kuap != amr)) { + isync(); mtspr(SPRN_AMR, regs->kuap); + /* + * No isync required here because we are about to RFI back to + * previous context before any user accesses would be made, + * which is a CSI. + */ + } +} + +static inline unsigned long kuap_get_and_check_amr(void) +{ + if (mmu_has_feature(MMU_FTR_RADIX_KUAP)) { + unsigned long amr = mfspr(SPRN_AMR); + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */ + WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); + return amr; + } + return 0; } static inline void kuap_check_amr(void) @@ -142,13 +166,18 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); } #else /* CONFIG_PPC_KUAP */ -static inline void kuap_restore_amr(struct pt_regs *regs) +static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { } static inline void kuap_check_amr(void) { } + +static inline unsigned long kuap_get_and_check_amr(void) +{ + return 0; +} #endif /* CONFIG_PPC_KUAP */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 3fa1b962dc27..93d18da5e7ec 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -86,8 +86,8 @@ #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) -#define HPTE_R_KEY_BIT0 ASM_CONST(0x2000000000000000) -#define HPTE_R_KEY_BIT1 ASM_CONST(0x1000000000000000) +#define HPTE_R_KEY_BIT4 ASM_CONST(0x2000000000000000) +#define HPTE_R_KEY_BIT3 ASM_CONST(0x1000000000000000) #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) #define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000) @@ -103,8 +103,8 @@ #define HPTE_R_R ASM_CONST(0x0000000000000100) #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) #define HPTE_R_KEY_BIT2 ASM_CONST(0x0000000000000800) -#define HPTE_R_KEY_BIT3 ASM_CONST(0x0000000000000400) -#define HPTE_R_KEY_BIT4 ASM_CONST(0x0000000000000200) +#define HPTE_R_KEY_BIT1 ASM_CONST(0x0000000000000400) +#define HPTE_R_KEY_BIT0 ASM_CONST(0x0000000000000200) #define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI) #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) @@ -793,7 +793,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, } /* - * For kernel space, we use context ids as below + * For kernel space, we use context ids as * below. Range is 512TB per context. * * 0x00001 - [ 0xc000000000000000 - 0xc001ffffffffffff] diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index bb3deb76c951..b392384a3b15 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -82,6 +82,11 @@ extern unsigned int mmu_pid_bits; /* Base PID to allocate from */ extern unsigned int mmu_base_pid; +/* + * memory block size used with radix translation. + */ +extern unsigned int __ro_after_init radix_mem_block_size; + #define PRTB_SIZE_SHIFT (mmu_pid_bits + 4) #define PRTB_ENTRIES (1ul << mmu_pid_bits) @@ -116,6 +121,9 @@ typedef struct { /* Number of users of the external (Nest) MMU */ atomic_t copros; + /* Number of user space windows opened in process mm_context */ + atomic_t vas_windows; + struct hash_mm_context *hash_context; unsigned long vdso_base; @@ -206,9 +214,15 @@ extern int mmu_io_psize; void mmu_early_init_devtree(void); void hash__early_init_devtree(void); void radix__early_init_devtree(void); +#ifdef CONFIG_PPC_MEM_KEYS +void pkey_early_init_devtree(void); +#else +static inline void pkey_early_init_devtree(void) {} +#endif + extern void hash__early_init_mmu(void); extern void radix__early_init_mmu(void); -static inline void early_init_mmu(void) +static inline void __init early_init_mmu(void) { if (radix_enabled()) return radix__early_init_mmu(); @@ -225,14 +239,14 @@ static inline void early_init_mmu_secondary(void) extern void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size); -extern void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size); static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { - if (early_radix_enabled()) - return radix__setup_initial_memory_limit(first_memblock_base, - first_memblock_size); + /* + * Hash has more strict restrictions. At this point we don't + * know which translations we will pick. Hence go with hash + * restrictions. + */ return hash__setup_initial_memory_limit(first_memblock_base, first_memblock_size); } diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index a41e91bd0580..e1af0b394ceb 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -85,9 +85,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +static inline void p4d_populate(struct mm_struct *mm, p4d_t *pgd, pud_t *pud) { - *pgd = __pgd(__pgtable_ptr_val(pud) | PGD_VAL_BITS); + *pgd = __p4d(__pgtable_ptr_val(pud) | PGD_VAL_BITS); } static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) @@ -107,9 +107,23 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) return pud; } +static inline void __pud_free(pud_t *pud) +{ + struct page *page = virt_to_page(pud); + + /* + * Early pud pages allocated via memblock allocator + * can't be directly freed to slab + */ + if (PageReserved(page)) + free_reserved_page(page); + else + kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud); +} + static inline void pud_free(struct mm_struct *mm, pud_t *pud) { - kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud); + return __pud_free(pud); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 368b136517e0..495fc0ccb453 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -2,7 +2,7 @@ #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ -#include <asm-generic/5level-fixup.h> +#include <asm-generic/pgtable-nop4d.h> #ifndef __ASSEMBLY__ #include <linux/mmdebug.h> @@ -32,11 +32,13 @@ #define _RPAGE_SW1 0x00800 #define _RPAGE_SW2 0x00400 #define _RPAGE_SW3 0x00200 -#define _RPAGE_RSV1 0x1000000000000000UL -#define _RPAGE_RSV2 0x0800000000000000UL -#define _RPAGE_RSV3 0x0400000000000000UL -#define _RPAGE_RSV4 0x0200000000000000UL -#define _RPAGE_RSV5 0x00040UL +#define _RPAGE_RSV1 0x00040UL + +#define _RPAGE_PKEY_BIT4 0x1000000000000000UL +#define _RPAGE_PKEY_BIT3 0x0800000000000000UL +#define _RPAGE_PKEY_BIT2 0x0400000000000000UL +#define _RPAGE_PKEY_BIT1 0x0200000000000000UL +#define _RPAGE_PKEY_BIT0 0x0100000000000000UL #define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */ @@ -58,13 +60,12 @@ */ #define _RPAGE_RPN0 0x01000 #define _RPAGE_RPN1 0x02000 -#define _RPAGE_RPN44 0x0100000000000000UL #define _RPAGE_RPN43 0x0080000000000000UL #define _RPAGE_RPN42 0x0040000000000000UL #define _RPAGE_RPN41 0x0020000000000000UL /* Max physical address bit as per radix table */ -#define _RPAGE_PA_MAX 57 +#define _RPAGE_PA_MAX 56 /* * Max physical address bit we will use for now. @@ -125,8 +126,6 @@ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) -#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ - H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) /* * We define 2 sets of base prot bits, one for basic pages (ie, * cacheable kernel and user pages) and one for non cacheable @@ -251,7 +250,7 @@ extern unsigned long __pmd_frag_size_shift; /* Bits to mask out from a PUD to get to the PMD page */ #define PUD_MASKED_BITS 0xc0000000000000ffUL /* Bits to mask out from a PGD to get to the PUD page */ -#define PGD_MASKED_BITS 0xc0000000000000ffUL +#define P4D_MASKED_BITS 0xc0000000000000ffUL /* * Used as an indicator for rcu callback functions @@ -553,6 +552,12 @@ static inline pte_t pte_clear_savedwrite(pte_t pte) } #endif /* CONFIG_NUMA_BALANCING */ +static inline bool pte_hw_valid(pte_t pte) +{ + return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE)) == + cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE); +} + static inline int pte_present(pte_t pte) { /* @@ -561,12 +566,11 @@ static inline int pte_present(pte_t pte) * invalid during ptep_set_access_flags. Hence we look for _PAGE_INVALID * if we find _PAGE_PRESENT cleared. */ - return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)); -} -static inline bool pte_hw_valid(pte_t pte) -{ - return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT)); + if (pte_hw_valid(pte)) + return true; + return (pte_raw(pte) & cpu_to_be64(_PAGE_INVALID | _PAGE_PTE)) == + cpu_to_be64(_PAGE_INVALID | _PAGE_PTE); } #ifdef CONFIG_PPC_MEM_KEYS @@ -949,81 +953,59 @@ static inline bool pud_access_permitted(pud_t pud, bool write) return pte_access_permitted(pud_pte(pud), write); } -#define pgd_write(pgd) pte_write(pgd_pte(pgd)) +#define __p4d_raw(x) ((p4d_t) { __pgd_raw(x) }) +static inline __be64 p4d_raw(p4d_t x) +{ + return pgd_raw(x.pgd); +} + +#define p4d_write(p4d) pte_write(p4d_pte(p4d)) -static inline void pgd_clear(pgd_t *pgdp) +static inline void p4d_clear(p4d_t *p4dp) { - *pgdp = __pgd(0); + *p4dp = __p4d(0); } -static inline int pgd_none(pgd_t pgd) +static inline int p4d_none(p4d_t p4d) { - return !pgd_raw(pgd); + return !p4d_raw(p4d); } -static inline int pgd_present(pgd_t pgd) +static inline int p4d_present(p4d_t p4d) { - return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); + return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PRESENT)); } -static inline pte_t pgd_pte(pgd_t pgd) +static inline pte_t p4d_pte(p4d_t p4d) { - return __pte_raw(pgd_raw(pgd)); + return __pte_raw(p4d_raw(p4d)); } -static inline pgd_t pte_pgd(pte_t pte) +static inline p4d_t pte_p4d(pte_t pte) { - return __pgd_raw(pte_raw(pte)); + return __p4d_raw(pte_raw(pte)); } -static inline int pgd_bad(pgd_t pgd) +static inline int p4d_bad(p4d_t p4d) { if (radix_enabled()) - return radix__pgd_bad(pgd); - return hash__pgd_bad(pgd); + return radix__p4d_bad(p4d); + return hash__p4d_bad(p4d); } -#define pgd_access_permitted pgd_access_permitted -static inline bool pgd_access_permitted(pgd_t pgd, bool write) +#define p4d_access_permitted p4d_access_permitted +static inline bool p4d_access_permitted(p4d_t p4d, bool write) { - return pte_access_permitted(pgd_pte(pgd), write); + return pte_access_permitted(p4d_pte(p4d), write); } -extern struct page *pgd_page(pgd_t pgd); +extern struct page *p4d_page(p4d_t p4d); /* Pointers in the page table tree are physical addresses */ #define __pgtable_ptr_val(ptr) __pa(ptr) -#define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS) #define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) -#define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS) - -#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) -#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) -#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) -#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) - -/* - * Find an entry in a page-table-directory. We combine the address region - * (the high order N bits) and the pgd portion of the address. - */ - -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) - -#define pud_offset(pgdp, addr) \ - (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr)) -#define pmd_offset(pudp,addr) \ - (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) -#define pte_offset_kernel(dir,addr) \ - (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) - -#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) - -static inline void pte_unmap(pte_t *pte) { } - -/* to find an entry in a kernel page-table-directory */ -/* This now only contains the vmalloc pages */ -#define pgd_offset_k(address) pgd_offset(&init_mm, address) +#define p4d_page_vaddr(p4d) __va(p4d_val(p4d) & ~P4D_MASKED_BITS) #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) @@ -1139,8 +1121,11 @@ extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); -extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd); +static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmd) +{ +} + extern int hash__has_transparent_hugepage(void); static inline int has_transparent_hugepage(void) { @@ -1168,10 +1153,6 @@ static inline int pmd_large(pmd_t pmd) return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); } -static inline pmd_t pmd_mknotpresent(pmd_t pmd) -{ - return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT); -} /* * For radix we should always find H_PAGE_HASHPTE zero. Hence * the below will work for radix too @@ -1260,6 +1241,11 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, } #define pmdp_collapse_flush pmdp_collapse_flush +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL +pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, + unsigned long addr, + pmd_t *pmdp, int full); + #define __HAVE_ARCH_PGTABLE_DEPOSIT static inline void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) @@ -1370,11 +1356,11 @@ static inline bool pud_is_leaf(pud_t pud) return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); } -#define pgd_is_leaf pgd_is_leaf -#define pgd_leaf pgd_is_leaf -static inline bool pgd_is_leaf(pgd_t pgd) +#define p4d_is_leaf p4d_is_leaf +#define p4d_leaf p4d_is_leaf +static inline bool p4d_is_leaf(p4d_t p4d) { - return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE)); + return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PTE)); } #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/pkeys.h b/arch/powerpc/include/asm/book3s/64/pkeys.h new file mode 100644 index 000000000000..b7d9f4267bcd --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/pkeys.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _ASM_POWERPC_BOOK3S_64_PKEYS_H +#define _ASM_POWERPC_BOOK3S_64_PKEYS_H + +#include <asm/book3s/64/hash-pkey.h> + +extern u64 __ro_after_init default_uamor; + +static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) +{ + if (!mmu_has_feature(MMU_FTR_PKEY)) + return 0x0UL; + + if (radix_enabled()) + BUG(); + return hash__vmflag_to_pte_pkey_bits(vm_flags); +} + +static inline u16 pte_to_pkey_bits(u64 pteflags) +{ + if (radix_enabled()) + BUG(); + return hash__pte_to_pkey_bits(pteflags); +} + +#endif /*_ASM_POWERPC_KEYS_H */ diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h index d5f5ab73dc7f..035ceecd6d67 100644 --- a/arch/powerpc/include/asm/book3s/64/radix-4k.h +++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h @@ -11,7 +11,7 @@ #define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB /* - * One fragment per per page + * One fragment per page */ #define RADIX_PTE_FRAG_SIZE_SHIFT (RADIX_PTE_INDEX_SIZE + 3) #define RADIX_PTE_FRAG_NR (PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 08c222d5b764..0cba794c4fb8 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -30,7 +30,7 @@ /* Don't have anything in the reserved bits and leaf bits */ #define RADIX_PMD_BAD_BITS 0x60000000000000e0UL #define RADIX_PUD_BAD_BITS 0x60000000000000e0UL -#define RADIX_PGD_BAD_BITS 0x60000000000000e0UL +#define RADIX_P4D_BAD_BITS 0x60000000000000e0UL #define RADIX_PMD_SHIFT (PAGE_SHIFT + RADIX_PTE_INDEX_SIZE) #define RADIX_PUD_SHIFT (RADIX_PMD_SHIFT + RADIX_PMD_INDEX_SIZE) @@ -227,9 +227,9 @@ static inline int radix__pud_bad(pud_t pud) } -static inline int radix__pgd_bad(pgd_t pgd) +static inline int radix__p4d_bad(p4d_t p4d) { - return !!(pgd_val(pgd) & RADIX_PGD_BAD_BITS); + return !!(p4d_val(p4d) & RADIX_P4D_BAD_BITS); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index 64d02a704bcb..3b95769739c7 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -113,8 +113,7 @@ static inline void hash__flush_tlb_kernel_range(unsigned long start, struct mmu_gather; extern void hash__tlb_flush(struct mmu_gather *tlb); /* Private function for use by PCI IO mapping code */ -extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, - unsigned long end); +extern void __flush_hash_table_range(unsigned long start, unsigned long end); extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr); #endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H */ diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index ca8db193ae38..94439e0cefc9 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -2,10 +2,25 @@ #ifndef _ASM_POWERPC_TLBFLUSH_RADIX_H #define _ASM_POWERPC_TLBFLUSH_RADIX_H +#include <asm/hvcall.h> + struct vm_area_struct; struct mm_struct; struct mmu_gather; +static inline u64 psize_to_rpti_pgsize(unsigned long psize) +{ + if (psize == MMU_PAGE_4K) + return H_RPTI_PAGE_4K; + if (psize == MMU_PAGE_64K) + return H_RPTI_PAGE_64K; + if (psize == MMU_PAGE_2M) + return H_RPTI_PAGE_2M; + if (psize == MMU_PAGE_1G) + return H_RPTI_PAGE_1G; + return H_RPTI_PAGE_ALL; +} + static inline int mmu_get_ap(int psize) { return mmu_psize_defs[psize].ap; diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 609cab1d58f2..2124b7090db9 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -6,7 +6,7 @@ /* bytes per L1 cache line */ -#if defined(CONFIG_PPC_8xx) || defined(CONFIG_403GCX) +#if defined(CONFIG_PPC_8xx) #define L1_CACHE_SHIFT 4 #define MAX_COPY_PREFETCH 1 #define IFETCH_ALIGN_SHIFT 2 diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index e92191b390f3..54764c6e922d 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -4,22 +4,9 @@ #ifndef _ASM_POWERPC_CACHEFLUSH_H #define _ASM_POWERPC_CACHEFLUSH_H -#ifdef __KERNEL__ - #include <linux/mm.h> #include <asm/cputable.h> - -/* - * No cache flushing is required when address mappings are changed, - * because the caches on PowerPCs are physically addressed. - */ -#define flush_cache_all() do { } while (0) -#define flush_cache_mm(mm) do { } while (0) -#define flush_cache_dup_mm(mm) do { } while (0) -#define flush_cache_range(vma, start, end) do { } while (0) -#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) -#define flush_icache_page(vma, page) do { } while (0) -#define flush_cache_vunmap(start, end) do { } while (0) +#include <asm/cpu_has_feature.h> #ifdef CONFIG_PPC_BOOK3S_64 /* @@ -33,20 +20,20 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) { asm volatile("ptesync" ::: "memory"); } -#else -static inline void flush_cache_vmap(unsigned long start, unsigned long end) { } -#endif +#define flush_cache_vmap flush_cache_vmap +#endif /* CONFIG_PPC_BOOK3S_64 */ #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *page); -#define flush_dcache_mmap_lock(mapping) do { } while (0) -#define flush_dcache_mmap_unlock(mapping) do { } while (0) void flush_icache_range(unsigned long start, unsigned long stop); -extern void flush_icache_user_range(struct vm_area_struct *vma, - struct page *page, unsigned long addr, - int len); -extern void flush_dcache_icache_page(struct page *page); +#define flush_icache_range flush_icache_range + +void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len); +#define flush_icache_user_page flush_icache_user_page + +void flush_dcache_icache_page(struct page *page); void __flush_dcache_icache(void *page); /** @@ -111,14 +98,6 @@ static inline void invalidate_dcache_range(unsigned long start, mb(); /* sync */ } -#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ - do { \ - memcpy(dst, src, len); \ - flush_icache_user_range(vma, page, vaddr, len); \ - } while (0) -#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ - memcpy(dst, src, len) - -#endif /* __KERNEL__ */ +#include <asm-generic/cacheflush.h> #endif /* _ASM_POWERPC_CACHEFLUSH_H */ diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 27183871eb3b..cf091c4c22e5 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -6,7 +6,6 @@ #include <linux/compiler.h> #include <asm/synch.h> #include <linux/bug.h> -#include <asm/asm-405.h> #ifdef __BIG_ENDIAN #define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) @@ -29,7 +28,6 @@ static inline u32 __xchg_##type##sfx(volatile void *p, u32 val) \ "1: lwarx %0,0,%3\n" \ " andc %1,%0,%5\n" \ " or %1,%1,%4\n" \ - PPC405_ERR77(0,%3) \ " stwcx. %1,0,%3\n" \ " bne- 1b\n" \ : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ @@ -60,7 +58,6 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \ " bne- 2f\n" \ " andc %1,%0,%6\n" \ " or %1,%1,%5\n" \ - PPC405_ERR77(0,%3) \ " stwcx. %1,0,%3\n" \ " bne- 1b\n" \ br2 \ @@ -92,7 +89,6 @@ __xchg_u32_local(volatile void *p, unsigned long val) __asm__ __volatile__( "1: lwarx %0,0,%2 \n" - PPC405_ERR77(0,%2) " stwcx. %3,0,%2 \n\ bne- 1b" : "=&r" (prev), "+m" (*(volatile unsigned int *)p) @@ -109,7 +105,6 @@ __xchg_u32_relaxed(u32 *p, unsigned long val) __asm__ __volatile__( "1: lwarx %0,0,%2\n" - PPC405_ERR77(0, %2) " stwcx. %3,0,%2\n" " bne- 1b" : "=&r" (prev), "+m" (*p) @@ -127,7 +122,6 @@ __xchg_u64_local(volatile void *p, unsigned long val) __asm__ __volatile__( "1: ldarx %0,0,%2 \n" - PPC405_ERR77(0,%2) " stdcx. %3,0,%2 \n\ bne- 1b" : "=&r" (prev), "+m" (*(volatile unsigned long *)p) @@ -144,7 +138,6 @@ __xchg_u64_relaxed(u64 *p, unsigned long val) __asm__ __volatile__( "1: ldarx %0,0,%2\n" - PPC405_ERR77(0, %2) " stdcx. %3,0,%2\n" " bne- 1b" : "=&r" (prev), "+m" (*p) @@ -229,7 +222,6 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) "1: lwarx %0,0,%2 # __cmpxchg_u32\n\ cmpw 0,%0,%3\n\ bne- 2f\n" - PPC405_ERR77(0,%2) " stwcx. %4,0,%2\n\ bne- 1b" PPC_ATOMIC_EXIT_BARRIER @@ -252,7 +244,6 @@ __cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, "1: lwarx %0,0,%2 # __cmpxchg_u32\n\ cmpw 0,%0,%3\n\ bne- 2f\n" - PPC405_ERR77(0,%2) " stwcx. %4,0,%2\n\ bne- 1b" "\n\ @@ -273,7 +264,6 @@ __cmpxchg_u32_relaxed(u32 *p, unsigned long old, unsigned long new) "1: lwarx %0,0,%2 # __cmpxchg_u32_relaxed\n" " cmpw 0,%0,%3\n" " bne- 2f\n" - PPC405_ERR77(0, %2) " stwcx. %4,0,%2\n" " bne- 1b\n" "2:" @@ -301,7 +291,6 @@ __cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new) "1: lwarx %0,0,%2 # __cmpxchg_u32_acquire\n" " cmpw 0,%0,%3\n" " bne- 2f\n" - PPC405_ERR77(0, %2) " stwcx. %4,0,%2\n" " bne- 1b\n" PPC_ACQUIRE_BARRIER diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 898b54262881..eacc9102c251 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -11,6 +11,7 @@ #include <linux/string.h> #include <linux/kallsyms.h> #include <asm/asm-compat.h> +#include <asm/inst.h> /* Flags for create_branch: * "b" == create_branch(addr, target, 0); @@ -22,33 +23,33 @@ #define BRANCH_ABSOLUTE 0x2 bool is_offset_in_branch_range(long offset); -unsigned int create_branch(const unsigned int *addr, - unsigned long target, int flags); -unsigned int create_cond_branch(const unsigned int *addr, - unsigned long target, int flags); -int patch_branch(unsigned int *addr, unsigned long target, int flags); -int patch_instruction(unsigned int *addr, unsigned int instr); -int raw_patch_instruction(unsigned int *addr, unsigned int instr); +int create_branch(struct ppc_inst *instr, const struct ppc_inst *addr, + unsigned long target, int flags); +int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr, + unsigned long target, int flags); +int patch_branch(struct ppc_inst *addr, unsigned long target, int flags); +int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr); +int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr); static inline unsigned long patch_site_addr(s32 *site) { return (unsigned long)site + *site; } -static inline int patch_instruction_site(s32 *site, unsigned int instr) +static inline int patch_instruction_site(s32 *site, struct ppc_inst instr) { - return patch_instruction((unsigned int *)patch_site_addr(site), instr); + return patch_instruction((struct ppc_inst *)patch_site_addr(site), instr); } static inline int patch_branch_site(s32 *site, unsigned long target, int flags) { - return patch_branch((unsigned int *)patch_site_addr(site), target, flags); + return patch_branch((struct ppc_inst *)patch_site_addr(site), target, flags); } static inline int modify_instruction(unsigned int *addr, unsigned int clr, unsigned int set) { - return patch_instruction(addr, (*addr & ~clr) | set); + return patch_instruction((struct ppc_inst *)addr, ppc_inst((*addr & ~clr) | set)); } static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int set) @@ -56,13 +57,13 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned return modify_instruction((unsigned int *)patch_site_addr(site), clr, set); } -int instr_is_relative_branch(unsigned int instr); -int instr_is_relative_link_branch(unsigned int instr); -int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr); -unsigned long branch_target(const unsigned int *instr); -unsigned int translate_branch(const unsigned int *dest, - const unsigned int *src); -extern bool is_conditional_branch(unsigned int instr); +int instr_is_relative_branch(struct ppc_inst instr); +int instr_is_relative_link_branch(struct ppc_inst instr); +int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr); +unsigned long branch_target(const struct ppc_inst *instr); +int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest, + const struct ppc_inst *src); +extern bool is_conditional_branch(struct ppc_inst instr); #ifdef CONFIG_PPC_BOOK3E_64 void __patch_exception(int exc, unsigned long addr); #define patch_exception(exc, name) do { \ diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 40a4d3c6fd99..32a15dc49e8c 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -9,6 +9,11 @@ #ifndef __ASSEMBLY__ +/* + * Added to include __machine_check_early_realmode_* functions + */ +#include <asm/mce.h> + /* This structure can grow, it's real size is used by head.S code * via the mkdefs mechanism. */ @@ -198,7 +203,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0000000080000000) #define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0000000100000000) #define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0000000200000000) -#define CPU_FTR_PKEY LONG_ASM_CONST(0x0000000400000000) +/* LONG_ASM_CONST(0x0000000400000000) Free */ #define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0000000800000000) #define CPU_FTR_TM LONG_ASM_CONST(0x0000001000000000) #define CPU_FTR_CFAR LONG_ASM_CONST(0x0000002000000000) @@ -213,6 +218,8 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000) #define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000) #define CPU_FTR_P9_RADIX_PREFETCH_BUG LONG_ASM_CONST(0x0002000000000000) +#define CPU_FTR_ARCH_31 LONG_ASM_CONST(0x0004000000000000) +#define CPU_FTR_DAWR1 LONG_ASM_CONST(0x0008000000000000) #ifndef __ASSEMBLY__ @@ -437,7 +444,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | \ - CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY) + CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX ) #define CPU_FTRS_POWER8 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -447,7 +454,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP ) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) #define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ @@ -458,8 +465,8 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ - CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ - CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR) + CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_P9_TLBIE_STQ_BUG | \ + CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR) #define CPU_FTRS_POWER9_DD2_0 (CPU_FTRS_POWER9 | CPU_FTR_P9_RADIX_PREFETCH_BUG) #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | \ CPU_FTR_P9_RADIX_PREFETCH_BUG | \ @@ -467,6 +474,17 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \ CPU_FTR_P9_TM_HV_ASSIST | \ CPU_FTR_P9_TM_XER_SO_BUG) +#define CPU_FTRS_POWER10 (CPU_FTR_LWSYNC | \ + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ + CPU_FTR_MMCRA | CPU_FTR_SMT | \ + CPU_FTR_COHERENT_ICACHE | \ + CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ + CPU_FTR_DSCR | CPU_FTR_SAO | \ + CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ + CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ + CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ + CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_ARCH_31 | \ + CPU_FTR_DAWR | CPU_FTR_DAWR1) #define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -485,14 +503,14 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | CPU_FTRS_POWER9 | \ - CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2) + CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | CPU_FTRS_POWER10) #else #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \ CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | CPU_FTRS_POWER9 | \ - CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2) + CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | CPU_FTRS_POWER10) #endif /* CONFIG_CPU_LITTLE_ENDIAN */ #endif #else @@ -614,7 +632,12 @@ enum { }; #endif /* __powerpc64__ */ -#define HBP_NUM 1 +/* + * Maximum number of hw breakpoint supported on powerpc. Number of + * breakpoints supported by actual hw might be less than this, which + * is decided at run time in nr_wp_slots(). + */ +#define HBP_NUM_MAX 2 #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 0fccd5ea1e9a..ed75d1c318e3 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -36,6 +36,8 @@ static inline unsigned long cputime_to_usecs(const cputime_t ct) return mulhdu((__force u64) ct, __cputime_usec_factor); } +#define cputime_to_nsecs(cputime) tb_to_ns((__force u64)cputime) + /* * PPC64 uses PACA which is task independent for storing accounting data while * PPC32 uses struct thread_info, therefore at task switch the accounting data @@ -65,7 +67,7 @@ static inline void arch_vtime_task_switch(struct task_struct *prev) /* * account_cpu_user_entry/exit runs "unreconciled", so can't trace, - * can't use use get_paca() + * can't use get_paca() */ static notrace inline void account_cpu_user_entry(void) { diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h b/arch/powerpc/include/asm/crashdump-ppc64.h new file mode 100644 index 000000000000..68d9717cc5ee --- /dev/null +++ b/arch/powerpc/include/asm/crashdump-ppc64.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H +#define _ASM_POWERPC_CRASHDUMP_PPC64_H + +/* + * Backup region - first 64KB of System RAM + * + * If ever the below macros are to be changed, please be judicious. + * The implicit assumptions are: + * - start, end & size are less than UINT32_MAX. + * - start & size are at least 8 byte aligned. + * + * For implementation details: arch/powerpc/purgatory/trampoline_64.S + */ +#define BACKUP_SRC_START 0 +#define BACKUP_SRC_END 0xffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) + +#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */ diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h index 4ce6808deed3..3e9da22a2779 100644 --- a/arch/powerpc/include/asm/dbell.h +++ b/arch/powerpc/include/asm/dbell.h @@ -11,8 +11,10 @@ #include <linux/smp.h> #include <linux/threads.h> +#include <asm/cputhreads.h> #include <asm/ppc-opcode.h> #include <asm/feature-fixups.h> +#include <asm/kvm_ppc.h> #define PPC_DBELL_MSG_BRDCAST (0x04000000) #define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36)) @@ -87,9 +89,6 @@ static inline void ppc_msgsync(void) #endif /* CONFIG_PPC_BOOK3S */ -extern void doorbell_global_ipi(int cpu); -extern void doorbell_core_ipi(int cpu); -extern int doorbell_try_core_ipi(int cpu); extern void doorbell_exception(struct pt_regs *regs); static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag) @@ -100,4 +99,63 @@ static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag) _ppc_msgsnd(msg); } +#ifdef CONFIG_SMP + +/* + * Doorbells must only be used if CPU_FTR_DBELL is available. + * msgsnd is used in HV, and msgsndp is used in !HV. + * + * These should be used by platform code that is aware of restrictions. + * Other arch code should use ->cause_ipi. + * + * doorbell_global_ipi() sends a dbell to any target CPU. + * Must be used only by architectures that address msgsnd target + * by PIR/get_hard_smp_processor_id. + */ +static inline void doorbell_global_ipi(int cpu) +{ + u32 tag = get_hard_smp_processor_id(cpu); + + kvmppc_set_host_ipi(cpu); + /* Order previous accesses vs. msgsnd, which is treated as a store */ + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); +} + +/* + * doorbell_core_ipi() sends a dbell to a target CPU in the same core. + * Must be used only by architectures that address msgsnd target + * by TIR/cpu_thread_in_core. + */ +static inline void doorbell_core_ipi(int cpu) +{ + u32 tag = cpu_thread_in_core(cpu); + + kvmppc_set_host_ipi(cpu); + /* Order previous accesses vs. msgsnd, which is treated as a store */ + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); +} + +/* + * Attempt to cause a core doorbell if destination is on the same core. + * Returns 1 on success, 0 on failure. + */ +static inline int doorbell_try_core_ipi(int cpu) +{ + int this_cpu = get_cpu(); + int ret = 0; + + if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) { + doorbell_core_ipi(cpu); + ret = 1; + } + + put_cpu(); + + return ret; +} + +#endif /* CONFIG_SMP */ + #endif /* _ASM_POWERPC_DBELL_H */ diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index 7756026b95ca..ec57daf87f40 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -45,7 +45,7 @@ static inline int debugger_break_match(struct pt_regs *regs) { return 0; } static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } #endif -void __set_breakpoint(struct arch_hw_breakpoint *brk); +void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk); bool ppc_breakpoint_available(void); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 266542769e4b..219559d65864 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -19,11 +19,6 @@ struct iommu_table; */ struct dev_archdata { /* - * Set to %true if the dma_iommu_ops are requested to use a direct - * window instead of dynamically mapping memory. - */ - bool iommu_bypass : 1; - /* * These two used to be a union. However, with the hybrid ops we need * both so here we store both a DMA offset for direct mappings and * an iommu_table for remapped DMA. @@ -34,9 +29,6 @@ struct dev_archdata { struct iommu_table *iommu_table_base; #endif -#ifdef CONFIG_IOMMU_API - void *iommu_domain; -#endif #ifdef CONFIG_PPC64 struct pci_dn *pci_data; #endif @@ -49,6 +41,9 @@ struct dev_archdata { #ifdef CONFIG_CXL_BASE struct cxl_context *cxl_ctx; #endif +#ifdef CONFIG_PCI_IOV + void *iov_data; +#endif }; struct pdev_archdata { diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 28c3d936fdf3..17ccc6474ab6 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -65,6 +65,7 @@ struct of_drconf_cell_v2 { #define DRCONF_MEM_ASSIGNED 0x00000008 #define DRCONF_MEM_AI_INVALID 0x00000040 #define DRCONF_MEM_RESERVED 0x00000080 +#define DRCONF_MEM_HOTREMOVABLE 0x00000100 static inline u32 drmem_lmb_size(void) { @@ -89,13 +90,14 @@ static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb) } u64 drmem_lmb_memory_max(void); -void __init walk_drmem_lmbs(struct device_node *dn, - void (*func)(struct drmem_lmb *, const __be32 **)); +int walk_drmem_lmbs(struct device_node *dn, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)); int drmem_update_dt(void); #ifdef CONFIG_PPC_PSERIES -void __init walk_drmem_lmbs_early(unsigned long node, - void (*func)(struct drmem_lmb *, const __be32 **)); +int __init +walk_drmem_lmbs_early(unsigned long node, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)); #endif static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) diff --git a/arch/powerpc/include/asm/dtl.h b/arch/powerpc/include/asm/dtl.h new file mode 100644 index 000000000000..1625888f27ef --- /dev/null +++ b/arch/powerpc/include/asm/dtl.h @@ -0,0 +1,52 @@ +#ifndef _ASM_POWERPC_DTL_H +#define _ASM_POWERPC_DTL_H + +#include <asm/lppaca.h> +#include <linux/spinlock_types.h> + +/* + * Layout of entries in the hypervisor's dispatch trace log buffer. + */ +struct dtl_entry { + u8 dispatch_reason; + u8 preempt_reason; + __be16 processor_id; + __be32 enqueue_to_dispatch_time; + __be32 ready_to_enqueue_time; + __be32 waiting_to_ready_time; + __be64 timebase; + __be64 fault_addr; + __be64 srr0; + __be64 srr1; +}; + +#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */ +#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry)) + +/* + * Dispatch trace log event enable mask: + * 0x1: voluntary virtual processor waits + * 0x2: time-slice preempts + * 0x4: virtual partition memory page faults + */ +#define DTL_LOG_CEDE 0x1 +#define DTL_LOG_PREEMPT 0x2 +#define DTL_LOG_FAULT 0x4 +#define DTL_LOG_ALL (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT) + +extern struct kmem_cache *dtl_cache; +extern rwlock_t dtl_access_lock; + +/* + * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls + * reading from the dispatch trace log. If other code wants to consume + * DTL entries, it can set this pointer to a function that will get + * called once for each DTL entry that gets processed. + */ +extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index); + +extern void register_dtl_buffer(int cpu); +extern void alloc_dtl_buffers(unsigned long *time_limit); +extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity); + +#endif /* _ASM_POWERPC_DTL_H */ diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 964a54292b36..d5f369bcd130 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -133,7 +133,6 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe) struct eeh_dev { int mode; /* EEH mode */ - int class_code; /* Class code of the device */ int bdfn; /* bdfn of device (for cfg ops) */ struct pci_controller *controller; int pe_config_addr; /* PE config address */ @@ -148,7 +147,10 @@ struct eeh_dev { struct pci_dn *pdn; /* Associated PCI device node */ struct pci_dev *pdev; /* Associated PCI device */ bool in_error; /* Error flag for edev */ + + /* VF specific properties */ struct pci_dev *physfn; /* Associated SRIOV PF */ + int vf_index; /* Index of this VF */ }; /* "fmt" must be a simple literal string */ @@ -217,18 +219,17 @@ struct eeh_ops { int (*init)(void); struct eeh_dev *(*probe)(struct pci_dev *pdev); int (*set_option)(struct eeh_pe *pe, int option); - int (*get_pe_addr)(struct eeh_pe *pe); int (*get_state)(struct eeh_pe *pe, int *delay); int (*reset)(struct eeh_pe *pe, int option); int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); int (*configure_bridge)(struct eeh_pe *pe); int (*err_inject)(struct eeh_pe *pe, int type, int func, unsigned long addr, unsigned long mask); - int (*read_config)(struct pci_dn *pdn, int where, int size, u32 *val); - int (*write_config)(struct pci_dn *pdn, int where, int size, u32 val); + int (*read_config)(struct eeh_dev *edev, int where, int size, u32 *val); + int (*write_config)(struct eeh_dev *edev, int where, int size, u32 val); int (*next_error)(struct eeh_pe **pe); - int (*restore_config)(struct pci_dn *pdn); - int (*notify_resume)(struct pci_dn *pdn); + int (*restore_config)(struct eeh_dev *edev); + int (*notify_resume)(struct eeh_dev *edev); }; extern int eeh_subsystem_flags; @@ -282,8 +283,8 @@ struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root); struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no, int config_addr); -int eeh_add_to_parent_pe(struct eeh_dev *edev); -int eeh_rmv_from_parent_pe(struct eeh_dev *edev); +int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent); +int eeh_pe_tree_remove(struct eeh_dev *edev); void eeh_pe_update_time_stamp(struct eeh_pe *pe); void *eeh_pe_traverse(struct eeh_pe *root, eeh_pe_traverse_func fn, void *flag); @@ -293,8 +294,6 @@ void eeh_pe_restore_bars(struct eeh_pe *pe); const char *eeh_pe_loc_get(struct eeh_pe *pe); struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); -struct eeh_dev *eeh_dev_init(struct pci_dn *pdn); -void eeh_dev_phb_init_dynamic(struct pci_controller *phb); void eeh_show_enabled(void); int __init eeh_ops_register(struct eeh_ops *ops); int __exit eeh_ops_unregister(const char *name); @@ -314,7 +313,6 @@ int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed); int eeh_pe_configure(struct eeh_pe *pe); int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, unsigned long addr, unsigned long mask); -int eeh_restore_vf_config(struct pci_dn *pdn); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. @@ -340,11 +338,6 @@ static inline bool eeh_enabled(void) static inline void eeh_show_enabled(void) { } -static inline void *eeh_dev_init(struct pci_dn *pdn, void *data) -{ - return NULL; -} - static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { } static inline int eeh_check_failure(const volatile void __iomem *token) @@ -362,6 +355,7 @@ static inline void eeh_remove_device(struct pci_dev *dev) { } #define EEH_POSSIBLE_ERROR(val, type) (0) #define EEH_IO_ERROR_VALUE(size) (-1UL) +static inline int eeh_phb_pe_create(struct pci_controller *phb) { return 0; } #endif /* CONFIG_EEH */ #if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_EEH) diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 57c229a86f08..53ed2ca40151 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -53,8 +53,6 @@ static inline void ppc_elf_core_copy_regs(elf_gregset_t elf_regs, } #define ELF_CORE_COPY_REGS(gregs, regs) ppc_elf_core_copy_regs(gregs, regs); -typedef elf_vrregset_t elf_fpxregset_t; - /* ELF_HWCAP yields a mask that user programs can use to figure out what instruction set this cpu supports. This could be done in userspace, but it's not easy, and we've already done it here. */ diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index d3a7e36f1402..c99ba08a408d 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -37,7 +37,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* A "hypercall" is an "sc 1" instruction. This header file file provides C +/* A "hypercall" is an "sc 1" instruction. This header file provides C * wrapper functions for the ePAPR hypervisor interface. It is inteded * for use by Linux device drivers and other operating systems. * @@ -246,7 +246,7 @@ static inline unsigned int ev_int_get_mask(unsigned int interrupt, * ev_int_eoi - signal the end of interrupt processing * @interrupt: the interrupt number * - * This function signals the end of processing for the the specified + * This function signals the end of processing for the specified * interrupt, which must be the interrupt currently in service. By * definition, this is also the highest-priority interrupt. * diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index 54a98ef7d7fe..40cdcb2fb057 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -66,14 +66,7 @@ #define EX_TLB_SRR0 (10 * 8) #define EX_TLB_SRR1 (11 * 8) #define EX_TLB_R7 (12 * 8) -#ifdef CONFIG_BOOK3E_MMU_TLB_STATS -#define EX_TLB_R8 (13 * 8) -#define EX_TLB_R9 (14 * 8) -#define EX_TLB_LR (15 * 8) -#define EX_TLB_SIZE (16 * 8) -#else #define EX_TLB_SIZE (13 * 8) -#endif #define START_EXCEPTION(label) \ .globl exc_##label##_book3e; \ @@ -110,8 +103,7 @@ exc_##label##_book3e: std r11,EX_TLB_R12(r12); \ mtspr SPRN_SPRG_TLB_EXFRAME,r14; \ std r15,EX_TLB_SRR1(r12); \ - std r16,EX_TLB_SRR0(r12); \ - TLB_MISS_PROLOG_STATS + std r16,EX_TLB_SRR0(r12); /* And these are the matching epilogs that restores things * @@ -143,7 +135,6 @@ exc_##label##_book3e: mtspr SPRN_SRR0,r15; \ ld r15,EX_TLB_R15(r12); \ mtspr SPRN_SRR1,r16; \ - TLB_MISS_RESTORE_STATS \ ld r16,EX_TLB_R16(r12); \ ld r12,EX_TLB_R12(r12); \ @@ -158,53 +149,15 @@ exc_##label##_book3e: addi r11,r13,PACA_EXTLB; \ TLB_MISS_RESTORE(r11) -#ifdef CONFIG_BOOK3E_MMU_TLB_STATS -#define TLB_MISS_PROLOG_STATS \ - mflr r10; \ - std r8,EX_TLB_R8(r12); \ - std r9,EX_TLB_R9(r12); \ - std r10,EX_TLB_LR(r12); -#define TLB_MISS_RESTORE_STATS \ - ld r16,EX_TLB_LR(r12); \ - ld r9,EX_TLB_R9(r12); \ - ld r8,EX_TLB_R8(r12); \ - mtlr r16; -#define TLB_MISS_STATS_D(name) \ - addi r9,r13,MMSTAT_DSTATS+name; \ - bl tlb_stat_inc; -#define TLB_MISS_STATS_I(name) \ - addi r9,r13,MMSTAT_ISTATS+name; \ - bl tlb_stat_inc; -#define TLB_MISS_STATS_X(name) \ - ld r8,PACA_EXTLB+EX_TLB_ESR(r13); \ - cmpdi cr2,r8,-1; \ - beq cr2,61f; \ - addi r9,r13,MMSTAT_DSTATS+name; \ - b 62f; \ -61: addi r9,r13,MMSTAT_ISTATS+name; \ -62: bl tlb_stat_inc; -#define TLB_MISS_STATS_SAVE_INFO \ - std r14,EX_TLB_ESR(r12); /* save ESR */ -#define TLB_MISS_STATS_SAVE_INFO_BOLTED \ - std r14,PACA_EXTLB+EX_TLB_ESR(r13); /* save ESR */ -#else -#define TLB_MISS_PROLOG_STATS -#define TLB_MISS_RESTORE_STATS -#define TLB_MISS_PROLOG_STATS_BOLTED -#define TLB_MISS_RESTORE_STATS_BOLTED -#define TLB_MISS_STATS_D(name) -#define TLB_MISS_STATS_I(name) -#define TLB_MISS_STATS_X(name) -#define TLB_MISS_STATS_Y(name) -#define TLB_MISS_STATS_SAVE_INFO -#define TLB_MISS_STATS_SAVE_INFO_BOLTED -#endif - #define SET_IVOR(vector_number, vector_offset) \ LOAD_REG_ADDR(r3,interrupt_base_book3e);\ ori r3,r3,vector_offset@l; \ mtspr SPRN_IVOR##vector_number,r3; - +/* + * powerpc relies on return from interrupt/syscall being context synchronising + * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional + * synchronisation instructions. + */ #define RFI_TO_KERNEL \ rfi diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 47bd4ea0837d..ebe95aa04d53 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -68,6 +68,14 @@ * * The nop instructions allow us to insert one or more instructions to flush the * L1-D cache when returning to userspace or a guest. + * + * powerpc relies on return from interrupt/syscall being context synchronising + * (which hrfid, rfid, and rfscv are) to support ARCH_HAS_MEMBARRIER_SYNC_CORE + * without additional synchronisation instructions. + * + * soft-masked interrupt replay does not include a context-synchronising rfid, + * but those always return to kernel, the sync is only required when returning + * to user. */ #define RFI_FLUSH_SLOT \ RFI_FLUSH_FIXUP_SECTION; \ @@ -123,6 +131,12 @@ hrfid; \ b hrfi_flush_fallback +#define RFSCV_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ + RFI_FLUSH_SLOT; \ + RFSCV; \ + b rfscv_flush_fallback + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_EXCEPTION_H */ diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h index c814a2b55389..8d61c8f3fec4 100644 --- a/arch/powerpc/include/asm/fadump-internal.h +++ b/arch/powerpc/include/asm/fadump-internal.h @@ -64,12 +64,14 @@ struct fadump_memory_range { }; /* fadump memory ranges info */ +#define RNG_NAME_SZ 16 struct fadump_mrange_info { - char name[16]; + char name[RNG_NAME_SZ]; struct fadump_memory_range *mem_ranges; u32 mem_ranges_sz; u32 mem_range_cnt; u32 max_mem_ranges; + bool is_static; }; /* Platform specific callback functions */ diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index ca33f4ef6cb4..0b295bdb201e 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,6 +52,7 @@ #define FW_FEATURE_PAPR_SCM ASM_CONST(0x0000002000000000) #define FW_FEATURE_ULTRAVISOR ASM_CONST(0x0000004000000000) #define FW_FEATURE_STUFF_TCE ASM_CONST(0x0000008000000000) +#define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000) #ifndef __ASSEMBLY__ @@ -71,7 +72,8 @@ enum { FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN | FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE | - FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR, + FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR | + FW_FEATURE_RPT_INVALIDATE, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR, FW_FEATURE_POWERNV_ALWAYS = 0, @@ -128,9 +130,16 @@ extern void machine_check_fwnmi(void); /* This is true if we are using the firmware NMI handler (typically LPAR) */ extern int fwnmi_active; +extern int ibm_nmi_interlock_token; extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup; +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +bool is_kvm_guest(void); +#else +static inline bool is_kvm_guest(void) { return false; } +#endif + #ifdef CONFIG_PPC_PSERIES void pseries_probe_fw_features(void); #else diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 2ef155a3c821..6bfc87915d5d 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -16,8 +16,8 @@ #ifndef __ASSEMBLY__ #include <linux/sizes.h> +#include <linux/pgtable.h> #include <asm/page.h> -#include <asm/pgtable.h> #ifdef CONFIG_HIGHMEM #include <linux/threads.h> #include <asm/kmap_types.h> @@ -52,7 +52,7 @@ enum fixed_addresses { FIX_HOLE, /* reserve the top 128K for early debugging purposes */ FIX_EARLY_DEBUG_TOP = FIX_HOLE, - FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+((128*1024)/PAGE_SIZE)-1, + FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, @@ -86,6 +86,10 @@ enum fixed_addresses { #define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) +#define FIXMAP_ALIGNED_SIZE (ALIGN(FIXADDR_TOP, PGDIR_SIZE) - \ + ALIGN_DOWN(FIXADDR_START, PGDIR_SIZE)) +#define FIXMAP_PTE_SIZE (FIXMAP_ALIGNED_SIZE / PGDIR_SIZE * PTE_TABLE_SIZE) + #define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NCG #define FIXMAP_PAGE_IO PAGE_KERNEL_NCG diff --git a/arch/powerpc/include/asm/floppy.h b/arch/powerpc/include/asm/floppy.h index 167c44b58848..7af9a68fd949 100644 --- a/arch/powerpc/include/asm/floppy.h +++ b/arch/powerpc/include/asm/floppy.h @@ -13,8 +13,8 @@ #include <asm/machdep.h> -#define fd_inb(port) inb_p(port) -#define fd_outb(value,port) outb_p(value,port) +#define fd_inb(base, reg) inb_p((base) + (reg)) +#define fd_outb(value, base, reg) outb_p(value, (base) + (reg)) #define fd_enable_dma() enable_dma(FLOPPY_DMA) #define fd_disable_dma() fd_ops->_disable_dma(FLOPPY_DMA) @@ -61,21 +61,22 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id) st = 1; for (lcount=virtual_dma_count, lptr=virtual_dma_addr; lcount; lcount--, lptr++) { - st=inb(virtual_dma_port+4) & 0xa0 ; - if (st != 0xa0) + st = inb(virtual_dma_port + FD_STATUS); + st &= STATUS_DMA | STATUS_READY; + if (st != (STATUS_DMA | STATUS_READY)) break; if (virtual_dma_mode) - outb_p(*lptr, virtual_dma_port+5); + outb_p(*lptr, virtual_dma_port + FD_DATA); else - *lptr = inb_p(virtual_dma_port+5); + *lptr = inb_p(virtual_dma_port + FD_DATA); } virtual_dma_count = lcount; virtual_dma_addr = lptr; - st = inb(virtual_dma_port+4); + st = inb(virtual_dma_port + FD_STATUS); - if (st == 0x20) + if (st == STATUS_DMA) return IRQ_HANDLED; - if (!(st & 0x20)) { + if (!(st & STATUS_DMA)) { virtual_dma_residue += virtual_dma_count; virtual_dma_count=0; doing_vdma = 0; diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index f54a08a2cd70..bc76970b6ee5 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -108,9 +108,23 @@ static inline void this_cpu_enable_ftrace(void) { get_paca()->ftrace_enabled = 1; } + +/* Disable ftrace on this CPU if possible (may not be implemented) */ +static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled) +{ + get_paca()->ftrace_enabled = ftrace_enabled; +} + +static inline u8 this_cpu_get_ftrace_enabled(void) +{ + return get_paca()->ftrace_enabled; +} + #else /* CONFIG_PPC64 */ static inline void this_cpu_disable_ftrace(void) { } static inline void this_cpu_enable_ftrace(void) { } +static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled) { } +static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; } #endif /* CONFIG_PPC64 */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index f187bb5e524e..e93ee3202e4c 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -8,14 +8,12 @@ #include <linux/uaccess.h> #include <asm/errno.h> #include <asm/synch.h> -#include <asm/asm-405.h> #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ __asm__ __volatile ( \ PPC_ATOMIC_ENTRY_BARRIER \ "1: lwarx %0,0,%2\n" \ insn \ - PPC405_ERR77(0, %2) \ "2: stwcx. %1,0,%2\n" \ "bne- 1b\n" \ PPC_ATOMIC_EXIT_BARRIER \ @@ -82,7 +80,6 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ cmpw 0,%1,%4\n\ bne- 3f\n" - PPC405_ERR77(0,%3) "2: stwcx. %5,0,%3\n\ bne- 1b\n" PPC_ATOMIC_EXIT_BARRIER diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index f1e9067bd5ac..f133b5930ae1 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -13,7 +13,6 @@ typedef struct { unsigned int pmu_irqs; unsigned int mce_exceptions; unsigned int spurious_irqs; - unsigned int hmi_exceptions; unsigned int sreset_irqs; #ifdef CONFIG_PPC_WATCHDOG unsigned int soft_nmi_irqs; diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 2dabcf668292..4cb9efa2eb21 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -128,7 +128,7 @@ name: .if ((start) % (size) != 0); \ .error "Fixed section exception vector misalignment"; \ .endif; \ - .if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100); \ + .if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100) && ((size) != 0x1000); \ .error "Fixed section exception vector bad size"; \ .endif; \ .if (start) < sname##_start; \ diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index a4b65b186ec6..104026f7d6bc 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -30,7 +30,6 @@ #include <asm/fixmap.h> extern pte_t *kmap_pte; -extern pgprot_t kmap_prot; extern pte_t *pkmap_page_table; /* @@ -59,33 +58,6 @@ extern pte_t *pkmap_page_table; #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) -extern void *kmap_high(struct page *page); -extern void kunmap_high(struct page *page); -extern void *kmap_atomic_prot(struct page *page, pgprot_t prot); -extern void __kunmap_atomic(void *kvaddr); - -static inline void *kmap(struct page *page) -{ - might_sleep(); - if (!PageHighMem(page)) - return page_address(page); - return kmap_high(page); -} - -static inline void kunmap(struct page *page) -{ - BUG_ON(in_interrupt()); - if (!PageHighMem(page)) - return; - kunmap_high(page); -} - -static inline void *kmap_atomic(struct page *page) -{ - return kmap_atomic_prot(page, kmap_prot); -} - - #define flush_cache_kmaps() flush_cache_all() #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index bd6504c28c2f..013165e62618 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -30,6 +30,7 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, return slice_is_hugepage_only_range(mm, addr, len); return 0; } +#define is_hugepage_only_range is_hugepage_only_range #define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, @@ -40,11 +41,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { -#ifdef CONFIG_PPC64 return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1)); -#else - return __pte(pte_update(ptep, ~0UL, 0)); -#endif } #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH @@ -60,10 +57,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); -static inline void arch_clear_hugepage_flags(struct page *page) -{ -} - +void gigantic_hugetlb_cma_reserve(void) __init; #include <asm-generic/hugetlb.h> #else /* ! CONFIG_HUGETLB_PAGE */ @@ -78,6 +72,12 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, { return NULL; } + + +static inline void __init gigantic_hugetlb_cma_reserve(void) +{ +} + #endif /* CONFIG_HUGETLB_PAGE */ #endif /* _ASM_POWERPC_HUGETLB_H */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index e90c073e437e..fbb377055471 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -237,7 +237,7 @@ #define H_CREATE_RPT 0x1A4 #define H_REMOVE_RPT 0x1A8 #define H_REGISTER_RPAGES 0x1AC -#define H_DISABLE_AND_GETC 0x1B0 +#define H_DISABLE_AND_GET 0x1B0 #define H_ERROR_DATA 0x1B4 #define H_GET_HCA_INFO 0x1B8 #define H_GET_PERF_COUNT 0x1BC @@ -305,7 +305,8 @@ #define H_SCM_UNBIND_ALL 0x3FC #define H_SCM_HEALTH 0x400 #define H_SCM_PERFORMANCE_STATS 0x418 -#define MAX_HCALL_OPCODE H_SCM_PERFORMANCE_STATS +#define H_RPT_INVALIDATE 0x448 +#define MAX_HCALL_OPCODE H_RPT_INVALIDATE /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) @@ -354,9 +355,10 @@ /* Values for 2nd argument to H_SET_MODE */ #define H_SET_MODE_RESOURCE_SET_CIABR 1 -#define H_SET_MODE_RESOURCE_SET_DAWR 2 +#define H_SET_MODE_RESOURCE_SET_DAWR0 2 #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 #define H_SET_MODE_RESOURCE_LE 4 +#define H_SET_MODE_RESOURCE_SET_DAWR1 5 /* Values for argument to H_SIGNAL_SYS_RESET */ #define H_SIGNAL_SYS_RESET_ALL -1 @@ -389,6 +391,37 @@ #define PROC_TABLE_RADIX 0x04 #define PROC_TABLE_GTSE 0x01 +/* + * Defines for + * H_RPT_INVALIDATE - Invalidate RPT translation lookaside information. + */ + +/* Type of translation to invalidate (type) */ +#define H_RPTI_TYPE_NESTED 0x0001 /* Invalidate nested guest partition-scope */ +#define H_RPTI_TYPE_TLB 0x0002 /* Invalidate TLB */ +#define H_RPTI_TYPE_PWC 0x0004 /* Invalidate Page Walk Cache */ +/* Invalidate Process Table Entries if H_RPTI_TYPE_NESTED is clear */ +#define H_RPTI_TYPE_PRT 0x0008 +/* Invalidate Partition Table Entries if H_RPTI_TYPE_NESTED is set */ +#define H_RPTI_TYPE_PAT 0x0008 +#define H_RPTI_TYPE_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \ + H_RPTI_TYPE_PRT) +#define H_RPTI_TYPE_NESTED_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \ + H_RPTI_TYPE_PAT) + +/* Invalidation targets (target) */ +#define H_RPTI_TARGET_CMMU 0x01 /* All virtual processors in the partition */ +#define H_RPTI_TARGET_CMMU_LOCAL 0x02 /* Current virtual processor */ +/* All nest/accelerator agents in use by the partition */ +#define H_RPTI_TARGET_NMMU 0x04 + +/* Page size mask (page sizes) */ +#define H_RPTI_PAGE_4K 0x01 +#define H_RPTI_PAGE_64K 0x02 +#define H_RPTI_PAGE_2M 0x04 +#define H_RPTI_PAGE_1G 0x08 +#define H_RPTI_PAGE_ALL (-1UL) + #ifndef __ASSEMBLY__ #include <linux/types.h> diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index f2f8d8aa8e3b..db206a7f38e2 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -9,6 +9,8 @@ #ifndef _PPC_BOOK3S_64_HW_BREAKPOINT_H #define _PPC_BOOK3S_64_HW_BREAKPOINT_H +#include <asm/cpu_has_feature.h> + #ifdef __KERNEL__ struct arch_hw_breakpoint { unsigned long address; @@ -17,7 +19,7 @@ struct arch_hw_breakpoint { u16 hw_len; /* length programmed in hw */ }; -/* Note: Don't change the the first 6 bits below as they are in the same order +/* Note: Don't change the first 6 bits below as they are in the same order * as the dabr and dabrx. */ #define HW_BRK_TYPE_READ 0x01 @@ -34,15 +36,21 @@ struct arch_hw_breakpoint { #define HW_BRK_TYPE_PRIV_ALL (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \ HW_BRK_TYPE_HYP) +/* Minimum granularity */ #ifdef CONFIG_PPC_8xx -#define HW_BREAKPOINT_ALIGN 0x3 +#define HW_BREAKPOINT_SIZE 0x4 #else -#define HW_BREAKPOINT_ALIGN 0x7 +#define HW_BREAKPOINT_SIZE 0x8 #endif #define DABR_MAX_LEN 8 #define DAWR_MAX_LEN 512 +static inline int nr_wp_slots(void) +{ + return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1; +} + #ifdef CONFIG_HAVE_HW_BREAKPOINT #include <linux/kdebug.h> #include <asm/reg.h> @@ -64,7 +72,6 @@ extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); int arch_install_hw_breakpoint(struct perf_event *bp); void arch_uninstall_hw_breakpoint(struct perf_event *bp); -void arch_unregister_hw_breakpoint(struct perf_event *bp); void hw_breakpoint_pmu_read(struct perf_event *bp); extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); @@ -73,14 +80,14 @@ extern void ptrace_triggered(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs); static inline void hw_breakpoint_disable(void) { - struct arch_hw_breakpoint brk; - - brk.address = 0; - brk.type = 0; - brk.len = 0; - brk.hw_len = 0; - if (ppc_breakpoint_available()) - __set_breakpoint(&brk); + int i; + struct arch_hw_breakpoint null_brk = {0}; + + if (!ppc_breakpoint_available()) + return; + + for (i = 0; i < nr_wp_slots(); i++) + __set_breakpoint(i, &null_brk); } extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); int hw_breakpoint_handler(struct die_args *args); @@ -99,10 +106,10 @@ static inline bool dawr_enabled(void) { return dawr_force_enable; } -int set_dawr(struct arch_hw_breakpoint *brk); +int set_dawr(int nr, struct arch_hw_breakpoint *brk); #else static inline bool dawr_enabled(void) { return false; } -static inline int set_dawr(struct arch_hw_breakpoint *brk) { return -1; } +static inline int set_dawr(int nr, struct arch_hw_breakpoint *brk) { return -1; } #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 3a0db7b0b46e..35060be09073 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void) #define powerpc_local_irq_pmu_save(flags) \ do { \ raw_local_irq_pmu_save(flags); \ - trace_hardirqs_off(); \ + if (!raw_irqs_disabled_flags(flags)) \ + trace_hardirqs_off(); \ } while(0) #define powerpc_local_irq_pmu_restore(flags) \ do { \ - if (raw_irqs_disabled_flags(flags)) { \ - raw_local_irq_pmu_restore(flags); \ - trace_hardirqs_off(); \ - } else { \ + if (!raw_irqs_disabled_flags(flags)) \ trace_hardirqs_on(); \ - raw_local_irq_pmu_restore(flags); \ - } \ + raw_local_irq_pmu_restore(flags); \ } while(0) #else #define powerpc_local_irq_pmu_save(flags) \ diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index b3b0f2d020f0..ae02eb53d6ef 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -10,7 +10,7 @@ * * © Copyright 1995 Apple Computer, Inc. All rights reserved. * - * It's available online from http://www.cpu.lu/~mlan/ftp/MacTech.pdf + * It's available online from https://www.cpu.lu/~mlan/ftp/MacTech.pdf * You can obtain paper copies of this book from computer bookstores or by * writing Morgan Kaufmann Publishers, Inc., 340 Pine Street, Sixth Floor, San * Francisco, CA 94104. Reference ISBN 1-55860-393-X. diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h index 9872f85d356f..b0c70a35fd0e 100644 --- a/arch/powerpc/include/asm/icswx.h +++ b/arch/powerpc/include/asm/icswx.h @@ -77,6 +77,8 @@ struct coprocessor_completion_block { #define CSB_CC_CHAIN (37) #define CSB_CC_SEQUENCE (38) #define CSB_CC_HW (39) +/* P9 DD2 NX Workbook 3.2 (Table 4-36): Address translation fault */ +#define CSB_CC_FAULT_ADDRESS (250) #define CSB_SIZE (0x10) #define CSB_ALIGN CSB_SIZE @@ -108,6 +110,17 @@ struct data_descriptor_entry { __be64 address; } __packed __aligned(DDE_ALIGN); +/* 4.3.2 NX-stamped Fault CRB */ + +#define NX_STAMP_ALIGN (0x10) + +struct nx_fault_stamp { + __be64 fault_storage_addr; + __be16 reserved; + __u8 flags; + __u8 fault_status; + __be32 pswid; +} __packed __aligned(NX_STAMP_ALIGN); /* Chapter 6.5.2 Coprocessor-Request Block (CRB) */ @@ -135,10 +148,15 @@ struct coprocessor_request_block { struct coprocessor_completion_block ccb; - u8 reserved[48]; + union { + struct nx_fault_stamp nx; + u8 reserved[16]; + } stamp; + + u8 reserved[32]; struct coprocessor_status_block csb; -} __packed __aligned(CRB_ALIGN); +} __packed; /* RFC02167 Initiate Coprocessor Instructions document diff --git a/arch/powerpc/include/asm/idle.h b/arch/powerpc/include/asm/idle.h new file mode 100644 index 000000000000..accd1f50085a --- /dev/null +++ b/arch/powerpc/include/asm/idle.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_IDLE_H +#define _ASM_POWERPC_IDLE_H +#include <asm/runlatch.h> +#include <asm/paca.h> + +#ifdef CONFIG_PPC_PSERIES +DECLARE_PER_CPU(u64, idle_spurr_cycles); +DECLARE_PER_CPU(u64, idle_entry_purr_snap); +DECLARE_PER_CPU(u64, idle_entry_spurr_snap); + +static inline void snapshot_purr_idle_entry(void) +{ + *this_cpu_ptr(&idle_entry_purr_snap) = mfspr(SPRN_PURR); +} + +static inline void snapshot_spurr_idle_entry(void) +{ + *this_cpu_ptr(&idle_entry_spurr_snap) = mfspr(SPRN_SPURR); +} + +static inline void update_idle_purr_accounting(void) +{ + u64 wait_cycles; + u64 in_purr = *this_cpu_ptr(&idle_entry_purr_snap); + + wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles); + wait_cycles += mfspr(SPRN_PURR) - in_purr; + get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles); +} + +static inline void update_idle_spurr_accounting(void) +{ + u64 *idle_spurr_cycles_ptr = this_cpu_ptr(&idle_spurr_cycles); + u64 in_spurr = *this_cpu_ptr(&idle_entry_spurr_snap); + + *idle_spurr_cycles_ptr += mfspr(SPRN_SPURR) - in_spurr; +} + +static inline void pseries_idle_prolog(void) +{ + ppc64_runlatch_off(); + snapshot_purr_idle_entry(); + snapshot_spurr_idle_entry(); + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + get_lppaca()->idle = 1; +} + +static inline void pseries_idle_epilog(void) +{ + update_idle_purr_accounting(); + update_idle_spurr_accounting(); + get_lppaca()->idle = 0; + ppc64_runlatch_on(); +} + +static inline u64 read_this_idle_purr(void) +{ + /* + * If we are reading from an idle context, update the + * idle-purr cycles corresponding to the last idle period. + * Since the idle context is not yet over, take a fresh + * snapshot of the idle-purr. + */ + if (unlikely(get_lppaca()->idle == 1)) { + update_idle_purr_accounting(); + snapshot_purr_idle_entry(); + } + + return be64_to_cpu(get_lppaca()->wait_state_cycles); +} + +static inline u64 read_this_idle_spurr(void) +{ + /* + * If we are reading from an idle context, update the + * idle-spurr cycles corresponding to the last idle period. + * Since the idle context is not yet over, take a fresh + * snapshot of the idle-spurr. + */ + if (get_lppaca()->idle == 1) { + update_idle_spurr_accounting(); + snapshot_spurr_idle_entry(); + } + + return *this_cpu_ptr(&idle_spurr_cycles); +} + +#endif /* CONFIG_PPC_PSERIES */ +#endif diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 4da4fcba0684..4f897993b710 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -99,6 +99,11 @@ struct trace_imc_data { */ #define IMC_TRACE_RECORD_TB1_MASK 0x3ffffffffffULL +/* + * Bit 0:1 in third DW of IMC trace record + * specifies the MSR[HV PR] values. + */ +#define IMC_TRACE_RECORD_VAL_HVPR(x) ((x) >> 62) /* * Device tree parser code detects IMC pmu support and diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h new file mode 100644 index 000000000000..cc73c1267572 --- /dev/null +++ b/arch/powerpc/include/asm/inst.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_INST_H +#define _ASM_POWERPC_INST_H + +#include <asm/ppc-opcode.h> + +/* + * Instruction data type for POWER + */ + +struct ppc_inst { + u32 val; +#ifdef CONFIG_PPC64 + u32 suffix; +#endif +} __packed; + +static inline u32 ppc_inst_val(struct ppc_inst x) +{ + return x.val; +} + +static inline int ppc_inst_primary_opcode(struct ppc_inst x) +{ + return ppc_inst_val(x) >> 26; +} + +#ifdef CONFIG_PPC64 +#define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff }) + +#define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) }) + +static inline u32 ppc_inst_suffix(struct ppc_inst x) +{ + return x.suffix; +} + +static inline bool ppc_inst_prefixed(struct ppc_inst x) +{ + return (ppc_inst_primary_opcode(x) == 1) && ppc_inst_suffix(x) != 0xff; +} + +static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) +{ + return ppc_inst_prefix(swab32(ppc_inst_val(x)), + swab32(ppc_inst_suffix(x))); +} + +static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) +{ + u32 val, suffix; + + val = *(u32 *)ptr; + if ((val >> 26) == OP_PREFIX) { + suffix = *((u32 *)ptr + 1); + return ppc_inst_prefix(val, suffix); + } else { + return ppc_inst(val); + } +} + +static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) +{ + return *(u64 *)&x == *(u64 *)&y; +} + +#else + +#define ppc_inst(x) ((struct ppc_inst){ .val = x }) + +static inline bool ppc_inst_prefixed(struct ppc_inst x) +{ + return false; +} + +static inline u32 ppc_inst_suffix(struct ppc_inst x) +{ + return 0; +} + +static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) +{ + return ppc_inst(swab32(ppc_inst_val(x))); +} + +static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) +{ + return *ptr; +} + +static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) +{ + return ppc_inst_val(x) == ppc_inst_val(y); +} + +#endif /* CONFIG_PPC64 */ + +static inline int ppc_inst_len(struct ppc_inst x) +{ + return ppc_inst_prefixed(x) ? 8 : 4; +} + +/* + * Return the address of the next instruction, if the instruction @value was + * located at @location. + */ +static inline struct ppc_inst *ppc_inst_next(void *location, struct ppc_inst *value) +{ + struct ppc_inst tmp; + + tmp = ppc_inst_read(value); + + return location + ppc_inst_len(tmp); +} + +static inline u64 ppc_inst_as_u64(struct ppc_inst x) +{ +#ifdef CONFIG_CPU_LITTLE_ENDIAN + return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x); +#else + return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x); +#endif +} + +#define PPC_INST_STR_LEN sizeof("00000000 00000000") + +static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_inst x) +{ + if (ppc_inst_prefixed(x)) + sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x)); + else + sprintf(str, "%08x", ppc_inst_val(x)); + + return str; +} + +#define ppc_inst_as_str(x) \ +({ \ + char __str[PPC_INST_STR_LEN]; \ + __ppc_inst_as_str(__str, x); \ + __str; \ +}) + +int probe_user_read_inst(struct ppc_inst *inst, + struct ppc_inst __user *nip); + +int probe_kernel_read_inst(struct ppc_inst *inst, + struct ppc_inst *src); + +#endif /* _ASM_POWERPC_INST_H */ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 635969b5b58e..58635960403c 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -34,7 +34,6 @@ extern struct pci_dev *isa_bridge_pcidev; #include <asm/mmiowb.h> #include <asm/mmu.h> #include <asm/ppc_asm.h> -#include <asm/pgtable.h> #define SIO_CONFIG_RA 0x398 #define SIO_CONFIG_RD 0x399 @@ -699,10 +698,6 @@ static inline void iosync(void) * * * iounmap undoes such a mapping and can be hooked * - * * __ioremap_at (and the pending __iounmap_at) are low level functions to - * create hand-made mappings for use only by the PCI code and cannot - * currently be hooked. Must be page aligned. - * * * __ioremap_caller is the same as above but takes an explicit caller * reference rather than using __builtin_return_address(0) * @@ -719,6 +714,8 @@ void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); extern void iounmap(volatile void __iomem *addr); +void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size); + int early_ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot); void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size, @@ -727,10 +724,6 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size, extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size, pgprot_t prot, void *caller); -extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea, - unsigned long size, pgprot_t prot); -extern void __iounmap_at(void *ea, unsigned long size); - /* * When CONFIG_PPC_INDIRECT_PIO is set, we use the generic iomap implementation * which needs some additional definitions here. They basically allow PIO diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 350101e11ddb..5032f1593299 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -22,11 +22,11 @@ #define IOMMU_PAGE_SHIFT_4K 12 #define IOMMU_PAGE_SIZE_4K (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K) #define IOMMU_PAGE_MASK_4K (~((1 << IOMMU_PAGE_SHIFT_4K) - 1)) -#define IOMMU_PAGE_ALIGN_4K(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE_4K) +#define IOMMU_PAGE_ALIGN_4K(addr) ALIGN(addr, IOMMU_PAGE_SIZE_4K) #define IOMMU_PAGE_SIZE(tblptr) (ASM_CONST(1) << (tblptr)->it_page_shift) #define IOMMU_PAGE_MASK(tblptr) (~((1 << (tblptr)->it_page_shift) - 1)) -#define IOMMU_PAGE_ALIGN(addr, tblptr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE(tblptr)) +#define IOMMU_PAGE_ALIGN(addr, tblptr) ALIGN(addr, IOMMU_PAGE_SIZE(tblptr)) /* Boot time flags */ extern int iommu_is_off; diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index fbff9ff9032e..7355ed05e65e 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -15,17 +15,22 @@ #ifndef __ASSEMBLY__ #include <asm/page.h> +#include <linux/sizes.h> #define KASAN_SHADOW_SCALE_SHIFT 3 +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_MODULES) && defined(CONFIG_STRICT_KERNEL_RWX) +#define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M) +#else +#define KASAN_KERN_START PAGE_OFFSET +#endif + #define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ - (PAGE_OFFSET >> KASAN_SHADOW_SCALE_SHIFT)) + (KASAN_KERN_START >> KASAN_SHADOW_SCALE_SHIFT)) #define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET) -#define KASAN_SHADOW_END 0UL - -#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START) +#define KASAN_SHADOW_END (-(-KASAN_SHADOW_START >> KASAN_SHADOW_SCALE_SHIFT)) #ifdef CONFIG_KASAN void kasan_early_init(void); @@ -38,5 +43,9 @@ static inline void kasan_mmu_init(void) { } static inline void kasan_late_init(void) { } #endif +void kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte); +int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end); +int kasan_init_region(void *start, size_t size); + #endif /* __ASSEMBLY */ #endif diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index c68476818753..55d6ede30c19 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -100,15 +100,26 @@ void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; -#ifdef CONFIG_IMA_KEXEC #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { + struct crash_mem *exclude_ranges; + + unsigned long backup_start; + void *backup_buf; + + unsigned long elfcorehdr_addr; + unsigned long elf_headers_sz; + void *elf_headers; + +#ifdef CONFIG_IMA_KEXEC phys_addr_t ima_buffer_addr; size_t ima_buffer_size; -}; #endif +}; +char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len); int setup_purgatory(struct kimage *image, const void *slave_code, const void *fdt, unsigned long kernel_load_addr, unsigned long fdt_load_addr); @@ -116,6 +127,20 @@ int setup_new_fdt(const struct kimage *image, void *fdt, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline); int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size); + +#ifdef CONFIG_PPC64 +struct kexec_buf; + +int load_crashdump_segments_ppc64(struct kimage *image, + struct kexec_buf *kbuf); +int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr); +int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, + unsigned long initrd_len, const char *cmdline); +#endif /* CONFIG_PPC64 */ + #endif /* CONFIG_KEXEC_FILE */ #else /* !CONFIG_KEXEC_CORE */ @@ -150,6 +175,18 @@ static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) } #endif /* CONFIG_KEXEC_CORE */ + +#ifdef CONFIG_PPC_BOOK3S_64 +#include <asm/book3s/64/kexec.h> +#endif + +#ifndef reset_sprs +#define reset_sprs reset_sprs +static inline void reset_sprs(void) +{ +} +#endif + #endif /* ! __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_KEXEC_H */ diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h new file mode 100644 index 000000000000..7a90000f8d15 --- /dev/null +++ b/arch/powerpc/include/asm/kexec_ranges.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_KEXEC_RANGES_H +#define _ASM_POWERPC_KEXEC_RANGES_H + +#define MEM_RANGE_CHUNK_SZ 2048 /* Memory ranges size chunk */ + +void sort_memory_ranges(struct crash_mem *mrngs, bool merge); +struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); +int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); +int add_tce_mem_ranges(struct crash_mem **mem_ranges); +int add_initrd_mem_range(struct crash_mem **mem_ranges); +#ifdef CONFIG_PPC_BOOK3S_64 +int add_htab_mem_range(struct crash_mem **mem_ranges); +#else +static inline int add_htab_mem_range(struct crash_mem **mem_ranges) +{ + return 0; +} +#endif +int add_kernel_mem_range(struct crash_mem **mem_ranges); +int add_rtas_mem_range(struct crash_mem **mem_ranges); +int add_opal_mem_range(struct crash_mem **mem_ranges); +int add_reserved_mem_ranges(struct crash_mem **mem_ranges); + +#endif /* _ASM_POWERPC_KEXEC_RANGES_H */ diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index 66b3f2983b22..4fc0e15e23a5 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -43,7 +43,7 @@ extern kprobe_opcode_t optprobe_template_ret[]; extern kprobe_opcode_t optprobe_template_end[]; /* Fixed instruction size for powerpc */ -#define MAX_INSN_SIZE 1 +#define MAX_INSN_SIZE 2 #define MAX_OPTIMIZED_LENGTH sizeof(kprobe_opcode_t) /* 4 bytes */ #define MAX_OPTINSN_SIZE (optprobe_template_end - optprobe_template_entry) #define RELATIVEJUMP_SIZE sizeof(kprobe_opcode_t) /* 4 bytes */ diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 92bcd1a26d73..1d0f7d838b2e 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -10,7 +10,9 @@ * Use the current saved situation instead of the to/from/size params. * Used on book3s/32 */ -#define KUAP_CURRENT 4 +#define KUAP_CURRENT_READ 4 +#define KUAP_CURRENT_WRITE 8 +#define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE) #ifdef CONFIG_PPC64 #include <asm/book3s/64/kup-radix.h> @@ -37,7 +39,7 @@ #else /* !__ASSEMBLY__ */ -#include <asm/pgtable.h> +#include <linux/pgtable.h> void setup_kup(void); @@ -101,6 +103,16 @@ static inline void prevent_current_access_user(void) prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT); } +static inline void prevent_current_read_from_user(void) +{ + prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_READ); +} + +static inline void prevent_current_write_to_user(void) +{ + prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_WRITE); +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_KUAP_H_ */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 506e4df2d730..d32ec9ae73bd 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -78,7 +78,7 @@ struct kvmppc_vcore { struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS]; struct list_head preempt_list; spinlock_t lock; - struct swait_queue_head wq; + struct rcuwait wait; spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ u64 stolen_tb; u64 preempt_tb; @@ -155,12 +155,11 @@ extern void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size); extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); -extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run, - struct kvm_vcpu *vcpu, unsigned long addr, - unsigned long status); +extern int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, + unsigned long addr, unsigned long status); extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, unsigned long valid); -extern int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu, unsigned long gpa, gva_t ea, int is_store); extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); @@ -174,8 +173,7 @@ extern void kvmppc_mmu_hpte_sysexit(void); extern int kvmppc_mmu_hv_init(void); extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); -extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run, - struct kvm_vcpu *vcpu, +extern int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr); extern unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, gva_t eaddr, void *to, void *from, @@ -198,7 +196,7 @@ extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, unsigned int shift, const struct kvm_memory_slot *memslot, unsigned int lpid); -extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, +extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing, unsigned long gpa, unsigned int lpid); extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, @@ -234,7 +232,7 @@ extern void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac); extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); -extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern int kvmppc_emulate_paired_single(struct kvm_vcpu *vcpu); extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, @@ -300,12 +298,12 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1); void kvmhv_release_all_nested(struct kvm *kvm); long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu); long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu); -int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu, +int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr); void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); -long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu); +long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu); void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 04b2b927bb5a..9bb9bb370b53 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -14,6 +14,7 @@ #include <asm/book3s/64/mmu-hash.h> #include <asm/cpu_has_feature.h> #include <asm/ppc-opcode.h> +#include <asm/pte-walk.h> #ifdef CONFIG_PPC_PSERIES static inline bool kvmhv_on_pseries(void) @@ -434,7 +435,7 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing) continue; } /* If pte is not present return None */ - if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT))) + if (unlikely(!pte_present(old_pte))) return __pte(0); new_pte = pte_mkyoung(old_pte); @@ -634,6 +635,47 @@ extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm, unsigned long gpa, unsigned long hpa, unsigned long nbytes); +static inline pte_t * +find_kvm_secondary_pte_unlocked(struct kvm *kvm, unsigned long ea, + unsigned *hshift) +{ + pte_t *pte; + + pte = __find_linux_pte(kvm->arch.pgtable, ea, NULL, hshift); + return pte; +} + +static inline pte_t *find_kvm_secondary_pte(struct kvm *kvm, unsigned long ea, + unsigned *hshift) +{ + pte_t *pte; + + VM_WARN(!spin_is_locked(&kvm->mmu_lock), + "%s called with kvm mmu_lock not held \n", __func__); + pte = __find_linux_pte(kvm->arch.pgtable, ea, NULL, hshift); + + return pte; +} + +static inline pte_t *find_kvm_host_pte(struct kvm *kvm, unsigned long mmu_seq, + unsigned long ea, unsigned *hshift) +{ + pte_t *pte; + + VM_WARN(!spin_is_locked(&kvm->mmu_lock), + "%s called with kvm mmu_lock not held \n", __func__); + + if (mmu_notifier_retry(kvm, mmu_seq)) + return NULL; + + pte = __find_linux_pte(kvm->mm->pgd, ea, NULL, hshift); + + return pte; +} + +extern pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid, + unsigned long ea, unsigned *hshift); + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 45704f2716e2..078f4648ea27 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -119,7 +119,7 @@ struct kvmppc_host_state { void __iomem *xive_tima_virt; u32 saved_xirr; u64 dabr; - u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */ + u64 host_mmcr[10]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER, MMCR3, SIER2/3 */ u32 host_pmc[8]; u64 host_purr; u64 host_spurr; diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h index 9cb7d8be2366..0a6319448cb6 100644 --- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h +++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h @@ -23,6 +23,10 @@ int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn); unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm); void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, struct kvm *kvm, bool skip_page_out); +int kvmppc_uvmem_memslot_create(struct kvm *kvm, + const struct kvm_memory_slot *new); +void kvmppc_uvmem_memslot_delete(struct kvm *kvm, + const struct kvm_memory_slot *old); #else static inline int kvmppc_uvmem_init(void) { @@ -82,5 +86,15 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) static inline void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, struct kvm *kvm, bool skip_page_out) { } + +static inline int kvmppc_uvmem_memslot_create(struct kvm *kvm, + const struct kvm_memory_slot *new) +{ + return H_UNSUPPORTED; +} + +static inline void kvmppc_uvmem_memslot_delete(struct kvm *kvm, + const struct kvm_memory_slot *old) { } + #endif /* CONFIG_PPC_UV */ #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */ diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index 310ba48d13f0..0c3401b2e19e 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -89,10 +89,12 @@ static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) return vcpu->arch.regs.nip; } +#ifdef CONFIG_BOOKE static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { return vcpu->arch.fault_dear; } +#endif static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1dc63101ffe1..10ded83414de 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -58,7 +58,8 @@ #define KVM_ARCH_WANT_MMU_NOTIFIER extern int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, + unsigned flags); extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); @@ -637,12 +638,14 @@ struct kvm_vcpu_arch { u32 ccr1; u32 dbsr; - u64 mmcr[5]; + u64 mmcr[4]; /* MMCR0, MMCR1, MMCR2, MMCR3 */ + u64 mmcra; + u64 mmcrs; u32 pmc[8]; u32 spmc[2]; u64 siar; u64 sdar; - u64 sier; + u64 sier[3]; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM u64 tfhar; u64 texasr; @@ -751,7 +754,7 @@ struct kvm_vcpu_arch { u8 irq_pending; /* Used by XIVE to signal pending guest irqs */ u32 last_inst; - struct swait_queue_head *wqp; + struct rcuwait *waitp; struct kvmppc_vcore *vcore; int ret; int trap; @@ -795,7 +798,6 @@ struct kvm_vcpu_arch { struct mmio_hpte_cache_entry *pgfault_cache; struct task_struct *run_task; - struct kvm_run *kvm_run; spinlock_t vpa_update_lock; struct kvmppc_vpa vpa; diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 9c1f6b4b9bbf..744612054c94 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -8,35 +8,15 @@ #ifndef __POWERPC_KVM_PARA_H__ #define __POWERPC_KVM_PARA_H__ -#include <uapi/asm/kvm_para.h> - -#ifdef CONFIG_KVM_GUEST - -#include <linux/of.h> - -static inline int kvm_para_available(void) -{ - struct device_node *hyper_node; - - hyper_node = of_find_node_by_path("/hypervisor"); - if (!hyper_node) - return 0; +#include <asm/firmware.h> - if (!of_device_is_compatible(hyper_node, "linux,kvm")) - return 0; - - return 1; -} - -#else +#include <uapi/asm/kvm_para.h> static inline int kvm_para_available(void) { - return 0; + return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest(); } -#endif - static inline unsigned int kvm_arch_para_features(void) { unsigned long r; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 94f5a32acaf1..0a056c64c317 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -58,28 +58,28 @@ enum xlate_readwrite { XLATE_WRITE /* check for write permissions */ }; -extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); -extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); +extern int kvmppc_vcpu_run(struct kvm_vcpu *vcpu); +extern int __kvmppc_vcpu_run(struct kvm_vcpu *vcpu); extern void kvmppc_handler_highmem(void); extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); -extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian); -extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_loads(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian); -extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_vsx_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian, int mmio_sign_extend); -extern int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian); -extern int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu, unsigned int rs, unsigned int bytes, int is_default_endian); -extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_store(struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_default_endian); -extern int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_handle_vsx_store(struct kvm_vcpu *vcpu, int rs, unsigned int bytes, int is_default_endian); @@ -90,10 +90,9 @@ extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); -extern int kvmppc_emulate_instruction(struct kvm_run *run, - struct kvm_vcpu *vcpu); +extern int kvmppc_emulate_instruction(struct kvm_vcpu *vcpu); extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu); -extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu); @@ -267,7 +266,7 @@ struct kvmppc_ops { void (*vcpu_put)(struct kvm_vcpu *vcpu); void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr); - int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); + int (*vcpu_run)(struct kvm_vcpu *vcpu); int (*vcpu_create)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); int (*check_requests)(struct kvm_vcpu *vcpu); @@ -291,7 +290,7 @@ struct kvmppc_ops { int (*init_vm)(struct kvm *kvm); void (*destroy_vm)(struct kvm *kvm); int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info); - int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu, + int (*emulate_op)(struct kvm_vcpu *vcpu, unsigned int inst, int *advance); int (*emulate_mtspr)(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); int (*emulate_mfspr)(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 3b4b305796ae..c390ec377bae 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -42,7 +42,6 @@ */ #include <linux/cache.h> #include <linux/threads.h> -#include <linux/spinlock_types.h> #include <asm/types.h> #include <asm/mmu.h> #include <asm/firmware.h> @@ -146,49 +145,6 @@ struct slb_shadow { } save_area[SLB_NUM_BOLTED]; } ____cacheline_aligned; -/* - * Layout of entries in the hypervisor's dispatch trace log buffer. - */ -struct dtl_entry { - u8 dispatch_reason; - u8 preempt_reason; - __be16 processor_id; - __be32 enqueue_to_dispatch_time; - __be32 ready_to_enqueue_time; - __be32 waiting_to_ready_time; - __be64 timebase; - __be64 fault_addr; - __be64 srr0; - __be64 srr1; -}; - -#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */ -#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry)) - -/* - * Dispatch trace log event enable mask: - * 0x1: voluntary virtual processor waits - * 0x2: time-slice preempts - * 0x4: virtual partition memory page faults - */ -#define DTL_LOG_CEDE 0x1 -#define DTL_LOG_PREEMPT 0x2 -#define DTL_LOG_FAULT 0x4 -#define DTL_LOG_ALL (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT) - -extern struct kmem_cache *dtl_cache; -extern rwlock_t dtl_access_lock; - -/* - * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls - * reading from the dispatch trace log. If other code wants to consume - * DTL entries, it can set this pointer to a function that will get - * called once for each DTL entry that gets processed. - */ -extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index); - -extern void register_dtl_buffer(int cpu); -extern void alloc_dtl_buffers(unsigned long *time_limit); extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity); #endif /* CONFIG_PPC_BOOK3S */ diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 7bcb64444a39..a90b892f0bfe 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -131,7 +131,7 @@ struct machdep_calls { unsigned long dabrx); /* Set DAWR for this platform, leave empty for default implementation */ - int (*set_dawr)(unsigned long dawr, + int (*set_dawr)(int nr, unsigned long dawr, unsigned long dawrx); #ifdef CONFIG_PPC32 /* XXX for now */ diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 376a395daf32..89aa8248a57d 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -86,6 +86,7 @@ enum MCE_TlbErrorType { enum MCE_UserErrorType { MCE_USER_ERROR_INDETERMINATE = 0, MCE_USER_ERROR_TLBIE = 1, + MCE_USER_ERROR_SCV = 2, }; enum MCE_RaErrorType { @@ -209,6 +210,9 @@ struct mce_error_info { #define MCE_EVENT_RELEASE true #define MCE_EVENT_DONTRELEASE false +struct pt_regs; +struct notifier_block; + extern void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t nip, uint64_t addr, uint64_t phys_addr); @@ -220,7 +224,13 @@ extern void machine_check_print_event_info(struct machine_check_event *evt, unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); extern void mce_common_process_ue(struct pt_regs *regs, struct mce_error_info *mce_err); +int mce_register_notifier(struct notifier_block *nb); +int mce_unregister_notifier(struct notifier_block *nb); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); +long __machine_check_early_realmode_p7(struct pt_regs *regs); +long __machine_check_early_realmode_p8(struct pt_regs *regs); +long __machine_check_early_realmode_p9(struct pt_regs *regs); +long __machine_check_early_realmode_p10(struct pt_regs *regs); #endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index d610c2e07b28..7cb6d18f5cd6 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -13,10 +13,6 @@ #include <linux/pkeys.h> #include <asm/cpu_has_feature.h> -/* - * This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits() - * here. How important is the optimization? - */ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { @@ -44,8 +40,13 @@ static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) { if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO)) return false; - if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO)) - return false; + if (prot & PROT_SAO) { + if (!cpu_has_feature(CPU_FTR_SAO)) + return false; + if (firmware_has_feature(FW_FEATURE_LPAR) && + !IS_ENABLED(CONFIG_PPC_PROT_SAO_LPAR)) + return false; + } return true; } #define arch_validate_prot arch_validate_prot diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 0699cfeeb8c9..255a1837e9f7 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -29,6 +29,19 @@ */ /* + * Support for KUEP feature. + */ +#define MMU_FTR_KUEP ASM_CONST(0x00000400) + +/* + * Support for memory protection keys. + */ +#define MMU_FTR_PKEY ASM_CONST(0x00000800) + +/* Guest Translation Shootdown Enable */ +#define MMU_FTR_GTSE ASM_CONST(0x00001000) + +/* * Support for 68 bit VA space. We added that from ISA 2.05 */ #define MMU_FTR_68_BIT_VA ASM_CONST(0x00002000) @@ -122,6 +135,7 @@ #define MMU_FTRS_POWER7 MMU_FTRS_POWER6 #define MMU_FTRS_POWER8 MMU_FTRS_POWER6 #define MMU_FTRS_POWER9 MMU_FTRS_POWER6 +#define MMU_FTRS_POWER10 MMU_FTRS_POWER6 #define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ MMU_FTR_CI_LARGE_PAGE #define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ @@ -172,10 +186,18 @@ enum { #endif #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | + MMU_FTR_GTSE | #ifdef CONFIG_PPC_KUAP MMU_FTR_RADIX_KUAP | #endif /* CONFIG_PPC_KUAP */ #endif /* CONFIG_PPC_RADIX_MMU */ +#ifdef CONFIG_PPC_MEM_KEYS + MMU_FTR_PKEY | +#endif +#ifdef CONFIG_PPC_KUEP + MMU_FTR_KUEP | +#endif /* CONFIG_PPC_KUAP */ + 0, }; @@ -291,15 +313,6 @@ static inline bool early_radix_enabled(void) } #endif -#ifdef CONFIG_PPC_MEM_KEYS -extern u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address); -#else -static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) -{ - return 0; -} -#endif /* CONFIG_PPC_MEM_KEYS */ - #ifdef CONFIG_STRICT_KERNEL_RWX static inline bool strict_kernel_rwx_enabled(void) { @@ -364,6 +377,8 @@ extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size); static inline void mmu_early_init_devtree(void) { } +static inline void pkey_early_init_devtree(void) {} + extern void *abatron_pteptrs[2]; #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 360367c579de..7f3658a97384 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -185,6 +185,34 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) dec_mm_active_cpus(mm); } } + +/* + * vas_windows counter shows number of open windows in the mm + * context. During context switch, use this counter to clear the + * foreign real address mapping (CP_ABORT) for the thread / process + * that intend to use COPY/PASTE. When a process closes all windows, + * disable CP_ABORT which is expensive to run. + * + * For user context, register a copro so that TLBIs are seen by the + * nest MMU. mm_context_add/remove_vas_window() are used only for user + * space windows. + */ +static inline void mm_context_add_vas_window(struct mm_struct *mm) +{ + atomic_inc(&mm->context.vas_windows); + mm_context_add_copro(mm); +} + +static inline void mm_context_remove_vas_window(struct mm_struct *mm) +{ + int v; + + mm_context_remove_copro(mm); + v = atomic_dec_if_positive(&mm->context.vas_windows); + + /* Detect imbalance between add and remove */ + WARN_ON(v < 0); +} #else static inline void inc_mm_active_cpus(struct mm_struct *mm) { } static inline void dec_mm_active_cpus(struct mm_struct *mm) { } diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 5398bfc465b4..857d9ff24295 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -72,12 +72,9 @@ struct mod_arch_specific { # ifdef MODULE asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous"); # endif /* MODULE */ -#endif int module_trampoline_target(struct module *mod, unsigned long trampoline, unsigned long *target); - -#ifdef CONFIG_DYNAMIC_FTRACE int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs); #else static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h index a46616937d20..e752a5807a59 100644 --- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -13,13 +13,13 @@ static inline pte_t *hugepd_page(hugepd_t hpd) static inline unsigned int hugepd_shift(hugepd_t hpd) { - return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17; + return PAGE_SHIFT_8M; } static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned int pdshift) { - unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT; + unsigned long idx = (addr & (SZ_4M - 1)) >> PAGE_SHIFT; return hugepd_page(hpd) + idx; } @@ -32,8 +32,12 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) { - *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT | - (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : _PMD_PAGE_512K)); + *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT | _PMD_PAGE_8M); +} + +static inline void hugepd_populate_kernel(hugepd_t *hpdp, pte_t *new, unsigned int pshift) +{ + *hpdp = __hugepd(__pa(new) | _PMD_PRESENT | _PMD_PAGE_8M); } static inline int check_and_get_huge_psize(int shift) @@ -41,4 +45,24 @@ static inline int check_and_get_huge_psize(int shift) return shift_to_mmu_psize(shift); } +#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); + +#define __HAVE_ARCH_HUGE_PTE_CLEAR +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz) +{ + pte_update(mm, addr, ptep, ~0UL, 0, 1); +} + +#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0))); + unsigned long set = pte_val(pte_wrprotect(__pte(0))); + + pte_update(mm, addr, ptep, clr, set, 1); +} + #endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 76af5b0cb16e..1d9ac0f9c794 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -19,7 +19,6 @@ #define MI_RSV4I 0x08000000 /* Reserve 4 TLB entries */ #define MI_PPCS 0x02000000 /* Use MI_RPN prob/priv state */ #define MI_IDXMASK 0x00001f00 /* TLB index to be loaded */ -#define MI_RESETVAL 0x00000000 /* Value of register at reset */ /* These are the Ks and Kp from the PowerPC books. For proper operation, * Ks = 0, Kp = 1. @@ -37,16 +36,16 @@ * Therefore, we define 2 APG groups. lsb is _PMD_USER * 0 => Kernel => 01 (all accesses performed according to page definition) * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + * 2-15 => Not Used */ -#define MI_APG_INIT 0x4fffffff +#define MI_APG_INIT 0x40000000 /* * 0 => Kernel => 01 (all accesses performed according to page definition) * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + * 2-15 => Not Used */ -#define MI_APG_KUEP 0x6fffffff +#define MI_APG_KUEP 0x60000000 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -95,7 +94,6 @@ #define MD_TWAM 0x04000000 /* Use 4K page hardware assist */ #define MD_PPCS 0x02000000 /* Use MI_RPN prob/priv state */ #define MD_IDXMASK 0x00001f00 /* TLB index to be loaded */ -#define MD_RESETVAL 0x04000000 /* Value of register at reset */ #define SPRN_M_CASID 793 /* Address space ID (context) to match */ #define MC_ASIDMASK 0x0000000f /* Bits used for ASID value */ @@ -117,16 +115,16 @@ * Therefore, we define 2 APG groups. lsb is _PMD_USER * 0 => Kernel => 01 (all accesses performed according to page definition) * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + * 2-15 => Not Used */ -#define MD_APG_INIT 0x4fffffff +#define MD_APG_INIT 0x40000000 /* * 0 => No user => 01 (all accesses performed according to page definition) * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + * 2-15 => Not Used */ -#define MD_APG_KUAP 0x6fffffff +#define MD_APG_KUAP 0x60000000 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in @@ -178,12 +176,6 @@ */ #define SPRN_M_TW 799 -#ifdef CONFIG_PPC_MM_SLICES -#include <asm/nohash/32/slice.h> -#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1)) -#define LOW_SLICE_ARRAY_SZ SLICE_ARRAY_SIZE -#endif - #if defined(CONFIG_PPC_4K_PAGES) #define mmu_virtual_psize MMU_PAGE_4K #elif defined(CONFIG_PPC_16K_PAGES) @@ -201,71 +193,15 @@ #include <linux/mmdebug.h> -struct slice_mask { - u64 low_slices; - DECLARE_BITMAP(high_slices, 0); -}; +void mmu_pin_tlb(unsigned long top, bool readonly); typedef struct { unsigned int id; unsigned int active; unsigned long vdso_base; -#ifdef CONFIG_PPC_MM_SLICES - u16 user_psize; /* page size index */ - unsigned char low_slices_psize[SLICE_ARRAY_SIZE]; - unsigned char high_slices_psize[0]; - unsigned long slb_addr_limit; - struct slice_mask mask_base_psize; /* 4k or 16k */ - struct slice_mask mask_512k; - struct slice_mask mask_8m; -#endif void *pte_frag; } mm_context_t; -#ifdef CONFIG_PPC_MM_SLICES -static inline u16 mm_ctx_user_psize(mm_context_t *ctx) -{ - return ctx->user_psize; -} - -static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize) -{ - ctx->user_psize = user_psize; -} - -static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx) -{ - return ctx->low_slices_psize; -} - -static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx) -{ - return ctx->high_slices_psize; -} - -static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx) -{ - return ctx->slb_addr_limit; -} - -static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit) -{ - ctx->slb_addr_limit = limit; -} - -static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize) -{ - if (psize == MMU_PAGE_512K) - return &ctx->mask_512k; - if (psize == MMU_PAGE_8M) - return &ctx->mask_8m; - - BUG_ON(psize != mmu_virtual_psize); - - return &ctx->mask_base_psize; -} -#endif /* CONFIG_PPC_MM_SLICE */ - #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) @@ -304,13 +240,7 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) } /* patch sites */ -extern s32 patch__itlbmiss_linmem_top, patch__itlbmiss_linmem_top8; -extern s32 patch__dtlbmiss_linmem_top, patch__dtlbmiss_immr_jmp; -extern s32 patch__fixupdar_linmem_top; -extern s32 patch__dtlbmiss_romem_top, patch__dtlbmiss_romem_top8; - -extern s32 patch__itlbmiss_exit_1, patch__itlbmiss_exit_2; -extern s32 patch__dtlbmiss_exit_1, patch__dtlbmiss_exit_2, patch__dtlbmiss_exit_3; +extern s32 patch__itlbmiss_exit_1, patch__dtlbmiss_exit_1; extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf; #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b04ba257fddb..b9e134d0f03a 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -2,14 +2,12 @@ #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H #define _ASM_POWERPC_NOHASH_32_PGTABLE_H -#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifndef __ASSEMBLY__ #include <linux/sched.h> #include <linux/threads.h> #include <asm/mmu.h> /* For sub-arch specific PPC_PIN_SIZE */ -#include <asm/asm-405.h> #ifdef CONFIG_44x extern int icache_44x_need_flush; @@ -30,6 +28,8 @@ extern int icache_44x_need_flush; #define PMD_TABLE_SIZE 0 #define PUD_TABLE_SIZE 0 #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) + +#define PMD_MASKED_BITS (PTE_TABLE_SIZE - 1) #endif /* __ASSEMBLY__ */ #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) @@ -110,13 +110,13 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #define VMALLOC_OFFSET (0x1000000) /* 16M */ #ifdef PPC_PIN_SIZE -#define VMALLOC_START (((_ALIGN((long)high_memory, PPC_PIN_SIZE) + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) +#define VMALLOC_START (((ALIGN((long)high_memory, PPC_PIN_SIZE) + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) #else #define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) #endif #ifdef CONFIG_KASAN_VMALLOC -#define VMALLOC_END _ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) +#define VMALLOC_END ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) #else #define VMALLOC_END ioremap_bot #endif @@ -166,7 +166,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #ifndef __ASSEMBLY__ #define pte_clear(mm, addr, ptep) \ - do { pte_update(ptep, ~0, 0); } while (0) + do { pte_update(mm, addr, ptep, ~0, 0, 0); } while (0) #ifndef pte_mkwrite static inline pte_t pte_mkwrite(pte_t pte) @@ -205,8 +205,6 @@ static inline void pmd_clear(pmd_t *pmdp) *pmdp = __pmd(0); } - - /* * PTE updates. This function is called whenever an existing * valid PTE is updated. This does -not- include set_pte_at() @@ -221,66 +219,56 @@ static inline void pmd_clear(pmd_t *pmdp) * that an executable user mapping was modified, which is needed * to properly flush the virtually tagged instruction cache of * those implementations. + * + * On the 8xx, the page tables are a bit special. For 16k pages, we have + * 4 identical entries. For 512k pages, we have 128 entries as if it was + * 4k pages, but they are flagged as 512k pages for the hardware. + * For other page sizes, we have a single entry in the table. */ -#ifndef CONFIG_PTE_64BIT -static inline unsigned long pte_update(pte_t *p, - unsigned long clr, - unsigned long set) +#ifdef CONFIG_PPC_8xx +static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr); + +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge) { -#ifdef PTE_ATOMIC_UPDATES - unsigned long old, tmp; - - __asm__ __volatile__("\ -1: lwarx %0,0,%3\n\ - andc %1,%0,%4\n\ - or %1,%1,%5\n" - PPC405_ERR77(0,%3) -" stwcx. %1,0,%3\n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p) - : "r" (p), "r" (clr), "r" (set), "m" (*p) - : "cc" ); -#else /* PTE_ATOMIC_UPDATES */ - unsigned long old = pte_val(*p); - unsigned long new = (old & ~clr) | set; - -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) - p->pte = p->pte1 = p->pte2 = p->pte3 = new; -#else - *p = __pte(new); -#endif -#endif /* !PTE_ATOMIC_UPDATES */ + pte_basic_t *entry = &p->pte; + pte_basic_t old = pte_val(*p); + pte_basic_t new = (old & ~(pte_basic_t)clr) | set; + int num, i; + pmd_t *pmd = pmd_off(mm, addr); + + if (!huge) + num = PAGE_SIZE / SZ_4K; + else if ((pmd_val(*pmd) & _PMD_PAGE_MASK) != _PMD_PAGE_8M) + num = SZ_512K / SZ_4K; + else + num = 1; + + for (i = 0; i < num; i++, entry++, new += SZ_4K) + *entry = new; -#ifdef CONFIG_44x - if ((old & _PAGE_USER) && (old & _PAGE_EXEC)) - icache_44x_need_flush = 1; -#endif return old; } -#else /* CONFIG_PTE_64BIT */ -static inline unsigned long long pte_update(pte_t *p, - unsigned long clr, - unsigned long set) + +#ifdef CONFIG_PPC_16K_PAGES +#define __HAVE_ARCH_PTEP_GET +static inline pte_t ptep_get(pte_t *ptep) +{ + pte_basic_t val = READ_ONCE(ptep->pte); + pte_t pte = {val, val, val, val}; + + return pte; +} +#endif /* CONFIG_PPC_16K_PAGES */ + +#else +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge) { -#ifdef PTE_ATOMIC_UPDATES - unsigned long long old; - unsigned long tmp; - - __asm__ __volatile__("\ -1: lwarx %L0,0,%4\n\ - lwzx %0,0,%3\n\ - andc %1,%L0,%5\n\ - or %1,%1,%6\n" - PPC405_ERR77(0,%3) -" stwcx. %1,0,%4\n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p) - : "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p) - : "cc" ); -#else /* PTE_ATOMIC_UPDATES */ - unsigned long long old = pte_val(*p); - *p = __pte((old & ~(unsigned long long)clr) | set); -#endif /* !PTE_ATOMIC_UPDATES */ + pte_basic_t old = pte_val(*p); + pte_basic_t new = (old & ~(pte_basic_t)clr) | set; + + *p = __pte(new); #ifdef CONFIG_44x if ((old & _PAGE_USER) && (old & _PAGE_EXEC)) @@ -288,23 +276,24 @@ static inline unsigned long long pte_update(pte_t *p, #endif return old; } -#endif /* CONFIG_PTE_64BIT */ +#endif #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep) +static inline int __ptep_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { unsigned long old; - old = pte_update(ptep, _PAGE_ACCESSED, 0); + old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); return (old & _PAGE_ACCESSED) != 0; } #define ptep_test_and_clear_young(__vma, __addr, __ptep) \ - __ptep_test_and_clear_young((__vma)->vm_mm->context.id, __addr, __ptep) + __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep) #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - return __pte(pte_update(ptep, ~0, 0)); + return __pte(pte_update(mm, addr, ptep, ~0, 0, 0)); } #define __HAVE_ARCH_PTEP_SET_WRPROTECT @@ -314,7 +303,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0))); unsigned long set = pte_val(pte_wrprotect(__pte(0))); - pte_update(ptep, clr, set); + pte_update(mm, addr, ptep, clr, set, 0); } static inline void __ptep_set_access_flags(struct vm_area_struct *vma, @@ -326,8 +315,9 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, pte_t pte_clr = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(~0))))); unsigned long set = pte_val(entry) & pte_val(pte_set); unsigned long clr = ~pte_val(entry) & ~pte_val(pte_clr); + int huge = psize > mmu_virtual_psize ? 1 : 0; - pte_update(ptep, clr, set); + pte_update(vma->vm_mm, address, ptep, clr, set, huge); flush_tlb_page(vma, address); } @@ -348,8 +338,6 @@ static inline int pte_young(pte_t pte) * of the pte page. -- paulus */ #ifndef CONFIG_BOOKE -#define pmd_page_vaddr(pmd) \ - ((unsigned long)__va(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1))) #define pmd_page(pmd) \ pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) #else @@ -359,22 +347,6 @@ static inline int pte_young(pte_t pte) pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT)) #endif -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(address) pgd_offset(&init_mm, address) - -/* to find an entry in a page-table-directory */ -#define pgd_index(address) ((address) >> PGDIR_SHIFT) -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) - -/* Find an entry in the third-level page table.. */ -#define pte_index(address) \ - (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -#define pte_offset_kernel(dir, addr) \ - (pmd_bad(*(dir)) ? NULL : (pte_t *)pmd_page_vaddr(*(dir)) + \ - pte_index(addr)) -#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) -static inline void pte_unmap(pte_t *pte) { } - /* * Encode and decode a swap entry. * Note that the bits we use in a PTE for representing a swap entry diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h index 12c6811e344b..2d3153cfc0d7 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-40x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h @@ -44,9 +44,8 @@ #define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */ #define _PAGE_USER 0x010 /* matches one of the zone permission bits */ #define _PAGE_SPECIAL 0x020 /* software: Special page */ -#define _PAGE_RW 0x040 /* software: Writes permitted */ #define _PAGE_DIRTY 0x080 /* software: dirty page */ -#define _PAGE_HWWRITE 0x100 /* hardware: Dirty & RW, set in exception */ +#define _PAGE_RW 0x100 /* hardware: WR, anded with dirty in exception */ #define _PAGE_EXEC 0x200 /* hardware: EX permission */ #define _PAGE_ACCESSED 0x400 /* software: R: page referenced */ @@ -58,8 +57,8 @@ #define _PAGE_KERNEL_RO 0 #define _PAGE_KERNEL_ROX _PAGE_EXEC -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE) -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC) +#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) +#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) #define _PMD_PRESENT 0x400 /* PMD points to page of PTEs */ #define _PMD_PRESENT_MASK _PMD_PRESENT @@ -85,21 +84,5 @@ #define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) #define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) -#ifndef __ASSEMBLY__ -static inline pte_t pte_wrprotect(pte_t pte) -{ - return __pte(pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE)); -} - -#define pte_wrprotect pte_wrprotect - -static inline pte_t pte_mkclean(pte_t pte) -{ - return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE)); -} - -#define pte_mkclean pte_mkclean -#endif - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_40x_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index c9e4b2d90f65..66f403a7da44 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -46,6 +46,8 @@ #define _PAGE_NA 0x0200 /* Supervisor NA, User no access */ #define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ +#define _PAGE_HUGE 0x0800 /* Copied to L1 PS bit 29 */ + /* cache related flags non existing on 8xx */ #define _PAGE_COHERENT 0 #define _PAGE_WRITETHRU 0 @@ -128,7 +130,7 @@ static inline pte_t pte_mkuser(pte_t pte) static inline pte_t pte_mkhuge(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_SPS); + return __pte(pte_val(pte) | _PAGE_SPS | _PAGE_HUGE); } #define pte_mkhuge pte_mkhuge diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h deleted file mode 100644 index 39eb0154ae2d..000000000000 --- a/arch/powerpc/include/asm/nohash/32/slice.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_32_SLICE_H -#define _ASM_POWERPC_NOHASH_32_SLICE_H - -#ifdef CONFIG_PPC_MM_SLICES - -#define SLICE_LOW_SHIFT 26 /* 64 slices */ -#define SLICE_LOW_TOP (0x100000000ull) -#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) -#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) - -#define SLICE_HIGH_SHIFT 0 -#define SLICE_NUM_HIGH 0ul -#define GET_HIGH_SLICE_INDEX(addr) (addr & 0) - -#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW - -#endif /* CONFIG_PPC_MM_SLICES */ - -#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */ diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index b9534a793293..668aee6017e7 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -15,7 +15,7 @@ struct vmemmap_backing { }; extern struct vmemmap_backing *vmemmap_list; -#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD) +#define p4d_populate(MM, P4D, PUD) p4d_set(P4D, (unsigned long)PUD) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index c40ec32b8194..fe2f4c9acd9e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -2,7 +2,7 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H -#include <asm-generic/5level-fixup.h> +#include <asm-generic/pgtable-nop4d.h> /* * Entries per page directory level. The PTE level must use a 64b record @@ -45,43 +45,39 @@ #define PMD_MASKED_BITS 0 /* Bits to mask out from a PUD to get to the PMD page */ #define PUD_MASKED_BITS 0 -/* Bits to mask out from a PGD to get to the PUD page */ -#define PGD_MASKED_BITS 0 +/* Bits to mask out from a P4D to get to the PUD page */ +#define P4D_MASKED_BITS 0 /* * 4-level page tables related bits */ -#define pgd_none(pgd) (!pgd_val(pgd)) -#define pgd_bad(pgd) (pgd_val(pgd) == 0) -#define pgd_present(pgd) (pgd_val(pgd) != 0) -#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) +#define p4d_none(p4d) (!p4d_val(p4d)) +#define p4d_bad(p4d) (p4d_val(p4d) == 0) +#define p4d_present(p4d) (p4d_val(p4d) != 0) +#define p4d_page_vaddr(p4d) (p4d_val(p4d) & ~P4D_MASKED_BITS) #ifndef __ASSEMBLY__ -static inline void pgd_clear(pgd_t *pgdp) +static inline void p4d_clear(p4d_t *p4dp) { - *pgdp = __pgd(0); + *p4dp = __p4d(0); } -static inline pte_t pgd_pte(pgd_t pgd) +static inline pte_t p4d_pte(p4d_t p4d) { - return __pte(pgd_val(pgd)); + return __pte(p4d_val(p4d)); } -static inline pgd_t pte_pgd(pte_t pte) +static inline p4d_t pte_p4d(pte_t pte) { - return __pgd(pte_val(pte)); + return __p4d(pte_val(pte)); } -extern struct page *pgd_page(pgd_t pgd); +extern struct page *p4d_page(p4d_t p4d); #endif /* !__ASSEMBLY__ */ -#define pud_offset(pgdp, addr) \ - (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ - (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) - #define pud_ERROR(e) \ pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 9a33b8bd842d..6cb8aa357191 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -175,35 +175,13 @@ static inline pud_t pte_pud(pte_t pte) return __pud(pte_val(pte)); } #define pud_write(pud) pte_write(pud_pte(pud)) -#define pgd_write(pgd) pte_write(pgd_pte(pgd)) +#define p4d_write(pgd) pte_write(p4d_pte(p4d)) -static inline void pgd_set(pgd_t *pgdp, unsigned long val) +static inline void p4d_set(p4d_t *p4dp, unsigned long val) { - *pgdp = __pgd(val); + *p4dp = __p4d(val); } -/* - * Find an entry in a page-table-directory. We combine the address region - * (the high order N bits) and the pgd portion of the address. - */ -#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) - -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) - -#define pmd_offset(pudp,addr) \ - (((pmd_t *) pud_page_vaddr(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) - -#define pte_offset_kernel(dir,addr) \ - (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) - -#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) - -static inline void pte_unmap(pte_t *pte) { } - -/* to find an entry in a kernel page-table-directory */ -/* This now only contains the vmalloc pages */ -#define pgd_offset_k(address) pgd_offset(&init_mm, address) - /* Atomic PTE updates */ static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr, @@ -211,22 +189,9 @@ static inline unsigned long pte_update(struct mm_struct *mm, unsigned long set, int huge) { -#ifdef PTE_ATOMIC_UPDATES - unsigned long old, tmp; - - __asm__ __volatile__( - "1: ldarx %0,0,%3 # pte_update\n\ - andc %1,%0,%4 \n\ - or %1,%1,%6\n\ - stdcx. %1,0,%3 \n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*ptep) - : "r" (ptep), "r" (clr), "m" (*ptep), "r" (set) - : "cc" ); -#else unsigned long old = pte_val(*ptep); *ptep = __pte((old & ~clr) | set); -#endif + /* huge pages use the old page table lock */ if (!huge) assert_pte_locked(mm, addr); @@ -310,21 +275,8 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, unsigned long bits = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); -#ifdef PTE_ATOMIC_UPDATES - unsigned long old, tmp; - - __asm__ __volatile__( - "1: ldarx %0,0,%4\n\ - or %0,%3,%0\n\ - stdcx. %0,0,%4\n\ - bne- 1b" - :"=&r" (old), "=&r" (tmp), "=m" (*ptep) - :"r" (bits), "r" (ptep), "m" (*ptep) - :"cc"); -#else unsigned long old = pte_val(*ptep); *ptep = __pte(old | bits); -#endif flush_tlb_page(vma, address); } diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 7fed9dc0f147..4b7c3472eab1 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -56,7 +56,7 @@ static inline bool pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } #ifdef CONFIG_NUMA_BALANCING /* * These work without NUMA balancing but the kernel does not care. See the - * comment in include/asm-generic/pgtable.h . On powerpc, this will only + * comment in include/linux/pgtable.h . On powerpc, this will only * work for user pages and always return true for kernel pages. */ static inline int pte_protnone(pte_t pte) @@ -130,12 +130,10 @@ static inline pte_t pte_exprotect(pte_t pte) return __pte(pte_val(pte) & ~_PAGE_EXEC); } -#ifndef pte_mkclean static inline pte_t pte_mkclean(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_DIRTY); } -#endif static inline pte_t pte_mkold(pte_t pte) { @@ -267,7 +265,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, static inline int hugepd_ok(hugepd_t hpd) { #ifdef CONFIG_PPC_8xx - return ((hpd_val(hpd) & 0x4) != 0); + return ((hpd_val(hpd) & _PMD_PAGE_MASK) == _PMD_PAGE_8M); #else /* We clear the top bit to indicate hugepd */ return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0); diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index e3cc9eb9204d..9454d29ff4b4 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -52,6 +52,7 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */ #define get_slb_shadow() (get_paca()->slb_shadow_ptr) struct task_struct; +struct rtas_args; /* * Defines the layout of the paca. @@ -224,6 +225,7 @@ struct paca_struct { u16 in_mce; u8 hmi_event_available; /* HMI event is available */ u8 hmi_p9_special_emu; /* HMI P9 special emulation */ + u32 hmi_irqs; /* HMI irq stat */ #endif u8 ftrace_enabled; /* Hard disable ftrace */ @@ -256,6 +258,7 @@ struct paca_struct { u64 l1d_flush_size; #endif #ifdef CONFIG_PPC_PSERIES + struct rtas_args *rtas_args_reentrant; u8 *mce_data_buf; /* buffer to hold per cpu rtas errlog */ #endif /* CONFIG_PPC_PSERIES */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 3ee8df0f66e0..254687258f42 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -249,21 +249,16 @@ static inline bool pfn_valid(unsigned long pfn) #include <asm/page_32.h> #endif -/* align addr on a size boundary - adjust address up/down if needed */ -#define _ALIGN_UP(addr, size) __ALIGN_KERNEL(addr, size) -#define _ALIGN_DOWN(addr, size) ((addr)&(~((typeof(addr))(size)-1))) - -/* align addr on a size boundary - adjust address up if needed */ -#define _ALIGN(addr,size) _ALIGN_UP(addr,size) - /* * Don't compare things with KERNELBASE or PAGE_OFFSET to test for * "kernelness", use is_kernel_addr() - it should do what you want. */ #ifdef CONFIG_PPC_BOOK3E_64 #define is_kernel_addr(x) ((x) >= 0x8000000000000000ul) -#else +#elif defined(CONFIG_PPC_BOOK3S_64) #define is_kernel_addr(x) ((x) >= PAGE_OFFSET) +#else +#define is_kernel_addr(x) ((x) >= TASK_SIZE) #endif #ifndef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h new file mode 100644 index 000000000000..9362c94fe3aa --- /dev/null +++ b/arch/powerpc/include/asm/paravirt.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_PARAVIRT_H +#define _ASM_POWERPC_PARAVIRT_H + +#include <linux/jump_label.h> +#include <asm/smp.h> +#ifdef CONFIG_PPC64 +#include <asm/paca.h> +#include <asm/hvcall.h> +#endif + +#ifdef CONFIG_PPC_SPLPAR +DECLARE_STATIC_KEY_FALSE(shared_processor); + +static inline bool is_shared_processor(void) +{ + return static_branch_unlikely(&shared_processor); +} + +/* If bit 0 is set, the cpu has been preempted */ +static inline u32 yield_count_of(int cpu) +{ + __be32 yield_count = READ_ONCE(lppaca_of(cpu).yield_count); + return be32_to_cpu(yield_count); +} + +static inline void yield_to_preempted(int cpu, u32 yield_count) +{ + plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count); +} + +static inline void prod_cpu(int cpu) +{ + plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); +} + +static inline void yield_to_any(void) +{ + plpar_hcall_norets(H_CONFER, -1, 0); +} +#else +static inline bool is_shared_processor(void) +{ + return false; +} + +static inline u32 yield_count_of(int cpu) +{ + return 0; +} + +extern void ___bad_yield_to_preempted(void); +static inline void yield_to_preempted(int cpu, u32 yield_count) +{ + ___bad_yield_to_preempted(); /* This would be a bug */ +} + +extern void ___bad_yield_to_any(void); +static inline void yield_to_any(void) +{ + ___bad_yield_to_any(); /* This would be a bug */ +} + +extern void ___bad_prod_cpu(void); +static inline void prod_cpu(int cpu) +{ + ___bad_prod_cpu(); /* This would be a bug */ +} + +#endif + +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + if (!is_shared_processor()) + return false; + if (yield_count_of(cpu) & 1) + return true; + return false; +} + +static inline bool pv_is_native_spin_unlock(void) +{ + return !is_shared_processor(); +} + +#endif /* _ASM_POWERPC_PARAVIRT_H */ diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 69f4cb3b7c56..d2a2a14e56f9 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -66,7 +66,7 @@ struct pci_controller { void __iomem *io_base_virt; #ifdef CONFIG_PPC64 - void *io_base_alloc; + void __iomem *io_base_alloc; #endif resource_size_t io_base_phys; resource_size_t pci_io_size; @@ -202,7 +202,6 @@ struct pci_dn { #define IODA_INVALID_PE 0xFFFFFFFF unsigned int pe_number; #ifdef CONFIG_PCI_IOV - int vf_index; /* VF index in the PF */ u16 vfs_expanded; /* number of VFs IOV BAR expanded */ u16 num_vfs; /* number of VFs enabled*/ unsigned int *pe_num_map; /* PE# for the first VF PE or array */ diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h index dce863a7635c..8e5b7d0b851c 100644 --- a/arch/powerpc/include/asm/percpu.h +++ b/arch/powerpc/include/asm/percpu.h @@ -10,8 +10,6 @@ #ifdef CONFIG_SMP -#include <asm/paca.h> - #define __my_cpu_offset local_paca->data_offset #endif /* CONFIG_SMP */ @@ -19,4 +17,6 @@ #include <asm-generic/percpu.h> +#include <asm/paca.h> + #endif /* _ASM_POWERPC_PERCPU_H_ */ diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index eed3954082fa..daec64d41b44 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -12,6 +12,8 @@ #ifdef CONFIG_PPC_PERF_CTRS #include <asm/perf_event_server.h> +#else +static inline bool is_sier_available(void) { return false; } #endif #ifdef CONFIG_FSL_EMB_PERF_EVENT @@ -38,4 +40,7 @@ /* To support perf_regs sier update */ extern bool is_sier_available(void); +/* To define perf extended regs mask value */ +extern u64 PERF_REG_EXTENDED_MASK; +#define PERF_REG_EXTENDED_MASK PERF_REG_EXTENDED_MASK #endif diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 3e9703f44c7c..f6acabb6c9be 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -17,6 +17,13 @@ struct perf_event; +struct mmcr_regs { + unsigned long mmcr0; + unsigned long mmcr1; + unsigned long mmcr2; + unsigned long mmcra; + unsigned long mmcr3; +}; /* * This struct provides the constants and functions needed to * describe the PMU on a particular POWER-family CPU. @@ -28,7 +35,7 @@ struct power_pmu { unsigned long add_fields; unsigned long test_adder; int (*compute_mmcr)(u64 events[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]); int (*get_constraint)(u64 event_id, unsigned long *mskp, unsigned long *valp); @@ -41,13 +48,13 @@ struct power_pmu { unsigned long group_constraint_val; u64 (*bhrb_filter_map)(u64 branch_sample_type); void (*config_bhrb)(u64 pmu_bhrb_filter); - void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); + void (*disable_pmc)(unsigned int pmc, struct mmcr_regs *mmcr); int (*limited_pmc_event)(u64 event_id); u32 flags; const struct attribute_group **attr_groups; int n_generic; int *generic_events; - int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + u64 (*cache_events)[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; @@ -55,6 +62,11 @@ struct power_pmu { int *blacklist_ev; /* BHRB entries in the PMU */ int bhrb_nr; + /* + * set this flag with `PERF_PMU_CAP_EXTENDED_REGS` if + * the pmu supports extended perf regs capability + */ + int capabilities; }; /* @@ -69,6 +81,7 @@ struct power_pmu { #define PPMU_HAS_SIER 0x00000040 /* Has SIER */ #define PPMU_ARCH_207S 0x00000080 /* PMC is architecture v2.07S */ #define PPMU_NO_SIAR 0x00000100 /* Do not use SIAR */ +#define PPMU_ARCH_31 0x00000200 /* Has MMCR3, SIER2 and SIER3 */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index b1f1d5339735..f7613f43c9cf 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -41,25 +41,6 @@ struct mm_struct; #ifndef __ASSEMBLY__ -#ifdef CONFIG_PPC32 -static inline pmd_t *pmd_ptr(struct mm_struct *mm, unsigned long va) -{ - return pmd_offset(pud_offset(pgd_offset(mm, va), va), va); -} - -static inline pmd_t *pmd_ptr_k(unsigned long va) -{ - return pmd_offset(pud_offset(pgd_offset_k(va), va), va); -} - -static inline pte_t *virt_to_kpte(unsigned long vaddr) -{ - pmd_t *pmd = pmd_ptr_k(vaddr); - - return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); -} -#endif - #include <asm/tlbflush.h> /* Keep these as a macros to avoid include dependency mess */ @@ -76,6 +57,13 @@ static inline pgprot_t pte_pgprot(pte_t pte) return __pgprot(pte_flags); } +#ifndef pmd_page_vaddr +static inline unsigned long pmd_page_vaddr(pmd_t pmd) +{ + return ((unsigned long)__va(pmd_val(pmd) & ~PMD_MASKED_BITS)); +} +#define pmd_page_vaddr pmd_page_vaddr +#endif /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. @@ -96,8 +84,6 @@ extern unsigned long ioremap_bot; */ #define kern_addr_valid(addr) (1) -#include <asm-generic/pgtable.h> - #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #endif @@ -107,6 +93,8 @@ unsigned long vmalloc_to_phys(void *vmalloc_addr); void pgtable_cache_add(unsigned int shift); +pte_t *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va); + #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32) void mark_initmem_nx(void); #else @@ -158,9 +146,9 @@ static inline bool pud_is_leaf(pud_t pud) } #endif -#ifndef pgd_is_leaf -#define pgd_is_leaf pgd_is_leaf -static inline bool pgd_is_leaf(pgd_t pgd) +#ifndef p4d_is_leaf +#define p4d_is_leaf p4d_is_leaf +static inline bool p4d_is_leaf(p4d_t p4d) { return false; } diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index 20ebf153c871..a7951049e129 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -11,9 +11,7 @@ #include <linux/jump_label.h> #include <asm/firmware.h> -DECLARE_STATIC_KEY_TRUE(pkey_disabled); -extern int pkeys_total; /* total pkeys as per device tree */ -extern u32 initial_allocation_mask; /* bits set for the initially allocated keys */ +extern int num_pkey; extern u32 reserved_allocation_mask; /* bits set for reserved keys */ #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \ @@ -25,48 +23,28 @@ extern u32 reserved_allocation_mask; /* bits set for reserved keys */ PKEY_DISABLE_WRITE | \ PKEY_DISABLE_EXECUTE) +#ifdef CONFIG_PPC_BOOK3S_64 +#include <asm/book3s/64/pkeys.h> +#else +#error "Not supported" +#endif + + static inline u64 pkey_to_vmflag_bits(u16 pkey) { return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS); } -static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) -{ - if (static_branch_likely(&pkey_disabled)) - return 0x0UL; - - return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT4 : 0x0UL) | - ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT3 : 0x0UL) | - ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) | - ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT1 : 0x0UL) | - ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT0 : 0x0UL)); -} - static inline int vma_pkey(struct vm_area_struct *vma) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return 0; return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT; } -#define arch_max_pkey() pkeys_total - -static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +static inline int arch_max_pkey(void) { - return (((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL)); -} - -static inline u16 pte_to_pkey_bits(u64 pteflags) -{ - return (((pteflags & H_PTE_PKEY_BIT0) ? 0x10 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT1) ? 0x8 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT3) ? 0x2 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT4) ? 0x1 : 0x0UL)); + return num_pkey; } #define pkey_alloc_mask(pkey) (0x1 << pkey) @@ -101,7 +79,7 @@ static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) /* * Returns a positive, 5-bit key on success, or -1 on failure. - * Relies on the mmap_sem to protect against concurrency in mm_pkey_alloc() and + * Relies on the mmap_lock to protect against concurrency in mm_pkey_alloc() and * mm_pkey_free(). */ static inline int mm_pkey_alloc(struct mm_struct *mm) @@ -114,9 +92,8 @@ static inline int mm_pkey_alloc(struct mm_struct *mm) u32 all_pkeys_mask = (u32)(~(0x0)); int ret; - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return -1; - /* * Are we out of pkeys? We must handle this specially because ffz() * behavior is undefined if there are no zeros. @@ -132,7 +109,7 @@ static inline int mm_pkey_alloc(struct mm_struct *mm) static inline int mm_pkey_free(struct mm_struct *mm, int pkey) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return -1; if (!mm_pkey_is_allocated(mm, pkey)) @@ -147,21 +124,13 @@ static inline int mm_pkey_free(struct mm_struct *mm, int pkey) * Try to dedicate one of the protection keys to be used as an * execute-only protection key. */ -extern int __execute_only_pkey(struct mm_struct *mm); -static inline int execute_only_pkey(struct mm_struct *mm) -{ - if (static_branch_likely(&pkey_disabled)) - return -1; - - return __execute_only_pkey(mm); -} - +extern int execute_only_pkey(struct mm_struct *mm); extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey); static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return 0; /* @@ -179,7 +148,7 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return -EINVAL; /* @@ -196,7 +165,7 @@ static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline bool arch_pkeys_enabled(void) { - return !static_branch_likely(&pkey_disabled); + return mmu_has_feature(MMU_FTR_PKEY); } extern void pkey_mm_init(struct mm_struct *mm); diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 4497c8afb573..ece84a430701 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -312,7 +312,12 @@ static inline long plpar_set_ciabr(unsigned long ciabr) static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0) { - return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR0, dawr0, dawrx0); +} + +static inline long plpar_set_watchpoint1(unsigned long dawr1, unsigned long dawrx1) +{ + return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR1, dawr1, dawrx1); } static inline long plpar_signal_sys_reset(long cpu) @@ -334,6 +339,51 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) return rc; } +/* + * Wrapper to H_RPT_INVALIDATE hcall that handles return values appropriately + * + * - Returns H_SUCCESS on success + * - For H_BUSY return value, we retry the hcall. + * - For any other hcall failures, attempt a full flush once before + * resorting to BUG(). + * + * Note: This hcall is expected to fail only very rarely. The correct + * error recovery of killing the process/guest will be eventually + * needed. + */ +static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type, + u64 page_sizes, u64 start, u64 end) +{ + long rc; + unsigned long all; + + while (true) { + rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target, type, + page_sizes, start, end); + if (rc == H_BUSY) { + cpu_relax(); + continue; + } else if (rc == H_SUCCESS) + return rc; + + /* Flush request failed, try with a full flush once */ + if (type & H_RPTI_TYPE_NESTED) + all = H_RPTI_TYPE_NESTED | H_RPTI_TYPE_NESTED_ALL; + else + all = H_RPTI_TYPE_ALL; +retry: + rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target, + all, page_sizes, 0, -1UL); + if (rc == H_BUSY) { + cpu_relax(); + goto retry; + } else if (rc == H_SUCCESS) + return rc; + + BUG(); + } +} + #else /* !CONFIG_PPC_PSERIES */ static inline long plpar_set_ciabr(unsigned long ciabr) @@ -346,6 +396,13 @@ static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex, { return 0; } + +static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type, + u64 page_sizes, u64 start, u64 end) +{ + return 0; +} + #endif /* CONFIG_PPC_PSERIES */ #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index 7de82647e761..ee79d2cd9fb6 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -9,28 +9,26 @@ #define PNV_OCXL_TL_BITS_PER_RATE 4 #define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8) -extern int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, - u16 *supported); -extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); +int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported); +int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); -extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, +int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, char *rate_buf, int rate_buf_size); -extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, - uint64_t rate_buf_phys, int rate_buf_size); - -extern int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq); -extern void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, - void __iomem *tfc, void __iomem *pe_handle); -extern int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, - void __iomem **dar, void __iomem **tfc, - void __iomem **pe_handle); - -extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, - void **platform_data); -extern void pnv_ocxl_spa_release(void *platform_data); -extern int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); - -extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr); -extern void pnv_ocxl_free_xive_irq(u32 irq); +int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, + uint64_t rate_buf_phys, int rate_buf_size); + +int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq); +void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, + void __iomem *tfc, void __iomem *pe_handle); +int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, + void __iomem **dar, void __iomem **tfc, + void __iomem **pe_handle); + +int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **platform_data); +void pnv_ocxl_spa_release(void *platform_data); +int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); + +int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr); +void pnv_ocxl_free_xive_irq(u32 irq); #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index c1df75edde44..a6e3700c4566 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -76,6 +76,19 @@ #define __REGA0_R30 30 #define __REGA0_R31 31 +#define IMM_L(i) ((uintptr_t)(i) & 0xffff) +#define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) + +/* + * 16-bit immediate helper macros: HA() is for use with sign-extending instrs + * (e.g. LD, ADDI). If the bottom 16 bits is "-ve", add another bit into the + * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000). + */ +#define IMM_H(i) ((uintptr_t)(i)>>16) +#define IMM_HA(i) (((uintptr_t)(i)>>16) + \ + (((uintptr_t)(i) & 0x8000) >> 15)) + + /* opcode and xopcode for instructions */ #define OP_TRAP 3 #define OP_TRAP_64 2 @@ -158,6 +171,9 @@ /* VMX Vector Store Instructions */ #define OP_31_XOP_STVX 231 +/* Prefixed Instructions */ +#define OP_PREFIX 1 + #define OP_31 31 #define OP_LWZ 32 #define OP_STFS 52 @@ -192,56 +208,28 @@ #define OP_LQ 56 /* sorted alphabetically */ -#define PPC_INST_BHRBE 0x7c00025c -#define PPC_INST_CLRBHRB 0x7c00035c +#define PPC_INST_BCCTR_FLUSH 0x4c400420 #define PPC_INST_COPY 0x7c20060c -#define PPC_INST_CP_ABORT 0x7c00068c -#define PPC_INST_DARN 0x7c0005e6 #define PPC_INST_DCBA 0x7c0005ec #define PPC_INST_DCBA_MASK 0xfc0007fe -#define PPC_INST_DCBAL 0x7c2005ec -#define PPC_INST_DCBZL 0x7c2007ec -#define PPC_INST_ICBT 0x7c00002c -#define PPC_INST_ICSWX 0x7c00032d -#define PPC_INST_ICSWEPX 0x7c00076d #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e -#define PPC_INST_LDARX 0x7c0000a8 -#define PPC_INST_STDCX 0x7c0001ad -#define PPC_INST_LQARX 0x7c000228 -#define PPC_INST_STQCX 0x7c00016d #define PPC_INST_LSWI 0x7c0004aa #define PPC_INST_LSWX 0x7c00042a -#define PPC_INST_LWARX 0x7c000028 -#define PPC_INST_STWCX 0x7c00012d #define PPC_INST_LWSYNC 0x7c2004ac #define PPC_INST_SYNC 0x7c0004ac #define PPC_INST_SYNC_MASK 0xfc0007fe #define PPC_INST_ISYNC 0x4c00012c -#define PPC_INST_LXVD2X 0x7c000698 #define PPC_INST_MCRXR 0x7c000400 #define PPC_INST_MCRXR_MASK 0xfc0007fe #define PPC_INST_MFSPR_PVR 0x7c1f42a6 #define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe -#define PPC_INST_MFTMR 0x7c0002dc -#define PPC_INST_MSGSND 0x7c00019c -#define PPC_INST_MSGCLR 0x7c0001dc -#define PPC_INST_MSGSYNC 0x7c0006ec -#define PPC_INST_MSGSNDP 0x7c00011c -#define PPC_INST_MSGCLRP 0x7c00015c #define PPC_INST_MTMSRD 0x7c000164 -#define PPC_INST_MTTMR 0x7c0003dc #define PPC_INST_NOP 0x60000000 -#define PPC_INST_PASTE 0x7c20070d #define PPC_INST_POPCNTB 0x7c0000f4 #define PPC_INST_POPCNTB_MASK 0xfc0007fe -#define PPC_INST_POPCNTD 0x7c0003f4 -#define PPC_INST_POPCNTW 0x7c0002f4 #define PPC_INST_RFEBB 0x4c000124 -#define PPC_INST_RFCI 0x4c000066 -#define PPC_INST_RFDI 0x4c00004e #define PPC_INST_RFID 0x4c000024 -#define PPC_INST_RFMCI 0x4c00004c #define PPC_INST_MFSPR 0x7c0002a6 #define PPC_INST_MFSPR_DSCR 0x7c1102a6 #define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe @@ -251,131 +239,43 @@ #define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 #define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1ffffe -#define PPC_INST_MFVSRD 0x7c000066 -#define PPC_INST_MTVSRD 0x7c000166 #define PPC_INST_SC 0x44000002 -#define PPC_INST_SLBFEE 0x7c0007a7 -#define PPC_INST_SLBIA 0x7c0003e4 - #define PPC_INST_STRING 0x7c00042a #define PPC_INST_STRING_MASK 0xfc0007fe #define PPC_INST_STRING_GEN_MASK 0xfc00067e - #define PPC_INST_STSWI 0x7c0005aa #define PPC_INST_STSWX 0x7c00052a -#define PPC_INST_STXVD2X 0x7c000798 -#define PPC_INST_TLBIE 0x7c000264 -#define PPC_INST_TLBIEL 0x7c000224 -#define PPC_INST_TLBILX 0x7c000024 -#define PPC_INST_WAIT 0x7c00007c -#define PPC_INST_TLBIVAX 0x7c000624 -#define PPC_INST_TLBSRX_DOT 0x7c0006a5 -#define PPC_INST_VPMSUMW 0x10000488 -#define PPC_INST_VPMSUMD 0x100004c8 -#define PPC_INST_VPERMXOR 0x1000002d -#define PPC_INST_XXLOR 0xf0000490 -#define PPC_INST_XXSWAPD 0xf0000250 -#define PPC_INST_XVCPSGNDP 0xf0000780 #define PPC_INST_TRECHKPT 0x7c0007dd #define PPC_INST_TRECLAIM 0x7c00075d -#define PPC_INST_TABORT 0x7c00071d #define PPC_INST_TSR 0x7c0005dd - -#define PPC_INST_NAP 0x4c000364 -#define PPC_INST_SLEEP 0x4c0003a4 -#define PPC_INST_WINKLE 0x4c0003e4 - -#define PPC_INST_STOP 0x4c0002e4 - -/* A2 specific instructions */ -#define PPC_INST_ERATWE 0x7c0001a6 -#define PPC_INST_ERATRE 0x7c000166 -#define PPC_INST_ERATILX 0x7c000066 -#define PPC_INST_ERATIVAX 0x7c000666 -#define PPC_INST_ERATSX 0x7c000126 -#define PPC_INST_ERATSX_DOT 0x7c000127 - -/* Misc instructions for BPF compiler */ -#define PPC_INST_LBZ 0x88000000 #define PPC_INST_LD 0xe8000000 -#define PPC_INST_LDX 0x7c00002a -#define PPC_INST_LHZ 0xa0000000 -#define PPC_INST_LWZ 0x80000000 -#define PPC_INST_LHBRX 0x7c00062c -#define PPC_INST_LDBRX 0x7c000428 -#define PPC_INST_STB 0x98000000 -#define PPC_INST_STH 0xb0000000 #define PPC_INST_STD 0xf8000000 -#define PPC_INST_STDX 0x7c00012a -#define PPC_INST_STDU 0xf8000001 -#define PPC_INST_STW 0x90000000 -#define PPC_INST_STWU 0x94000000 #define PPC_INST_MFLR 0x7c0802a6 -#define PPC_INST_MTLR 0x7c0803a6 #define PPC_INST_MTCTR 0x7c0903a6 -#define PPC_INST_CMPWI 0x2c000000 -#define PPC_INST_CMPDI 0x2c200000 -#define PPC_INST_CMPW 0x7c000000 -#define PPC_INST_CMPD 0x7c200000 -#define PPC_INST_CMPLW 0x7c000040 -#define PPC_INST_CMPLD 0x7c200040 -#define PPC_INST_CMPLWI 0x28000000 -#define PPC_INST_CMPLDI 0x28200000 #define PPC_INST_ADDI 0x38000000 #define PPC_INST_ADDIS 0x3c000000 #define PPC_INST_ADD 0x7c000214 -#define PPC_INST_ADDC 0x7c000014 -#define PPC_INST_SUB 0x7c000050 #define PPC_INST_BLR 0x4e800020 -#define PPC_INST_BLRL 0x4e800021 #define PPC_INST_BCTR 0x4e800420 -#define PPC_INST_MULLD 0x7c0001d2 -#define PPC_INST_MULLW 0x7c0001d6 -#define PPC_INST_MULHWU 0x7c000016 -#define PPC_INST_MULLI 0x1c000000 -#define PPC_INST_MADDHD 0x10000030 -#define PPC_INST_MADDHDU 0x10000031 -#define PPC_INST_MADDLD 0x10000033 -#define PPC_INST_DIVWU 0x7c000396 +#define PPC_INST_BCTRL 0x4e800421 #define PPC_INST_DIVD 0x7c0003d2 -#define PPC_INST_DIVDU 0x7c000392 -#define PPC_INST_RLWINM 0x54000000 -#define PPC_INST_RLWINM_DOT 0x54000001 -#define PPC_INST_RLWIMI 0x50000000 -#define PPC_INST_RLDICL 0x78000000 #define PPC_INST_RLDICR 0x78000004 -#define PPC_INST_SLW 0x7c000030 -#define PPC_INST_SLD 0x7c000036 -#define PPC_INST_SRW 0x7c000430 -#define PPC_INST_SRAW 0x7c000630 -#define PPC_INST_SRAWI 0x7c000670 -#define PPC_INST_SRD 0x7c000436 -#define PPC_INST_SRAD 0x7c000634 -#define PPC_INST_SRADI 0x7c000674 -#define PPC_INST_AND 0x7c000038 -#define PPC_INST_ANDDOT 0x7c000039 -#define PPC_INST_OR 0x7c000378 -#define PPC_INST_XOR 0x7c000278 -#define PPC_INST_ANDI 0x70000000 #define PPC_INST_ORI 0x60000000 #define PPC_INST_ORIS 0x64000000 -#define PPC_INST_XORI 0x68000000 -#define PPC_INST_XORIS 0x6c000000 -#define PPC_INST_NEG 0x7c0000d0 -#define PPC_INST_EXTSW 0x7c0007b4 #define PPC_INST_BRANCH 0x48000000 #define PPC_INST_BRANCH_COND 0x40800000 -#define PPC_INST_LBZCIX 0x7c0006aa -#define PPC_INST_STBCIX 0x7c0007aa -#define PPC_INST_LWZX 0x7c00002e -#define PPC_INST_LFSX 0x7c00042e -#define PPC_INST_STFSX 0x7c00052e -#define PPC_INST_LFDX 0x7c0004ae -#define PPC_INST_STFDX 0x7c0005ae -#define PPC_INST_LVX 0x7c0000ce -#define PPC_INST_STVX 0x7c0001ce -#define PPC_INST_VCMPEQUD 0x100000c7 -#define PPC_INST_VCMPEQUB 0x10000006 + +/* Prefixes */ +#define PPC_INST_LFS 0xc0000000 +#define PPC_INST_STFS 0xd0000000 +#define PPC_INST_LFD 0xc8000000 +#define PPC_INST_STFD 0xd8000000 +#define PPC_PREFIX_MLS 0x06000000 +#define PPC_PREFIX_8LS 0x04000000 + +/* Prefixed instructions */ +#define PPC_INST_PLD 0xe4000000 +#define PPC_INST_PSTD 0xf4000000 /* macros to insert fields into opcodes */ #define ___PPC_RA(a) (((a) & 0x1f) << 16) @@ -408,6 +308,7 @@ #define __PPC_CT(t) (((t) & 0x0f) << 21) #define __PPC_SPR(r) ((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11)) #define __PPC_RC21 (0x1 << 10) +#define __PPC_PRFX_R(r) (((r) & 0x1) << 20) /* * Both low and high 16 bits are added as SIGNED additions, so if low 16 bits @@ -428,175 +329,278 @@ #define __PPC_EH(eh) 0 #endif +/* Base instruction encoding */ +#define PPC_RAW_CP_ABORT (0x7c00068c) +#define PPC_RAW_COPY(a, b) (PPC_INST_COPY | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DARN(t, l) (0x7c0005e6 | ___PPC_RT(t) | (((l) & 0x3) << 16)) +#define PPC_RAW_DCBAL(a, b) (0x7c2005ec | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_RAW_DCBZL(a, b) (0x7c2007ec | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_RAW_LQARX(t, a, b, eh) (0x7c000228 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_RAW_LDARX(t, a, b, eh) (0x7c0000a8 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_RAW_LWARX(t, a, b, eh) (0x7c000028 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_RAW_PHWSYNC (0x7c8004ac) +#define PPC_RAW_PLWSYNC (0x7ca004ac) +#define PPC_RAW_STQCX(t, a, b) (0x7c00016d | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_MADDHD(t, a, b, c) (0x10000030 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c)) +#define PPC_RAW_MADDHDU(t, a, b, c) (0x10000031 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c)) +#define PPC_RAW_MADDLD(t, a, b, c) (0x10000033 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c)) +#define PPC_RAW_MSGSND(b) (0x7c00019c | ___PPC_RB(b)) +#define PPC_RAW_MSGSYNC (0x7c0006ec) +#define PPC_RAW_MSGCLR(b) (0x7c0001dc | ___PPC_RB(b)) +#define PPC_RAW_MSGSNDP(b) (0x7c00011c | ___PPC_RB(b)) +#define PPC_RAW_MSGCLRP(b) (0x7c00015c | ___PPC_RB(b)) +#define PPC_RAW_PASTE(a, b) (0x7c20070d | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_POPCNTB(a, s) (PPC_INST_POPCNTB | __PPC_RA(a) | __PPC_RS(s)) +#define PPC_RAW_POPCNTD(a, s) (0x7c0003f4 | __PPC_RA(a) | __PPC_RS(s)) +#define PPC_RAW_POPCNTW(a, s) (0x7c0002f4 | __PPC_RA(a) | __PPC_RS(s)) +#define PPC_RAW_RFCI (0x4c000066) +#define PPC_RAW_RFDI (0x4c00004e) +#define PPC_RAW_RFMCI (0x4c00004c) +#define PPC_RAW_TLBILX(t, a, b) (0x7c000024 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_WAIT(w) (0x7c00007c | __PPC_WC(w)) +#define PPC_RAW_TLBIE(lp, a) (0x7c000264 | ___PPC_RB(a) | ___PPC_RS(lp)) +#define PPC_RAW_TLBIE_5(rb, rs, ric, prs, r) \ + (0x7c000264 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r)) +#define PPC_RAW_TLBIEL(rb, rs, ric, prs, r) \ + (0x7c000224 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r)) +#define PPC_RAW_TLBSRX_DOT(a, b) (0x7c0006a5 | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_TLBIVAX(a, b) (0x7c000624 | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_ERATWE(s, a, w) (0x7c0001a6 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) +#define PPC_RAW_ERATRE(s, a, w) (0x7c000166 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) +#define PPC_RAW_ERATILX(t, a, b) (0x7c000066 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_ERATIVAX(s, a, b) (0x7c000666 | __PPC_RS(s) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_ERATSX(t, a, w) (0x7c000126 | __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_ERATSX_DOT(t, a, w) (0x7c000127 | __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_SLBFEE_DOT(t, b) (0x7c0007a7 | __PPC_RT(t) | __PPC_RB(b)) +#define __PPC_RAW_SLBFEE_DOT(t, b) (0x7c0007a7 | ___PPC_RT(t) | ___PPC_RB(b)) +#define PPC_RAW_ICBT(c, a, b) (0x7c00002c | __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_LBZCIX(t, a, b) (0x7c0006aa | __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_RAW_STBCIX(s, a, b) (0x7c0007aa | __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_RAW_DCBFPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21)) +#define PPC_RAW_DCBSTPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21)) +/* + * Define what the VSX XX1 form instructions will look like, then add + * the 128 bit load store instructions based on that. + */ +#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b)) +#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b)) +#define PPC_RAW_STXVD2X(s, a, b) (0x7c000798 | VSX_XX1((s), a, b)) +#define PPC_RAW_LXVD2X(s, a, b) (0x7c000698 | VSX_XX1((s), a, b)) +#define PPC_RAW_MFVRD(a, t) (0x7c000066 | VSX_XX1((t) + 32, a, R0)) +#define PPC_RAW_MTVRD(t, a) (0x7c000166 | VSX_XX1((t) + 32, a, R0)) +#define PPC_RAW_VPMSUMW(t, a, b) (0x10000488 | VSX_XX3((t), a, b)) +#define PPC_RAW_VPMSUMD(t, a, b) (0x100004c8 | VSX_XX3((t), a, b)) +#define PPC_RAW_XXLOR(t, a, b) (0xf0000490 | VSX_XX3((t), a, b)) +#define PPC_RAW_XXSWAPD(t, a) (0xf0000250 | VSX_XX3((t), a, a)) +#define PPC_RAW_XVCPSGNDP(t, a, b) ((0xf0000780 | VSX_XX3((t), (a), (b)))) +#define PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc) \ + ((0x1000002d | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6))) +#define PPC_RAW_NAP (0x4c000364) +#define PPC_RAW_SLEEP (0x4c0003a4) +#define PPC_RAW_WINKLE (0x4c0003e4) +#define PPC_RAW_STOP (0x4c0002e4) +#define PPC_RAW_CLRBHRB (0x7c00035c) +#define PPC_RAW_MFBHRBE(r, n) (0x7c00025c | __PPC_RT(r) | (((n) & 0x3ff) << 11)) +#define PPC_RAW_TRECHKPT (PPC_INST_TRECHKPT) +#define PPC_RAW_TRECLAIM(r) (PPC_INST_TRECLAIM | __PPC_RA(r)) +#define PPC_RAW_TABORT(r) (0x7c00071d | __PPC_RA(r)) +#define TMRN(x) ((((x) & 0x1f) << 16) | (((x) & 0x3e0) << 6)) +#define PPC_RAW_MTTMR(tmr, r) (0x7c0003dc | TMRN(tmr) | ___PPC_RS(r)) +#define PPC_RAW_MFTMR(tmr, r) (0x7c0002dc | TMRN(tmr) | ___PPC_RT(r)) +#define PPC_RAW_ICSWX(s, a, b) (0x7c00032d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ICSWEPX(s, a, b) (0x7c00076d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_SLBIA(IH) (0x7c0003e4 | (((IH) & 0x7) << 21)) +#define PPC_RAW_VCMPEQUD_RC(vrt, vra, vrb) \ + (0x100000c7 | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | __PPC_RC21) +#define PPC_RAW_VCMPEQUB_RC(vrt, vra, vrb) \ + (0x10000006 | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | __PPC_RC21) +#define PPC_RAW_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_DS(i)) +#define PPC_RAW_LWZ(r, base, i) (0x80000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_LWZX(t, a, b) (0x7c00002e | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | ___PPC_RA(base) | IMM_DS(i)) +#define PPC_RAW_STDCX(s, a, b) (0x7c0001ad | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_LFSX(t, a, b) (0x7c00042e | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STFSX(s, a, b) (0x7c00052e | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_LFDX(t, a, b) (0x7c0004ae | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STFDX(s, a, b) (0x7c0005ae | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_LVX(t, a, b) (0x7c0000ce | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STVX(s, a, b) (0x7c0001ce | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADD(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#define PPC_RAW_ADDC(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADDC_DOT(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#define PPC_RAW_NOP() (PPC_INST_NOP) +#define PPC_RAW_BLR() (PPC_INST_BLR) +#define PPC_RAW_BLRL() (0x4e800021) +#define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r)) +#define PPC_RAW_BCTR() (PPC_INST_BCTR) +#define PPC_RAW_MTCTR(r) (PPC_INST_MTCTR | ___PPC_RT(r)) +#define PPC_RAW_ADDI(d, a, i) (PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_LI(r, i) PPC_RAW_ADDI(r, 0, i) +#define PPC_RAW_ADDIS(d, a, i) (PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_LIS(r, i) PPC_RAW_ADDIS(r, 0, i) +#define PPC_RAW_STDX(r, base, b) (0x7c00012a | ___PPC_RS(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_STDU(r, base, i) (0xf8000001 | ___PPC_RS(r) | ___PPC_RA(base) | ((i) & 0xfffc)) +#define PPC_RAW_STW(r, base, i) (0x90000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_STWU(r, base, i) (0x94000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_STH(r, base, i) (0xb0000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_STB(r, base, i) (0x98000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_LBZ(r, base, i) (0x88000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_CMPDI(a, i) (0x2c200000 | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_CMPW(a, b) (0x7c000000 | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_CMPD(a, b) (0x7c200000 | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_CMPLWI(a, i) (0x28000000 | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_CMPLDI(a, i) (0x28200000 | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_CMPLW(a, b) (0x7c000040 | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_CMPLD(a, b) (0x7c200040 | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_SUB(d, a, b) (0x7c000050 | ___PPC_RT(d) | ___PPC_RB(a) | ___PPC_RA(b)) +#define PPC_RAW_MULD(d, a, b) (0x7c0001d2 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_MULW(d, a, b) (0x7c0001d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_MULHWU(d, a, b) (0x7c000016 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_MULI(d, a, i) (0x1c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_DIVWU(d, a, b) (0x7c000396 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DIVDU(d, a, b) (0x7c000392 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DIVDE(t, a, b) (0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DIVDE_DOT(t, a, b) (0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#define PPC_RAW_DIVDEU(t, a, b) (0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DIVDEU_DOT(t, a, b) (0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#define PPC_RAW_AND(d, a, b) (0x7c000038 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) +#define PPC_RAW_ANDI(d, a, i) (0x70000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_AND_DOT(d, a, b) (0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) +#define PPC_RAW_OR(d, a, b) (0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) +#define PPC_RAW_MR(d, a) PPC_RAW_OR(d, a, a) +#define PPC_RAW_ORI(d, a, i) (PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_ORIS(d, a, i) (PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_XOR(d, a, b) (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) +#define PPC_RAW_XORI(d, a, i) (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_XORIS(d, a, i) (0x6c000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_EXTSW(d, a) (0x7c0007b4 | ___PPC_RA(d) | ___PPC_RS(a)) +#define PPC_RAW_SLW(d, a, s) (0x7c000030 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SLD(d, a, s) (0x7c000036 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRW(d, a, s) (0x7c000430 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRAW(d, a, s) (0x7c000630 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRAWI(d, a, i) (0x7c000670 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i)) +#define PPC_RAW_SRD(d, a, s) (0x7c000436 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRAD(d, a, s) (0x7c000634 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRADI(d, a, i) (0x7c000674 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i)) +#define PPC_RAW_RLWINM(d, a, i, mb, me) (0x54000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) +#define PPC_RAW_RLWINM_DOT(d, a, i, mb, me) \ + (0x54000001 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) +#define PPC_RAW_RLWIMI(d, a, i, mb, me) (0x50000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) +#define PPC_RAW_RLDICL(d, a, i, mb) (0x78000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_MB64(mb)) +#define PPC_RAW_RLDICR(d, a, i, me) (PPC_INST_RLDICR | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me)) + +/* slwi = rlwinm Rx, Ry, n, 0, 31-n */ +#define PPC_RAW_SLWI(d, a, i) PPC_RAW_RLWINM(d, a, i, 0, 31-(i)) +/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */ +#define PPC_RAW_SRWI(d, a, i) PPC_RAW_RLWINM(d, a, 32-(i), i, 31) +/* sldi = rldicr Rx, Ry, n, 63-n */ +#define PPC_RAW_SLDI(d, a, i) PPC_RAW_RLDICR(d, a, i, 63-(i)) +/* sldi = rldicl Rx, Ry, 64-n, n */ +#define PPC_RAW_SRDI(d, a, i) PPC_RAW_RLDICL(d, a, 64-(i), i) + +#define PPC_RAW_NEG(d, a) (0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a)) + /* Deal with instructions that older assemblers aren't aware of */ -#define PPC_CP_ABORT stringify_in_c(.long PPC_INST_CP_ABORT) -#define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_DARN(t, l) stringify_in_c(.long PPC_INST_DARN | \ - ___PPC_RT(t) | \ - (((l) & 0x3) << 16)) -#define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ - __PPC_RA(a) | __PPC_RB(b)) -#define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ - __PPC_RA(a) | __PPC_RB(b)) -#define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LQARX | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | __PPC_EH(eh)) -#define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LDARX | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | __PPC_EH(eh)) -#define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | __PPC_EH(eh)) -#define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_INST_STQCX | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_INST_MADDHD | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | ___PPC_RC(c)) -#define PPC_MADDHDU(t, a, b, c) stringify_in_c(.long PPC_INST_MADDHDU | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | ___PPC_RC(c)) -#define PPC_MADDLD(t, a, b, c) stringify_in_c(.long PPC_INST_MADDLD | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | ___PPC_RC(c)) -#define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \ - ___PPC_RB(b)) -#define PPC_MSGSYNC stringify_in_c(.long PPC_INST_MSGSYNC) -#define PPC_MSGCLR(b) stringify_in_c(.long PPC_INST_MSGCLR | \ - ___PPC_RB(b)) -#define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \ - ___PPC_RB(b)) -#define PPC_MSGCLRP(b) stringify_in_c(.long PPC_INST_MSGCLRP | \ - ___PPC_RB(b)) -#define PPC_PASTE(a, b) stringify_in_c(.long PPC_INST_PASTE | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \ - __PPC_RA(a) | __PPC_RS(s)) -#define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_INST_POPCNTD | \ - __PPC_RA(a) | __PPC_RS(s)) -#define PPC_POPCNTW(a, s) stringify_in_c(.long PPC_INST_POPCNTW | \ - __PPC_RA(a) | __PPC_RS(s)) -#define PPC_RFCI stringify_in_c(.long PPC_INST_RFCI) -#define PPC_RFDI stringify_in_c(.long PPC_INST_RFDI) -#define PPC_RFMCI stringify_in_c(.long PPC_INST_RFMCI) -#define PPC_TLBILX(t, a, b) stringify_in_c(.long PPC_INST_TLBILX | \ - __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) +#define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT) +#define PPC_COPY(a, b) stringify_in_c(.long PPC_RAW_COPY(a, b)) +#define PPC_DARN(t, l) stringify_in_c(.long PPC_RAW_DARN(t, l)) +#define PPC_DCBAL(a, b) stringify_in_c(.long PPC_RAW_DCBAL(a, b)) +#define PPC_DCBZL(a, b) stringify_in_c(.long PPC_RAW_DCBZL(a, b)) +#define PPC_DIVDE(t, a, b) stringify_in_c(.long PPC_RAW_DIVDE(t, a, b)) +#define PPC_DIVDEU(t, a, b) stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b)) +#define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh)) +#define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LDARX(t, a, b, eh)) +#define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LWARX(t, a, b, eh)) +#define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_RAW_STQCX(t, a, b)) +#define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c)) +#define PPC_MADDHDU(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHDU(t, a, b, c)) +#define PPC_MADDLD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDLD(t, a, b, c)) +#define PPC_MSGSND(b) stringify_in_c(.long PPC_RAW_MSGSND(b)) +#define PPC_MSGSYNC stringify_in_c(.long PPC_RAW_MSGSYNC) +#define PPC_MSGCLR(b) stringify_in_c(.long PPC_RAW_MSGCLR(b)) +#define PPC_MSGSNDP(b) stringify_in_c(.long PPC_RAW_MSGSNDP(b)) +#define PPC_MSGCLRP(b) stringify_in_c(.long PPC_RAW_MSGCLRP(b)) +#define PPC_PASTE(a, b) stringify_in_c(.long PPC_RAW_PASTE(a, b)) +#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_RAW_POPCNTB(a, s)) +#define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_RAW_POPCNTD(a, s)) +#define PPC_POPCNTW(a, s) stringify_in_c(.long PPC_RAW_POPCNTW(a, s)) +#define PPC_RFCI stringify_in_c(.long PPC_RAW_RFCI) +#define PPC_RFDI stringify_in_c(.long PPC_RAW_RFDI) +#define PPC_RFMCI stringify_in_c(.long PPC_RAW_RFMCI) +#define PPC_TLBILX(t, a, b) stringify_in_c(.long PPC_RAW_TLBILX(t, a, b)) #define PPC_TLBILX_ALL(a, b) PPC_TLBILX(0, a, b) #define PPC_TLBILX_PID(a, b) PPC_TLBILX(1, a, b) #define PPC_TLBILX_VA(a, b) PPC_TLBILX(3, a, b) -#define PPC_WAIT(w) stringify_in_c(.long PPC_INST_WAIT | \ - __PPC_WC(w)) -#define PPC_TLBIE(lp,a) stringify_in_c(.long PPC_INST_TLBIE | \ - ___PPC_RB(a) | ___PPC_RS(lp)) -#define PPC_TLBIE_5(rb,rs,ric,prs,r) \ - stringify_in_c(.long PPC_INST_TLBIE | \ - ___PPC_RB(rb) | ___PPC_RS(rs) | \ - ___PPC_RIC(ric) | ___PPC_PRS(prs) | \ - ___PPC_R(r)) +#define PPC_WAIT(w) stringify_in_c(.long PPC_RAW_WAIT(w)) +#define PPC_TLBIE(lp, a) stringify_in_c(.long PPC_RAW_TLBIE(lp, a)) +#define PPC_TLBIE_5(rb, rs, ric, prs, r) \ + stringify_in_c(.long PPC_RAW_TLBIE_5(rb, rs, ric, prs, r)) #define PPC_TLBIEL(rb,rs,ric,prs,r) \ - stringify_in_c(.long PPC_INST_TLBIEL | \ - ___PPC_RB(rb) | ___PPC_RS(rs) | \ - ___PPC_RIC(ric) | ___PPC_PRS(prs) | \ - ___PPC_R(r)) -#define PPC_TLBSRX_DOT(a,b) stringify_in_c(.long PPC_INST_TLBSRX_DOT | \ - __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_TLBIVAX(a,b) stringify_in_c(.long PPC_INST_TLBIVAX | \ - __PPC_RA0(a) | __PPC_RB(b)) - -#define PPC_ERATWE(s, a, w) stringify_in_c(.long PPC_INST_ERATWE | \ - __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) -#define PPC_ERATRE(s, a, w) stringify_in_c(.long PPC_INST_ERATRE | \ - __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) -#define PPC_ERATILX(t, a, b) stringify_in_c(.long PPC_INST_ERATILX | \ - __PPC_T_TLB(t) | __PPC_RA0(a) | \ - __PPC_RB(b)) -#define PPC_ERATIVAX(s, a, b) stringify_in_c(.long PPC_INST_ERATIVAX | \ - __PPC_RS(s) | __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_ERATSX(t, a, w) stringify_in_c(.long PPC_INST_ERATSX | \ - __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_INST_ERATSX_DOT | \ - __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ - __PPC_RT(t) | __PPC_RB(b)) -#define __PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ - ___PPC_RT(t) | ___PPC_RB(b)) -#define PPC_ICBT(c,a,b) stringify_in_c(.long PPC_INST_ICBT | \ - __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b)) + stringify_in_c(.long PPC_RAW_TLBIEL(rb, rs, ric, prs, r)) +#define PPC_TLBSRX_DOT(a, b) stringify_in_c(.long PPC_RAW_TLBSRX_DOT(a, b)) +#define PPC_TLBIVAX(a, b) stringify_in_c(.long PPC_RAW_TLBIVAX(a, b)) + +#define PPC_ERATWE(s, a, w) stringify_in_c(.long PPC_RAW_ERATWE(s, a, w)) +#define PPC_ERATRE(s, a, w) stringify_in_c(.long PPC_RAW_ERATRE(a, a, w)) +#define PPC_ERATILX(t, a, b) stringify_in_c(.long PPC_RAW_ERATILX(t, a, b)) +#define PPC_ERATIVAX(s, a, b) stringify_in_c(.long PPC_RAW_ERATIVAX(s, a, b)) +#define PPC_ERATSX(t, a, w) stringify_in_c(.long PPC_RAW_ERATSX(t, a, w)) +#define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_RAW_ERATSX_DOT(t, a, w)) +#define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_RAW_SLBFEE_DOT(t, b)) +#define __PPC_SLBFEE_DOT(t, b) stringify_in_c(.long __PPC_RAW_SLBFEE_DOT(t, b)) +#define PPC_ICBT(c, a, b) stringify_in_c(.long PPC_RAW_ICBT(c, a, b)) /* PASemi instructions */ -#define LBZCIX(t,a,b) stringify_in_c(.long PPC_INST_LBZCIX | \ - __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b)) -#define STBCIX(s,a,b) stringify_in_c(.long PPC_INST_STBCIX | \ - __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b)) - -/* - * Define what the VSX XX1 form instructions will look like, then add - * the 128 bit load store instructions based on that. - */ -#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b)) -#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b)) -#define STXVD2X(s, a, b) stringify_in_c(.long PPC_INST_STXVD2X | \ - VSX_XX1((s), a, b)) -#define LXVD2X(s, a, b) stringify_in_c(.long PPC_INST_LXVD2X | \ - VSX_XX1((s), a, b)) -#define MFVRD(a, t) stringify_in_c(.long PPC_INST_MFVSRD | \ - VSX_XX1((t)+32, a, R0)) -#define MTVRD(t, a) stringify_in_c(.long PPC_INST_MTVSRD | \ - VSX_XX1((t)+32, a, R0)) -#define VPMSUMW(t, a, b) stringify_in_c(.long PPC_INST_VPMSUMW | \ - VSX_XX3((t), a, b)) -#define VPMSUMD(t, a, b) stringify_in_c(.long PPC_INST_VPMSUMD | \ - VSX_XX3((t), a, b)) -#define XXLOR(t, a, b) stringify_in_c(.long PPC_INST_XXLOR | \ - VSX_XX3((t), a, b)) -#define XXSWAPD(t, a) stringify_in_c(.long PPC_INST_XXSWAPD | \ - VSX_XX3((t), a, a)) -#define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_INST_XVCPSGNDP | \ - VSX_XX3((t), (a), (b)))) +#define LBZCIX(t, a, b) stringify_in_c(.long PPC_RAW_LBZCIX(t, a, b)) +#define STBCIX(s, a, b) stringify_in_c(.long PPC_RAW_STBCIX(s, a, b)) +#define PPC_DCBFPS(a, b) stringify_in_c(.long PPC_RAW_DCBFPS(a, b)) +#define PPC_DCBSTPS(a, b) stringify_in_c(.long PPC_RAW_DCBSTPS(a, b)) +#define PPC_PHWSYNC stringify_in_c(.long PPC_RAW_PHWSYNC) +#define PPC_PLWSYNC stringify_in_c(.long PPC_RAW_PLWSYNC) +#define STXVD2X(s, a, b) stringify_in_c(.long PPC_RAW_STXVD2X(s, a, b)) +#define LXVD2X(s, a, b) stringify_in_c(.long PPC_RAW_LXVD2X(s, a, b)) +#define MFVRD(a, t) stringify_in_c(.long PPC_RAW_MFVRD(a, t)) +#define MTVRD(t, a) stringify_in_c(.long PPC_RAW_MTVRD(t, a)) +#define VPMSUMW(t, a, b) stringify_in_c(.long PPC_RAW_VPMSUMW(t, a, b)) +#define VPMSUMD(t, a, b) stringify_in_c(.long PPC_RAW_VPMSUMD(t, a, b)) +#define XXLOR(t, a, b) stringify_in_c(.long PPC_RAW_XXLOR(t, a, b)) +#define XXSWAPD(t, a) stringify_in_c(.long PPC_RAW_XXSWAPD(t, a)) +#define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_RAW_XVCPSGNDP(t, a, b))) #define VPERMXOR(vrt, vra, vrb, vrc) \ - stringify_in_c(.long (PPC_INST_VPERMXOR | \ - ___PPC_RT(vrt) | ___PPC_RA(vra) | \ - ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6))) + stringify_in_c(.long (PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc))) -#define PPC_NAP stringify_in_c(.long PPC_INST_NAP) -#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP) -#define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE) +#define PPC_NAP stringify_in_c(.long PPC_RAW_NAP) +#define PPC_SLEEP stringify_in_c(.long PPC_RAW_SLEEP) +#define PPC_WINKLE stringify_in_c(.long PPC_RAW_WINKLE) -#define PPC_STOP stringify_in_c(.long PPC_INST_STOP) +#define PPC_STOP stringify_in_c(.long PPC_RAW_STOP) /* BHRB instructions */ -#define PPC_CLRBHRB stringify_in_c(.long PPC_INST_CLRBHRB) -#define PPC_MFBHRBE(r, n) stringify_in_c(.long PPC_INST_BHRBE | \ - __PPC_RT(r) | \ - (((n) & 0x3ff) << 11)) +#define PPC_CLRBHRB stringify_in_c(.long PPC_RAW_CLRBHRB) +#define PPC_MFBHRBE(r, n) stringify_in_c(.long PPC_RAW_MFBHRBE(r, n)) /* Transactional memory instructions */ -#define TRECHKPT stringify_in_c(.long PPC_INST_TRECHKPT) -#define TRECLAIM(r) stringify_in_c(.long PPC_INST_TRECLAIM \ - | __PPC_RA(r)) -#define TABORT(r) stringify_in_c(.long PPC_INST_TABORT \ - | __PPC_RA(r)) +#define TRECHKPT stringify_in_c(.long PPC_RAW_TRECHKPT) +#define TRECLAIM(r) stringify_in_c(.long PPC_RAW_TRECLAIM(r)) +#define TABORT(r) stringify_in_c(.long PPC_RAW_TABORT(r)) /* book3e thread control instructions */ -#define TMRN(x) ((((x) & 0x1f) << 16) | (((x) & 0x3e0) << 6)) -#define MTTMR(tmr, r) stringify_in_c(.long PPC_INST_MTTMR | \ - TMRN(tmr) | ___PPC_RS(r)) -#define MFTMR(tmr, r) stringify_in_c(.long PPC_INST_MFTMR | \ - TMRN(tmr) | ___PPC_RT(r)) +#define MTTMR(tmr, r) stringify_in_c(.long PPC_RAW_MTTMR(tmr, r)) +#define MFTMR(tmr, r) stringify_in_c(.long PPC_RAW_MFTMR(tmr, r)) /* Coprocessor instructions */ -#define PPC_ICSWX(s, a, b) stringify_in_c(.long PPC_INST_ICSWX | \ - ___PPC_RS(s) | \ - ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_ICSWEPX(s, a, b) stringify_in_c(.long PPC_INST_ICSWEPX | \ - ___PPC_RS(s) | \ - ___PPC_RA(a) | \ - ___PPC_RB(b)) - -#define PPC_SLBIA(IH) stringify_in_c(.long PPC_INST_SLBIA | \ - ((IH & 0x7) << 21)) +#define PPC_ICSWX(s, a, b) stringify_in_c(.long PPC_RAW_ICSWX(s, a, b)) +#define PPC_ICSWEPX(s, a, b) stringify_in_c(.long PPC_RAW_ICSWEPX(s, a, b)) + +#define PPC_SLBIA(IH) stringify_in_c(.long PPC_RAW_SLBIA(IH)) /* * These may only be used on ISA v3.0 or later (aka. CPU_FTR_ARCH_300, radix @@ -608,12 +612,8 @@ #define PPC_RADIX_INVALIDATE_ERAT_USER PPC_SLBIA(3) #define PPC_RADIX_INVALIDATE_ERAT_GUEST PPC_SLBIA(6) -#define VCMPEQUD_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUD | \ - ___PPC_RT(vrt) | ___PPC_RA(vra) | \ - ___PPC_RB(vrb) | __PPC_RC21) +#define VCMPEQUD_RC(vrt, vra, vrb) stringify_in_c(.long PPC_RAW_VCMPEQUD_RC(vrt, vra, vrb)) -#define VCMPEQUB_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUB | \ - ___PPC_RT(vrt) | ___PPC_RA(vra) | \ - ___PPC_RB(vrb) | __PPC_RC21) +#define VCMPEQUB_RC(vrt, vra, vrb) stringify_in_c(.long PPC_RAW_VCMPEQUB_RC(vrt, vra, vrb)) #endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 6b03dff61a05..b4cc6608131c 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -755,6 +755,8 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #define N_SLINE 68 #define N_SO 100 +#define RFSCV .long 0x4c0000a4 + /* * Create an endian fixup trampoline * @@ -774,7 +776,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #define FIXUP_ENDIAN #else /* - * This version may be used in in HV or non-HV context. + * This version may be used in HV or non-HV context. * MSR[EE] must be disabled. */ #define FIXUP_ENDIAN \ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index eedcbfb9a6ff..ed0d633ab5aa 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -180,14 +180,14 @@ struct thread_struct { int fpexc_mode; /* floating-point exception mode */ unsigned int align_ctl; /* alignment handling control */ #ifdef CONFIG_HAVE_HW_BREAKPOINT - struct perf_event *ptrace_bps[HBP_NUM]; + struct perf_event *ptrace_bps[HBP_NUM_MAX]; /* * Helps identify source of single-step exception and subsequent * hw-breakpoint enablement */ - struct perf_event *last_hit_ubp; + struct perf_event *last_hit_ubp[HBP_NUM_MAX]; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ - struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */ + struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint info */ unsigned long trap_nr; /* last trap # on this thread */ u8 load_slb; /* Ages out SLB preload cache entries */ u8 load_fp; @@ -237,7 +237,6 @@ struct thread_struct { #ifdef CONFIG_PPC_MEM_KEYS unsigned long amr; unsigned long iamr; - unsigned long uamor; #endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ @@ -272,7 +271,10 @@ struct thread_struct { unsigned mmcr0; unsigned used_ebb; - unsigned int used_vas; + unsigned long mmcr3; + unsigned long sier2; + unsigned long sier3; + #endif }; @@ -301,14 +303,12 @@ struct thread_struct { #else #define INIT_THREAD { \ .ksp = INIT_SP, \ - .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ .addr_limit = KERNEL_DS, \ .fpexc_mode = 0, \ - .fscr = FSCR_TAR | FSCR_EBB \ } #endif -#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.regs) +#define task_pt_regs(tsk) ((tsk)->thread.regs) unsigned long get_wchan(struct task_struct *p); diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 94e3fd54f2c8..324a13351749 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -117,6 +117,7 @@ extern int of_read_drc_info_cell(struct property **prop, #define OV1_PPC_2_07 0x01 /* set if we support PowerPC 2.07 */ #define OV1_PPC_3_00 0x80 /* set if we support PowerPC 3.00 */ +#define OV1_PPC_3_1 0x40 /* set if we support PowerPC 3.1 */ /* Option vector 2: Open Firmware options supported */ #define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index e0195e6b892b..155a197c0aa1 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -179,6 +179,22 @@ extern int ptrace_put_reg(struct task_struct *task, int regno, #define current_pt_regs() \ ((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1) + +#ifdef __powerpc64__ +#ifdef CONFIG_PPC_BOOK3S +#define TRAP_FLAGS_MASK 0x10 +#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) +#define FULL_REGS(regs) true +#define SET_FULL_REGS(regs) do { } while (0) +#else +#define TRAP_FLAGS_MASK 0x11 +#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) +#define FULL_REGS(regs) (((regs)->trap & 1) == 0) +#define SET_FULL_REGS(regs) ((regs)->trap |= 1) +#endif +#define CHECK_FULL_REGS(regs) BUG_ON(!FULL_REGS(regs)) +#define NV_REG_POISON 0xdeadbeefdeadbeefUL +#else /* * We use the least-significant bit of the trap field to indicate * whether we have saved the full set of registers, or only a @@ -186,17 +202,13 @@ extern int ptrace_put_reg(struct task_struct *task, int regno, * On 4xx we use the next bit to indicate whether the exception * is a critical exception (1 means it is). */ +#define TRAP_FLAGS_MASK 0x1F +#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) #define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#ifndef __powerpc64__ +#define SET_FULL_REGS(regs) ((regs)->trap |= 1) #define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0) #define IS_MCHECK_EXC(regs) (((regs)->trap & 4) != 0) #define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0) -#endif /* ! __powerpc64__ */ -#define TRAP(regs) ((regs)->trap & ~0xF) -#ifdef __powerpc64__ -#define NV_REG_POISON 0xdeadbeefdeadbeefUL -#define CHECK_FULL_REGS(regs) BUG_ON(regs->trap & 1) -#else #define NV_REG_POISON 0xdeadbeef #define CHECK_FULL_REGS(regs) \ do { \ @@ -205,8 +217,33 @@ do { \ } while (0) #endif /* __powerpc64__ */ +static inline void set_trap(struct pt_regs *regs, unsigned long val) +{ + regs->trap = (regs->trap & TRAP_FLAGS_MASK) | (val & ~TRAP_FLAGS_MASK); +} + +static inline bool trap_is_scv(struct pt_regs *regs) +{ + return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000); +} + +static inline bool trap_is_syscall(struct pt_regs *regs) +{ + return (trap_is_scv(regs) || TRAP(regs) == 0xc00); +} + +static inline bool trap_norestart(struct pt_regs *regs) +{ + return regs->trap & 0x10; +} + +static inline void set_trap_norestart(struct pt_regs *regs) +{ + regs->trap |= 0x10; +} + #define arch_has_single_step() (1) -#ifndef CONFIG_BOOK3S_601 +#ifndef CONFIG_PPC_BOOK3S_601 #define arch_has_block_step() (true) #else #define arch_has_block_step() (false) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h new file mode 100644 index 000000000000..b752d34517b3 --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_QSPINLOCK_H +#define _ASM_POWERPC_QSPINLOCK_H + +#include <asm-generic/qspinlock_types.h> +#include <asm/paravirt.h> + +#define _Q_PENDING_LOOPS (1 << 9) /* not tuned */ + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_queued_spin_unlock(struct qspinlock *lock); + +static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + if (!is_shared_processor()) + native_queued_spin_lock_slowpath(lock, val); + else + __pv_queued_spin_lock_slowpath(lock, val); +} + +#define queued_spin_unlock queued_spin_unlock +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + if (!is_shared_processor()) + smp_store_release(&lock->locked, 0); + else + __pv_queued_spin_unlock(lock); +} + +#else +extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +#endif + +static __always_inline void queued_spin_lock(struct qspinlock *lock) +{ + u32 val = 0; + + if (likely(atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL))) + return; + + queued_spin_lock_slowpath(lock, val); +} +#define queued_spin_lock queued_spin_lock + +#define smp_mb__after_spinlock() smp_mb() + +static __always_inline int queued_spin_is_locked(struct qspinlock *lock) +{ + /* + * This barrier was added to simple spinlocks by commit 51d7d5205d338, + * but it should now be possible to remove it, asm arm64 has done with + * commit c6f5d02b6a0f. + */ + smp_mb(); + return atomic_read(&lock->val); +} +#define queued_spin_is_locked queued_spin_is_locked + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#define SPIN_THRESHOLD (1<<15) /* not tuned */ + +static __always_inline void pv_wait(u8 *ptr, u8 val) +{ + if (*ptr != val) + return; + yield_to_any(); + /* + * We could pass in a CPU here if waiting in the queue and yield to + * the previous CPU in the queue. + */ +} + +static __always_inline void pv_kick(int cpu) +{ + prod_cpu(cpu); +} + +extern void __pv_init_lock_hash(void); + +static inline void pv_spinlocks_init(void) +{ + __pv_init_lock_hash(); +} + +#endif + +#include <asm-generic/qspinlock.h> + +#endif /* _ASM_POWERPC_QSPINLOCK_H */ diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h new file mode 100644 index 000000000000..6b60e7736a47 --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock_paravirt.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H +#define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H + +EXPORT_SYMBOL(__pv_queued_spin_unlock); + +#endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index da5cab038e25..88fb88491fe9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -12,6 +12,7 @@ #ifdef __KERNEL__ #include <linux/stringify.h> +#include <linux/const.h> #include <asm/cputable.h> #include <asm/asm-const.h> #include <asm/feature-fixups.h> @@ -283,14 +284,16 @@ #define CTRL_CT1 0x40000000 /* thread 1 */ #define CTRL_TE 0x00c00000 /* thread enable */ #define CTRL_RUNLATCH 0x1 -#define SPRN_DAWR 0xB4 +#define SPRN_DAWR0 0xB4 +#define SPRN_DAWR1 0xB5 #define SPRN_RPR 0xBA /* Relative Priority Register */ #define SPRN_CIABR 0xBB #define CIABR_PRIV 0x3 #define CIABR_PRIV_USER 1 #define CIABR_PRIV_SUPER 2 #define CIABR_PRIV_HYPER 3 -#define SPRN_DAWRX 0xBC +#define SPRN_DAWRX0 0xBC +#define SPRN_DAWRX1 0xBD #define DAWRX_USER __MASK(0) #define DAWRX_KERNEL __MASK(1) #define DAWRX_HYP __MASK(2) @@ -397,6 +400,7 @@ #define SPRN_RWMR 0x375 /* Region-Weighting Mode Register */ /* HFSCR and FSCR bit numbers are the same */ +#define FSCR_PREFIX_LG 13 /* Enable Prefix Instructions */ #define FSCR_SCV_LG 12 /* Enable System Call Vectored */ #define FSCR_MSGP_LG 10 /* Enable MSGP */ #define FSCR_TAR_LG 8 /* Enable Target Address Register */ @@ -408,11 +412,13 @@ #define FSCR_VECVSX_LG 1 /* Enable VMX/VSX */ #define FSCR_FP_LG 0 /* Enable Floating Point */ #define SPRN_FSCR 0x099 /* Facility Status & Control Register */ +#define FSCR_PREFIX __MASK(FSCR_PREFIX_LG) #define FSCR_SCV __MASK(FSCR_SCV_LG) #define FSCR_TAR __MASK(FSCR_TAR_LG) #define FSCR_EBB __MASK(FSCR_EBB_LG) #define FSCR_DSCR __MASK(FSCR_DSCR_LG) #define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ +#define HFSCR_PREFIX __MASK(FSCR_PREFIX_LG) #define HFSCR_MSGP __MASK(FSCR_MSGP_LG) #define HFSCR_TAR __MASK(FSCR_TAR_LG) #define HFSCR_EBB __MASK(FSCR_EBB_LG) @@ -468,7 +474,8 @@ #ifndef SPRN_LPID #define SPRN_LPID 0x13F /* Logical Partition Identifier */ #endif -#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ +#define LPID_RSVD_POWER7 0x3ff /* Reserved LPID for partn switching */ +#define LPID_RSVD 0xfff /* Reserved LPID for partn switching */ #define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */ #define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ #define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ @@ -476,16 +483,18 @@ #define PCR_VEC_DIS (__MASK(63-0)) /* Vec. disable (bit NA since POWER8) */ #define PCR_VSX_DIS (__MASK(63-1)) /* VSX disable (bit NA since POWER8) */ #define PCR_TM_DIS (__MASK(63-2)) /* Trans. memory disable (POWER8) */ -#define PCR_HIGH_BITS (PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS) +#define PCR_MMA_DIS (__MASK(63-3)) /* Matrix-Multiply Accelerator */ +#define PCR_HIGH_BITS (PCR_MMA_DIS | PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS) /* * These bits are used in the function kvmppc_set_arch_compat() to specify and * determine both the compatibility level which we want to emulate and the * compatibility level which the host is capable of emulating. */ +#define PCR_ARCH_300 0x10 /* Architecture 3.00 */ #define PCR_ARCH_207 0x8 /* Architecture 2.07 */ #define PCR_ARCH_206 0x4 /* Architecture 2.06 */ #define PCR_ARCH_205 0x2 /* Architecture 2.05 */ -#define PCR_LOW_BITS (PCR_ARCH_207 | PCR_ARCH_206 | PCR_ARCH_205) +#define PCR_LOW_BITS (PCR_ARCH_207 | PCR_ARCH_206 | PCR_ARCH_205 | PCR_ARCH_300) #define PCR_MASK ~(PCR_HIGH_BITS | PCR_LOW_BITS) /* PCR Reserved Bits */ #define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ #define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */ @@ -759,7 +768,7 @@ #endif #define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */ -#define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */ +#define SRR1_ISI_N_G_OR_CIP 0x10000000 /* ISI: Access is no-exec or G or CI for a prefixed instruction */ #define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */ #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ #define SRR1_WAKEMASK_P8 0x003c0000 /* reason for wakeup on POWER8 and 9 */ @@ -786,6 +795,8 @@ #define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */ #define SRR1_MCE_MCP 0x00080000 /* Machine check signal caused interrupt */ +#define SRR1_BOUNDARY 0x10000000 /* Prefixed instruction crosses 64-byte boundary */ +#define SRR1_PREFIXED 0x20000000 /* Exception caused by prefixed instruction */ #define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ #define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ @@ -867,7 +878,9 @@ #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ #define SPRN_MMCR1 798 #define SPRN_MMCR2 785 +#define SPRN_MMCR3 754 #define SPRN_UMMCR2 769 +#define SPRN_UMMCR3 738 #define SPRN_MMCRA 0x312 #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ #define MMCRA_SDAR_DCACHE_MISS 0x40000000UL @@ -877,6 +890,7 @@ #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ #define MMCRA_SLOT_SHIFT 24 #define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */ +#define MMCRA_BHRB_DISABLE _UL(0x2000000000) // BHRB disable bit for ISA v3.1 #define POWER6_MMCRA_SDSYNC 0x0000080000000000ULL /* SDAR/SIAR synced */ #define POWER6_MMCRA_SIHV 0x0000040000000000ULL #define POWER6_MMCRA_SIPR 0x0000020000000000ULL @@ -909,6 +923,10 @@ #define SIER_SIHV 0x1000000 /* Sampled MSR_HV */ #define SIER_SIAR_VALID 0x0400000 /* SIAR contents valid */ #define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */ +#define SPRN_SIER2 752 +#define SPRN_SIER3 753 +#define SPRN_USIER2 736 +#define SPRN_USIER3 737 #define SPRN_SIAR 796 #define SPRN_SDAR 797 #define SPRN_TACR 888 @@ -1345,6 +1363,7 @@ #define PVR_ARCH_206p 0x0f100003 #define PVR_ARCH_207 0x0f000004 #define PVR_ARCH_300 0x0f000005 +#define PVR_ARCH_31 0x0f000006 /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ @@ -1463,7 +1482,7 @@ static inline void update_power8_hid0(unsigned long hid0) { /* * The HID0 update on Power8 should at the very least be - * preceded by a a SYNC instruction followed by an ISYNC + * preceded by a SYNC instruction followed by an ISYNC * instruction */ asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index f26fe482fbca..ff30f1076162 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -663,60 +663,6 @@ #define EPC_EPID 0x00003fff #define EPC_EPID_SHIFT 0 -/* - * The IBM-403 is an even more odd special case, as it is much - * older than the IBM-405 series. We put these down here incase someone - * wishes to support these machines again. - */ -#ifdef CONFIG_403GCX -/* Special Purpose Registers (SPRNs)*/ -#define SPRN_TBHU 0x3CC /* Time Base High User-mode */ -#define SPRN_TBLU 0x3CD /* Time Base Low User-mode */ -#define SPRN_CDBCR 0x3D7 /* Cache Debug Control Register */ -#define SPRN_TBHI 0x3DC /* Time Base High */ -#define SPRN_TBLO 0x3DD /* Time Base Low */ -#define SPRN_DBCR 0x3F2 /* Debug Control Register */ -#define SPRN_PBL1 0x3FC /* Protection Bound Lower 1 */ -#define SPRN_PBL2 0x3FE /* Protection Bound Lower 2 */ -#define SPRN_PBU1 0x3FD /* Protection Bound Upper 1 */ -#define SPRN_PBU2 0x3FF /* Protection Bound Upper 2 */ - - -/* Bit definitions for the DBCR. */ -#define DBCR_EDM DBCR0_EDM -#define DBCR_IDM DBCR0_IDM -#define DBCR_RST(x) (((x) & 0x3) << 28) -#define DBCR_RST_NONE 0 -#define DBCR_RST_CORE 1 -#define DBCR_RST_CHIP 2 -#define DBCR_RST_SYSTEM 3 -#define DBCR_IC DBCR0_IC /* Instruction Completion Debug Evnt */ -#define DBCR_BT DBCR0_BT /* Branch Taken Debug Event */ -#define DBCR_EDE DBCR0_EDE /* Exception Debug Event */ -#define DBCR_TDE DBCR0_TDE /* TRAP Debug Event */ -#define DBCR_FER 0x00F80000 /* First Events Remaining Mask */ -#define DBCR_FT 0x00040000 /* Freeze Timers on Debug Event */ -#define DBCR_IA1 0x00020000 /* Instr. Addr. Compare 1 Enable */ -#define DBCR_IA2 0x00010000 /* Instr. Addr. Compare 2 Enable */ -#define DBCR_D1R 0x00008000 /* Data Addr. Compare 1 Read Enable */ -#define DBCR_D1W 0x00004000 /* Data Addr. Compare 1 Write Enable */ -#define DBCR_D1S(x) (((x) & 0x3) << 12) /* Data Adrr. Compare 1 Size */ -#define DAC_BYTE 0 -#define DAC_HALF 1 -#define DAC_WORD 2 -#define DAC_QUAD 3 -#define DBCR_D2R 0x00000800 /* Data Addr. Compare 2 Read Enable */ -#define DBCR_D2W 0x00000400 /* Data Addr. Compare 2 Write Enable */ -#define DBCR_D2S(x) (((x) & 0x3) << 8) /* Data Addr. Compare 2 Size */ -#define DBCR_SBT 0x00000040 /* Second Branch Taken Debug Event */ -#define DBCR_SED 0x00000020 /* Second Exception Debug Event */ -#define DBCR_STD 0x00000010 /* Second Trap Debug Event */ -#define DBCR_SIA 0x00000008 /* Second IAC Enable */ -#define DBCR_SDA 0x00000004 /* Second DAC Enable */ -#define DBCR_JOI 0x00000002 /* JTAG Serial Outbound Int. Enable */ -#define DBCR_JII 0x00000001 /* JTAG Serial Inbound Int. Enable */ -#endif /* 403GCX */ - /* Some 476 specific registers */ #define SPRN_SSPCR 830 #define SPRN_USPCR 831 diff --git a/arch/powerpc/include/asm/rtas-types.h b/arch/powerpc/include/asm/rtas-types.h new file mode 100644 index 000000000000..aa420561bc10 --- /dev/null +++ b/arch/powerpc/include/asm/rtas-types.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_RTAS_TYPES_H +#define _ASM_POWERPC_RTAS_TYPES_H + +#include <linux/spinlock_types.h> + +typedef __be32 rtas_arg_t; + +struct rtas_args { + __be32 token; + __be32 nargs; + __be32 nret; + rtas_arg_t args[16]; + rtas_arg_t *rets; /* Pointer to return values in args[]. */ +}; + +struct rtas_t { + unsigned long entry; /* physical address pointer */ + unsigned long base; /* physical address pointer */ + unsigned long size; + arch_spinlock_t lock; + struct rtas_args args; + struct device_node *dev; /* virtual address pointer */ +}; + +struct rtas_suspend_me_data { + atomic_t working; /* number of cpus accessing this struct */ + atomic_t done; + int token; /* ibm,suspend-me */ + atomic_t error; + struct completion *complete; /* wait on this until working == 0 */ +}; + +struct rtas_error_log { + /* Byte 0 */ + u8 byte0; /* Architectural version */ + + /* Byte 1 */ + u8 byte1; + /* XXXXXXXX + * XXX 3: Severity level of error + * XX 2: Degree of recovery + * X 1: Extended log present? + * XX 2: Reserved + */ + + /* Byte 2 */ + u8 byte2; + /* XXXXXXXX + * XXXX 4: Initiator of event + * XXXX 4: Target of failed operation + */ + u8 byte3; /* General event or error*/ + __be32 extended_log_length; /* length in bytes */ + unsigned char buffer[1]; /* Start of extended log */ + /* Variable length. */ +}; + +/* RTAS general extended event log, Version 6. The extended log starts + * from "buffer" field of struct rtas_error_log defined above. + */ +struct rtas_ext_event_log_v6 { + /* Byte 0 */ + u8 byte0; + /* XXXXXXXX + * X 1: Log valid + * X 1: Unrecoverable error + * X 1: Recoverable (correctable or successfully retried) + * X 1: Bypassed unrecoverable error (degraded operation) + * X 1: Predictive error + * X 1: "New" log (always 1 for data returned from RTAS) + * X 1: Big Endian + * X 1: Reserved + */ + + /* Byte 1 */ + u8 byte1; /* reserved */ + + /* Byte 2 */ + u8 byte2; + /* XXXXXXXX + * X 1: Set to 1 (indicating log is in PowerPC format) + * XXX 3: Reserved + * XXXX 4: Log format used for bytes 12-2047 + */ + + /* Byte 3 */ + u8 byte3; /* reserved */ + /* Byte 4-11 */ + u8 reserved[8]; /* reserved */ + /* Byte 12-15 */ + __be32 company_id; /* Company ID of the company */ + /* that defines the format for */ + /* the vendor specific log type */ + /* Byte 16-end of log */ + u8 vendor_log[1]; /* Start of vendor specific log */ + /* Variable length. */ +}; + +/* Vendor specific Platform Event Log Format, Version 6, section header */ +struct pseries_errorlog { + __be16 id; /* 0x00 2-byte ASCII section ID */ + __be16 length; /* 0x02 Section length in bytes */ + u8 version; /* 0x04 Section version */ + u8 subtype; /* 0x05 Section subtype */ + __be16 creator_component; /* 0x06 Creator component ID */ + u8 data[]; /* 0x08 Start of section data */ +}; + +/* RTAS pseries hotplug errorlog section */ +struct pseries_hp_errorlog { + u8 resource; + u8 action; + u8 id_type; + u8 reserved; + union { + __be32 drc_index; + __be32 drc_count; + struct { __be32 count, index; } ic; + char drc_name[1]; + } _drc_u; +}; + +#endif /* _ASM_POWERPC_RTAS_TYPES_H */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 3c1887351c71..55f9a154c95d 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -5,6 +5,7 @@ #include <linux/spinlock.h> #include <asm/page.h> +#include <asm/rtas-types.h> #include <linux/time.h> #include <linux/cpumask.h> @@ -42,33 +43,6 @@ * */ -typedef __be32 rtas_arg_t; - -struct rtas_args { - __be32 token; - __be32 nargs; - __be32 nret; - rtas_arg_t args[16]; - rtas_arg_t *rets; /* Pointer to return values in args[]. */ -}; - -struct rtas_t { - unsigned long entry; /* physical address pointer */ - unsigned long base; /* physical address pointer */ - unsigned long size; - arch_spinlock_t lock; - struct rtas_args args; - struct device_node *dev; /* virtual address pointer */ -}; - -struct rtas_suspend_me_data { - atomic_t working; /* number of cpus accessing this struct */ - atomic_t done; - int token; /* ibm,suspend-me */ - atomic_t error; - struct completion *complete; /* wait on this until working == 0 */ -}; - /* RTAS event classes */ #define RTAS_INTERNAL_ERROR 0x80000000 /* set bit 0 */ #define RTAS_EPOW_WARNING 0x40000000 /* set bit 1 */ @@ -148,31 +122,6 @@ struct rtas_suspend_me_data { /* RTAS check-exception vector offset */ #define RTAS_VECTOR_EXTERNAL_INTERRUPT 0x500 -struct rtas_error_log { - /* Byte 0 */ - uint8_t byte0; /* Architectural version */ - - /* Byte 1 */ - uint8_t byte1; - /* XXXXXXXX - * XXX 3: Severity level of error - * XX 2: Degree of recovery - * X 1: Extended log present? - * XX 2: Reserved - */ - - /* Byte 2 */ - uint8_t byte2; - /* XXXXXXXX - * XXXX 4: Initiator of event - * XXXX 4: Target of failed operation - */ - uint8_t byte3; /* General event or error*/ - __be32 extended_log_length; /* length in bytes */ - unsigned char buffer[1]; /* Start of extended log */ - /* Variable length. */ -}; - static inline uint8_t rtas_error_severity(const struct rtas_error_log *elog) { return (elog->byte1 & 0xE0) >> 5; @@ -212,47 +161,6 @@ uint32_t rtas_error_extended_log_length(const struct rtas_error_log *elog) #define RTAS_V6EXT_COMPANY_ID_IBM (('I' << 24) | ('B' << 16) | ('M' << 8)) -/* RTAS general extended event log, Version 6. The extended log starts - * from "buffer" field of struct rtas_error_log defined above. - */ -struct rtas_ext_event_log_v6 { - /* Byte 0 */ - uint8_t byte0; - /* XXXXXXXX - * X 1: Log valid - * X 1: Unrecoverable error - * X 1: Recoverable (correctable or successfully retried) - * X 1: Bypassed unrecoverable error (degraded operation) - * X 1: Predictive error - * X 1: "New" log (always 1 for data returned from RTAS) - * X 1: Big Endian - * X 1: Reserved - */ - - /* Byte 1 */ - uint8_t byte1; /* reserved */ - - /* Byte 2 */ - uint8_t byte2; - /* XXXXXXXX - * X 1: Set to 1 (indicating log is in PowerPC format) - * XXX 3: Reserved - * XXXX 4: Log format used for bytes 12-2047 - */ - - /* Byte 3 */ - uint8_t byte3; /* reserved */ - /* Byte 4-11 */ - uint8_t reserved[8]; /* reserved */ - /* Byte 12-15 */ - __be32 company_id; /* Company ID of the company */ - /* that defines the format for */ - /* the vendor specific log type */ - /* Byte 16-end of log */ - uint8_t vendor_log[1]; /* Start of vendor specific log */ - /* Variable length. */ -}; - static inline uint8_t rtas_ext_event_log_format(struct rtas_ext_event_log_v6 *ext_log) { @@ -287,16 +195,6 @@ inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log) #define PSERIES_ELOG_SECT_ID_HOTPLUG (('H' << 8) | 'P') #define PSERIES_ELOG_SECT_ID_MCE (('M' << 8) | 'C') -/* Vendor specific Platform Event Log Format, Version 6, section header */ -struct pseries_errorlog { - __be16 id; /* 0x00 2-byte ASCII section ID */ - __be16 length; /* 0x02 Section length in bytes */ - uint8_t version; /* 0x04 Section version */ - uint8_t subtype; /* 0x05 Section subtype */ - __be16 creator_component; /* 0x06 Creator component ID */ - uint8_t data[]; /* 0x08 Start of section data */ -}; - static inline uint16_t pseries_errorlog_id(struct pseries_errorlog *sect) { @@ -309,20 +207,6 @@ inline uint16_t pseries_errorlog_length(struct pseries_errorlog *sect) return be16_to_cpu(sect->length); } -/* RTAS pseries hotplug errorlog section */ -struct pseries_hp_errorlog { - u8 resource; - u8 action; - u8 id_type; - u8 reserved; - union { - __be32 drc_index; - __be32 drc_count; - struct { __be32 count, index; } ic; - char drc_name[1]; - } _drc_u; -}; - #define PSERIES_HP_ELOG_RESOURCE_CPU 1 #define PSERIES_HP_ELOG_RESOURCE_MEM 2 #define PSERIES_HP_ELOG_RESOURCE_SLOT 3 @@ -331,7 +215,6 @@ struct pseries_hp_errorlog { #define PSERIES_HP_ELOG_ACTION_ADD 1 #define PSERIES_HP_ELOG_ACTION_REMOVE 2 -#define PSERIES_HP_ELOG_ACTION_READD 3 #define PSERIES_HP_ELOG_ID_DRC_NAME 1 #define PSERIES_HP_ELOG_ID_DRC_INDEX 2 @@ -352,6 +235,7 @@ extern struct rtas_t rtas; extern int rtas_token(const char *service); extern int rtas_service_present(const char *service); extern int rtas_call(int token, int, int, int *, ...); +int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...); void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...); extern void __noreturn rtas_restart(char *cmd); @@ -368,8 +252,6 @@ extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); -extern int rtas_online_cpus_mask(cpumask_var_t cpus); -extern int rtas_offline_cpus_mask(cpumask_var_t cpus); extern int rtas_ibm_suspend_me(u64 handle); struct rtc_time; @@ -483,5 +365,11 @@ static inline void rtas_initialize(void) { }; extern int call_rtas(const char *, int, int, unsigned long *, ...); +#ifdef CONFIG_HV_PERF_CTRS +void read_24x7_sys_info(void); +#else +static inline void read_24x7_sys_info(void) { } +#endif + #endif /* __KERNEL__ */ #endif /* _POWERPC_RTAS_H */ diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index d19871763ed4..324d7b298ec3 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -85,7 +85,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct ppc64_opd_entry *desc = ptr; void *p; - if (!probe_kernel_address(&desc->funcaddr, p)) + if (!get_kernel_nofault(p, (void *)&desc->funcaddr)) ptr = p; return ptr; } diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 7c05e95a5c44..fbb8fa32150f 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -63,6 +63,8 @@ static inline bool security_ftr_enabled(u64 feature) // bcctr 2,0,0 triggers a hardware assisted count cache flush #define SEC_FTR_BCCTR_FLUSH_ASSIST 0x0000000000000800ull +// bcctr 2,0,0 triggers a hardware assisted link stack flush +#define SEC_FTR_BCCTR_LINK_FLUSH_ASSIST 0x0000000000002000ull // Features indicating need for Spectre/Meltdown mitigations diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 65676e2325b8..9efbddee2bca 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -30,12 +30,12 @@ void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 #ifdef CONFIG_PPC_PSERIES -extern void pseries_enable_reloc_on_exc(void); +extern bool pseries_enable_reloc_on_exc(void); extern void pseries_disable_reloc_on_exc(void); extern void pseries_big_endian_exceptions(void); extern void pseries_little_endian_exceptions(void); #else -static inline void pseries_enable_reloc_on_exc(void) {} +static inline bool pseries_enable_reloc_on_exc(void) { return false; } static inline void pseries_disable_reloc_on_exc(void) {} static inline void pseries_big_endian_exceptions(void) {} static inline void pseries_little_endian_exceptions(void) {} diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h new file mode 100644 index 000000000000..9c3c30534333 --- /dev/null +++ b/arch/powerpc/include/asm/simple_spinlock.h @@ -0,0 +1,288 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_H +#define _ASM_POWERPC_SIMPLE_SPINLOCK_H + +/* + * Simple spin lock operations. + * + * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM + * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM + * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM + * Rework to support virtual processors + * + * Type of int is used as a full 64b word is not necessary. + * + * (the type definitions are in asm/simple_spinlock_types.h) + */ +#include <linux/irqflags.h> +#include <asm/paravirt.h> +#include <asm/paca.h> +#include <asm/synch.h> +#include <asm/ppc-opcode.h> + +#ifdef CONFIG_PPC64 +/* use 0x800000yy when locked, where yy == CPU number */ +#ifdef __BIG_ENDIAN__ +#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) +#else +#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) +#endif +#else +#define LOCK_TOKEN 1 +#endif + +static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.slock == 0; +} + +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + smp_mb(); + return !arch_spin_value_unlocked(*lock); +} + +/* + * This returns the old value in the lock, so we succeeded + * in getting the lock if the return value is 0. + */ +static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock) +{ + unsigned long tmp, token; + + token = LOCK_TOKEN; + __asm__ __volatile__( +"1: " PPC_LWARX(%0,0,%2,1) "\n\ + cmpwi 0,%0,0\n\ + bne- 2f\n\ + stwcx. %1,0,%2\n\ + bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" + : "=&r" (tmp) + : "r" (token), "r" (&lock->slock) + : "cr0", "memory"); + + return tmp; +} + +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + return __arch_spin_trylock(lock) == 0; +} + +/* + * On a system with shared processors (that is, where a physical + * processor is multiplexed between several virtual processors), + * there is no point spinning on a lock if the holder of the lock + * isn't currently scheduled on a physical processor. Instead + * we detect this situation and ask the hypervisor to give the + * rest of our timeslice to the lock holder. + * + * So that we can tell which virtual processor is holding a lock, + * we put 0x80000000 | smp_processor_id() in the lock when it is + * held. Conveniently, we have a word in the paca that holds this + * value. + */ + +#if defined(CONFIG_PPC_SPLPAR) +/* We only yield to the hypervisor if we are in shared processor mode */ +void splpar_spin_yield(arch_spinlock_t *lock); +void splpar_rw_yield(arch_rwlock_t *lock); +#else /* SPLPAR */ +static inline void splpar_spin_yield(arch_spinlock_t *lock) {}; +static inline void splpar_rw_yield(arch_rwlock_t *lock) {}; +#endif + +static inline void spin_yield(arch_spinlock_t *lock) +{ + if (is_shared_processor()) + splpar_spin_yield(lock); + else + barrier(); +} + +static inline void rw_yield(arch_rwlock_t *lock) +{ + if (is_shared_processor()) + splpar_rw_yield(lock); + else + barrier(); +} + +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + while (1) { + if (likely(__arch_spin_trylock(lock) == 0)) + break; + do { + HMT_low(); + if (is_shared_processor()) + splpar_spin_yield(lock); + } while (unlikely(lock->slock != 0)); + HMT_medium(); + } +} + +static inline +void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) +{ + unsigned long flags_dis; + + while (1) { + if (likely(__arch_spin_trylock(lock) == 0)) + break; + local_save_flags(flags_dis); + local_irq_restore(flags); + do { + HMT_low(); + if (is_shared_processor()) + splpar_spin_yield(lock); + } while (unlikely(lock->slock != 0)); + HMT_medium(); + local_irq_restore(flags_dis); + } +} +#define arch_spin_lock_flags arch_spin_lock_flags + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + __asm__ __volatile__("# arch_spin_unlock\n\t" + PPC_RELEASE_BARRIER: : :"memory"); + lock->slock = 0; +} + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ + +#ifdef CONFIG_PPC64 +#define __DO_SIGN_EXTEND "extsw %0,%0\n" +#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */ +#else +#define __DO_SIGN_EXTEND +#define WRLOCK_TOKEN (-1) +#endif + +/* + * This returns the old value in the lock + 1, + * so we got a read lock if the return value is > 0. + */ +static inline long __arch_read_trylock(arch_rwlock_t *rw) +{ + long tmp; + + __asm__ __volatile__( +"1: " PPC_LWARX(%0,0,%1,1) "\n" + __DO_SIGN_EXTEND +" addic. %0,%0,1\n\ + ble- 2f\n" +" stwcx. %0,0,%1\n\ + bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" : "=&r" (tmp) + : "r" (&rw->lock) + : "cr0", "xer", "memory"); + + return tmp; +} + +/* + * This returns the old value in the lock, + * so we got the write lock if the return value is 0. + */ +static inline long __arch_write_trylock(arch_rwlock_t *rw) +{ + long tmp, token; + + token = WRLOCK_TOKEN; + __asm__ __volatile__( +"1: " PPC_LWARX(%0,0,%2,1) "\n\ + cmpwi 0,%0,0\n\ + bne- 2f\n" +" stwcx. %1,0,%2\n\ + bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" : "=&r" (tmp) + : "r" (token), "r" (&rw->lock) + : "cr0", "memory"); + + return tmp; +} + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + while (1) { + if (likely(__arch_read_trylock(rw) > 0)) + break; + do { + HMT_low(); + if (is_shared_processor()) + splpar_rw_yield(rw); + } while (unlikely(rw->lock < 0)); + HMT_medium(); + } +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + while (1) { + if (likely(__arch_write_trylock(rw) == 0)) + break; + do { + HMT_low(); + if (is_shared_processor()) + splpar_rw_yield(rw); + } while (unlikely(rw->lock != 0)); + HMT_medium(); + } +} + +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + return __arch_read_trylock(rw) > 0; +} + +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + return __arch_write_trylock(rw) == 0; +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + long tmp; + + __asm__ __volatile__( + "# read_unlock\n\t" + PPC_RELEASE_BARRIER +"1: lwarx %0,0,%1\n\ + addic %0,%0,-1\n" +" stwcx. %0,0,%1\n\ + bne- 1b" + : "=&r"(tmp) + : "r"(&rw->lock) + : "cr0", "xer", "memory"); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + __asm__ __volatile__("# write_unlock\n\t" + PPC_RELEASE_BARRIER: : :"memory"); + rw->lock = 0; +} + +#define arch_spin_relax(lock) spin_yield(lock) +#define arch_read_relax(lock) rw_yield(lock) +#define arch_write_relax(lock) rw_yield(lock) + +/* See include/linux/spinlock.h */ +#define smp_mb__after_spinlock() smp_mb() + +#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_H */ diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h new file mode 100644 index 000000000000..0f3cdd8faa95 --- /dev/null +++ b/arch/powerpc/include/asm/simple_spinlock_types.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H +#define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +# error "please don't include this file directly" +#endif + +typedef struct { + volatile unsigned int slock; +} arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } + +typedef struct { + volatile signed int lock; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { 0 } + +#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H */ diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h index c6f466f4c241..0bdd9c62eca0 100644 --- a/arch/powerpc/include/asm/slice.h +++ b/arch/powerpc/include/asm/slice.h @@ -4,8 +4,6 @@ #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/slice.h> -#elif defined(CONFIG_PPC_MMU_NOHASH_32) -#include <asm/nohash/32/slice.h> #endif #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h index 8dff086c0cab..4b30a0205c93 100644 --- a/arch/powerpc/include/asm/smu.h +++ b/arch/powerpc/include/asm/smu.h @@ -108,7 +108,7 @@ /* * i2c commands * - * To issue an i2c command, first is to send a parameter block to the + * To issue an i2c command, first is to send a parameter block to * the SMU. This is a command of type 0x9a with 9 bytes of header * eventually followed by data for a write: * diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index c89b32443cff..1e6fa371cc38 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -17,12 +17,6 @@ extern int create_section_mapping(unsigned long start, unsigned long end, int nid, pgprot_t prot); extern int remove_section_mapping(unsigned long start, unsigned long end); -#ifdef CONFIG_PPC_BOOK3S_64 -extern int resize_hpt_for_hotplug(unsigned long new_mem_size); -#else -static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; } -#endif - #ifdef CONFIG_NUMA extern int hot_add_scn_to_nid(unsigned long scn_addr); #else diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 860228e917dc..6ec72282888d 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -3,316 +3,16 @@ #define __ASM_SPINLOCK_H #ifdef __KERNEL__ -/* - * Simple spin lock operations. - * - * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM - * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM - * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM - * Rework to support virtual processors - * - * Type of int is used as a full 64b word is not necessary. - * - * (the type definitions are in asm/spinlock_types.h) - */ -#include <linux/jump_label.h> -#include <linux/irqflags.h> -#ifdef CONFIG_PPC64 -#include <asm/paca.h> -#include <asm/hvcall.h> -#endif -#include <asm/synch.h> -#include <asm/ppc-opcode.h> -#include <asm/asm-405.h> - -#ifdef CONFIG_PPC64 -/* use 0x800000yy when locked, where yy == CPU number */ -#ifdef __BIG_ENDIAN__ -#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) -#else -#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) -#endif +#ifdef CONFIG_PPC_QUEUED_SPINLOCKS +#include <asm/qspinlock.h> +#include <asm/qrwlock.h> #else -#define LOCK_TOKEN 1 +#include <asm/simple_spinlock.h> #endif -#ifdef CONFIG_PPC_PSERIES -DECLARE_STATIC_KEY_FALSE(shared_processor); - -#define vcpu_is_preempted vcpu_is_preempted -static inline bool vcpu_is_preempted(int cpu) -{ - if (!static_branch_unlikely(&shared_processor)) - return false; - return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1); -} -#endif - -static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) -{ - return lock.slock == 0; -} - -static inline int arch_spin_is_locked(arch_spinlock_t *lock) -{ - smp_mb(); - return !arch_spin_value_unlocked(*lock); -} - -/* - * This returns the old value in the lock, so we succeeded - * in getting the lock if the return value is 0. - */ -static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock) -{ - unsigned long tmp, token; - - token = LOCK_TOKEN; - __asm__ __volatile__( -"1: " PPC_LWARX(%0,0,%2,1) "\n\ - cmpwi 0,%0,0\n\ - bne- 2f\n\ - stwcx. %1,0,%2\n\ - bne- 1b\n" - PPC_ACQUIRE_BARRIER -"2:" - : "=&r" (tmp) - : "r" (token), "r" (&lock->slock) - : "cr0", "memory"); - - return tmp; -} - -static inline int arch_spin_trylock(arch_spinlock_t *lock) -{ - return __arch_spin_trylock(lock) == 0; -} - -/* - * On a system with shared processors (that is, where a physical - * processor is multiplexed between several virtual processors), - * there is no point spinning on a lock if the holder of the lock - * isn't currently scheduled on a physical processor. Instead - * we detect this situation and ask the hypervisor to give the - * rest of our timeslice to the lock holder. - * - * So that we can tell which virtual processor is holding a lock, - * we put 0x80000000 | smp_processor_id() in the lock when it is - * held. Conveniently, we have a word in the paca that holds this - * value. - */ - -#if defined(CONFIG_PPC_SPLPAR) -/* We only yield to the hypervisor if we are in shared processor mode */ -void splpar_spin_yield(arch_spinlock_t *lock); -void splpar_rw_yield(arch_rwlock_t *lock); -#else /* SPLPAR */ -static inline void splpar_spin_yield(arch_spinlock_t *lock) {}; -static inline void splpar_rw_yield(arch_rwlock_t *lock) {}; -#endif - -static inline bool is_shared_processor(void) -{ -#ifdef CONFIG_PPC_SPLPAR - return static_branch_unlikely(&shared_processor); -#else - return false; -#endif -} - -static inline void spin_yield(arch_spinlock_t *lock) -{ - if (is_shared_processor()) - splpar_spin_yield(lock); - else - barrier(); -} - -static inline void rw_yield(arch_rwlock_t *lock) -{ - if (is_shared_processor()) - splpar_rw_yield(lock); - else - barrier(); -} - -static inline void arch_spin_lock(arch_spinlock_t *lock) -{ - while (1) { - if (likely(__arch_spin_trylock(lock) == 0)) - break; - do { - HMT_low(); - if (is_shared_processor()) - splpar_spin_yield(lock); - } while (unlikely(lock->slock != 0)); - HMT_medium(); - } -} - -static inline -void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) -{ - unsigned long flags_dis; - - while (1) { - if (likely(__arch_spin_trylock(lock) == 0)) - break; - local_save_flags(flags_dis); - local_irq_restore(flags); - do { - HMT_low(); - if (is_shared_processor()) - splpar_spin_yield(lock); - } while (unlikely(lock->slock != 0)); - HMT_medium(); - local_irq_restore(flags_dis); - } -} -#define arch_spin_lock_flags arch_spin_lock_flags - -static inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - __asm__ __volatile__("# arch_spin_unlock\n\t" - PPC_RELEASE_BARRIER: : :"memory"); - lock->slock = 0; -} - -/* - * Read-write spinlocks, allowing multiple readers - * but only one writer. - * - * NOTE! it is quite common to have readers in interrupts - * but no interrupt writers. For those circumstances we - * can "mix" irq-safe locks - any writer needs to get a - * irq-safe write-lock, but readers can get non-irqsafe - * read-locks. - */ - -#ifdef CONFIG_PPC64 -#define __DO_SIGN_EXTEND "extsw %0,%0\n" -#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */ -#else -#define __DO_SIGN_EXTEND -#define WRLOCK_TOKEN (-1) +#ifndef CONFIG_PARAVIRT_SPINLOCKS +static inline void pv_spinlocks_init(void) { } #endif -/* - * This returns the old value in the lock + 1, - * so we got a read lock if the return value is > 0. - */ -static inline long __arch_read_trylock(arch_rwlock_t *rw) -{ - long tmp; - - __asm__ __volatile__( -"1: " PPC_LWARX(%0,0,%1,1) "\n" - __DO_SIGN_EXTEND -" addic. %0,%0,1\n\ - ble- 2f\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1\n\ - bne- 1b\n" - PPC_ACQUIRE_BARRIER -"2:" : "=&r" (tmp) - : "r" (&rw->lock) - : "cr0", "xer", "memory"); - - return tmp; -} - -/* - * This returns the old value in the lock, - * so we got the write lock if the return value is 0. - */ -static inline long __arch_write_trylock(arch_rwlock_t *rw) -{ - long tmp, token; - - token = WRLOCK_TOKEN; - __asm__ __volatile__( -"1: " PPC_LWARX(%0,0,%2,1) "\n\ - cmpwi 0,%0,0\n\ - bne- 2f\n" - PPC405_ERR77(0,%1) -" stwcx. %1,0,%2\n\ - bne- 1b\n" - PPC_ACQUIRE_BARRIER -"2:" : "=&r" (tmp) - : "r" (token), "r" (&rw->lock) - : "cr0", "memory"); - - return tmp; -} - -static inline void arch_read_lock(arch_rwlock_t *rw) -{ - while (1) { - if (likely(__arch_read_trylock(rw) > 0)) - break; - do { - HMT_low(); - if (is_shared_processor()) - splpar_rw_yield(rw); - } while (unlikely(rw->lock < 0)); - HMT_medium(); - } -} - -static inline void arch_write_lock(arch_rwlock_t *rw) -{ - while (1) { - if (likely(__arch_write_trylock(rw) == 0)) - break; - do { - HMT_low(); - if (is_shared_processor()) - splpar_rw_yield(rw); - } while (unlikely(rw->lock != 0)); - HMT_medium(); - } -} - -static inline int arch_read_trylock(arch_rwlock_t *rw) -{ - return __arch_read_trylock(rw) > 0; -} - -static inline int arch_write_trylock(arch_rwlock_t *rw) -{ - return __arch_write_trylock(rw) == 0; -} - -static inline void arch_read_unlock(arch_rwlock_t *rw) -{ - long tmp; - - __asm__ __volatile__( - "# read_unlock\n\t" - PPC_RELEASE_BARRIER -"1: lwarx %0,0,%1\n\ - addic %0,%0,-1\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1\n\ - bne- 1b" - : "=&r"(tmp) - : "r"(&rw->lock) - : "cr0", "xer", "memory"); -} - -static inline void arch_write_unlock(arch_rwlock_t *rw) -{ - __asm__ __volatile__("# write_unlock\n\t" - PPC_RELEASE_BARRIER: : :"memory"); - rw->lock = 0; -} - -#define arch_spin_relax(lock) spin_yield(lock) -#define arch_read_relax(lock) rw_yield(lock) -#define arch_write_relax(lock) rw_yield(lock) - -/* See include/linux/spinlock.h */ -#define smp_mb__after_spinlock() smp_mb() - #endif /* __KERNEL__ */ #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h index 87adaf13b7e8..c5d742f18021 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -6,16 +6,11 @@ # error "please don't include this file directly" #endif -typedef struct { - volatile unsigned int slock; -} arch_spinlock_t; - -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } - -typedef struct { - volatile signed int lock; -} arch_rwlock_t; - -#define __ARCH_RW_LOCK_UNLOCKED { 0 } +#ifdef CONFIG_PPC_QUEUED_SPINLOCKS +#include <asm-generic/qspinlock_types.h> +#include <asm-generic/qrwlock_types.h> +#else +#include <asm/simple_spinlock_types.h> +#endif #endif diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 769f055509c9..972ed0df154d 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -2,6 +2,7 @@ /* * Copyright (C) 2004 Paul Mackerras <paulus@au.ibm.com>, IBM */ +#include <asm/inst.h> struct pt_regs; @@ -15,9 +16,9 @@ struct pt_regs; * Note that IS_MTMSRD returns true for both an mtmsr (32-bit) * and an mtmsrd (64-bit). */ -#define IS_MTMSRD(instr) (((instr) & 0xfc0007be) == 0x7c000124) -#define IS_RFID(instr) (((instr) & 0xfc0007fe) == 0x4c000024) -#define IS_RFI(instr) (((instr) & 0xfc0007fe) == 0x4c000064) +#define IS_MTMSRD(instr) ((ppc_inst_val(instr) & 0xfc0007be) == 0x7c000124) +#define IS_RFID(instr) ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000024) +#define IS_RFI(instr) ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000064) enum instruction_type { COMPUTE, /* arith/logical/CR op, etc. */ @@ -39,6 +40,7 @@ enum instruction_type { CACHEOP, BARRIER, SYSCALL, + SYSCALL_VECTORED_0, MFMSR, MTMSR, RFI, @@ -48,6 +50,8 @@ enum instruction_type { #define INSTR_TYPE_MASK 0x1f +#define OP_IS_LOAD(type) ((LOAD <= (type) && (type) <= LOAD_VSX) || (type) == LARX) +#define OP_IS_STORE(type) ((STORE <= (type) && (type) <= STORE_VSX) || (type) == STCX) #define OP_IS_LOAD_STORE(type) (LOAD <= (type) && (type) <= STCX) /* Compute flags, ORed in with type */ @@ -89,14 +93,24 @@ enum instruction_type { #define VSX_LDLEFT 4 /* load VSX register from left */ #define VSX_CHECK_VEC 8 /* check MSR_VEC not MSR_VSX for reg >= 32 */ +/* Prefixed flag, ORed in with type */ +#define PREFIXED 0x800 + /* Size field in type word */ #define SIZE(n) ((n) << 12) #define GETSIZE(w) ((w) >> 12) #define GETTYPE(t) ((t) & INSTR_TYPE_MASK) +#define GETLENGTH(t) (((t) & PREFIXED) ? 8 : 4) #define MKOP(t, f, s) ((t) | (f) | SIZE(s)) +/* Prefix instruction operands */ +#define GET_PREFIX_RA(i) (((i) >> 16) & 0x1f) +#define GET_PREFIX_R(i) ((i) & (1ul << 20)) + +extern s32 patch__exec_instr; + struct instruction_op { int type; int reg; @@ -132,7 +146,7 @@ union vsx_reg { * otherwise. */ extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, - unsigned int instr); + struct ppc_inst instr); /* * Emulate an instruction that can be executed just by updating @@ -149,7 +163,7 @@ void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op); * 0 if it could not be emulated, or -1 for an instruction that * should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.). */ -extern int emulate_step(struct pt_regs *regs, unsigned int instr); +extern int emulate_step(struct pt_regs *regs, struct ppc_inst instr); /* * Emulate a load or store instruction by reading/writing the diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index b72692702f35..283552cd0e58 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -28,7 +28,7 @@ extern void * memcpy(void *,const void *,__kernel_size_t); extern void * memmove(void *,const void *,__kernel_size_t); extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); -extern void * memcpy_flushcache(void *,const void *,__kernel_size_t); +void memcpy_flushcache(void *dest, const void *src, size_t size); void *__memset(void *s, int c, __kernel_size_t count); void *__memcpy(void *to, const void *from, __kernel_size_t n); diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index b867b58b1093..fdab93428372 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -102,8 +102,6 @@ static inline void clear_task_ebb(struct task_struct *t) #endif } -extern int set_thread_uses_vas(void); - extern int set_thread_tidr(struct task_struct *t); #endif /* _ASM_POWERPC_SWITCH_TO_H */ diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 38d62acfdce7..fd1b518eed17 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -26,7 +26,10 @@ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) * This is important for seccomp so that compat tasks can set r0 = -1 * to reject the syscall. */ - return TRAP(regs) == 0xc00 ? regs->gpr[0] : -1; + if (trap_is_syscall(regs)) + return regs->gpr[0]; + else + return -1; } static inline void syscall_rollback(struct task_struct *task, diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 39ce95016a3a..cb326720a8a1 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -39,7 +39,6 @@ struct div_result { }; /* Accessor functions for the timebase (RTC on 601) registers. */ -/* If one day CONFIG_POWER is added just define __USE_RTC as 1 */ #define __USE_RTC() (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) #ifdef CONFIG_PPC64 @@ -51,24 +50,12 @@ struct div_result { static inline unsigned long get_tbl(void) { -#if defined(CONFIG_403GCX) - unsigned long tbl; - asm volatile("mfspr %0, 0x3dd" : "=r" (tbl)); - return tbl; -#else return mftbl(); -#endif } static inline unsigned int get_tbu(void) { -#ifdef CONFIG_403GCX - unsigned int tbu; - asm volatile("mfspr %0, 0x3dc" : "=r" (tbu)); - return tbu; -#else return mftbu(); -#endif } #endif /* !CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index d2d2c4bd8435..6047402b0a4d 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -17,7 +17,7 @@ typedef unsigned long cycles_t; static inline cycles_t get_cycles(void) { - if (IS_ENABLED(CONFIG_BOOK3S_601)) + if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) return 0; return mftb(); diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 7f3a8b902325..fbc6f3002f23 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -10,9 +10,8 @@ #ifdef __KERNEL__ #ifndef __powerpc64__ -#include <asm/pgtable.h> +#include <linux/pgtable.h> #endif -#include <asm/pgalloc.h> #ifndef __powerpc64__ #include <asm/page.h> #include <asm/mmu.h> diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 2db7ba789720..f0b6300e7dd3 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -43,7 +43,6 @@ extern void __init dump_numa_cpu_topology(void); extern int sysfs_add_device_to_node(struct device *dev, int nid); extern void sysfs_remove_device_from_node(struct device *dev, int nid); -extern int numa_update_cpu_topology(bool cpus_locked); static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) { @@ -78,11 +77,6 @@ static inline void sysfs_remove_device_from_node(struct device *dev, { } -static inline int numa_update_cpu_topology(bool cpus_locked) -{ - return 0; -} - static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {} static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) @@ -93,32 +87,12 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) #endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) -extern int start_topology_update(void); -extern int stop_topology_update(void); -extern int prrn_is_enabled(void); extern int find_and_online_cpu_nid(int cpu); -extern int timed_topology_update(int nsecs); #else -static inline int start_topology_update(void) -{ - return 0; -} -static inline int stop_topology_update(void) -{ - return 0; -} -static inline int prrn_is_enabled(void) -{ - return 0; -} static inline int find_and_online_cpu_nid(int cpu) { return 0; } -static inline int timed_topology_update(int nsecs) -{ - return 0; -} #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ @@ -141,7 +115,6 @@ int get_physical_package_id(int cpu); #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) -int dlpar_cpu_readd(int cpu); #endif #endif diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 0969285996cb..00699903f1ef 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -38,8 +38,7 @@ static inline void set_fs(mm_segment_t fs) set_thread_flag(TIF_FSCHECK); } -#define segment_eq(a, b) ((a).seg == (b).seg) - +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define user_addr_max() (get_fs().seg) #ifdef __powerpc64__ @@ -93,18 +92,63 @@ static inline int __access_ok(unsigned long addr, unsigned long size, #define __get_user(x, ptr) \ __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true) #define __put_user(x, ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), true) + __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) +#define __put_user_goto(x, ptr, label) \ + __put_user_nocheck_goto((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label) #define __get_user_allowed(x, ptr) \ __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false) -#define __put_user_allowed(x, ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), false) #define __get_user_inatomic(x, ptr) \ __get_user_nosleep((x), (ptr), sizeof(*(ptr))) #define __put_user_inatomic(x, ptr) \ __put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) +#ifdef CONFIG_PPC64 + +#define ___get_user_instr(gu_op, dest, ptr) \ +({ \ + long __gui_ret = 0; \ + unsigned long __gui_ptr = (unsigned long)ptr; \ + struct ppc_inst __gui_inst; \ + unsigned int __prefix, __suffix; \ + __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr); \ + if (__gui_ret == 0) { \ + if ((__prefix >> 26) == OP_PREFIX) { \ + __gui_ret = gu_op(__suffix, \ + (unsigned int __user *)__gui_ptr + 1); \ + __gui_inst = ppc_inst_prefix(__prefix, \ + __suffix); \ + } else { \ + __gui_inst = ppc_inst(__prefix); \ + } \ + if (__gui_ret == 0) \ + (dest) = __gui_inst; \ + } \ + __gui_ret; \ +}) + +#define get_user_instr(x, ptr) \ + ___get_user_instr(get_user, x, ptr) + +#define __get_user_instr(x, ptr) \ + ___get_user_instr(__get_user, x, ptr) + +#define __get_user_instr_inatomic(x, ptr) \ + ___get_user_instr(__get_user_inatomic, x, ptr) + +#else /* !CONFIG_PPC64 */ +#define get_user_instr(x, ptr) \ + get_user((x).val, (u32 __user *)(ptr)) + +#define __get_user_instr(x, ptr) \ + __get_user_nocheck((x).val, (u32 __user *)(ptr), sizeof(u32), true) + +#define __get_user_instr_inatomic(x, ptr) \ + __get_user_nosleep((x).val, (u32 __user *)(ptr), sizeof(u32)) + +#endif /* CONFIG_PPC64 */ + extern long __put_user_bad(void); /* @@ -162,7 +206,7 @@ do { \ prevent_write_to_user(ptr, size); \ } while (0) -#define __put_user_nocheck(x, ptr, size, do_allow) \ +#define __put_user_nocheck(x, ptr, size) \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ @@ -172,10 +216,7 @@ do { \ if (!is_kernel_addr((unsigned long)__pu_addr)) \ might_fault(); \ __chk_user_ptr(__pu_addr); \ - if (do_allow) \ - __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ - else \ - __put_user_size_allowed(__pu_val, __pu_addr, __pu_size, __pu_err); \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ \ __pu_err; \ }) @@ -208,6 +249,52 @@ do { \ }) +#define __put_user_asm_goto(x, addr, label, op) \ + asm volatile goto( \ + "1: " op "%U1%X1 %0,%1 # put_user\n" \ + EX_TABLE(1b, %l2) \ + : \ + : "r" (x), "m" (*addr) \ + : \ + : label) + +#ifdef __powerpc64__ +#define __put_user_asm2_goto(x, ptr, label) \ + __put_user_asm_goto(x, ptr, label, "std") +#else /* __powerpc64__ */ +#define __put_user_asm2_goto(x, addr, label) \ + asm volatile goto( \ + "1: stw%X1 %0, %1\n" \ + "2: stw%X1 %L0, %L1\n" \ + EX_TABLE(1b, %l2) \ + EX_TABLE(2b, %l2) \ + : \ + : "r" (x), "m" (*addr) \ + : \ + : label) +#endif /* __powerpc64__ */ + +#define __put_user_size_goto(x, ptr, size, label) \ +do { \ + switch (size) { \ + case 1: __put_user_asm_goto(x, ptr, label, "stb"); break; \ + case 2: __put_user_asm_goto(x, ptr, label, "sth"); break; \ + case 4: __put_user_asm_goto(x, ptr, label, "stw"); break; \ + case 8: __put_user_asm2_goto(x, ptr, label); break; \ + default: __put_user_bad(); \ + } \ +} while (0) + +#define __put_user_nocheck_goto(x, ptr, size, label) \ +do { \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + if (!is_kernel_addr((unsigned long)__pu_addr)) \ + might_fault(); \ + __chk_user_ptr(ptr); \ + __put_user_size_goto((x), __pu_addr, (size), label); \ +} while (0) + + extern long __get_user_bad(void); /* @@ -489,10 +576,51 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t #define user_access_save prevent_user_access_return #define user_access_restore restore_user_access +static __must_check inline bool +user_read_access_begin(const void __user *ptr, size_t len) +{ + if (unlikely(!access_ok(ptr, len))) + return false; + allow_read_from_user(ptr, len); + return true; +} +#define user_read_access_begin user_read_access_begin +#define user_read_access_end prevent_current_read_from_user + +static __must_check inline bool +user_write_access_begin(const void __user *ptr, size_t len) +{ + if (unlikely(!access_ok(ptr, len))) + return false; + allow_write_to_user((void __user *)ptr, len); + return true; +} +#define user_write_access_begin user_write_access_begin +#define user_write_access_end prevent_current_write_to_user + #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e) -#define unsafe_put_user(x, p, e) unsafe_op_wrap(__put_user_allowed(x, p), e) +#define unsafe_put_user(x, p, e) __put_user_goto(x, p, e) + #define unsafe_copy_to_user(d, s, l, e) \ - unsafe_op_wrap(raw_copy_to_user_allowed(d, s, l), e) +do { \ + u8 __user *_dst = (u8 __user *)(d); \ + const u8 *_src = (const u8 *)(s); \ + size_t _len = (l); \ + int _i; \ + \ + for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long)) \ + __put_user_goto(*(long*)(_src + _i), (long __user *)(_dst + _i), e);\ + if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) { \ + __put_user_goto(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \ + _i += 4; \ + } \ + if (_len & 2) { \ + __put_user_goto(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), e); \ + _i += 2; \ + } \ + if (_len & 1) \ + __put_user_goto(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e);\ +} while (0) #endif /* _ARCH_POWERPC_UACCESS_H */ diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index 2bbdf27d09b5..5bf65f5d44a9 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -11,10 +11,11 @@ #include <linux/notifier.h> #include <asm/probes.h> +#include <asm/inst.h> typedef ppc_opcode_t uprobe_opcode_t; -#define MAX_UINSN_BYTES 4 +#define MAX_UINSN_BYTES 8 #define UPROBE_XOL_SLOT_BYTES (MAX_UINSN_BYTES) /* The following alias is needed for reference from arch-agnostic code */ @@ -23,8 +24,8 @@ typedef ppc_opcode_t uprobe_opcode_t; struct arch_uprobe { union { - u32 insn; - u32 ixol; + struct ppc_inst insn; + struct ppc_inst ixol; }; }; diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index f93e6b0f5c84..e33f80b0ea81 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -86,7 +86,6 @@ struct vas_tx_win_attr { int wcreds_max; int lpid; int pidr; /* hardware PID (from SPRN_PID) */ - int pid; /* linux process id */ int pswid; int rsvd_txbuf_count; int tc_mode; @@ -163,4 +162,16 @@ int vas_copy_crb(void *crb, int offset); */ int vas_paste_crb(struct vas_window *win, int offset, bool re); +/* + * Register / unregister coprocessor type to VAS API which will be exported + * to user space. Applications can use this API to open / close window + * which can be used to send / receive requests directly to cooprcessor. + * + * Only NX GZIP coprocessor type is supported now, but this API can be + * used for others in future. + */ +int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, + const char *name); +void vas_unregister_coproc_api(void); + #endif /* __ASM_POWERPC_VAS_H */ diff --git a/arch/powerpc/include/asm/xilinx_intc.h b/arch/powerpc/include/asm/xilinx_intc.h deleted file mode 100644 index ca9aa162fb09..000000000000 --- a/arch/powerpc/include/asm/xilinx_intc.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Xilinx intc external definitions - * - * Copyright 2007 Secret Lab Technologies Ltd. - */ -#ifndef _ASM_POWERPC_XILINX_INTC_H -#define _ASM_POWERPC_XILINX_INTC_H - -#ifdef __KERNEL__ - -extern void __init xilinx_intc_init_tree(void); -extern unsigned int xintc_get_irq(void); - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_XILINX_INTC_H */ diff --git a/arch/powerpc/include/asm/xilinx_pci.h b/arch/powerpc/include/asm/xilinx_pci.h deleted file mode 100644 index 7a8275caf6af..000000000000 --- a/arch/powerpc/include/asm/xilinx_pci.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Xilinx pci external definitions - * - * Copyright 2009 Roderick Colenbrander - * Copyright 2009 Secret Lab Technologies Ltd. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#ifndef INCLUDE_XILINX_PCI -#define INCLUDE_XILINX_PCI - -#ifdef CONFIG_XILINX_PCI -extern void __init xilinx_pci_init(void); -#else -static inline void __init xilinx_pci_init(void) { return; } -#endif - -#endif /* INCLUDE_XILINX_PCI */ diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h index 33aee7490cbb..8b211faa0e42 100644 --- a/arch/powerpc/include/asm/xive-regs.h +++ b/arch/powerpc/include/asm/xive-regs.h @@ -37,6 +37,14 @@ #define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ #define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ +/* + * Load-after-store ordering + * + * Adding this offset to the load address will enforce + * load-after-store ordering. This is required to use StoreEOI. + */ +#define XIVE_ESB_LD_ST_MO 0x40 /* Load-after-store ordering */ + #define XIVE_ESB_VAL_P 0x2 #define XIVE_ESB_VAL_Q 0x1 #define XIVE_ESB_INVALID 0xFF diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 93f982dbb3d4..309b4d65b74f 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -5,6 +5,8 @@ #ifndef _ASM_POWERPC_XIVE_H #define _ASM_POWERPC_XIVE_H +#include <asm/opal-api.h> + #define XIVE_INVALID_VP 0xffffffff #ifdef CONFIG_PPC_XIVE @@ -108,7 +110,6 @@ void xive_native_free_vp_block(u32 vp_base); int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data); void xive_cleanup_irq_data(struct xive_irq_data *xd); -u32 xive_native_alloc_irq(void); void xive_native_free_irq(u32 irq); int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq); @@ -137,6 +138,12 @@ int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle, u32 qindex); int xive_native_get_vp_state(u32 vp_id, u64 *out_state); bool xive_native_has_queue_state_support(void); +extern u32 xive_native_alloc_irq_on_chip(u32 chip_id); + +static inline u32 xive_native_alloc_irq(void) +{ + return xive_native_alloc_irq_on_chip(OPAL_XIVE_ANY_CHIP); +} #else @@ -148,7 +155,6 @@ static inline void xive_smp_probe(void) { } static inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; } static inline void xive_smp_setup_cpu(void) { } static inline void xive_smp_disable_cpu(void) { } -static inline void xive_kexec_teardown_cpu(int secondary) { } static inline void xive_shutdown(void) { } static inline void xive_flush_interrupt(void) { } diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index 540592034740..731b97dc2d15 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -50,6 +50,8 @@ #define PPC_FEATURE2_DARN 0x00200000 /* darn random number insn */ #define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */ #define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000 /* TM w/out suspended state */ +#define PPC_FEATURE2_ARCH_3_1 0x00040000 /* ISA 3.1 */ +#define PPC_FEATURE2_MMA 0x00020000 /* Matrix Multiply Assist */ /* * IMPORTANT! diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 264e266a85bf..c3af3f324c5a 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -640,6 +640,11 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) #define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0) +/* POWER10 registers */ +#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) +#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) +#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h index be48c2215fa2..a809b1b44ddf 100644 --- a/arch/powerpc/include/uapi/asm/kvm_para.h +++ b/arch/powerpc/include/uapi/asm/kvm_para.h @@ -31,7 +31,7 @@ * Struct fields are always 32 or 64 bit aligned, depending on them being 32 * or 64 bit wide respectively. * - * See Documentation/virt/kvm/ppc-pv.txt + * See Documentation/virt/kvm/ppc-pv.rst */ struct kvm_vcpu_arch_shared { __u64 scratch1; diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h new file mode 100644 index 000000000000..50ef95e2f5b1 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl + * + * (C) Copyright IBM 2020 + * + * Author: Vaibhav Jain <vaibhav at linux.ibm.com> + */ + +#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_ +#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_ + +#include <linux/types.h> +#include <linux/ndctl.h> + +/* + * PDSM Envelope: + * + * The ioctl ND_CMD_CALL exchange data between user-space and kernel via + * envelope which consists of 2 headers sections and payload sections as + * illustrated below: + * +-----------------+---------------+---------------------------+ + * | 64-Bytes | 8-Bytes | Max 184-Bytes | + * +-----------------+---------------+---------------------------+ + * | ND-HEADER | PDSM-HEADER | PDSM-PAYLOAD | + * +-----------------+---------------+---------------------------+ + * | nd_family | | | + * | nd_size_out | cmd_status | | + * | nd_size_in | reserved | nd_pdsm_payload | + * | nd_command | payload --> | | + * | nd_fw_size | | | + * | nd_payload ---> | | | + * +---------------+-----------------+---------------------------+ + * + * ND Header: + * This is the generic libnvdimm header described as 'struct nd_cmd_pkg' + * which is interpreted by libnvdimm before passed on to papr_scm. Important + * member fields used are: + * 'nd_family' : (In) NVDIMM_FAMILY_PAPR_SCM + * 'nd_size_in' : (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0) + * 'nd_size_out' : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD + * 'nd_command' : (In) One of PAPR_PDSM_XXX + * 'nd_fw_size' : (Out) PDSM-HEADER + size of actual payload returned + * + * PDSM Header: + * This is papr-scm specific header that precedes the payload. This is defined + * as nd_cmd_pdsm_pkg. Following fields aare available in this header: + * + * 'cmd_status' : (Out) Errors if any encountered while servicing PDSM. + * 'reserved' : Not used, reserved for future and should be set to 0. + * 'payload' : A union of all the possible payload structs + * + * PDSM Payload: + * + * The layout of the PDSM Payload is defined by various structs shared between + * papr_scm and libndctl so that contents of payload can be interpreted. As such + * its defined as a union of all possible payload structs as + * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command' + * appropriate member of the union is accessed. + */ + +/* Max payload size that we can handle */ +#define ND_PDSM_PAYLOAD_MAX_SIZE 184 + +/* Max payload size that we can handle */ +#define ND_PDSM_HDR_SIZE \ + (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE) + +/* Various nvdimm health indicators */ +#define PAPR_PDSM_DIMM_HEALTHY 0 +#define PAPR_PDSM_DIMM_UNHEALTHY 1 +#define PAPR_PDSM_DIMM_CRITICAL 2 +#define PAPR_PDSM_DIMM_FATAL 3 + +/* struct nd_papr_pdsm_health.extension_flags field flags */ + +/* Indicate that the 'dimm_fuel_gauge' field is valid */ +#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1 + +/* + * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH + * Various flags indicate the health status of the dimm. + * + * extension_flags : Any extension fields present in the struct. + * dimm_unarmed : Dimm not armed. So contents wont persist. + * dimm_bad_shutdown : Previous shutdown did not persist contents. + * dimm_bad_restore : Contents from previous shutdown werent restored. + * dimm_scrubbed : Contents of the dimm have been scrubbed. + * dimm_locked : Contents of the dimm cant be modified until CEC reboot + * dimm_encrypted : Contents of dimm are encrypted. + * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX + * dimm_fuel_gauge : Life remaining of DIMM as a percentage from 0-100 + */ +struct nd_papr_pdsm_health { + union { + struct { + __u32 extension_flags; + __u8 dimm_unarmed; + __u8 dimm_bad_shutdown; + __u8 dimm_bad_restore; + __u8 dimm_scrubbed; + __u8 dimm_locked; + __u8 dimm_encrypted; + __u16 dimm_health; + + /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */ + __u16 dimm_fuel_gauge; + }; + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; + }; +}; + +/* + * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel + * via 'nd_cmd_pkg.nd_command' member of the ioctl struct + */ +enum papr_pdsm { + PAPR_PDSM_MIN = 0x0, + PAPR_PDSM_HEALTH, + PAPR_PDSM_MAX, +}; + +/* Maximal union that can hold all possible payload types */ +union nd_pdsm_payload { + struct nd_papr_pdsm_health health; + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; +} __packed; + +/* + * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm + * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command' + * that should always precede this struct when sent to papr_scm via CMD_CALL + * interface. + */ +struct nd_pkg_pdsm { + __s32 cmd_status; /* Out: Sub-cmd status returned back */ + __u16 reserved[2]; /* Ignored and to be set as '0' */ + union nd_pdsm_payload payload; +} __packed; + +#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */ diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h index f599064dd8dc..bdf5f10f8b9f 100644 --- a/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/arch/powerpc/include/uapi/asm/perf_regs.h @@ -48,6 +48,24 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_DSISR, PERF_REG_POWERPC_SIER, PERF_REG_POWERPC_MMCRA, - PERF_REG_POWERPC_MAX, + /* Extended registers */ + PERF_REG_POWERPC_MMCR0, + PERF_REG_POWERPC_MMCR1, + PERF_REG_POWERPC_MMCR2, + PERF_REG_POWERPC_MMCR3, + PERF_REG_POWERPC_SIER2, + PERF_REG_POWERPC_SIER3, + /* Max regs without the extended regs */ + PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, }; + +#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) + +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */ +#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK) +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */ +#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK) + +#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1) +#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/arch/powerpc/include/uapi/asm/vas-api.h b/arch/powerpc/include/uapi/asm/vas-api.h new file mode 100644 index 000000000000..ebd4b2424785 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/vas-api.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright 2019 IBM Corp. + */ + +#ifndef _UAPI_MISC_VAS_H +#define _UAPI_MISC_VAS_H + +#include <linux/types.h> + +#include <asm/ioctl.h> + +#define VAS_MAGIC 'v' +#define VAS_TX_WIN_OPEN _IOW(VAS_MAGIC, 0x20, struct vas_tx_win_open_attr) + +struct vas_tx_win_open_attr { + __u32 version; + __s16 vas_id; /* specific instance of vas or -1 for default */ + __u16 reserved1; + __u64 flags; /* Future use */ + __u64 reserved2[6]; +}; + +#endif /* _UAPI_MISC_VAS_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 244542ae2a91..cbf41fb4ee89 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -16,7 +16,7 @@ CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) -CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector) +CFLAGS_prom_init.o += -fno-stack-protector CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING CFLAGS_prom_init.o += -ffreestanding @@ -45,11 +45,10 @@ obj-y := cputable.o syscalls.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ - of_platform.o prom_parse.o + of_platform.o prom_parse.o firmware.o obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o \ - paca.o nvram_64.o firmware.o note.o \ - syscall_64.o + paca.o nvram_64.o note.o syscall_64.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o @@ -71,7 +70,7 @@ obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o obj-$(CONFIG_PPC_DT_CPU_FTRS) += dt_cpu_ftrs.o -obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ +obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_cache.o \ eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 92045ed64976..c7797eb958c7 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -24,6 +24,7 @@ #include <asm/disassemble.h> #include <asm/cpu_has_feature.h> #include <asm/sstep.h> +#include <asm/inst.h> struct aligninfo { unsigned char len; @@ -104,7 +105,7 @@ static struct aligninfo spe_aligninfo[32] = { * so we don't need the address swizzling. */ static int emulate_spe(struct pt_regs *regs, unsigned int reg, - unsigned int instr) + struct ppc_inst ppc_instr) { int ret; union { @@ -115,8 +116,9 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, } data, temp; unsigned char __user *p, *addr; unsigned long *evr = ¤t->thread.evr[reg]; - unsigned int nb, flags; + unsigned int nb, flags, instr; + instr = ppc_inst_val(ppc_instr); instr = (instr >> 1) & 0x1f; /* DAR has the operand effective address */ @@ -176,11 +178,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, ret |= __get_user_inatomic(temp.v[1], p++); ret |= __get_user_inatomic(temp.v[2], p++); ret |= __get_user_inatomic(temp.v[3], p++); - /* fall through */ + fallthrough; case 4: ret |= __get_user_inatomic(temp.v[4], p++); ret |= __get_user_inatomic(temp.v[5], p++); - /* fall through */ + fallthrough; case 2: ret |= __get_user_inatomic(temp.v[6], p++); ret |= __get_user_inatomic(temp.v[7], p++); @@ -261,11 +263,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, ret |= __put_user_inatomic(data.v[1], p++); ret |= __put_user_inatomic(data.v[2], p++); ret |= __put_user_inatomic(data.v[3], p++); - /* fall through */ + fallthrough; case 4: ret |= __put_user_inatomic(data.v[4], p++); ret |= __put_user_inatomic(data.v[5], p++); - /* fall through */ + fallthrough; case 2: ret |= __put_user_inatomic(data.v[6], p++); ret |= __put_user_inatomic(data.v[7], p++); @@ -293,7 +295,7 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, int fix_alignment(struct pt_regs *regs) { - unsigned int instr; + struct ppc_inst instr; struct instruction_op op; int r, type; @@ -303,18 +305,18 @@ int fix_alignment(struct pt_regs *regs) */ CHECK_FULL_REGS(regs); - if (unlikely(__get_user(instr, (unsigned int __user *)regs->nip))) + if (unlikely(__get_user_instr(instr, (void __user *)regs->nip))) return -EFAULT; if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) { /* We don't handle PPC little-endian any more... */ if (cpu_has_feature(CPU_FTR_PPC_LE)) return -EIO; - instr = swab32(instr); + instr = ppc_inst_swab(instr); } #ifdef CONFIG_SPE - if ((instr >> 26) == 0x4) { - int reg = (instr >> 21) & 0x1f; + if (ppc_inst_primary_opcode(instr) == 0x4) { + int reg = (ppc_inst_val(instr) >> 21) & 0x1f; PPC_WARN_ALIGNMENT(spe, regs); return emulate_spe(regs, reg, instr); } @@ -331,7 +333,7 @@ int fix_alignment(struct pt_regs *regs) * when pasting to a co-processor. Furthermore, paste_last is the * synchronisation point for preceding copy/paste sequences. */ - if ((instr & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe)) + if ((ppc_inst_val(instr) & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe)) return -EIO; r = analyse_instr(&op, regs, instr); diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index fcf24a365fc0..8711c2164b45 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -30,7 +30,6 @@ #include <asm/io.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/processor.h> #include <asm/cputable.h> #include <asm/thread_info.h> @@ -70,6 +69,10 @@ #include <asm/fixmap.h> #endif +#ifdef CONFIG_XMON +#include "../xmon/xmon_bpts.h" +#endif + #define STACK_PT_REGS_OFFSET(sym, val) \ DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val)) @@ -556,6 +559,8 @@ int main(void) OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending); OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); + OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra); + OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs); OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc); OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar); @@ -693,6 +698,9 @@ int main(void) HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]); HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]); HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]); + HSTATE_FIELD(HSTATE_MMCR3, host_mmcr[7]); + HSTATE_FIELD(HSTATE_SIER2, host_mmcr[8]); + HSTATE_FIELD(HSTATE_SIER3, host_mmcr[9]); HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]); HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]); HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]); @@ -795,5 +803,9 @@ int main(void) DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE)); #endif +#ifdef CONFIG_XMON + DEFINE(BPT_SIZE, BPT_SIZE); +#endif + return 0; } diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index f57712a55815..02300edc6989 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -9,13 +9,13 @@ #include <linux/init.h> #include <linux/export.h> #include <linux/memblock.h> +#include <linux/pgtable.h> #include <asm/sections.h> #include <asm/prom.h> #include <asm/btext.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/pgtable.h> #include <asm/io.h> #include <asm/processor.h> #include <asm/udbg.h> diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 470336277c67..65ab9fcebd31 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -7,6 +7,8 @@ * Author: Nathan Lynch */ +#define pr_fmt(fmt) "cacheinfo: " fmt + #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/kernel.h> @@ -166,7 +168,7 @@ static void release_cache_debugcheck(struct cache *cache) list_for_each_entry(iter, &cache_list, list) WARN_ONCE(iter->next_local == cache, - "cache for %pOF(%s) refers to cache for %pOF(%s)\n", + "cache for %pOFP(%s) refers to cache for %pOFP(%s)\n", iter->ofnode, cache_type_string(iter), cache->ofnode, @@ -178,7 +180,7 @@ static void release_cache(struct cache *cache) if (!cache) return; - pr_debug("freeing L%d %s cache for %pOF\n", cache->level, + pr_debug("freeing L%d %s cache for %pOFP\n", cache->level, cache_type_string(cache), cache->ofnode); release_cache_debugcheck(cache); @@ -193,7 +195,7 @@ static void cache_cpu_set(struct cache *cache, int cpu) while (next) { WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map), - "CPU %i already accounted in %pOF(%s)\n", + "CPU %i already accounted in %pOFP(%s)\n", cpu, next->ofnode, cache_type_string(next)); cpumask_set_cpu(cpu, &next->shared_cpu_map); @@ -352,7 +354,7 @@ static int cache_is_unified_d(const struct device_node *np) static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level) { - pr_debug("creating L%d ucache for %pOF\n", level, node); + pr_debug("creating L%d ucache for %pOFP\n", level, node); return new_cache(cache_is_unified_d(node), level, node); } @@ -362,7 +364,7 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node, { struct cache *dcache, *icache; - pr_debug("creating L%d dcache and icache for %pOF\n", level, + pr_debug("creating L%d dcache and icache for %pOFP\n", level, node); dcache = new_cache(CACHE_TYPE_DATA, level, node); @@ -418,12 +420,27 @@ static void link_cache_lists(struct cache *smaller, struct cache *bigger) } smaller->next_local = bigger; + + /* + * The cache->next_local list sorts by level ascending: + * L1d -> L1i -> L2 -> L3 ... + */ + WARN_ONCE((smaller->level == 1 && bigger->level > 2) || + (smaller->level > 1 && bigger->level != smaller->level + 1), + "linking L%i cache %pOFP to L%i cache %pOFP; skipped a level?\n", + smaller->level, smaller->ofnode, bigger->level, bigger->ofnode); } static void do_subsidiary_caches_debugcheck(struct cache *cache) { - WARN_ON_ONCE(cache->level != 1); - WARN_ON_ONCE(!of_node_is_type(cache->ofnode, "cpu")); + WARN_ONCE(cache->level != 1, + "instantiating cache chain from L%d %s cache for " + "%pOFP instead of an L1\n", cache->level, + cache_type_string(cache), cache->ofnode); + WARN_ONCE(!of_node_is_type(cache->ofnode, "cpu"), + "instantiating cache chain from node %pOFP of type '%s' " + "instead of a cpu node\n", cache->ofnode, + of_node_get_device_type(cache->ofnode)); } static void do_subsidiary_caches(struct cache *cache) @@ -647,12 +664,13 @@ static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache * return &cache->shared_cpu_map; } -static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +static ssize_t +show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bool list) { struct cache_index_dir *index; struct cache *cache; const struct cpumask *mask; - int ret, cpu; + int cpu; index = kobj_to_cache_index_dir(k); cache = index->cache; @@ -664,16 +682,25 @@ static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *att mask = &cache->shared_cpu_map; } - ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n", - cpumask_pr_args(mask)); - buf[ret++] = '\n'; - buf[ret] = '\0'; - return ret; + return cpumap_print_to_pagebuf(list, buf, mask); +} + +static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + return show_shared_cpumap(k, attr, buf, false); +} + +static ssize_t shared_cpu_list_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + return show_shared_cpumap(k, attr, buf, true); } static struct kobj_attribute cache_shared_cpu_map_attr = __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL); +static struct kobj_attribute cache_shared_cpu_list_attr = + __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); + /* Attributes which should always be created -- the kobject/sysfs core * does this automatically via kobj_type->default_attrs. This is the * minimum data required to uniquely identify a cache. @@ -682,6 +709,7 @@ static struct attribute *cache_index_default_attrs[] = { &cache_type_attr.attr, &cache_level_attr.attr, &cache_shared_cpu_map_attr.attr, + &cache_shared_cpu_list_attr.attr, NULL, }; @@ -733,13 +761,13 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) rc = attr->show(&dir->kobj, attr, buf); if (rc <= 0) { pr_debug("not creating %s attribute for " - "%pOF(%s) (rc = %zd)\n", + "%pOFP(%s) (rc = %zd)\n", attr->attr.name, cache->ofnode, cache_type, rc); continue; } if (sysfs_create_file(&dir->kobj, &attr->attr)) - pr_debug("could not create %s attribute for %pOF(%s)\n", + pr_debug("could not create %s attribute for %pOFP(%s)\n", attr->attr.name, cache->ofnode, cache_type); } @@ -855,7 +883,7 @@ static void cache_cpu_clear(struct cache *cache, int cpu) struct cache *next = cache->next_local; WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map), - "CPU %i not accounted in %pOF(%s)\n", + "CPU %i not accounted in %pOFP(%s)\n", cpu, cache->ofnode, cache_type_string(cache)); diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f6517f67265a..f8b5ff64b604 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -288,6 +288,7 @@ _GLOBAL(__init_fpu_registers) mtmsr r10 isync blr +_ASM_NOKPROBE_SYMBOL(__init_fpu_registers) /* Definitions for the table use to save CPU states */ @@ -483,4 +484,5 @@ _GLOBAL(__restore_cpu_setup) 1: mtcr r7 blr +_ASM_NOKPROBE_SYMBOL(__restore_cpu_setup) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index a460298c7ddb..704e8b9501ee 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -91,11 +91,18 @@ _GLOBAL(__restore_cpu_power8) mtlr r11 blr +_GLOBAL(__setup_cpu_power10) + mflr r11 + bl __init_FSCR_power10 + bl __init_PMU + bl __init_PMU_ISA31 + b 1f + _GLOBAL(__setup_cpu_power9) mflr r11 - bl __init_FSCR + bl __init_FSCR_power9 bl __init_PMU - bl __init_hvmode_206 +1: bl __init_hvmode_206 mtlr r11 beqlr li r0,0 @@ -116,11 +123,18 @@ _GLOBAL(__setup_cpu_power9) mtlr r11 blr +_GLOBAL(__restore_cpu_power10) + mflr r11 + bl __init_FSCR_power10 + bl __init_PMU + bl __init_PMU_ISA31 + b 1f + _GLOBAL(__restore_cpu_power9) mflr r11 - bl __init_FSCR + bl __init_FSCR_power9 bl __init_PMU - mfmsr r3 +1: mfmsr r3 rldicl. r0,r3,4,63 mtlr r11 beqlr @@ -182,9 +196,21 @@ __init_LPCR_ISA300: isync blr +__init_FSCR_power10: + mfspr r3, SPRN_FSCR + ori r3, r3, FSCR_PREFIX + mtspr SPRN_FSCR, r3 + // fall through + +__init_FSCR_power9: + mfspr r3, SPRN_FSCR + ori r3, r3, FSCR_SCV + mtspr SPRN_FSCR, r3 + // fall through + __init_FSCR: mfspr r3,SPRN_FSCR - ori r3,r3,FSCR_TAR|FSCR_DSCR|FSCR_EBB + ori r3,r3,FSCR_TAR|FSCR_EBB mtspr SPRN_FSCR,r3 blr @@ -217,3 +243,10 @@ __init_PMU_ISA207: li r5,0 mtspr SPRN_MMCRS,r5 blr + +__init_PMU_ISA31: + li r5,0 + mtspr SPRN_MMCR3,r5 + LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE) + mtspr SPRN_MMCRA,r5 + blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 13eba2eb46fe..2aa89c6b2896 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -70,9 +70,8 @@ extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power8(void); extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power9(void); -extern long __machine_check_early_realmode_p7(struct pt_regs *regs); -extern long __machine_check_early_realmode_p8(struct pt_regs *regs); -extern long __machine_check_early_realmode_p9(struct pt_regs *regs); +extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec); +extern void __restore_cpu_power10(void); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -118,7 +117,12 @@ extern void __restore_cpu_e6500(void); #define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \ PPC_FEATURE2_ARCH_3_00 | \ PPC_FEATURE2_HAS_IEEE128 | \ - PPC_FEATURE2_DARN ) + PPC_FEATURE2_DARN | \ + PPC_FEATURE2_SCV) +#define COMMON_USER_POWER10 COMMON_USER_POWER9 +#define COMMON_USER2_POWER10 (COMMON_USER2_POWER9 | \ + PPC_FEATURE2_ARCH_3_1 | \ + PPC_FEATURE2_MMA) #ifdef CONFIG_PPC_BOOK3E_64 #define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE) @@ -367,6 +371,22 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_power9, .platform = "power9", }, + { /* 3.1-compliant processor, i.e. Power10 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000006, + .cpu_name = "POWER10 (architected)", + .cpu_features = CPU_FTRS_POWER10, + .cpu_user_features = COMMON_USER_POWER10, + .cpu_user_features2 = COMMON_USER2_POWER10, + .mmu_features = MMU_FTRS_POWER10, + .icache_bsize = 128, + .dcache_bsize = 128, + .oprofile_type = PPC_OPROFILE_INVALID, + .oprofile_cpu_type = "ppc64/ibm-compat-v1", + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .platform = "power10", + }, { /* Power7 */ .pvr_mask = 0xffff0000, .pvr_value = 0x003f0000, @@ -519,6 +539,25 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, + { /* Power10 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00800000, + .cpu_name = "POWER10 (raw)", + .cpu_features = CPU_FTRS_POWER10, + .cpu_user_features = COMMON_USER_POWER10, + .cpu_user_features2 = COMMON_USER2_POWER10, + .mmu_features = MMU_FTRS_POWER10, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power10", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .machine_check_early = __machine_check_early_realmode_p10, + .platform = "power10", + }, { /* Cell Broadband Engine */ .pvr_mask = 0xffff0000, .pvr_value = 0x00700000, @@ -1232,69 +1271,6 @@ static struct cpu_spec __initdata cpu_specs[] = { }, #endif /* CONFIG_PPC_8xx */ #ifdef CONFIG_40x - { /* 403GC */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x00200200, - .cpu_name = "403GC", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 16, - .dcache_bsize = 16, - .machine_check = machine_check_4xx, - .platform = "ppc403", - }, - { /* 403GCX */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x00201400, - .cpu_name = "403GCX", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 16, - .dcache_bsize = 16, - .machine_check = machine_check_4xx, - .platform = "ppc403", - }, - { /* 403G ?? */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00200000, - .cpu_name = "403G ??", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 16, - .dcache_bsize = 16, - .machine_check = machine_check_4xx, - .platform = "ppc403", - }, - { /* 405GP */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x40110000, - .cpu_name = "405GP", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* STB 03xxx */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x40130000, - .cpu_name = "STB03xxx", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, { /* STB 04xxx */ .pvr_mask = 0xffff0000, .pvr_value = 0x41810000, @@ -1385,32 +1361,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_4xx, .platform = "ppc405", }, - { /* Xilinx Virtex-II Pro */ - .pvr_mask = 0xfffff000, - .pvr_value = 0x20010000, - .cpu_name = "Virtex-II Pro", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* Xilinx Virtex-4 FX */ - .pvr_mask = 0xfffff000, - .pvr_value = 0x20011000, - .cpu_name = "Virtex-4 FX", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, { /* 405EP */ .pvr_mask = 0xffff0000, .pvr_value = 0x51210000, @@ -1800,19 +1750,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, - { /* 440 in Xilinx Virtex-5 FXT */ - .pvr_mask = 0xfffffff0, - .pvr_value = 0x7ff21910, - .cpu_name = "440 in Virtex-5 FXT", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440x5, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, { /* 460EX */ .pvr_mask = 0xffff0006, .pvr_value = 0x13020002, diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 05745ddbd229..735e89337398 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -18,6 +18,7 @@ #include <asm/firmware.h> #include <linux/uaccess.h> #include <asm/rtas.h> +#include <asm/inst.h> #ifdef DEBUG #include <asm/udbg.h> @@ -34,7 +35,7 @@ void __init reserve_kdump_trampoline(void) static void __init create_trampoline(unsigned long addr) { - unsigned int *p = (unsigned int *)addr; + struct ppc_inst *p = (struct ppc_inst *)addr; /* The maximum range of a single instruction branch, is the current * instruction's address + (32 MB - 4) bytes. For the trampoline we @@ -44,8 +45,8 @@ static void __init create_trampoline(unsigned long addr) * branch to "addr" we jump to ("addr" + 32 MB). Although it requires * two instructions it doesn't require any registers. */ - patch_instruction(p, PPC_INST_NOP); - patch_branch(++p, addr + PHYSICAL_START, 0); + patch_instruction(p, ppc_inst(PPC_INST_NOP)); + patch_branch((void *)p + 4, addr + PHYSICAL_START, 0); } void __init setup_kdump_trampoline(void) diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c index cc14aa6c4a1b..cdc2dccb987d 100644 --- a/arch/powerpc/kernel/dawr.c +++ b/arch/powerpc/kernel/dawr.c @@ -16,7 +16,7 @@ bool dawr_force_enable; EXPORT_SYMBOL_GPL(dawr_force_enable); -int set_dawr(struct arch_hw_breakpoint *brk) +int set_dawr(int nr, struct arch_hw_breakpoint *brk) { unsigned long dawr, dawrx, mrd; @@ -37,17 +37,26 @@ int set_dawr(struct arch_hw_breakpoint *brk) dawrx |= (mrd & 0x3f) << (63 - 53); if (ppc_md.set_dawr) - return ppc_md.set_dawr(dawr, dawrx); - - mtspr(SPRN_DAWR, dawr); - mtspr(SPRN_DAWRX, dawrx); + return ppc_md.set_dawr(nr, dawr, dawrx); + + if (nr == 0) { + mtspr(SPRN_DAWR0, dawr); + mtspr(SPRN_DAWRX0, dawrx); + } else { + mtspr(SPRN_DAWR1, dawr); + mtspr(SPRN_DAWRX1, dawrx); + } return 0; } -static void set_dawr_cb(void *info) +static void disable_dawrs_cb(void *info) { - set_dawr(info); + struct arch_hw_breakpoint null_brk = {0}; + int i; + + for (i = 0; i < nr_wp_slots(); i++) + set_dawr(i, &null_brk); } static ssize_t dawr_write_file_bool(struct file *file, @@ -60,7 +69,7 @@ static ssize_t dawr_write_file_bool(struct file *file, /* Send error to user if they hypervisor won't allow us to write DAWR */ if (!dawr_force_enable && firmware_has_feature(FW_FEATURE_LPAR) && - set_dawr(&null_brk) != H_SUCCESS) + set_dawr(0, &null_brk) != H_SUCCESS) return -ENODEV; rc = debugfs_write_file_bool(file, user_buf, count, ppos); @@ -69,7 +78,7 @@ static ssize_t dawr_write_file_bool(struct file *file, /* If we are clearing, make sure all CPUs have the DAWR cleared */ if (!dawr_force_enable) - smp_call_function(set_dawr_cb, &null_brk, 0); + smp_call_function(disable_dawrs_cb, NULL, 0); return rc; } diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index f17ff1200eaa..52680cf07c9d 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -18,61 +18,6 @@ #ifdef CONFIG_SMP -/* - * Doorbells must only be used if CPU_FTR_DBELL is available. - * msgsnd is used in HV, and msgsndp is used in !HV. - * - * These should be used by platform code that is aware of restrictions. - * Other arch code should use ->cause_ipi. - * - * doorbell_global_ipi() sends a dbell to any target CPU. - * Must be used only by architectures that address msgsnd target - * by PIR/get_hard_smp_processor_id. - */ -void doorbell_global_ipi(int cpu) -{ - u32 tag = get_hard_smp_processor_id(cpu); - - kvmppc_set_host_ipi(cpu); - /* Order previous accesses vs. msgsnd, which is treated as a store */ - ppc_msgsnd_sync(); - ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); -} - -/* - * doorbell_core_ipi() sends a dbell to a target CPU in the same core. - * Must be used only by architectures that address msgsnd target - * by TIR/cpu_thread_in_core. - */ -void doorbell_core_ipi(int cpu) -{ - u32 tag = cpu_thread_in_core(cpu); - - kvmppc_set_host_ipi(cpu); - /* Order previous accesses vs. msgsnd, which is treated as a store */ - ppc_msgsnd_sync(); - ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); -} - -/* - * Attempt to cause a core doorbell if destination is on the same core. - * Returns 1 on success, 0 on failure. - */ -int doorbell_try_core_ipi(int cpu) -{ - int this_cpu = get_cpu(); - int ret = 0; - - if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) { - doorbell_core_ipi(cpu); - ret = 1; - } - - put_cpu(); - - return ret; -} - void doorbell_exception(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index e486d1d78de2..9053fc9d20c7 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -14,23 +14,6 @@ * Generic iommu implementation */ -/* - * The coherent mask may be smaller than the real mask, check if we can - * really use a direct window. - */ -static inline bool dma_iommu_alloc_bypass(struct device *dev) -{ - return dev->archdata.iommu_bypass && !iommu_fixed_is_weak && - dma_direct_supported(dev, dev->coherent_dma_mask); -} - -static inline bool dma_iommu_map_bypass(struct device *dev, - unsigned long attrs) -{ - return dev->archdata.iommu_bypass && - (!iommu_fixed_is_weak || (attrs & DMA_ATTR_WEAK_ORDERING)); -} - /* Allocates a contiguous real buffer and creates mappings over it. * Returns the virtual address of the buffer and sets dma_handle * to the dma address (mapping) of the first page. @@ -39,8 +22,6 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) { - if (dma_iommu_alloc_bypass(dev)) - return dma_direct_alloc(dev, size, dma_handle, flag, attrs); return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size, dma_handle, dev->coherent_dma_mask, flag, dev_to_node(dev)); @@ -50,11 +31,7 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { - if (dma_iommu_alloc_bypass(dev)) - dma_direct_free(dev, size, vaddr, dma_handle, attrs); - else - iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, - dma_handle); + iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle); } /* Creates TCEs for a user provided buffer. The user buffer must be @@ -67,9 +44,6 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, enum dma_data_direction direction, unsigned long attrs) { - if (dma_iommu_map_bypass(dev, attrs)) - return dma_direct_map_page(dev, page, offset, size, direction, - attrs); return iommu_map_page(dev, get_iommu_table_base(dev), page, offset, size, dma_get_mask(dev), direction, attrs); } @@ -79,11 +53,8 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, unsigned long attrs) { - if (!dma_iommu_map_bypass(dev, attrs)) - iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, - direction, attrs); - else - dma_direct_unmap_page(dev, dma_handle, size, direction, attrs); + iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction, + attrs); } @@ -91,8 +62,6 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, unsigned long attrs) { - if (dma_iommu_map_bypass(dev, attrs)) - return dma_direct_map_sg(dev, sglist, nelems, direction, attrs); return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems, dma_get_mask(dev), direction, attrs); } @@ -101,11 +70,8 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, unsigned long attrs) { - if (!dma_iommu_map_bypass(dev, attrs)) - ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, + ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction, attrs); - else - dma_direct_unmap_sg(dev, sglist, nelems, direction, attrs); } static bool dma_iommu_bypass_supported(struct device *dev, u64 mask) @@ -113,8 +79,9 @@ static bool dma_iommu_bypass_supported(struct device *dev, u64 mask) struct pci_dev *pdev = to_pci_dev(dev); struct pci_controller *phb = pci_bus_to_host(pdev->bus); - return phb->controller_ops.iommu_bypass_supported && - phb->controller_ops.iommu_bypass_supported(pdev, mask); + if (iommu_fixed_is_weak || !phb->controller_ops.iommu_bypass_supported) + return false; + return phb->controller_ops.iommu_bypass_supported(pdev, mask); } /* We support DMA to/from any memory page via the iommu */ @@ -123,7 +90,7 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask) struct iommu_table *tbl = get_iommu_table_base(dev); if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) { - dev->archdata.iommu_bypass = true; + dev->dma_ops_bypass = true; dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); return 1; } @@ -141,7 +108,7 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask) } dev_dbg(dev, "iommu: not 64-bit, using default ops\n"); - dev->archdata.iommu_bypass = false; + dev->dma_ops_bypass = false; return 1; } @@ -153,47 +120,13 @@ u64 dma_iommu_get_required_mask(struct device *dev) if (!tbl) return 0; - if (dev_is_pci(dev)) { - u64 bypass_mask = dma_direct_get_required_mask(dev); - - if (dma_iommu_bypass_supported(dev, bypass_mask)) - return bypass_mask; - } - - mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1); + mask = 1ULL << (fls_long(tbl->it_offset + tbl->it_size) + + tbl->it_page_shift - 1); mask += mask - 1; return mask; } -static void dma_iommu_sync_for_cpu(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - if (dma_iommu_alloc_bypass(dev)) - dma_direct_sync_single_for_cpu(dev, addr, size, dir); -} - -static void dma_iommu_sync_for_device(struct device *dev, dma_addr_t addr, - size_t sz, enum dma_data_direction dir) -{ - if (dma_iommu_alloc_bypass(dev)) - dma_direct_sync_single_for_device(dev, addr, sz, dir); -} - -extern void dma_iommu_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - if (dma_iommu_alloc_bypass(dev)) - dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir); -} - -extern void dma_iommu_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - if (dma_iommu_alloc_bypass(dev)) - dma_direct_sync_sg_for_device(dev, sgl, nents, dir); -} - const struct dma_map_ops dma_iommu_ops = { .alloc = dma_iommu_alloc_coherent, .free = dma_iommu_free_coherent, @@ -203,10 +136,6 @@ const struct dma_map_ops dma_iommu_ops = { .map_page = dma_iommu_map_page, .unmap_page = dma_iommu_unmap_page, .get_required_mask = dma_iommu_get_required_mask, - .sync_single_for_cpu = dma_iommu_sync_for_cpu, - .sync_single_for_device = dma_iommu_sync_for_device, - .sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu, - .sync_sg_for_device = dma_iommu_sync_sg_for_device, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, }; diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 36bc0d5c4f3a..f204ad79b6b5 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -24,8 +24,8 @@ /* Device-tree visible constants follow */ -#define ISA_V2_07B 2070 #define ISA_V3_0B 3000 +#define ISA_V3_1 3100 #define USABLE_PR (1U << 0) #define USABLE_OS (1U << 1) @@ -64,9 +64,6 @@ struct dt_cpu_feature { * Set up the base CPU */ -extern long __machine_check_early_realmode_p8(struct pt_regs *regs); -extern long __machine_check_early_realmode_p9(struct pt_regs *regs); - static int hv_mode; static struct { @@ -74,6 +71,7 @@ static struct { u64 lpcr_clear; u64 hfscr; u64 fscr; + u64 pcr; } system_registers; static void (*init_pmu_registers)(void); @@ -101,7 +99,7 @@ static void __restore_cpu_cpufeatures(void) if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); - mtspr(SPRN_PCR, PCR_MASK); + mtspr(SPRN_PCR, system_registers.pcr); } mtspr(SPRN_FSCR, system_registers.fscr); @@ -335,6 +333,7 @@ static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f) #ifdef CONFIG_PPC_RADIX_MMU cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE; + cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU; return 1; @@ -346,6 +345,14 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f) { u64 lpcr; + /* + * Linux relies on FSCR[DSCR] being clear, so that we can take the + * facility unavailable interrupt and track the task's usage of DSCR. + * See facility_unavailable_exception(). + * Clear the bit here so that feat_enable() doesn't set it. + */ + f->fscr_bit_nr = -1; + feat_enable(f); lpcr = mfspr(SPRN_LPCR); @@ -440,6 +447,39 @@ static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f) return 1; } +static void init_pmu_power10(void) +{ + init_pmu_power9(); + + mtspr(SPRN_MMCR3, 0); + mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); +} + +static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) +{ + hfscr_pmu_enable(); + + init_pmu_power10(); + init_pmu_registers = init_pmu_power10; + + cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT; + + cur_cpu_spec->num_pmcs = 6; + cur_cpu_spec->pmc_type = PPC_PMC_IBM; + cur_cpu_spec->oprofile_cpu_type = "ppc64/power10"; + + return 1; +} + +static int __init feat_enable_mce_power10(struct dt_cpu_feature *f) +{ + cur_cpu_spec->platform = "power10"; + cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10; + + return 1; +} + static int __init feat_enable_tm(struct dt_cpu_feature *f) { #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -552,6 +592,18 @@ static int __init feat_enable_large_ci(struct dt_cpu_feature *f) return 1; } +static int __init feat_enable_mma(struct dt_cpu_feature *f) +{ + u64 pcr; + + feat_enable(f); + pcr = mfspr(SPRN_PCR); + pcr &= ~PCR_MMA_DIS; + mtspr(SPRN_PCR, pcr); + + return 1; +} + struct dt_cpu_feature_match { const char *name; int (*enable)(struct dt_cpu_feature *f); @@ -565,6 +617,7 @@ static struct dt_cpu_feature_match __initdata {"little-endian", feat_enable_le, CPU_FTR_REAL_LE}, {"smt", feat_enable_smt, 0}, {"interrupt-facilities", feat_enable, 0}, + {"system-call-vectored", feat_enable, 0}, {"timer-facilities", feat_enable, 0}, {"timer-facilities-v3", feat_enable, 0}, {"debug-facilities", feat_enable, 0}, @@ -616,7 +669,9 @@ static struct dt_cpu_feature_match __initdata {"group-start-register", feat_enable, 0}, {"pc-relative-addressing", feat_enable, 0}, {"machine-check-power9", feat_enable_mce_power9, 0}, + {"machine-check-power10", feat_enable_mce_power10, 0}, {"performance-monitor-power9", feat_enable_pmu_power9, 0}, + {"performance-monitor-power10", feat_enable_pmu_power10, 0}, {"event-based-branch-v3", feat_enable, 0}, {"random-number-generator", feat_enable, 0}, {"system-call-vectored", feat_disable, 0}, @@ -625,6 +680,9 @@ static struct dt_cpu_feature_match __initdata {"vector-binary128", feat_enable, 0}, {"vector-binary16", feat_enable, 0}, {"wait-v3", feat_enable, 0}, + {"prefix-instructions", feat_enable, 0}, + {"matrix-multiply-assist", feat_enable_mma, 0}, + {"debug-facilities-v31", feat_enable, CPU_FTR_DAWR1}, }; static bool __initdata using_dt_cpu_ftrs; @@ -650,10 +708,15 @@ static void __init cpufeatures_setup_start(u32 isa) { pr_info("setup for ISA %d\n", isa); - if (isa >= 3000) { + if (isa >= ISA_V3_0B) { cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300; cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00; } + + if (isa >= ISA_V3_1) { + cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_31; + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_1; + } } static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f) @@ -747,12 +810,6 @@ static __init void cpufeatures_cpu_quirks(void) } update_tlbie_feature_flag(version); - /* - * PKEY was not in the initial base or feature node - * specification, but it should become optional in the next - * cpu feature version sequence. - */ - cur_cpu_spec->cpu_features |= CPU_FTR_PKEY; } static void __init cpufeatures_setup_finished(void) @@ -770,6 +827,7 @@ static void __init cpufeatures_setup_finished(void) system_registers.lpcr = mfspr(SPRN_LPCR); system_registers.hfscr = mfspr(SPRN_HFSCR); system_registers.fscr = mfspr(SPRN_FSCR); + system_registers.pcr = mfspr(SPRN_PCR); pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n", cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 7cdcb413bb44..94682382fc8c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -167,39 +167,33 @@ void eeh_show_enabled(void) */ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); u32 cfg; int cap, i; int n = 0, l = 0; char buffer[128]; - if (!pdn) { - pr_warn("EEH: Note: No error log for absent device.\n"); - return 0; - } - n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", - pdn->phb->global_number, pdn->busno, - PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + edev->pe->phb->global_number, edev->bdfn >> 8, + PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", - pdn->phb->global_number, pdn->busno, - PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + edev->pe->phb->global_number, edev->bdfn >> 8, + PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); - eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); + eeh_ops->read_config(edev, PCI_VENDOR_ID, 4, &cfg); n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); pr_warn("EEH: PCI device/vendor: %08x\n", cfg); - eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); + eeh_ops->read_config(edev, PCI_COMMAND, 4, &cfg); n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); /* Gather bridge-specific registers */ if (edev->mode & EEH_DEV_BRIDGE) { - eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); + eeh_ops->read_config(edev, PCI_SEC_STATUS, 2, &cfg); n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); pr_warn("EEH: Bridge secondary status: %04x\n", cfg); - eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &cfg); n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); pr_warn("EEH: Bridge control: %04x\n", cfg); } @@ -207,11 +201,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) /* Dump out the PCI-X command and status regs */ cap = edev->pcix_cap; if (cap) { - eeh_ops->read_config(pdn, cap, 4, &cfg); + eeh_ops->read_config(edev, cap, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); pr_warn("EEH: PCI-X cmd: %08x\n", cfg); - eeh_ops->read_config(pdn, cap+4, 4, &cfg); + eeh_ops->read_config(edev, cap+4, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); pr_warn("EEH: PCI-X status: %08x\n", cfg); } @@ -223,7 +217,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) pr_warn("EEH: PCI-E capabilities and status follow:\n"); for (i=0; i<=8; i++) { - eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); + eeh_ops->read_config(edev, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); if ((i % 4) == 0) { @@ -250,7 +244,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) pr_warn("EEH: PCI-E AER capability register set follows:\n"); for (i=0; i<=13; i++) { - eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); + eeh_ops->read_config(edev, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); if ((i % 4) == 0) { @@ -726,7 +720,6 @@ static void eeh_disable_and_save_dev_state(struct eeh_dev *edev, static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); struct pci_dev *dev = userdata; @@ -734,73 +727,14 @@ static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) return; /* Apply customization from firmware */ - if (pdn && eeh_ops->restore_config) - eeh_ops->restore_config(pdn); + if (eeh_ops->restore_config) + eeh_ops->restore_config(edev); /* The caller should restore state for the specified device */ if (pdev != dev) pci_restore_state(pdev); } -int eeh_restore_vf_config(struct pci_dn *pdn) -{ - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - u32 devctl, cmd, cap2, aer_capctl; - int old_mps; - - if (edev->pcie_cap) { - /* Restore MPS */ - old_mps = (ffs(pdn->mps) - 8) << 5; - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; - devctl |= old_mps; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - - /* Disable Completion Timeout if possible */ - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, - 4, &cap2); - if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) { - eeh_ops->read_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, &cap2); - cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS; - eeh_ops->write_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, cap2); - } - } - - /* Enable SERR and parity checking */ - eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); - cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); - eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); - - /* Enable report various errors */ - if (edev->pcie_cap) { - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_CERE; - devctl |= (PCI_EXP_DEVCTL_NFERE | - PCI_EXP_DEVCTL_FERE | - PCI_EXP_DEVCTL_URRE); - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - } - - /* Enable ECRC generation and check */ - if (edev->pcie_cap && edev->aer_cap) { - eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, &aer_capctl); - aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); - eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, aer_capctl); - } - - return 0; -} - /** * pcibios_set_pcie_reset_state - Set PCI-E reset state * @dev: pci device struct @@ -977,15 +911,13 @@ int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed) */ void eeh_save_bars(struct eeh_dev *edev) { - struct pci_dn *pdn; int i; - pdn = eeh_dev_to_pdn(edev); - if (!pdn) + if (!edev) return; for (i = 0; i < 16; i++) - eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); + eeh_ops->read_config(edev, i * 4, 4, &edev->config_space[i]); /* * For PCI bridges including root port, we need enable bus @@ -1096,7 +1028,7 @@ static int eeh_init(void) /* Initialize PHB PEs */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) - eeh_dev_phb_init_dynamic(hose); + eeh_phb_pe_create(hose); eeh_addr_cache_init(); @@ -1106,6 +1038,37 @@ static int eeh_init(void) core_initcall_sync(eeh_init); +static int eeh_device_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + switch (action) { + /* + * Note: It's not possible to perform EEH device addition (i.e. + * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on + * the device's resources, which have not yet been set up. + */ + case BUS_NOTIFY_DEL_DEVICE: + eeh_remove_device(to_pci_dev(dev)); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block eeh_device_nb = { + .notifier_call = eeh_device_notifier, +}; + +static __init int eeh_set_bus_notifier(void) +{ + bus_register_notifier(&pci_bus_type, &eeh_device_nb); + return 0; +} +arch_initcall(eeh_set_bus_notifier); + /** * eeh_probe_device() - Perform EEH initialization for the indicated pci device * @dev: pci device for which to set up EEH @@ -1144,7 +1107,7 @@ void eeh_probe_device(struct pci_dev *dev) * FIXME: HEY MA, LOOK AT ME, NO LOCKING! */ if (edev->pdev && edev->pdev != dev) { - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); eeh_addr_cache_rmv_dev(edev->pdev); eeh_sysfs_remove_device(edev->pdev); @@ -1223,7 +1186,7 @@ void eeh_remove_device(struct pci_dev *dev) edev->in_error = false; dev->dev.archdata.edev = NULL; if (!(edev->pe->state & EEH_PE_KEEP)) - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); else edev->mode |= EEH_DEV_DISCONNECTED; } diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c deleted file mode 100644 index 7370185c7a05..000000000000 --- a/arch/powerpc/kernel/eeh_dev.c +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The file intends to implement dynamic creation of EEH device, which will - * be bound with OF node and PCI device simutaneously. The EEH devices would - * be foundamental information for EEH core components to work proerly. Besides, - * We have to support multiple situations where dynamic creation of EEH device - * is required: - * - * 1) Before PCI emunation starts, we need create EEH devices according to the - * PCI sensitive OF nodes. - * 2) When PCI emunation is done, we need do the binding between PCI device and - * the associated EEH device. - * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device - * will be created while PCI sensitive OF node is detected from DR. - * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If - * PHB is newly inserted, we also need create EEH devices accordingly. - * - * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012. - */ - -#include <linux/export.h> -#include <linux/gfp.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/string.h> - -#include <asm/pci-bridge.h> -#include <asm/ppc-pci.h> - -/** - * eeh_dev_init - Create EEH device according to OF node - * @pdn: PCI device node - * - * It will create EEH device according to the given OF node. The function - * might be called by PCI emunation, DR, PHB hotplug. - */ -struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) -{ - struct eeh_dev *edev; - - /* Allocate EEH device */ - edev = kzalloc(sizeof(*edev), GFP_KERNEL); - if (!edev) - return NULL; - - /* Associate EEH device with OF node */ - pdn->edev = edev; - edev->pdn = pdn; - edev->bdfn = (pdn->busno << 8) | pdn->devfn; - edev->controller = pdn->phb; - - return edev; -} - -/** - * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB - * @phb: PHB - * - * Scan the PHB OF node and its child association, then create the - * EEH devices accordingly - */ -void eeh_dev_phb_init_dynamic(struct pci_controller *phb) -{ - /* EEH PE for PHB */ - eeh_phb_pe_create(phb); -} diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7b048cee767c..3eff6a4888e7 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -214,7 +214,7 @@ static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata) pci_save_state(pdev); } -static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s) +static void eeh_set_channel_state(struct eeh_pe *root, pci_channel_state_t s) { struct eeh_pe *pe; struct eeh_dev *edev, *tmp; @@ -425,8 +425,8 @@ static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev, pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED); #ifdef CONFIG_PCI_IOV - if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) - eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); + if (eeh_ops->notify_resume) + eeh_ops->notify_resume(edev); #endif return PCI_ERS_RESULT_NONE; } @@ -477,7 +477,7 @@ static void *eeh_add_virt_device(struct eeh_dev *edev) } #ifdef CONFIG_PCI_IOV - pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index); + pci_iov_add_virtfn(edev->physfn, edev->vf_index); #endif return NULL; } @@ -521,9 +521,7 @@ static void eeh_rmv_device(struct eeh_dev *edev, void *userdata) if (edev->physfn) { #ifdef CONFIG_PCI_IOV - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - - pci_iov_remove_virtfn(edev->physfn, pdn->vf_index); + pci_iov_remove_virtfn(edev->physfn, edev->vf_index); edev->pdev = NULL; #endif if (rmv_data) @@ -544,7 +542,7 @@ static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata) continue; edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED); - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); } return NULL; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 177852e39a25..d2aaaa73fdd5 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -319,56 +319,22 @@ struct eeh_pe *eeh_pe_get(struct pci_controller *phb, } /** - * eeh_pe_get_parent - Retrieve the parent PE + * eeh_pe_tree_insert - Add EEH device to parent PE * @edev: EEH device + * @new_pe_parent: PE to create additional PEs under * - * The whole PEs existing in the system are organized as hierarchy - * tree. The function is used to retrieve the parent PE according - * to the parent EEH device. - */ -static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) -{ - struct eeh_dev *parent; - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - - /* - * It might have the case for the indirect parent - * EEH device already having associated PE, but - * the direct parent EEH device doesn't have yet. - */ - if (edev->physfn) - pdn = pci_get_pdn(edev->physfn); - else - pdn = pdn ? pdn->parent : NULL; - while (pdn) { - /* We're poking out of PCI territory */ - parent = pdn_to_eeh_dev(pdn); - if (!parent) - return NULL; - - if (parent->pe) - return parent->pe; - - pdn = pdn->parent; - } - - return NULL; -} - -/** - * eeh_add_to_parent_pe - Add EEH device to parent PE - * @edev: EEH device + * Add EEH device to the PE in edev->pe_config_addr. If a PE already + * exists with that address then @edev is added to that PE. Otherwise + * a new PE is created and inserted into the PE tree as a child of + * @new_pe_parent. * - * Add EEH device to the parent PE. If the parent PE already - * exists, the PE type will be changed to EEH_PE_BUS. Otherwise, - * we have to create new PE to hold the EEH device and the new - * PE will be linked to its parent PE as well. + * If @new_pe_parent is NULL then the new PE will be inserted under + * directly under the the PHB. */ -int eeh_add_to_parent_pe(struct eeh_dev *edev) +int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent) { + struct pci_controller *hose = edev->controller; struct eeh_pe *pe, *parent; - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - int config_addr = (pdn->busno << 8) | (pdn->devfn); /* Check if the PE number is valid */ if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) { @@ -382,7 +348,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * PE should be composed of PCI bus and its subordinate * components. */ - pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr); + pe = eeh_pe_get(hose, edev->pe_config_addr, edev->bdfn); if (pe) { if (pe->type & EEH_PE_INVALID) { list_add_tail(&edev->entry, &pe->edevs); @@ -399,8 +365,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) parent = parent->parent; } - eeh_edev_dbg(edev, - "Added to device PE (parent: PE#%x)\n", + eeh_edev_dbg(edev, "Added to existing PE (parent: PE#%x)\n", pe->parent->addr); } else { /* Mark the PE as type of PCI bus */ @@ -416,15 +381,15 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Create a new EEH PE */ if (edev->physfn) - pe = eeh_pe_alloc(pdn->phb, EEH_PE_VF); + pe = eeh_pe_alloc(hose, EEH_PE_VF); else - pe = eeh_pe_alloc(pdn->phb, EEH_PE_DEVICE); + pe = eeh_pe_alloc(hose, EEH_PE_DEVICE); if (!pe) { pr_err("%s: out of memory!\n", __func__); return -ENOMEM; } pe->addr = edev->pe_config_addr; - pe->config_addr = config_addr; + pe->config_addr = edev->bdfn; /* * Put the new EEH PE into hierarchy tree. If the parent @@ -432,34 +397,35 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * to PHB directly. Otherwise, we have to associate the * PE with its parent. */ - parent = eeh_pe_get_parent(edev); - if (!parent) { - parent = eeh_phb_pe_get(pdn->phb); - if (!parent) { + if (!new_pe_parent) { + new_pe_parent = eeh_phb_pe_get(hose); + if (!new_pe_parent) { pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", - __func__, pdn->phb->global_number); + __func__, hose->global_number); edev->pe = NULL; kfree(pe); return -EEXIST; } } - pe->parent = parent; + + /* link new PE into the tree */ + pe->parent = new_pe_parent; + list_add_tail(&pe->child, &new_pe_parent->child_list); /* * Put the newly created PE into the child list and * link the EEH device accordingly. */ - list_add_tail(&pe->child, &parent->child_list); list_add_tail(&edev->entry, &pe->edevs); edev->pe = pe; - eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n", - pe->parent->addr); + eeh_edev_dbg(edev, "Added to new (parent: PE#%x)\n", + new_pe_parent->addr); return 0; } /** - * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE + * eeh_pe_tree_remove - Remove one EEH device from the associated PE * @edev: EEH device * * The PE hierarchy tree might be changed when doing PCI hotplug. @@ -467,7 +433,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * during EEH recovery. So we have to call the function remove the * corresponding PE accordingly if necessary. */ -int eeh_rmv_from_parent_pe(struct eeh_dev *edev) +int eeh_pe_tree_remove(struct eeh_dev *edev) { struct eeh_pe *pe, *parent, *child; bool keep, recover; @@ -698,7 +664,6 @@ void eeh_pe_state_clear(struct eeh_pe *root, int state, bool include_passed) */ static void eeh_bridge_check_link(struct eeh_dev *edev) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); int cap; uint32_t val; int timeout = 0; @@ -714,32 +679,32 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) /* Check slot status */ cap = edev->pcie_cap; - eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_SLTSTA, 2, &val); if (!(val & PCI_EXP_SLTSTA_PDS)) { eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val); return; } /* Check power status if we have the capability */ - eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCAP, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_SLTCAP, 2, &val); if (val & PCI_EXP_SLTCAP_PCP) { - eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_SLTCTL, 2, &val); if (val & PCI_EXP_SLTCTL_PCC) { eeh_edev_dbg(edev, "In power-off state, power it on ...\n"); val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC); val |= (0x0100 & PCI_EXP_SLTCTL_PIC); - eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val); + eeh_ops->write_config(edev, cap + PCI_EXP_SLTCTL, 2, val); msleep(2 * 1000); } } /* Enable link */ - eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCTL, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_LNKCTL, 2, &val); val &= ~PCI_EXP_LNKCTL_LD; - eeh_ops->write_config(pdn, cap + PCI_EXP_LNKCTL, 2, val); + eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val); /* Check link */ - eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_LNKCAP, 4, &val); if (!(val & PCI_EXP_LNKCAP_DLLLARC)) { eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val); msleep(1000); @@ -752,7 +717,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) msleep(20); timeout += 20; - eeh_ops->read_config(pdn, cap + PCI_EXP_LNKSTA, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_LNKSTA, 2, &val); if (val & PCI_EXP_LNKSTA_DLLLA) break; } @@ -769,7 +734,6 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) static void eeh_restore_bridge_bars(struct eeh_dev *edev) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); int i; /* @@ -777,20 +741,20 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev) * Bus numbers and windows: 0x18 - 0x30 */ for (i = 4; i < 13; i++) - eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]); + eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]); /* Rom: 0x38 */ - eeh_ops->write_config(pdn, 14*4, 4, edev->config_space[14]); + eeh_ops->write_config(edev, 14*4, 4, edev->config_space[14]); /* Cache line & Latency timer: 0xC 0xD */ - eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1, + eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1, SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1, - SAVED_BYTE(PCI_LATENCY_TIMER)); + eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1, + SAVED_BYTE(PCI_LATENCY_TIMER)); /* Max latency, min grant, interrupt ping and line: 0x3C */ - eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]); + eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]); /* PCI Command: 0x4 */ - eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1] | + eeh_ops->write_config(edev, PCI_COMMAND, 4, edev->config_space[1] | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); /* Check the PCIe link is ready */ @@ -799,28 +763,27 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev) static void eeh_restore_device_bars(struct eeh_dev *edev) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); int i; u32 cmd; for (i = 4; i < 10; i++) - eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]); + eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]); /* 12 == Expansion ROM Address */ - eeh_ops->write_config(pdn, 12*4, 4, edev->config_space[12]); + eeh_ops->write_config(edev, 12*4, 4, edev->config_space[12]); - eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1, + eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1, SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1, + eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1, SAVED_BYTE(PCI_LATENCY_TIMER)); /* max latency, min grant, interrupt pin and line */ - eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]); + eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]); /* * Restore PERR & SERR bits, some devices require it, * don't touch the other command bits */ - eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cmd); + eeh_ops->read_config(edev, PCI_COMMAND, 4, &cmd); if (edev->config_space[1] & PCI_COMMAND_PARITY) cmd |= PCI_COMMAND_PARITY; else @@ -829,7 +792,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev) cmd |= PCI_COMMAND_SERR; else cmd &= ~PCI_COMMAND_SERR; - eeh_ops->write_config(pdn, PCI_COMMAND, 4, cmd); + eeh_ops->write_config(edev, PCI_COMMAND, 4, cmd); } /** @@ -843,16 +806,14 @@ static void eeh_restore_device_bars(struct eeh_dev *edev) */ static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - /* Do special restore for bridges */ if (edev->mode & EEH_DEV_BRIDGE) eeh_restore_bridge_bars(edev); else eeh_restore_device_bars(edev); - if (eeh_ops->restore_config && pdn) - eeh_ops->restore_config(pdn); + if (eeh_ops->restore_config) + eeh_ops->restore_config(edev); } /** diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 4fb0f1e1017a..429620da73ba 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -99,7 +99,7 @@ static ssize_t eeh_notify_resume_store(struct device *dev, if (!edev || !edev->pe || !eeh_ops->notify_resume) return -ENODEV; - if (eeh_ops->notify_resume(pci_get_pdn(pdev))) + if (eeh_ops->notify_resume(edev)) return -EIO; return count; diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 8420abd4ea1c..f4d0af8e1136 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -28,7 +28,6 @@ #include <asm/unistd.h> #include <asm/ptrace.h> #include <asm/export.h> -#include <asm/asm-405.h> #include <asm/feature-fixups.h> #include <asm/barrier.h> #include <asm/kup.h> @@ -37,6 +36,12 @@ #include "head_32.h" /* + * powerpc relies on return from interrupt/syscall being context synchronising + * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional + * synchronisation instructions. + */ + +/* * Align to 4k in order to ensure that all functions modyfing srr0/srr1 * fit into one page in order to not encounter a TLB miss between the * modification of srr0/srr1 and the associated rfi. @@ -51,6 +56,7 @@ mcheck_transfer_to_handler: mfspr r0,SPRN_DSRR1 stw r0,_DSRR1(r11) /* fall through */ +_ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler) .globl debug_transfer_to_handler debug_transfer_to_handler: @@ -59,6 +65,7 @@ debug_transfer_to_handler: mfspr r0,SPRN_CSRR1 stw r0,_CSRR1(r11) /* fall through */ +_ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler) .globl crit_transfer_to_handler crit_transfer_to_handler: @@ -94,6 +101,7 @@ crit_transfer_to_handler: rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) stw r0,KSP_LIMIT(r8) /* fall through */ +_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #endif #ifdef CONFIG_40x @@ -115,6 +123,7 @@ crit_transfer_to_handler: rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) stw r0,KSP_LIMIT(r8) /* fall through */ +_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #endif /* @@ -127,6 +136,7 @@ crit_transfer_to_handler: .globl transfer_to_handler_full transfer_to_handler_full: SAVE_NVGPRS(r11) +_ASM_NOKPROBE_SYMBOL(transfer_to_handler_full) /* fall through */ .globl transfer_to_handler @@ -227,6 +237,23 @@ transfer_to_handler_cont: SYNC RFI /* jump to handler, enable MMU */ +#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) +4: rlwinm r12,r12,0,~_TLF_NAPPING + stw r12,TI_LOCAL_FLAGS(r2) + b power_save_ppc32_restore + +7: rlwinm r12,r12,0,~_TLF_SLEEPING + stw r12,TI_LOCAL_FLAGS(r2) + lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ + rlwinm r9,r9,0,~MSR_EE + lwz r12,_LINK(r11) /* and return to address in LR */ + kuap_restore r11, r2, r3, r4, r5 + lwz r2, GPR2(r11) + b fast_exception_return +#endif +_ASM_NOKPROBE_SYMBOL(transfer_to_handler) +_ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) + #ifdef CONFIG_TRACE_IRQFLAGS 1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to * keep interrupts disabled at this point otherwise we might risk @@ -272,21 +299,6 @@ reenable_mmu: bctr /* jump to handler */ #endif /* CONFIG_TRACE_IRQFLAGS */ -#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) -4: rlwinm r12,r12,0,~_TLF_NAPPING - stw r12,TI_LOCAL_FLAGS(r2) - b power_save_ppc32_restore - -7: rlwinm r12,r12,0,~_TLF_SLEEPING - stw r12,TI_LOCAL_FLAGS(r2) - lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ - rlwinm r9,r9,0,~MSR_EE - lwz r12,_LINK(r11) /* and return to address in LR */ - kuap_restore r11, r2, r3, r4, r5 - lwz r2, GPR2(r11) - b fast_exception_return -#endif - #ifndef CONFIG_VMAP_STACK /* * On kernel stack overflow, load up an initial stack pointer @@ -313,6 +325,7 @@ stack_ovf: mtspr SPRN_SRR1,r10 SYNC RFI +_ASM_NOKPROBE_SYMBOL(stack_ovf) #endif #ifdef CONFIG_TRACE_IRQFLAGS @@ -455,6 +468,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) lwz r7,_NIP(r1) lwz r2,GPR2(r1) lwz r1,GPR1(r1) +syscall_exit_finish: #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif @@ -462,6 +476,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) mtspr SPRN_SRR1,r8 SYNC RFI +_ASM_NOKPROBE_SYMBOL(syscall_exit_finish) #ifdef CONFIG_44x 2: li r7,0 iccci r0,r0 @@ -541,9 +556,6 @@ syscall_exit_work: addi r12,r2,TI_FLAGS 3: lwarx r8,0,r12 andc r8,r8,r11 -#ifdef CONFIG_IBM405_ERR77 - dcbt 0,r12 -#endif stwcx. r8,0,r12 bne- 3b @@ -596,6 +608,7 @@ ret_from_kernel_syscall: mtspr SPRN_SRR1, r10 SYNC RFI +_ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall) /* * The fork/clone functions need to copy the full register set into @@ -799,6 +812,7 @@ fast_exception_return: lwz r11,GPR11(r11) SYNC RFI +_ASM_NOKPROBE_SYMBOL(fast_exception_return) #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) /* check if the exception happened in a restartable section */ @@ -918,9 +932,6 @@ resume_kernel: addi r5,r2,TI_FLAGS 0: lwarx r8,0,r5 andc r8,r8,r11 -#ifdef CONFIG_IBM405_ERR77 - dcbt 0,r5 -#endif stwcx. r8,0,r5 bne- 0b 1: @@ -997,7 +1008,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) mtspr SPRN_XER,r10 mtctr r11 - PPC405_ERR77(0,r1) BEGIN_FTR_SECTION lwarx r11,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) @@ -1038,6 +1048,8 @@ exc_exit_restart: exc_exit_restart_end: SYNC RFI +_ASM_NOKPROBE_SYMBOL(exc_exit_restart) +_ASM_NOKPROBE_SYMBOL(exc_exit_restart_end) #else /* !(CONFIG_4xx || CONFIG_BOOKE) */ /* @@ -1059,16 +1071,15 @@ exc_exit_restart_end: exc_exit_restart: lwz r11,_NIP(r1) lwz r12,_MSR(r1) -exc_exit_start: mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r12 REST_2GPRS(11, r1) lwz r1,GPR1(r1) .globl exc_exit_restart_end exc_exit_restart_end: - PPC405_ERR77_SYNC rfi b . /* prevent prefetch past rfi */ +_ASM_NOKPROBE_SYMBOL(exc_exit_restart) /* * Returning from a critical interrupt in user mode doesn't need @@ -1109,7 +1120,6 @@ exc_exit_restart_end: lwz r11,_CTR(r1); \ mtspr SPRN_XER,r10; \ mtctr r11; \ - PPC405_ERR77(0,r1); \ stwcx. r0,0,r1; /* to clear the reservation */ \ lwz r11,_LINK(r1); \ mtlr r11; \ @@ -1129,7 +1139,6 @@ exc_exit_restart_end: lwz r10,GPR10(r1); \ lwz r11,GPR11(r1); \ lwz r1,GPR1(r1); \ - PPC405_ERR77_SYNC; \ exc_lvl_rfi; \ b .; /* prevent prefetch past exc_lvl_rfi */ @@ -1182,6 +1191,7 @@ ret_from_crit_exc: mtspr SPRN_SRR0,r9; mtspr SPRN_SRR1,r10; RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) +_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) #endif /* CONFIG_40x */ #ifdef CONFIG_BOOKE @@ -1193,6 +1203,7 @@ ret_from_crit_exc: RESTORE_xSRR(SRR0,SRR1); RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) +_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) .globl ret_from_debug_exc ret_from_debug_exc: @@ -1203,6 +1214,7 @@ ret_from_debug_exc: RESTORE_xSRR(CSRR0,CSRR1); RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, PPC_RFDI) +_ASM_NOKPROBE_SYMBOL(ret_from_debug_exc) .globl ret_from_mcheck_exc ret_from_mcheck_exc: @@ -1214,6 +1226,7 @@ ret_from_mcheck_exc: RESTORE_xSRR(DSRR0,DSRR1); RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI) +_ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc) #endif /* CONFIG_BOOKE */ /* @@ -1337,6 +1350,7 @@ nonrecoverable: bl unrecoverable_exception /* shouldn't return */ b 4b +_ASM_NOKPROBE_SYMBOL(nonrecoverable) .section .bss .align 2 @@ -1391,10 +1405,5 @@ _GLOBAL(enter_rtas) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 RFI /* return to caller */ - - .globl machine_check_in_rtas -machine_check_in_rtas: - twi 31,0,0 - /* XXX load up BATs and panic */ - +_ASM_NOKPROBE_SYMBOL(enter_rtas) #endif /* CONFIG_PPC_RTAS */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index b3c9f15089b6..733e40eba4eb 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -64,15 +64,177 @@ exception_marker: .section ".text" .align 7 +#ifdef CONFIG_PPC_BOOK3S +.macro system_call_vectored name trapnr + .globl system_call_vectored_\name +system_call_vectored_\name: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ + bne .Ltabort_syscall +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif + INTERRUPT_TO_KERNEL + mr r10,r1 + ld r1,PACAKSAVE(r13) + std r10,0(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + std r0,GPR0(r1) + std r10,GPR1(r1) + std r2,GPR2(r1) + ld r2,PACATOC(r13) + mfcr r12 + li r11,0 + /* Can we avoid saving r3-r8 in common case? */ + std r3,GPR3(r1) + std r4,GPR4(r1) + std r5,GPR5(r1) + std r6,GPR6(r1) + std r7,GPR7(r1) + std r8,GPR8(r1) + /* Zero r9-r12, this should only be required when restoring all GPRs */ + std r11,GPR9(r1) + std r11,GPR10(r1) + std r11,GPR11(r1) + std r11,GPR12(r1) + std r9,GPR13(r1) + SAVE_NVGPRS(r1) + std r11,_XER(r1) + std r11,_LINK(r1) + std r11,_CTR(r1) + + li r11,\trapnr + std r11,_TRAP(r1) + std r12,_CCR(r1) + std r3,ORIG_GPR3(r1) + addi r10,r1,STACK_FRAME_OVERHEAD + ld r11,exception_marker@toc(r2) + std r11,-16(r10) /* "regshere" marker */ + +BEGIN_FTR_SECTION + HMT_MEDIUM +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + /* + * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which + * would clobber syscall parameters. Also we always enter with IRQs + * enabled and nothing pending. system_call_exception() will call + * trace_hardirqs_off(). + * + * scv enters with MSR[EE]=1, so don't set PACA_IRQ_HARD_DIS. The + * entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED. + */ + + /* Calling convention has r9 = orig r0, r10 = regs */ + mr r9,r0 + bl system_call_exception + +.Lsyscall_vectored_\name\()_exit: + addi r4,r1,STACK_FRAME_OVERHEAD + li r5,1 /* scv */ + bl syscall_exit_prepare + + ld r2,_CCR(r1) + ld r4,_NIP(r1) + ld r5,_MSR(r1) + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + +BEGIN_FTR_SECTION + HMT_MEDIUM_LOW +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + cmpdi r3,0 + bne .Lsyscall_vectored_\name\()_restore_regs + + /* rfscv returns with LR->NIA and CTR->MSR */ + mtlr r4 + mtctr r5 + + /* Could zero these as per ABI, but we may consider a stricter ABI + * which preserves these if libc implementations can benefit, so + * restore them for now until further measurement is done. */ + ld r0,GPR0(r1) + ld r4,GPR4(r1) + ld r5,GPR5(r1) + ld r6,GPR6(r1) + ld r7,GPR7(r1) + ld r8,GPR8(r1) + /* Zero volatile regs that may contain sensitive kernel data */ + li r9,0 + li r10,0 + li r11,0 + li r12,0 + mtspr SPRN_XER,r0 + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ + mtcr r2 + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r13,GPR13(r1) + ld r1,GPR1(r1) + RFSCV_TO_USER + b . /* prevent speculative execution */ + +.Lsyscall_vectored_\name\()_restore_regs: + li r3,0 + mtmsrd r3,1 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 + + ld r3,_CTR(r1) + ld r4,_LINK(r1) + ld r5,_XER(r1) + + REST_NVGPRS(r1) + ld r0,GPR0(r1) + mtcr r2 + mtctr r3 + mtlr r4 + mtspr SPRN_XER,r5 + REST_10GPRS(2, r1) + REST_2GPRS(12, r1) + ld r1,GPR1(r1) + RFI_TO_USER +.endm + +system_call_vectored common 0x3000 +/* + * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0 + * which is tested by system_call_exception when r0 is -1 (as set by vector + * entry code). + */ +system_call_vectored sigill 0x7ff0 + + +/* + * Entered via kernel return set up by kernel/sstep.c, must match entry regs + */ + .globl system_call_vectored_emulate +system_call_vectored_emulate: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + b system_call_vectored_common +#endif + + .balign IFETCH_ALIGN_BYTES .globl system_call_common system_call_common: +_ASM_NOKPROBE_SYMBOL(system_call_common) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BEGIN_FTR_SECTION extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ bne .Ltabort_syscall END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -_ASM_NOKPROBE_SYMBOL(system_call_common) mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) @@ -138,6 +300,7 @@ END_BTB_FLUSH_SECTION .Lsyscall_exit: addi r4,r1,STACK_FRAME_OVERHEAD + li r5,0 /* !scv */ bl syscall_exit_prepare ld r2,_CCR(r1) @@ -224,21 +387,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) b . /* prevent speculative execution */ #endif +#ifdef CONFIG_PPC_BOOK3S +_GLOBAL(ret_from_fork_scv) + bl schedule_tail + REST_NVGPRS(r1) + li r3,0 /* fork() return value */ + b .Lsyscall_vectored_common_exit +#endif + _GLOBAL(ret_from_fork) bl schedule_tail REST_NVGPRS(r1) - li r3,0 + li r3,0 /* fork() return value */ b .Lsyscall_exit _GLOBAL(ret_from_kernel_thread) bl schedule_tail REST_NVGPRS(r1) - mtlr r14 + mtctr r14 mr r3,r15 #ifdef PPC64_ELF_ABI_v2 mr r12,r14 #endif - blrl + bctrl li r3,0 b .Lsyscall_exit @@ -259,10 +430,7 @@ _ASM_NOKPROBE_SYMBOL(save_nvgprs); #define FLUSH_COUNT_CACHE \ 1: nop; \ - patch_site 1b, patch__call_flush_count_cache - - -#define BCCTR_FLUSH .long 0x4c400420 + patch_site 1b, patch__call_flush_branch_caches .macro nops number .rept \number @@ -271,8 +439,8 @@ _ASM_NOKPROBE_SYMBOL(save_nvgprs); .endm .balign 32 -.global flush_count_cache -flush_count_cache: +.global flush_branch_caches +flush_branch_caches: /* Save LR into r9 */ mflr r9 @@ -294,7 +462,7 @@ flush_count_cache: li r9,0x7fff mtctr r9 - BCCTR_FLUSH + PPC_BCCTR_FLUSH 2: nop patch_site 2b patch__flush_count_cache_return @@ -303,7 +471,7 @@ flush_count_cache: .rept 278 .balign 32 - BCCTR_FLUSH + PPC_BCCTR_FLUSH nops 7 .endr @@ -357,7 +525,7 @@ _GLOBAL(_switch) * kernel/sched/core.c). * * Uncacheable stores in the case of involuntary preemption must - * be taken care of. The smp_mb__before_spin_lock() in __schedule() + * be taken care of. The smp_mb__after_spinlock() in __schedule() * is implemented as hwsync on powerpc, which orders MMIO too. So * long as there is an hwsync in the context switch path, it will * be executed on the source CPU after the task has performed @@ -479,11 +647,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) fast_interrupt_return: _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) kuap_check_amr r3, r4 - ld r4,_MSR(r1) - andi. r0,r4,MSR_PR + ld r5,_MSR(r1) + andi. r0,r5,MSR_PR bne .Lfast_user_interrupt_return - kuap_restore_amr r3 - andi. r0,r4,MSR_RI + kuap_restore_amr r3, r4 + andi. r0,r5,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 9d32158ce36f..2ed14d4a47f5 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -11,6 +11,7 @@ #include <asm/cacheflush.h> #include <asm/code-patching.h> #include <asm/machdep.h> +#include <asm/inst.h> #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) extern void epapr_ev_idle(void); @@ -36,10 +37,10 @@ static int __init early_init_dt_scan_epapr(unsigned long node, return -1; for (i = 0; i < (len / 4); i++) { - u32 inst = be32_to_cpu(insts[i]); - patch_instruction(epapr_hypercall_start + i, inst); + struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i])); + patch_instruction((struct ppc_inst *)(epapr_hypercall_start + i), inst); #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) - patch_instruction(epapr_ev_idle_start + i, inst); + patch_instruction((struct ppc_inst *)(epapr_ev_idle_start + i), inst); #endif } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ebeebab74b56..f7d748b88705 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -270,7 +270,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .endif - ld r10,PACA_EXGEN+EX_CTR(r13) + ld r10,IAREA+EX_CTR(r13) mtctr r10 BEGIN_FTR_SECTION ld r10,IAREA+EX_PPR(r13) @@ -298,7 +298,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .if IKVM_SKIP 89: mtocrf 0x80,r9 - ld r10,PACA_EXGEN+EX_CTR(r13) + ld r10,IAREA+EX_CTR(r13) mtctr r10 ld r9,IAREA+EX_R9(r13) ld r10,IAREA+EX_R10(r13) @@ -508,8 +508,24 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .macro __GEN_COMMON_BODY name .if IMASK + .if ! ISTACK + .error "No support for masked interrupt to use custom stack" + .endif + + /* If coming from user, skip soft-mask tests. */ + andi. r10,r12,MSR_PR + bne 2f + + /* Kernel code running below __end_interrupts is implicitly + * soft-masked */ + LOAD_HANDLER(r10, __end_interrupts) + cmpld r11,r10 + li r10,IMASK + blt- 1f + + /* Test the soft mask state against our interrupt's bit */ lbz r10,PACAIRQSOFTMASK(r13) - andi. r10,r10,IMASK +1: andi. r10,r10,IMASK /* Associate vector numbers with bits in paca->irq_happened */ .if IVEC == 0x500 || IVEC == 0xea0 li r10,PACA_IRQ_EE @@ -540,7 +556,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .if ISTACK andi. r10,r12,MSR_PR /* See if coming from user */ - mr r10,r1 /* Save r1 */ +2: mr r10,r1 /* Save r1 */ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */ beq- 100f ld r1,PACAKSAVE(r13) /* kernel stack to use */ @@ -740,6 +756,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) * guarantee they will be delivered virtually. Some conditions (see the ISA) * cause exceptions to be delivered in real mode. * + * The scv instructions are a special case. They get a 0x3000 offset applied. + * scv exceptions have unique reentrancy properties, see below. + * * It's impossible to receive interrupts below 0x300 via AIL. * * KVM: None of the virtual exceptions are from the guest. Anything that @@ -749,8 +768,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code * 0x0100 - 0x18ff : Real mode pSeries interrupt vectors - * 0x1900 - 0x3fff : Real mode trampolines - * 0x4000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors + * 0x1900 - 0x2fff : Real mode trampolines + * 0x3000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors * 0x5900 - 0x6fff : Relon mode trampolines * 0x7000 - 0x7fff : FWNMI data area * 0x8000 - .... : Common interrupt handlers, remaining early @@ -761,8 +780,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) * vectors there. */ OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900) -OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000) -OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900) +OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x3000) +OPEN_FIXED_SECTION(virt_vectors, 0x3000, 0x5900) OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000) #ifdef CONFIG_PPC_POWERNV @@ -798,6 +817,77 @@ USE_FIXED_SECTION(real_vectors) .globl __start_interrupts __start_interrupts: +/** + * Interrupt 0x3000 - System Call Vectored Interrupt (syscall). + * This is a synchronous interrupt invoked with the "scv" instruction. The + * system call does not alter the HV bit, so it is directed to the OS. + * + * Handling: + * scv instructions enter the kernel without changing EE, RI, ME, or HV. + * In particular, this means we can take a maskable interrupt at any point + * in the scv handler, which is unlike any other interrupt. This is solved + * by treating the instruction addresses below __end_interrupts as being + * soft-masked. + * + * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and + * ensure scv is never executed with relocation off, which means AIL-0 + * should never happen. + * + * Before leaving the below __end_interrupts text, at least of the following + * must be true: + * - MSR[PR]=1 (i.e., return to userspace) + * - MSR_EE|MSR_RI is set (no reentrant exceptions) + * - Standard kernel environment is set up (stack, paca, etc) + * + * Call convention: + * + * syscall register convention is in Documentation/powerpc/syscall64-abi.rst + */ +EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) + /* SCV 0 */ + mr r9,r13 + GET_PACA(r13) + mflr r11 + mfctr r12 + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) +#ifdef CONFIG_RELOCATABLE + b system_call_vectored_tramp +#else + b system_call_vectored_common +#endif + nop + + /* SCV 1 - 127 */ + .rept 127 + mr r9,r13 + GET_PACA(r13) + mflr r11 + mfctr r12 + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + li r0,-1 /* cause failure */ +#ifdef CONFIG_RELOCATABLE + b system_call_vectored_sigill_tramp +#else + b system_call_vectored_sigill +#endif + .endr +EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000) + +#ifdef CONFIG_RELOCATABLE +TRAMP_VIRT_BEGIN(system_call_vectored_tramp) + __LOAD_HANDLER(r10, system_call_vectored_common) + mtctr r10 + bctr + +TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp) + __LOAD_HANDLER(r10, system_call_vectored_sigill) + mtctr r10 + bctr +#endif + + /* No virt vectors corresponding with 0x0..0x100 */ EXC_VIRT_NONE(0x4000, 0x100) @@ -939,13 +1029,13 @@ EXC_COMMON_BEGIN(system_reset_common) * the right thing. We do not want to reconcile because that goes * through irq tracing which we don't want in NMI. * - * Save PACAIRQHAPPENED to _DAR (otherwise unused), and set HARD_DIS + * Save PACAIRQHAPPENED to RESULT (otherwise unused), and set HARD_DIS * as we are running with MSR[EE]=0. */ li r10,IRQS_ALL_DISABLED stb r10,PACAIRQSOFTMASK(r13) lbz r10,PACAIRQHAPPENED(r13) - std r10,_DAR(r1) + std r10,RESULT(r1) ori r10,r10,PACA_IRQ_HARD_DIS stb r10,PACAIRQHAPPENED(r13) @@ -966,12 +1056,12 @@ EXC_COMMON_BEGIN(system_reset_common) /* * Restore soft mask settings. */ - ld r10,_DAR(r1) + ld r10,RESULT(r1) stb r10,PACAIRQHAPPENED(r13) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r10 + kuap_restore_amr r9, r10 EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL @@ -1117,11 +1207,30 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) li r10,MSR_RI mtmsrd r10,1 + /* + * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see + * system_reset_common) + */ + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + lbz r10,PACAIRQHAPPENED(r13) + std r10,RESULT(r1) + ori r10,r10,PACA_IRQ_HARD_DIS + stb r10,PACAIRQHAPPENED(r13) + addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) + /* + * Restore soft mask settings. + */ + ld r10,RESULT(r1) + stb r10,PACAIRQHAPPENED(r13) + ld r10,SOFTE(r1) + stb r10,PACAIRQSOFTMASK(r13) + #ifdef CONFIG_PPC_P7_NAP /* * Check if thread was in power saving mode. We come here when any @@ -1225,17 +1334,19 @@ EXC_COMMON_BEGIN(machine_check_idle_common) bl machine_check_queue_event /* - * We have not used any non-volatile GPRs here, and as a rule - * most exception code including machine check does not. - * Therefore PACA_NAPSTATELOST does not need to be set. Idle - * wakeup will restore volatile registers. + * GPR-loss wakeups are relatively straightforward, because the + * idle sleep code has saved all non-volatile registers on its + * own stack, and r1 in PACAR1. * - * Load the original SRR1 into r3 for pnv_powersave_wakeup_mce. + * For no-loss wakeups the r1 and lr registers used by the + * early machine check handler have to be restored first. r2 is + * the kernel TOC, so no need to restore it. * * Then decrement MCE nesting after finishing with the stack. */ ld r3,_MSR(r1) ld r4,_LINK(r1) + ld r1,GPR1(r1) lhz r11,PACA_IN_MCE(r13) subi r11,r11,1 @@ -1244,7 +1355,7 @@ EXC_COMMON_BEGIN(machine_check_idle_common) mtlr r4 rlwinm r10,r3,47-31,30,31 cmpwi cr1,r10,2 - bltlr cr1 /* no state loss, return to idle caller */ + bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */ b idle_return_gpr_loss #endif @@ -1266,6 +1377,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) andc r10,r10,r3 mtmsrd r10 + lhz r12,PACA_IN_MCE(r13) + subi r12,r12,1 + sth r12,PACA_IN_MCE(r13) + /* Invoke machine_check_exception to print MCE event and panic. */ addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception @@ -2526,7 +2641,7 @@ EXC_VIRT_NONE(0x5400, 0x100) INT_DEFINE_BEGIN(denorm_exception) IVEC=0x1500 IHSRR=1 - IBRANCH_COMMON=0 + IBRANCH_TO_COMMON=0 IKVM_REAL=1 INT_DEFINE_END(denorm_exception) @@ -2740,7 +2855,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) li r10,IRQS_ALL_DISABLED stb r10,PACAIRQSOFTMASK(r13) lbz r10,PACAIRQHAPPENED(r13) - std r10,_DAR(r1) + std r10,RESULT(r1) ori r10,r10,PACA_IRQ_HARD_DIS stb r10,PACAIRQHAPPENED(r13) @@ -2754,12 +2869,12 @@ EXC_COMMON_BEGIN(soft_nmi_common) /* * Restore soft mask settings. */ - ld r10,_DAR(r1) + ld r10,RESULT(r1) stb r10,PACAIRQHAPPENED(r13) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r10 + kuap_restore_amr r9, r10 EXCEPTION_RESTORE_REGS hsrr=0 RFI_TO_KERNEL @@ -2813,7 +2928,8 @@ masked_interrupt: ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) - /* returns to kernel where r13 must be set up, so don't restore it */ + ld r13,PACA_EXGEN+EX_R13(r13) + /* May return to masked low address where r13 is not set up */ .if \hsrr HRFI_TO_KERNEL .else @@ -2921,6 +3037,47 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) GET_SCRATCH0(r13); hrfid +TRAMP_REAL_BEGIN(rfscv_flush_fallback) + /* system call volatile */ + mr r7,r13 + GET_PACA(r13); + mr r8,r1 + ld r1,PACAKSAVE(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ + mtctr r11 + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync + + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 + bdnz 1b + + mtctr r9 + li r9,0 + li r10,0 + li r11,0 + mr r1,r8 + mr r13,r7 + RFSCV + USE_TEXT_SECTION() MASKED_INTERRUPT MASKED_INTERRUPT hsrr=1 @@ -2972,6 +3129,10 @@ EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline) USE_FIXED_SECTION(virt_trampolines) /* + * All code below __end_interrupts is treated as soft-masked. If + * any code runs here with MSR[EE]=1, it must then cope with pending + * soft interrupt being raised (i.e., by ensuring it is replayed). + * * The __end_interrupts marker must be past the out-of-line (OOL) * handlers, so that they are copied to real address 0x100 when running * a relocatable kernel. This ensures they can be reached from the short @@ -3047,10 +3208,18 @@ do_hash_page: ori r0,r0,DSISR_BAD_FAULT_64S@l and. r0,r5,r0 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ + + /* + * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then + * don't call hash_page, just fail the fault. This is required to + * prevent re-entrancy problems in the hash code, namely perf + * interrupts hitting while something holds H_PAGE_BUSY, and taking a + * hash fault. See the comment in hash_preload(). + */ ld r11, PACA_THREAD_INFO(r13) - lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ - andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ - bne 77f /* then don't call hash_page now */ + lwz r0,TI_PREEMPT(r11) + andis. r0,r0,NMI_MASK@h + bne 77f /* * r3 contains the trap number diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 59e60a9a9f5c..10ebb4bf71ad 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -32,6 +32,14 @@ #include <asm/fadump-internal.h> #include <asm/setup.h> +/* + * The CPU who acquired the lock to trigger the fadump crash should + * wait for other CPUs to enter. + * + * The timeout is in milliseconds. + */ +#define CRASH_TIMEOUT 500 + static struct fw_dump fw_dump; static void __init fadump_reserve_crash_area(u64 base); @@ -39,9 +47,21 @@ static void __init fadump_reserve_crash_area(u64 base); struct kobject *fadump_kobj; #ifndef CONFIG_PRESERVE_FA_DUMP + +static atomic_t cpus_in_fadump; static DEFINE_MUTEX(fadump_mutex); -struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 }; -struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 }; + +struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; + +#define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ +#define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ + sizeof(struct fadump_memory_range)) +static struct fadump_memory_range rngs[RESERVED_RNGS_CNT]; +struct fadump_mrange_info reserved_mrange_info = { "reserved", rngs, + RESERVED_RNGS_SZ, 0, + RESERVED_RNGS_CNT, true }; + +static void __init early_init_dt_scan_reserved_ranges(unsigned long node); #ifdef CONFIG_CMA static struct cma *fadump_cma; @@ -110,6 +130,11 @@ static int __init fadump_cma_init(void) { return 1; } int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data) { + if (depth == 0) { + early_init_dt_scan_reserved_ranges(node); + return 0; + } + if (depth != 1) return 0; @@ -431,10 +456,72 @@ static int __init fadump_get_boot_mem_regions(void) return ret; } +/* + * Returns true, if the given range overlaps with reserved memory ranges + * starting at idx. Also, updates idx to index of overlapping memory range + * with the given memory range. + * False, otherwise. + */ +static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx) +{ + bool ret = false; + int i; + + for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) { + u64 rbase = reserved_mrange_info.mem_ranges[i].base; + u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size; + + if (end <= rbase) + break; + + if ((end > rbase) && (base < rend)) { + *idx = i; + ret = true; + break; + } + } + + return ret; +} + +/* + * Locate a suitable memory area to reserve memory for FADump. While at it, + * lookup reserved-ranges & avoid overlap with them, as they are used by F/W. + */ +static u64 __init fadump_locate_reserve_mem(u64 base, u64 size) +{ + struct fadump_memory_range *mrngs; + phys_addr_t mstart, mend; + int idx = 0; + u64 i, ret = 0; + + mrngs = reserved_mrange_info.mem_ranges; + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, + &mstart, &mend, NULL) { + pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n", + i, mstart, mend, base); + + if (mstart > base) + base = PAGE_ALIGN(mstart); + + while ((mend > base) && ((mend - base) >= size)) { + if (!overlaps_reserved_ranges(base, base+size, &idx)) { + ret = base; + goto out; + } + + base = mrngs[idx].base + mrngs[idx].size; + base = PAGE_ALIGN(base); + } + } + +out: + return ret; +} + int __init fadump_reserve_mem(void) { - u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE; - bool is_memblock_bottom_up = memblock_bottom_up(); + u64 base, size, mem_boundary, bootmem_min; int ret = 1; if (!fw_dump.fadump_enabled) @@ -455,9 +542,9 @@ int __init fadump_reserve_mem(void) PAGE_ALIGN(fadump_calculate_reserve_size()); #ifdef CONFIG_CMA if (!fw_dump.nocma) { - align = FADUMP_CMA_ALIGNMENT; fw_dump.boot_memory_size = - ALIGN(fw_dump.boot_memory_size, align); + ALIGN(fw_dump.boot_memory_size, + FADUMP_CMA_ALIGNMENT); } #endif @@ -525,13 +612,9 @@ int __init fadump_reserve_mem(void) * Reserve memory at an offset closer to bottom of the RAM to * minimize the impact of memory hot-remove operation. */ - memblock_set_bottom_up(true); - base = memblock_find_in_range(base, mem_boundary, size, align); - - /* Restore the previous allocation mode */ - memblock_set_bottom_up(is_memblock_bottom_up); + base = fadump_locate_reserve_mem(base, size); - if (!base) { + if (!base || (base + size > mem_boundary)) { pr_err("Failed to find memory chunk for reservation!\n"); goto error_out; } @@ -596,8 +679,11 @@ early_param("fadump_reserve_mem", early_fadump_reserve_mem); void crash_fadump(struct pt_regs *regs, const char *str) { + unsigned int msecs; struct fadump_crash_info_header *fdh = NULL; int old_cpu, this_cpu; + /* Do not include first CPU */ + unsigned int ncpus = num_online_cpus() - 1; if (!should_fadump_crash()) return; @@ -613,6 +699,8 @@ void crash_fadump(struct pt_regs *regs, const char *str) old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); if (old_cpu != -1) { + atomic_inc(&cpus_in_fadump); + /* * We can't loop here indefinitely. Wait as long as fadump * is in force. If we race with fadump un-registration this @@ -636,6 +724,16 @@ void crash_fadump(struct pt_regs *regs, const char *str) fdh->online_mask = *cpu_online_mask; + /* + * If we came in via system reset, wait a while for the secondary + * CPUs to enter. + */ + if (TRAP(&(fdh->regs)) == 0x100) { + msecs = CRASH_TIMEOUT; + while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0)) + mdelay(1); + } + fw_dump.ops->fadump_trigger(fdh, str); } @@ -728,10 +826,14 @@ void fadump_free_cpu_notes_buf(void) static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info) { + if (mrange_info->is_static) { + mrange_info->mem_range_cnt = 0; + return; + } + kfree(mrange_info->mem_ranges); - mrange_info->mem_ranges = NULL; - mrange_info->mem_ranges_sz = 0; - mrange_info->max_mem_ranges = 0; + memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0, + (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ)); } /* @@ -788,6 +890,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) { int ret; + if (mrange_info->is_static) { + pr_err("Reached array size limit for %s memory ranges\n", + mrange_info->name); + return -ENOSPC; + } + ret = fadump_alloc_mem_ranges(mrange_info); if (ret) return ret; @@ -1204,20 +1312,19 @@ static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info) * Scan reserved-ranges to consider them while reserving/releasing * memory for FADump. */ -static inline int fadump_scan_reserved_mem_ranges(void) +static void __init early_init_dt_scan_reserved_ranges(unsigned long node) { - struct device_node *root; const __be32 *prop; int len, ret = -1; unsigned long i; - root = of_find_node_by_path("/"); - if (!root) - return ret; + /* reserved-ranges already scanned */ + if (reserved_mrange_info.mem_range_cnt != 0) + return; - prop = of_get_property(root, "reserved-ranges", &len); + prop = of_get_flat_dt_prop(node, "reserved-ranges", &len); if (!prop) - return ret; + return; /* * Each reserved range is an (address,size) pair, 2 cells each, @@ -1239,7 +1346,8 @@ static inline int fadump_scan_reserved_mem_ranges(void) } } - return ret; + /* Compact reserved ranges */ + sort_and_merge_mem_ranges(&reserved_mrange_info); } /* @@ -1253,32 +1361,21 @@ static void fadump_release_memory(u64 begin, u64 end) u64 ra_start, ra_end, tstart; int i, ret; - fadump_scan_reserved_mem_ranges(); - ra_start = fw_dump.reserve_dump_area_start; ra_end = ra_start + fw_dump.reserve_dump_area_size; /* - * Add reserved dump area to reserved ranges list - * and exclude all these ranges while releasing memory. + * If reserved ranges array limit is hit, overwrite the last reserved + * memory range with reserved dump area to ensure it is excluded from + * the memory being released (reused for next FADump registration). */ - ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end); - if (ret != 0) { - /* - * Not enough memory to setup reserved ranges but the system is - * running shortage of memory. So, release all the memory except - * Reserved dump area (reused for next fadump registration). - */ - if (begin < ra_end && end > ra_start) { - if (begin < ra_start) - fadump_release_reserved_area(begin, ra_start); - if (end > ra_end) - fadump_release_reserved_area(ra_end, end); - } else - fadump_release_reserved_area(begin, end); + if (reserved_mrange_info.mem_range_cnt == + reserved_mrange_info.max_mem_ranges) + reserved_mrange_info.mem_range_cnt--; + ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end); + if (ret != 0) return; - } /* Get the reserved ranges list in order first. */ sort_and_merge_mem_ranges(&reserved_mrange_info); diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index cc4a5e3f51f1..fe48d319d490 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -11,8 +11,27 @@ #include <linux/export.h> #include <linux/cache.h> +#include <linux/of.h> #include <asm/firmware.h> +#ifdef CONFIG_PPC64 unsigned long powerpc_firmware_features __read_mostly; EXPORT_SYMBOL_GPL(powerpc_firmware_features); +#endif + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +bool is_kvm_guest(void) +{ + struct device_node *hyper_node; + + hyper_node = of_find_node_by_path("/hypervisor"); + if (!hyper_node) + return 0; + + if (!of_device_is_compatible(hyper_node, "linux,kvm")) + return 0; + + return 1; +} +#endif diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 3235a8da6af7..4ae39db70044 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -12,7 +12,6 @@ #include <asm/reg.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/pgtable.h> #include <asm/cputable.h> #include <asm/cache.h> #include <asm/thread_info.h> @@ -108,9 +107,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) or r12,r12,r4 std r12,_MSR(r1) #endif - /* Don't care if r4 overflows, this is desired behaviour */ - lbz r4,THREAD_LOAD_FP(r5) - addi r4,r4,1 + li r4,1 stb r4,THREAD_LOAD_FP(r5) addi r10,r5,THREAD_FPSTATE lfd fr0,FPSTATE_FPSCR(r10) @@ -119,6 +116,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) /* restore registers and return */ /* we haven't used ctr or xer or lr */ blr +_ASM_NOKPROBE_SYMBOL(load_up_fpu) /* * save_fpu(tsk) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 97c887950c3c..f3ab94d73936 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -17,10 +17,10 @@ */ #include <linux/init.h> +#include <linux/pgtable.h> #include <asm/reg.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/pgtable.h> #include <asm/cputable.h> #include <asm/cache.h> #include <asm/thread_info.h> @@ -297,7 +297,7 @@ MachineCheck: cmpwi cr1, r4, 0 #endif beq cr1, machine_check_tramp - b machine_check_in_rtas + twi 31, 0, 0 #else b machine_check_tramp #endif @@ -474,7 +474,7 @@ InstructionTLBMiss: /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) - lis r1,PAGE_OFFSET@h /* check if kernel address */ + lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SPRG_PGDIR @@ -484,7 +484,7 @@ InstructionTLBMiss: li r1,_PAGE_PRESENT | _PAGE_EXEC #endif #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) - bge- 112f + bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ #endif @@ -541,7 +541,7 @@ DataLoadTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS - lis r1,PAGE_OFFSET@h /* check if kernel address */ + lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR #ifdef CONFIG_SWAP @@ -549,7 +549,7 @@ DataLoadTLBMiss: #else li r1, _PAGE_PRESENT #endif - bge- 112f + bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -621,7 +621,7 @@ DataStoreTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS - lis r1,PAGE_OFFSET@h /* check if kernel address */ + lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR #ifdef CONFIG_SWAP @@ -629,7 +629,7 @@ DataStoreTLBMiss: #else li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT #endif - bge- 112f + bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -673,6 +673,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #define altivec_assist_exception unknown_exception #endif +#ifndef CONFIG_TAU_INT +#define TAUException unknown_exception +#endif + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD) EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 2cec543c38f0..5b282d9965a5 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -26,17 +26,16 @@ */ #include <linux/init.h> +#include <linux/pgtable.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/pgtable.h> #include <asm/cputable.h> #include <asm/thread_info.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> #include <asm/export.h> -#include <asm/asm-405.h> #include "head_32.h" @@ -176,135 +175,16 @@ _ENTRY(saved_ksp_limit) * 0x0300 - Data Storage Exception * This happens for just a few reasons. U0 set (but we don't do that), * or zone protection fault (user violation, write to protected page). - * If this is just an update of modified status, we do that quickly - * and exit. Otherwise, we call heavywight functions to do the work. + * The other Data TLB exceptions bail out to this point + * if they can't resolve the lightweight TLB fault. */ START_EXCEPTION(0x0300, DataStorage) - mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH1, r11 -#ifdef CONFIG_403GCX - stw r12, 0(r0) - stw r9, 4(r0) - mfcr r11 - mfspr r12, SPRN_PID - stw r11, 8(r0) - stw r12, 12(r0) -#else - mtspr SPRN_SPRG_SCRATCH3, r12 - mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r11 - mfspr r12, SPRN_PID - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH5, r12 -#endif - - /* First, check if it was a zone fault (which means a user - * tried to access a kernel or read-protected page - always - * a SEGV). All other faults here must be stores, so no - * need to check ESR_DST as well. */ - mfspr r10, SPRN_ESR - andis. r10, r10, ESR_DIZ@h - bne 2f - - mfspr r10, SPRN_DEAR /* Get faulting address */ - - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - li r9, 0 - mtspr SPRN_PID, r9 /* TLB will have 0 TID */ - b 4f - - /* Get the PGD for the current thread. - */ -3: - mfspr r11,SPRN_SPRG_THREAD - lwz r11,PGDIR(r11) -4: - tophys(r11, r11) - rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r11, 0(r11) /* Get L1 entry */ - rlwinm. r12, r11, 0, 0, 19 /* Extract L2 (pte) base address */ - beq 2f /* Bail if no table */ - - rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r12) /* Get Linux PTE */ - - andi. r9, r11, _PAGE_RW /* Is it writeable? */ - beq 2f /* Bail if not */ - - /* Update 'changed'. - */ - ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE - stw r11, 0(r12) /* Update Linux page table */ - - /* Most of the Linux PTE is ready to load into the TLB LO. - * We set ZSEL, where only the LS-bit determines user access. - * We set execute, because we don't have the granularity to - * properly set this at the page level (Linux problem). - * If shared is set, we cause a zero PID->TID load. - * Many of these bits are software only. Bits we don't set - * here we (properly should) assume have the appropriate value. - */ - li r12, 0x0ce2 - andc r11, r11, r12 /* Make sure 20, 21 are zero */ - - /* find the TLB index that caused the fault. It has to be here. - */ - tlbsx r9, 0, r10 - - tlbwe r11, r9, TLB_DATA /* Load TLB LO */ - - /* Done...restore registers and get out of here. - */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 -#endif - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 - PPC405_ERR77_SYNC - rfi /* Should sync shadow TLBs */ - b . /* prevent prefetch past rfi */ - -2: - /* The bailout. Restore registers to pre-exception conditions - * and call the heavyweights to help us out. - */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 -#endif - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 - b DataAccess + EXCEPTION_PROLOG + mfspr r5, SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + stw r5, _ESR(r11) + mfspr r4, SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + stw r4, _DEAR(r11) + EXC_XFER_LITE(0x300, handle_page_fault) /* * 0x0400 - Instruction Storage Exception @@ -372,21 +252,11 @@ _ENTRY(saved_ksp_limit) START_EXCEPTION(0x1100, DTLBMiss) mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ mtspr SPRN_SPRG_SCRATCH1, r11 -#ifdef CONFIG_403GCX - stw r12, 0(r0) - stw r9, 4(r0) - mfcr r11 - mfspr r12, SPRN_PID - stw r11, 8(r0) - stw r12, 12(r0) -#else mtspr SPRN_SPRG_SCRATCH3, r12 mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r11 - mfspr r12, SPRN_PID - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH5, r12 -#endif + mfcr r12 + mfspr r9, SPRN_PID + mtspr SPRN_SPRG_SCRATCH5, r9 mfspr r10, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -409,28 +279,34 @@ _ENTRY(saved_ksp_limit) 4: tophys(r11, r11) rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r12, 0(r11) /* Get L1 entry */ - andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */ + lwz r11, 0(r11) /* Get L1 entry */ + andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */ beq 2f /* Bail if no table */ - rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r12) /* Get Linux PTE */ - andi. r9, r11, _PAGE_PRESENT - beq 5f + rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r11) /* Get Linux PTE */ +#ifdef CONFIG_SWAP + li r9, _PAGE_PRESENT | _PAGE_ACCESSED +#else + li r9, _PAGE_PRESENT +#endif + andc. r9, r9, r11 /* Check permission */ + bne 5f - ori r11, r11, _PAGE_ACCESSED - stw r11, 0(r12) + rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ + and r9, r9, r11 /* hwwrite = dirty & rw */ + rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ /* Create TLB tag. This is the faulting address plus a static * set of bits. These are size, valid, E, U0. */ - li r12, 0x00c0 - rlwimi r10, r12, 0, 20, 31 + li r9, 0x00c0 + rlwimi r10, r9, 0, 20, 31 b finish_tlb_load 2: /* Check for possible large-page pmd entry */ - rlwinm. r9, r12, 2, 22, 24 + rlwinm. r9, r11, 2, 22, 24 beq 5f /* Create TLB tag. This is the faulting address, plus a static @@ -438,7 +314,6 @@ _ENTRY(saved_ksp_limit) */ ori r9, r9, 0x40 rlwimi r10, r9, 0, 20, 31 - mr r11, r12 b finish_tlb_load @@ -446,24 +321,14 @@ _ENTRY(saved_ksp_limit) /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH5 + mtspr SPRN_PID, r9 + mtcr r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 -#endif mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 - b DataAccess + b DataStorage /* 0x1200 - Instruction TLB Miss Exception * Nearly the same as above, except we get our information from different @@ -472,21 +337,11 @@ _ENTRY(saved_ksp_limit) START_EXCEPTION(0x1200, ITLBMiss) mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ mtspr SPRN_SPRG_SCRATCH1, r11 -#ifdef CONFIG_403GCX - stw r12, 0(r0) - stw r9, 4(r0) - mfcr r11 - mfspr r12, SPRN_PID - stw r11, 8(r0) - stw r12, 12(r0) -#else mtspr SPRN_SPRG_SCRATCH3, r12 mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r11 - mfspr r12, SPRN_PID - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH5, r12 -#endif + mfcr r12 + mfspr r9, SPRN_PID + mtspr SPRN_SPRG_SCRATCH5, r9 mfspr r10, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -509,28 +364,34 @@ _ENTRY(saved_ksp_limit) 4: tophys(r11, r11) rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r12, 0(r11) /* Get L1 entry */ - andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */ + lwz r11, 0(r11) /* Get L1 entry */ + andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */ beq 2f /* Bail if no table */ - rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r12) /* Get Linux PTE */ - andi. r9, r11, _PAGE_PRESENT - beq 5f + rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r11) /* Get Linux PTE */ +#ifdef CONFIG_SWAP + li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC +#else + li r9, _PAGE_PRESENT | _PAGE_EXEC +#endif + andc. r9, r9, r11 /* Check permission */ + bne 5f - ori r11, r11, _PAGE_ACCESSED - stw r11, 0(r12) + rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ + and r9, r9, r11 /* hwwrite = dirty & rw */ + rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ /* Create TLB tag. This is the faulting address plus a static * set of bits. These are size, valid, E, U0. */ - li r12, 0x00c0 - rlwimi r10, r12, 0, 20, 31 + li r9, 0x00c0 + rlwimi r10, r9, 0, 20, 31 b finish_tlb_load 2: /* Check for possible large-page pmd entry */ - rlwinm. r9, r12, 2, 22, 24 + rlwinm. r9, r11, 2, 22, 24 beq 5f /* Create TLB tag. This is the faulting address, plus a static @@ -538,7 +399,6 @@ _ENTRY(saved_ksp_limit) */ ori r9, r9, 0x40 rlwimi r10, r9, 0, 20, 31 - mr r11, r12 b finish_tlb_load @@ -546,21 +406,11 @@ _ENTRY(saved_ksp_limit) /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH5 + mtspr SPRN_PID, r9 + mtcr r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 -#endif mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 b InstructionAccess @@ -569,13 +419,7 @@ _ENTRY(saved_ksp_limit) EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD) EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) -#ifdef CONFIG_IBM405_ERR51 - /* 405GP errata 51 */ - START_EXCEPTION(0x1700, Trap_17) - b DTLBMiss -#else EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) -#endif EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD) @@ -636,7 +480,6 @@ _ENTRY(saved_ksp_limit) lwz r12,GPR12(r11) lwz r10,crit_r10@l(0) lwz r11,crit_r11@l(0) - PPC405_ERR77_SYNC rfci b . @@ -669,18 +512,6 @@ WDTException: (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) -/* - * The other Data TLB exceptions bail out to this point - * if they can't resolve the lightweight TLB fault. - */ -DataAccess: - EXCEPTION_PROLOG - mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ - stw r5,_ESR(r11) - mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ - stw r4, _DEAR(r11) - EXC_XFER_LITE(0x300, handle_page_fault) - /* Other PowerPC processors, namely those derived from the 6xx-series * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. * However, for the 4xx-series processors these are neither defined nor @@ -692,7 +523,7 @@ DataAccess: * miss get to this point to load the TLB. * r10 - TLB_TAG value * r11 - Linux PTE - * r12, r9 - available to use + * r9 - available to use * PID - loaded with proper value when we get here * Upon exit, we reload everything and RFI. * Actually, it will fit now, but oh well.....a common place @@ -701,45 +532,32 @@ DataAccess: tlb_4xx_index: .long 0 finish_tlb_load: - /* load the next available TLB index. - */ - lwz r9, tlb_4xx_index@l(0) - addi r9, r9, 1 - andi. r9, r9, (PPC40X_TLB_SIZE-1) - stw r9, tlb_4xx_index@l(0) - -6: /* * Clear out the software-only bits in the PTE to generate the * TLB_DATA value. These are the bottom 2 bits of the RPM, the * top 3 bits of the zone field, and M. */ - li r12, 0x0ce2 - andc r11, r11, r12 + li r9, 0x0ce2 + andc r11, r11, r9 + + /* load the next available TLB index. */ + lwz r9, tlb_4xx_index@l(0) + addi r9, r9, 1 + andi. r9, r9, PPC40X_TLB_SIZE - 1 + stw r9, tlb_4xx_index@l(0) tlbwe r11, r9, TLB_DATA /* Load TLB LO */ tlbwe r10, r9, TLB_TAG /* Load TLB HI */ /* Done...restore registers and get out of here. */ -#ifdef CONFIG_403GCX - lwz r12, 12(r0) - lwz r11, 8(r0) - mtspr SPRN_PID, r12 - mtcr r11 - lwz r9, 4(r0) - lwz r12, 0(r0) -#else - mfspr r12, SPRN_SPRG_SCRATCH5 - mfspr r11, SPRN_SPRG_SCRATCH6 - mtspr SPRN_PID, r12 - mtcr r11 + mfspr r9, SPRN_SPRG_SCRATCH5 + mtspr SPRN_PID, r9 + mtcr r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 -#endif mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 - PPC405_ERR77_SYNC rfi /* Should sync shadow TLBs */ b . /* prevent prefetch past rfi */ @@ -802,7 +620,7 @@ start_here: ori r6, r6, swapper_pg_dir@l lis r5, abatron_pteptrs@h ori r5, r5, abatron_pteptrs@l - stw r5, 0xf0(r0) /* Must match your Abatron config file */ + stw r5, 0xf0(0) /* Must match your Abatron config file */ tophys(r5,r5) stw r6, 0(r5) diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 51dd01a27314..8e36718f3167 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -25,10 +25,10 @@ */ #include <linux/init.h> +#include <linux/pgtable.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/pgtable.h> #include <asm/cputable.h> #include <asm/thread_info.h> #include <asm/ppc_asm.h> diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ddfbd02140d9..0e05a9a47a4b 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -947,15 +947,8 @@ start_here_multiplatform: std r0,0(r4) #endif - /* The following gets the stack set up with the regs */ - /* pointing to the real addr of the kernel stack. This is */ - /* all done to support the C function call below which sets */ - /* up the htab. This is done because we have relocated the */ - /* kernel but are still running in real mode. */ - - LOAD_REG_ADDR(r3,init_thread_union) - /* set up a stack pointer */ + LOAD_REG_ADDR(r3,init_thread_union) LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) add r1,r3,r1 li r0,0 diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 073a651787df..9f359d3fba74 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -16,11 +16,12 @@ #include <linux/init.h> #include <linux/magic.h> +#include <linux/pgtable.h> +#include <linux/sizes.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> #include <asm/cache.h> -#include <asm/pgtable.h> #include <asm/cputable.h> #include <asm/thread_info.h> #include <asm/ppc_asm.h> @@ -31,10 +32,15 @@ #include "head_32.h" +.macro compare_to_kernel_boundary scratch, addr #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ -#define SIMPLE_KERNEL_ADDRESS 1 + not. \scratch, \addr +#else + rlwinm \scratch, \addr, 16, 0xfff8 + cmpli cr0, \scratch, PAGE_OFFSET@h #endif +.endm /* * We need an ITLB miss handler for kernel addresses if: @@ -196,7 +202,7 @@ SystemCall: InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH0, r10 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mtspr SPRN_SPRG_SCRATCH1, r11 #endif @@ -206,44 +212,31 @@ InstructionTLBMiss: mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10) mtspr SPRN_MD_EPN, r10 - /* Only modules will cause ITLB Misses as we always - * pin the first 8MB of kernel memory */ #ifdef ITLB_MISS_KERNEL mfcr r11 -#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) - cmpi cr0, r10, 0 /* Address >= 0x80000000 */ -#else - rlwinm r10, r10, 16, 0xfff8 - cmpli cr0, r10, PAGE_OFFSET@h -#ifndef CONFIG_PIN_TLB_TEXT - /* It is assumed that kernel code fits into the first 32M */ -0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h - patch_site 0b, patch__itlbmiss_linmem_top -#endif -#endif + compare_to_kernel_boundary r10, r10 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ #ifdef ITLB_MISS_KERNEL -#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) - bge+ 3f -#else blt+ 3f -#endif -#ifndef CONFIG_PIN_TLB_TEXT - blt cr7, ITLBMissLinear -#endif rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha 3: + mtcr r11 #endif +#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) + lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ + mtspr SPRN_MD_TWC, r11 +#else lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MI_TWC, r10 /* Set segment attributes */ - mtspr SPRN_MD_TWC, r10 +#endif mfspr r10, SPRN_MD_TWC lwz r10, 0(r10) /* Get the pte */ -#ifdef ITLB_MISS_KERNEL - mtcr r11 +#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) + rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K + mtspr SPRN_MI_TWC, r11 #endif #ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT @@ -263,7 +256,7 @@ InstructionTLBMiss: /* Restore registers */ 0: mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mfspr r11, SPRN_SPRG_SCRATCH1 #endif rfi @@ -281,33 +274,6 @@ InstructionTLBMiss: rfi #endif -#ifndef CONFIG_PIN_TLB_TEXT -ITLBMissLinear: - mtcr r11 -#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23 - patch_site 0f, patch__itlbmiss_linmem_top8 - - mfspr r10, SPRN_SRR0 -0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha - rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K - ori r11, r11, MI_PS512K | MI_SVALID - rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */ -#else - /* Set 8M byte page and mark it valid */ - li r11, MI_PS8MEG | MI_SVALID - rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */ -#endif - mtspr SPRN_MI_TWC, r11 - ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ - _PAGE_PRESENT - mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ - -0: mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 - rfi - patch_site 0b, patch__itlbmiss_exit_2 -#endif - . = 0x1200 DataStoreTLBMiss: mtspr SPRN_DAR, r10 @@ -318,21 +284,9 @@ DataStoreTLBMiss: * kernel page tables. */ mfspr r10, SPRN_MD_EPN - rlwinm r10, r10, 16, 0xfff8 - cmpli cr0, r10, PAGE_OFFSET@h -#ifndef CONFIG_PIN_TLB_IMMR - cmpli cr6, r10, VIRT_IMMR_BASE@h -#endif -0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h - patch_site 0b, patch__dtlbmiss_linmem_top - + compare_to_kernel_boundary r10, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table */ blt+ 3f -#ifndef CONFIG_PIN_TLB_IMMR -0: beq- cr6, DTLBMissIMMR - patch_site 0b, patch__dtlbmiss_immr_jmp -#endif - blt cr7, DTLBMissLinear rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha 3: @@ -350,6 +304,7 @@ DataStoreTLBMiss: * above. */ rlwimi r11, r10, 0, _PAGE_GUARDED + rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K mtspr SPRN_MD_TWC, r11 /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. @@ -383,61 +338,16 @@ DataStoreTLBMiss: rfi patch_site 0b, patch__dtlbmiss_exit_1 -DTLBMissIMMR: - mtcr r11 - /* Set 512k byte guarded page and mark it valid */ - li r10, MD_PS512K | MD_GUARDED | MD_SVALID - mtspr SPRN_MD_TWC, r10 - mfspr r10, SPRN_IMMR /* Get current IMMR */ - rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ - _PAGE_PRESENT | _PAGE_NO_CACHE - mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ - - li r11, RPN_PATTERN - -0: mfspr r10, SPRN_DAR +#ifdef CONFIG_PERF_EVENTS + patch_site 0f, patch__dtlbmiss_perf +0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) + addi r10, r10, 1 + stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) + mfspr r10, SPRN_DAR mtspr SPRN_DAR, r11 /* Tag DAR */ mfspr r11, SPRN_M_TW rfi - patch_site 0b, patch__dtlbmiss_exit_2 - -DTLBMissLinear: - mtcr r11 - rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */ -#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_DATA_SHIFT < 23 - patch_site 0f, patch__dtlbmiss_romem_top8 - -0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha - rlwinm r11, r11, 0, 0xff800000 - neg r10, r11 - or r11, r11, r10 - rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K - ori r11, r11, MI_PS512K | MI_SVALID - mfspr r10, SPRN_MD_EPN - rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */ -#else - /* Set 8M byte page and mark it valid */ - li r11, MD_PS8MEG | MD_SVALID #endif - mtspr SPRN_MD_TWC, r11 -#ifdef CONFIG_STRICT_KERNEL_RWX - patch_site 0f, patch__dtlbmiss_romem_top - -0: subis r11, r10, 0 - rlwimi r10, r11, 11, _PAGE_RO -#endif - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ - _PAGE_PRESENT - mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ - - li r11, RPN_PATTERN - -0: mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_M_TW - rfi - patch_site 0b, patch__dtlbmiss_exit_3 /* This is an instruction TLB error on the MPC8xx. This could be due * to many reasons, such as executing guarded memory or illegal instruction @@ -485,18 +395,6 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) -/* Called from DataStoreTLBMiss when perf TLB misses events are activated */ -#ifdef CONFIG_PERF_EVENTS - patch_site 0f, patch__dtlbmiss_perf -0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - addi r10, r10, 1 - stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_M_TW - rfi -#endif - stack_overflow: vmap_stack_overflow_exception @@ -563,14 +461,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ cmpli cr1, r11, PAGE_OFFSET@h mfspr r11, SPRN_M_TWB /* Get level 1 table */ blt+ cr1, 3f - rlwinm r11, r10, 16, 0xfff8 - -0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h - patch_site 0b, patch__fixupdar_linmem_top /* create physical page address from effective address */ tophys(r11, r10) - blt- cr7, 201f mfspr r11, SPRN_M_TWB /* Get level 1 table */ rlwinm r11, r11, 0, 20, 31 oris r11, r11, (swapper_pg_dir - PAGE_OFFSET)@ha @@ -581,7 +474,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r11, SPRN_MD_TWC lwz r11, 0(r11) /* Get the pte */ bt 28,200f /* bit 28 = Large page (8M) */ - bt 29,202f /* bit 29 = Large page (8M or 512K) */ /* concat physical page address(r11) and page offset(r10) */ rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 201: lwz r11,0(r11) @@ -608,11 +500,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31 b 201b -202: - /* concat physical page address(r11) and page offset(r10) */ - rlwimi r11, r10, 0, 32 - PAGE_SHIFT_512K, 31 - b 201b - 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 @@ -747,6 +634,31 @@ start_here: rfi /* Load up the kernel context */ 2: +#ifdef CONFIG_PIN_TLB_IMMR + lis r0, MD_TWAM@h + oris r0, r0, 0x1f00 + mtspr SPRN_MD_CTR, r0 + LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID) + tlbie r0 + mtspr SPRN_MD_EPN, r0 + LOAD_REG_IMMEDIATE(r0, MD_SVALID | MD_PS512K | MD_GUARDED) + mtspr SPRN_MD_TWC, r0 + mfspr r0, SPRN_IMMR + rlwinm r0, r0, 0, 0xfff80000 + ori r0, r0, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \ + _PAGE_NO_CACHE | _PAGE_PRESENT + mtspr SPRN_MD_RPN, r0 + lis r0, (MD_TWAM | MD_RSV4I)@h + mtspr SPRN_MD_CTR, r0 +#endif +#ifndef CONFIG_PIN_TLB_TEXT + li r0, 0 + mtspr SPRN_MI_CTR, r0 +#endif +#if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) + lis r0, MD_TWAM@h + mtspr SPRN_MD_CTR, r0 +#endif tlbia /* Clear all TLB entries */ sync /* wait for tlbia/tlbie to finish */ @@ -779,17 +691,10 @@ start_here: initial_mmu: li r8, 0 mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */ - lis r10, MD_RESETVAL@h -#ifndef CONFIG_8xx_COPYBACK - oris r10, r10, MD_WTDEF@h -#endif + lis r10, MD_TWAM@h mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */ tlbia /* Invalidate all TLB entries */ -#ifdef CONFIG_PIN_TLB_DATA - oris r10, r10, MD_RSV4I@h - mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ -#endif lis r8, MI_APG_INIT@h /* Set protection modes */ ori r8, r8, MI_APG_INIT@l @@ -798,55 +703,32 @@ initial_mmu: ori r8, r8, MD_APG_INIT@l mtspr SPRN_MD_AP, r8 - /* Map a 512k page for the IMMR to get the processor - * internal registers (among other things). - */ -#ifdef CONFIG_PIN_TLB_IMMR - oris r10, r10, MD_RSV4I@h - ori r10, r10, 0x1c00 - mtspr SPRN_MD_CTR, r10 - - mfspr r9, 638 /* Get current IMMR */ - andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */ - - lis r8, VIRT_IMMR_BASE@h /* Create vaddr for TLB */ - ori r8, r8, MD_EVALID /* Mark it valid */ - mtspr SPRN_MD_EPN, r8 - li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */ - ori r8, r8, MD_SVALID /* Make it valid */ - mtspr SPRN_MD_TWC, r8 - mr r8, r9 /* Create paddr for TLB */ - ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ - mtspr SPRN_MD_RPN, r8 -#endif - - /* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */ -#ifdef CONFIG_PIN_TLB_TEXT + /* Map the lower RAM (up to 32 Mbytes) into the ITLB and DTLB */ lis r8, MI_RSV4I@h ori r8, r8, 0x1c00 -#endif + oris r12, r10, MD_RSV4I@h + ori r12, r12, 0x1c00 li r9, 4 /* up to 4 pages of 8M */ mtctr r9 lis r9, KERNELBASE@h /* Create vaddr for TLB */ li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */ li r11, MI_BOOTINIT /* Create RPN for address 0 */ - lis r12, _einittext@h - ori r12, r12, _einittext@l 1: -#ifdef CONFIG_PIN_TLB_TEXT mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ addi r8, r8, 0x100 -#endif - ori r0, r9, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r0 mtspr SPRN_MI_TWC, r10 mtspr SPRN_MI_RPN, r11 /* Store TLB entry */ + mtspr SPRN_MD_CTR, r12 + addi r12, r12, 0x100 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r10 + mtspr SPRN_MD_RPN, r11 addis r9, r9, 0x80 addis r11, r11, 0x80 - cmpl cr0, r9, r12 - bdnzf gt, 1b + bdnz 1b /* Since the cache is enabled according to the information we * just loaded into the TLB, invalidate and enable the caches here. @@ -857,17 +739,7 @@ initial_mmu: mtspr SPRN_DC_CST, r8 lis r8, IDC_ENABLE@h mtspr SPRN_IC_CST, r8 -#ifdef CONFIG_8xx_COPYBACK mtspr SPRN_DC_CST, r8 -#else - /* For a debug option, I left this here to easily enable - * the write through cache mode - */ - lis r8, DC_SFWT@h - mtspr SPRN_DC_CST, r8 - lis r8, IDC_ENABLE@h - mtspr SPRN_DC_CST, r8 -#endif /* Disable debug mode entry on breakpoints */ mfspr r8, SPRN_DER #ifdef CONFIG_PERF_EVENTS @@ -878,6 +750,108 @@ initial_mmu: mtspr SPRN_DER, r8 blr +#ifdef CONFIG_PIN_TLB +_GLOBAL(mmu_pin_tlb) + lis r9, (1f - PAGE_OFFSET)@h + ori r9, r9, (1f - PAGE_OFFSET)@l + mfmsr r10 + mflr r11 + li r12, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI) + rlwinm r0, r10, 0, ~MSR_RI + rlwinm r0, r0, 0, ~MSR_EE + mtmsr r0 + isync + .align 4 + mtspr SPRN_SRR0, r9 + mtspr SPRN_SRR1, r12 + rfi +1: + li r5, 0 + lis r6, MD_TWAM@h + mtspr SPRN_MI_CTR, r5 + mtspr SPRN_MD_CTR, r6 + tlbia + +#ifdef CONFIG_PIN_TLB_TEXT + LOAD_REG_IMMEDIATE(r5, 28 << 8) + LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) + LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) + LOAD_REG_ADDR(r9, _sinittext) + li r0, 4 + mtctr r0 + +2: ori r0, r6, MI_EVALID + mtspr SPRN_MI_CTR, r5 + mtspr SPRN_MI_EPN, r0 + mtspr SPRN_MI_TWC, r7 + mtspr SPRN_MI_RPN, r8 + addi r5, r5, 0x100 + addis r6, r6, SZ_8M@h + addis r8, r8, SZ_8M@h + cmplw r6, r9 + bdnzt lt, 2b + lis r0, MI_RSV4I@h + mtspr SPRN_MI_CTR, r0 +#endif + LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) +#ifdef CONFIG_PIN_TLB_DATA + LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) +#ifdef CONFIG_PIN_TLB_IMMR + li r0, 3 +#else + li r0, 4 +#endif + mtctr r0 + cmpwi r4, 0 + beq 4f + LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) + LOAD_REG_ADDR(r9, _sinittext) + +2: ori r0, r6, MD_EVALID + mtspr SPRN_MD_CTR, r5 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r7 + mtspr SPRN_MD_RPN, r8 + addi r5, r5, 0x100 + addis r6, r6, SZ_8M@h + addis r8, r8, SZ_8M@h + cmplw r6, r9 + bdnzt lt, 2b + +4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) +2: ori r0, r6, MD_EVALID + mtspr SPRN_MD_CTR, r5 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r7 + mtspr SPRN_MD_RPN, r8 + addi r5, r5, 0x100 + addis r6, r6, SZ_8M@h + addis r8, r8, SZ_8M@h + cmplw r6, r3 + bdnzt lt, 2b +#endif +#ifdef CONFIG_PIN_TLB_IMMR + LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID) + LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED) + mfspr r8, SPRN_IMMR + rlwinm r8, r8, 0, 0xfff80000 + ori r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \ + _PAGE_NO_CACHE | _PAGE_PRESENT + mtspr SPRN_MD_CTR, r5 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r7 + mtspr SPRN_MD_RPN, r8 +#endif +#if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA) + lis r0, (MD_RSV4I | MD_TWAM)@h + mtspr SPRN_MI_CTR, r0 +#endif + mtspr SPRN_SRR1, r10 + mtspr SPRN_SRR0, r11 + rfi +#endif /* CONFIG_PIN_TLB */ /* * We put a few things here that have to be page-aligned. diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index bd2e5ed8dd50..18f87bf9e32b 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -534,7 +534,7 @@ struct exception_regs { }; /* ensure this structure is always sized to a multiple of the stack alignment */ -#define STACK_EXC_LVL_FRAME_SIZE _ALIGN_UP(sizeof (struct exception_regs), 16) +#define STACK_EXC_LVL_FRAME_SIZE ALIGN(sizeof (struct exception_regs), 16) #endif /* __ASSEMBLY__ */ #endif /* __HEAD_BOOKE_H__ */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 840af004041e..586a6ac501e9 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -28,10 +28,10 @@ #include <linux/init.h> #include <linux/threads.h> +#include <linux/pgtable.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/pgtable.h> #include <asm/cputable.h> #include <asm/thread_info.h> #include <asm/ppc_asm.h> diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 72f461bd70fb..1f4a1efa0074 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -24,13 +24,14 @@ #include <asm/debug.h> #include <asm/debugfs.h> #include <asm/hvcall.h> +#include <asm/inst.h> #include <linux/uaccess.h> /* * Stores the breakpoints currently in use on each breakpoint address * register for every cpu */ -static DEFINE_PER_CPU(struct perf_event *, bp_per_reg); +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM_MAX]); /* * Returns total number of data or instruction breakpoints available. @@ -38,10 +39,21 @@ static DEFINE_PER_CPU(struct perf_event *, bp_per_reg); int hw_breakpoint_slots(int type) { if (type == TYPE_DATA) - return HBP_NUM; + return nr_wp_slots(); return 0; /* no instruction breakpoints available */ } +static bool single_step_pending(void) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (current->thread.last_hit_ubp[i]) + return true; + } + return false; +} + /* * Install a perf counter breakpoint. * @@ -54,16 +66,26 @@ int hw_breakpoint_slots(int type) int arch_install_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot = this_cpu_ptr(&bp_per_reg); + struct perf_event **slot; + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + slot = this_cpu_ptr(&bp_per_reg[i]); + if (!*slot) { + *slot = bp; + break; + } + } - *slot = bp; + if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot")) + return -EBUSY; /* * Do not install DABR values if the instruction must be single-stepped. * If so, DABR will be populated in single_step_dabr_instruction(). */ - if (current->thread.last_hit_ubp != bp) - __set_breakpoint(info); + if (!single_step_pending()) + __set_breakpoint(i, info); return 0; } @@ -79,15 +101,248 @@ int arch_install_hw_breakpoint(struct perf_event *bp) */ void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - struct perf_event **slot = this_cpu_ptr(&bp_per_reg); + struct arch_hw_breakpoint null_brk = {0}; + struct perf_event **slot; + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + slot = this_cpu_ptr(&bp_per_reg[i]); + if (*slot == bp) { + *slot = NULL; + break; + } + } - if (*slot != bp) { - WARN_ONCE(1, "Can't find the breakpoint"); + if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot")) return; + + __set_breakpoint(i, &null_brk); +} + +static bool is_ptrace_bp(struct perf_event *bp) +{ + return bp->overflow_handler == ptrace_triggered; +} + +struct breakpoint { + struct list_head list; + struct perf_event *bp; + bool ptrace_bp; +}; + +static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]); +static LIST_HEAD(task_bps); + +static struct breakpoint *alloc_breakpoint(struct perf_event *bp) +{ + struct breakpoint *tmp; + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return ERR_PTR(-ENOMEM); + tmp->bp = bp; + tmp->ptrace_bp = is_ptrace_bp(bp); + return tmp; +} + +static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2) +{ + __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr; + + bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE); + bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, HW_BREAKPOINT_SIZE); + bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE); + bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, HW_BREAKPOINT_SIZE); + + return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr); +} + +static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp) +{ + return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp; +} + +static bool can_co_exist(struct breakpoint *b, struct perf_event *bp) +{ + return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp)); +} + +static int task_bps_add(struct perf_event *bp) +{ + struct breakpoint *tmp; + + tmp = alloc_breakpoint(bp); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + list_add(&tmp->list, &task_bps); + return 0; +} + +static void task_bps_remove(struct perf_event *bp) +{ + struct list_head *pos, *q; + + list_for_each_safe(pos, q, &task_bps) { + struct breakpoint *tmp = list_entry(pos, struct breakpoint, list); + + if (tmp->bp == bp) { + list_del(&tmp->list); + kfree(tmp); + break; + } } +} - *slot = NULL; - hw_breakpoint_disable(); +/* + * If any task has breakpoint from alternate infrastructure, + * return true. Otherwise return false. + */ +static bool all_task_bps_check(struct perf_event *bp) +{ + struct breakpoint *tmp; + + list_for_each_entry(tmp, &task_bps, list) { + if (!can_co_exist(tmp, bp)) + return true; + } + return false; +} + +/* + * If same task has breakpoint from alternate infrastructure, + * return true. Otherwise return false. + */ +static bool same_task_bps_check(struct perf_event *bp) +{ + struct breakpoint *tmp; + + list_for_each_entry(tmp, &task_bps, list) { + if (tmp->bp->hw.target == bp->hw.target && + !can_co_exist(tmp, bp)) + return true; + } + return false; +} + +static int cpu_bps_add(struct perf_event *bp) +{ + struct breakpoint **cpu_bp; + struct breakpoint *tmp; + int i = 0; + + tmp = alloc_breakpoint(bp); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); + for (i = 0; i < nr_wp_slots(); i++) { + if (!cpu_bp[i]) { + cpu_bp[i] = tmp; + break; + } + } + return 0; +} + +static void cpu_bps_remove(struct perf_event *bp) +{ + struct breakpoint **cpu_bp; + int i = 0; + + cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); + for (i = 0; i < nr_wp_slots(); i++) { + if (!cpu_bp[i]) + continue; + + if (cpu_bp[i]->bp == bp) { + kfree(cpu_bp[i]); + cpu_bp[i] = NULL; + break; + } + } +} + +static bool cpu_bps_check(int cpu, struct perf_event *bp) +{ + struct breakpoint **cpu_bp; + int i; + + cpu_bp = per_cpu_ptr(cpu_bps, cpu); + for (i = 0; i < nr_wp_slots(); i++) { + if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) + return true; + } + return false; +} + +static bool all_cpu_bps_check(struct perf_event *bp) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (cpu_bps_check(cpu, bp)) + return true; + } + return false; +} + +/* + * We don't use any locks to serialize accesses to cpu_bps or task_bps + * because are already inside nr_bp_mutex. + */ +int arch_reserve_bp_slot(struct perf_event *bp) +{ + int ret; + + /* ptrace breakpoint */ + if (is_ptrace_bp(bp)) { + if (all_cpu_bps_check(bp)) + return -ENOSPC; + + if (same_task_bps_check(bp)) + return -ENOSPC; + + return task_bps_add(bp); + } + + /* perf breakpoint */ + if (is_kernel_addr(bp->attr.bp_addr)) + return 0; + + if (bp->hw.target && bp->cpu == -1) { + if (same_task_bps_check(bp)) + return -ENOSPC; + + return task_bps_add(bp); + } else if (!bp->hw.target && bp->cpu != -1) { + if (all_task_bps_check(bp)) + return -ENOSPC; + + return cpu_bps_add(bp); + } + + if (same_task_bps_check(bp)) + return -ENOSPC; + + ret = cpu_bps_add(bp); + if (ret) + return ret; + ret = task_bps_add(bp); + if (ret) + cpu_bps_remove(bp); + + return ret; +} + +void arch_release_bp_slot(struct perf_event *bp) +{ + if (!is_kernel_addr(bp->attr.bp_addr)) { + if (bp->hw.target) + task_bps_remove(bp); + if (bp->cpu != -1) + cpu_bps_remove(bp); + } } /* @@ -102,8 +357,14 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp) * restoration variables to prevent dangling pointers. * FIXME, this should not be using bp->ctx at all! Sayeth peterz. */ - if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) - bp->ctx->task->thread.last_hit_ubp = NULL; + if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) { + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (bp->ctx->task->thread.last_hit_ubp[i] == bp) + bp->ctx->task->thread.last_hit_ubp[i] = NULL; + } + } } /* @@ -140,10 +401,10 @@ int arch_bp_generic_fields(int type, int *gen_bp_type) * <---8 bytes---> * * In this case, we should configure hw as: - * start_addr = address & ~HW_BREAKPOINT_ALIGN + * start_addr = address & ~(HW_BREAKPOINT_SIZE - 1) * len = 16 bytes * - * @start_addr and @end_addr are inclusive. + * @start_addr is inclusive but @end_addr is exclusive. */ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw) { @@ -151,14 +412,15 @@ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw) u16 hw_len; unsigned long start_addr, end_addr; - start_addr = hw->address & ~HW_BREAKPOINT_ALIGN; - end_addr = (hw->address + hw->len - 1) | HW_BREAKPOINT_ALIGN; - hw_len = end_addr - start_addr + 1; + start_addr = ALIGN_DOWN(hw->address, HW_BREAKPOINT_SIZE); + end_addr = ALIGN(hw->address + hw->len, HW_BREAKPOINT_SIZE); + hw_len = end_addr - start_addr; if (dawr_enabled()) { max_len = DAWR_MAX_LEN; - /* DAWR region can't cross 512 bytes boundary */ - if ((start_addr >> 9) != (end_addr >> 9)) + /* DAWR region can't cross 512 bytes boundary on p10 predecessors */ + if (!cpu_has_feature(CPU_FTR_ARCH_31) && + (ALIGN_DOWN(start_addr, SZ_512) != ALIGN_DOWN(end_addr - 1, SZ_512))) return -EINVAL; } else if (IS_ENABLED(CONFIG_PPC_8xx)) { /* 8xx can setup a range without limitation */ @@ -215,90 +477,238 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) { struct arch_hw_breakpoint *info; + int i; - if (likely(!tsk->thread.last_hit_ubp)) - return; + for (i = 0; i < nr_wp_slots(); i++) { + if (unlikely(tsk->thread.last_hit_ubp[i])) + goto reset; + } + return; - info = counter_arch_bp(tsk->thread.last_hit_ubp); +reset: regs->msr &= ~MSR_SE; - __set_breakpoint(info); - tsk->thread.last_hit_ubp = NULL; + for (i = 0; i < nr_wp_slots(); i++) { + info = counter_arch_bp(__this_cpu_read(bp_per_reg[i])); + __set_breakpoint(i, info); + tsk->thread.last_hit_ubp[i] = NULL; + } } -static bool dar_within_range(unsigned long dar, struct arch_hw_breakpoint *info) +static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info) { return ((info->address <= dar) && (dar - info->address < info->len)); } -static bool -dar_range_overlaps(unsigned long dar, int size, struct arch_hw_breakpoint *info) +static bool ea_user_range_overlaps(unsigned long ea, int size, + struct arch_hw_breakpoint *info) { - return ((dar <= info->address + info->len - 1) && - (dar + size - 1 >= info->address)); + return ((ea < info->address + info->len) && + (ea + size > info->address)); } -/* - * Handle debug exception notifications. - */ -static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp, - struct arch_hw_breakpoint *info) +static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info) { - unsigned int instr = 0; - int ret, type, size; - struct instruction_op op; - unsigned long addr = info->address; + unsigned long hw_start_addr, hw_end_addr; + + hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); + hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); - if (__get_user_inatomic(instr, (unsigned int *)regs->nip)) - goto fail; + return ((hw_start_addr <= dar) && (hw_end_addr > dar)); +} - ret = analyse_instr(&op, regs, instr); - type = GETTYPE(op.type); - size = GETSIZE(op.type); +static bool ea_hw_range_overlaps(unsigned long ea, int size, + struct arch_hw_breakpoint *info) +{ + unsigned long hw_start_addr, hw_end_addr; - if (!ret && (type == LARX || type == STCX)) { - printk_ratelimited("Breakpoint hit on instruction that can't be emulated." - " Breakpoint at 0x%lx will be disabled.\n", addr); - goto disable; - } + hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); + hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); + + return ((ea < hw_end_addr) && (ea + size > hw_start_addr)); +} + +/* + * If hw has multiple DAWR registers, we also need to check all + * dawrx constraint bits to confirm this is _really_ a valid event. + * If type is UNKNOWN, but privilege level matches, consider it as + * a positive match. + */ +static bool check_dawrx_constraints(struct pt_regs *regs, int type, + struct arch_hw_breakpoint *info) +{ + if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ)) + return false; /* - * If it's extraneous event, we still need to emulate/single- - * step the instruction, but we don't generate an event. + * The Cache Management instructions other than dcbz never + * cause a match. i.e. if type is CACHEOP, the instruction + * is dcbz, and dcbz is treated as Store. */ - if (size && !dar_range_overlaps(regs->dar, size, info)) - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE)) + return false; - /* Do not emulate user-space instructions, instead single-step them */ - if (user_mode(regs)) { - current->thread.last_hit_ubp = bp; - regs->msr |= MSR_SE; + if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL)) return false; - } - if (!emulate_step(regs, instr)) - goto fail; + if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER)) + return false; return true; +} + +/* + * Return true if the event is valid wrt dawr configuration, + * including extraneous exception. Otherwise return false. + */ +static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, + unsigned long ea, int type, int size, + struct arch_hw_breakpoint *info) +{ + bool in_user_range = dar_in_user_range(regs->dar, info); + bool dawrx_constraints; -fail: /* - * We've failed in reliably handling the hw-breakpoint. Unregister - * it and throw a warning message to let the user know about it. + * 8xx supports only one breakpoint and thus we can + * unconditionally return true. */ - WARN(1, "Unable to handle hardware breakpoint. Breakpoint at " - "0x%lx will be disabled.", addr); + if (IS_ENABLED(CONFIG_PPC_8xx)) { + if (!in_user_range) + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + return true; + } -disable: - perf_event_disable_inatomic(bp); + if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) { + if (cpu_has_feature(CPU_FTR_ARCH_31) && + !dar_in_hw_range(regs->dar, info)) + return false; + + return true; + } + + dawrx_constraints = check_dawrx_constraints(regs, type, info); + + if (type == UNKNOWN) { + if (cpu_has_feature(CPU_FTR_ARCH_31) && + !dar_in_hw_range(regs->dar, info)) + return false; + + return dawrx_constraints; + } + + if (ea_user_range_overlaps(ea, size, info)) + return dawrx_constraints; + + if (ea_hw_range_overlaps(ea, size, info)) { + if (dawrx_constraints) { + info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + return true; + } + } return false; } +static int cache_op_size(void) +{ +#ifdef __powerpc64__ + return ppc64_caches.l1d.block_size; +#else + return L1_CACHE_BYTES; +#endif +} + +static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, + int *type, int *size, unsigned long *ea) +{ + struct instruction_op op; + + if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip)) + return; + + analyse_instr(&op, regs, *instr); + *type = GETTYPE(op.type); + *ea = op.ea; +#ifdef __powerpc64__ + if (!(regs->msr & MSR_64BIT)) + *ea &= 0xffffffffUL; +#endif + + *size = GETSIZE(op.type); + if (*type == CACHEOP) { + *size = cache_op_size(); + *ea &= ~(*size - 1); + } +} + +static bool is_larx_stcx_instr(int type) +{ + return type == LARX || type == STCX; +} + +/* + * We've failed in reliably handling the hw-breakpoint. Unregister + * it and throw a warning message to let the user know about it. + */ +static void handler_error(struct perf_event *bp, struct arch_hw_breakpoint *info) +{ + WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.", + info->address); + perf_event_disable_inatomic(bp); +} + +static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info) +{ + printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n", + info->address); + perf_event_disable_inatomic(bp); +} + +static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, + struct arch_hw_breakpoint **info, int *hit, + struct ppc_inst instr) +{ + int i; + int stepped; + + /* Do not emulate user-space instructions, instead single-step them */ + if (user_mode(regs)) { + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + current->thread.last_hit_ubp[i] = bp[i]; + info[i] = NULL; + } + regs->msr |= MSR_SE; + return false; + } + + stepped = emulate_step(regs, instr); + if (!stepped) { + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + handler_error(bp[i], info[i]); + info[i] = NULL; + } + return false; + } + return true; +} + int hw_breakpoint_handler(struct die_args *args) { + bool err = false; int rc = NOTIFY_STOP; - struct perf_event *bp; + struct perf_event *bp[HBP_NUM_MAX] = { NULL }; struct pt_regs *regs = args->regs; - struct arch_hw_breakpoint *info; + struct arch_hw_breakpoint *info[HBP_NUM_MAX] = { NULL }; + int i; + int hit[HBP_NUM_MAX] = {0}; + int nr_hit = 0; + bool ptrace_bp = false; + struct ppc_inst instr = ppc_inst(0); + int type = 0; + int size = 0; + unsigned long ea; /* Disable breakpoints during exception handling */ hw_breakpoint_disable(); @@ -311,12 +721,40 @@ int hw_breakpoint_handler(struct die_args *args) */ rcu_read_lock(); - bp = __this_cpu_read(bp_per_reg); - if (!bp) { + if (!IS_ENABLED(CONFIG_PPC_8xx)) + get_instr_detail(regs, &instr, &type, &size, &ea); + + for (i = 0; i < nr_wp_slots(); i++) { + bp[i] = __this_cpu_read(bp_per_reg[i]); + if (!bp[i]) + continue; + + info[i] = counter_arch_bp(bp[i]); + info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; + + if (check_constraints(regs, instr, ea, type, size, info[i])) { + if (!IS_ENABLED(CONFIG_PPC_8xx) && + ppc_inst_equal(instr, ppc_inst(0))) { + handler_error(bp[i], info[i]); + info[i] = NULL; + err = 1; + continue; + } + + if (is_ptrace_bp(bp[i])) + ptrace_bp = true; + hit[i] = 1; + nr_hit++; + } + } + + if (err) + goto reset; + + if (!nr_hit) { rc = NOTIFY_DONE; goto out; } - info = counter_arch_bp(bp); /* * Return early after invoking user-callback function without restoring @@ -324,29 +762,50 @@ int hw_breakpoint_handler(struct die_args *args) * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal * generated in do_dabr(). */ - if (bp->overflow_handler == ptrace_triggered) { - perf_bp_event(bp, regs); + if (ptrace_bp) { + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + perf_bp_event(bp[i], regs); + info[i] = NULL; + } rc = NOTIFY_DONE; - goto out; + goto reset; } - info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (IS_ENABLED(CONFIG_PPC_8xx)) { - if (!dar_within_range(regs->dar, info)) - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; - } else { - if (!stepping_handler(regs, bp, info)) - goto out; + if (!IS_ENABLED(CONFIG_PPC_8xx)) { + if (is_larx_stcx_instr(type)) { + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + larx_stcx_err(bp[i], info[i]); + info[i] = NULL; + } + goto reset; + } + + if (!stepping_handler(regs, bp, info, hit, instr)) + goto reset; } /* * As a policy, the callback is invoked in a 'trigger-after-execute' * fashion */ - if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) - perf_bp_event(bp, regs); + for (i = 0; i < nr_wp_slots(); i++) { + if (!hit[i]) + continue; + if (!(info[i]->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) + perf_bp_event(bp[i], regs); + } + +reset: + for (i = 0; i < nr_wp_slots(); i++) { + if (!info[i]) + continue; + __set_breakpoint(i, info[i]); + } - __set_breakpoint(info); out: rcu_read_unlock(); return rc; @@ -361,26 +820,43 @@ static int single_step_dabr_instruction(struct die_args *args) struct pt_regs *regs = args->regs; struct perf_event *bp = NULL; struct arch_hw_breakpoint *info; + int i; + bool found = false; - bp = current->thread.last_hit_ubp; /* * Check if we are single-stepping as a result of a * previous HW Breakpoint exception */ - if (!bp) - return NOTIFY_DONE; + for (i = 0; i < nr_wp_slots(); i++) { + bp = current->thread.last_hit_ubp[i]; + + if (!bp) + continue; + + found = true; + info = counter_arch_bp(bp); + + /* + * We shall invoke the user-defined callback function in the + * single stepping handler to confirm to 'trigger-after-execute' + * semantics + */ + if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) + perf_bp_event(bp, regs); + current->thread.last_hit_ubp[i] = NULL; + } - info = counter_arch_bp(bp); + if (!found) + return NOTIFY_DONE; - /* - * We shall invoke the user-defined callback function in the single - * stepping handler to confirm to 'trigger-after-execute' semantics - */ - if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) - perf_bp_event(bp, regs); + for (i = 0; i < nr_wp_slots(); i++) { + bp = __this_cpu_read(bp_per_reg[i]); + if (!bp) + continue; - __set_breakpoint(info); - current->thread.last_hit_ubp = NULL; + info = counter_arch_bp(bp); + __set_breakpoint(i, info); + } /* * If the process was being single-stepped by ptrace, let the @@ -419,10 +895,13 @@ NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify); */ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { + int i; struct thread_struct *t = &tsk->thread; - unregister_hw_breakpoint(t->ptrace_bps[0]); - t->ptrace_bps[0] = NULL; + for (i = 0; i < nr_wp_slots(); i++) { + unregister_hw_breakpoint(t->ptrace_bps[i]); + t->ptrace_bps[i] = NULL; + } } void hw_breakpoint_pmu_read(struct perf_event *bp) diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 433d97bea1f3..69df840f7253 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -187,6 +187,7 @@ BEGIN_FTR_SECTION mtspr SPRN_HID1, r9 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) b transfer_to_handler_cont +_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) .data diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 308f499e146c..72c85b6f3898 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -90,3 +90,4 @@ _GLOBAL(power_save_ppc32_restore) #endif b transfer_to_handler_cont +_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 0276bc8c8969..51bbaae94ccc 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -10,10 +10,10 @@ #include <linux/kernel.h> #include <linux/sched/mm.h> /* for init_mm */ +#include <linux/pgtable.h> #include <asm/io.h> #include <asm/machdep.h> -#include <asm/pgtable.h> #include <asm/ppc-pci.h> #include <asm/io-workarounds.h> #include <asm/pte-walk.h> diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 5ac84efc6ede..9fe4fb3b08aa 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -15,23 +15,23 @@ * Here comes the ppc64 implementation of the IOMAP * interfaces. */ -unsigned int ioread8(void __iomem *addr) +unsigned int ioread8(const void __iomem *addr) { return readb(addr); } -unsigned int ioread16(void __iomem *addr) +unsigned int ioread16(const void __iomem *addr) { return readw(addr); } -unsigned int ioread16be(void __iomem *addr) +unsigned int ioread16be(const void __iomem *addr) { return readw_be(addr); } -unsigned int ioread32(void __iomem *addr) +unsigned int ioread32(const void __iomem *addr) { return readl(addr); } -unsigned int ioread32be(void __iomem *addr) +unsigned int ioread32be(const void __iomem *addr) { return readl_be(addr); } @@ -41,27 +41,27 @@ EXPORT_SYMBOL(ioread16be); EXPORT_SYMBOL(ioread32); EXPORT_SYMBOL(ioread32be); #ifdef __powerpc64__ -u64 ioread64(void __iomem *addr) +u64 ioread64(const void __iomem *addr) { return readq(addr); } -u64 ioread64_lo_hi(void __iomem *addr) +u64 ioread64_lo_hi(const void __iomem *addr) { return readq(addr); } -u64 ioread64_hi_lo(void __iomem *addr) +u64 ioread64_hi_lo(const void __iomem *addr) { return readq(addr); } -u64 ioread64be(void __iomem *addr) +u64 ioread64be(const void __iomem *addr) { return readq_be(addr); } -u64 ioread64be_lo_hi(void __iomem *addr) +u64 ioread64be_lo_hi(const void __iomem *addr) { return readq_be(addr); } -u64 ioread64be_hi_lo(void __iomem *addr) +u64 ioread64be_hi_lo(const void __iomem *addr) { return readq_be(addr); } @@ -139,15 +139,15 @@ EXPORT_SYMBOL(iowrite64be_hi_lo); * FIXME! We could make these do EEH handling if we really * wanted. Not clear if we do. */ -void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) { readsb(addr, dst, count); } -void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) { readsw(addr, dst, count); } -void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) { readsl(addr, dst, count); } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 1f1169856dc8..bf21ebd36190 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -51,10 +51,10 @@ #include <linux/of.h> #include <linux/of_irq.h> #include <linux/vmalloc.h> +#include <linux/pgtable.h> #include <linux/uaccess.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/irq.h> #include <asm/cache.h> #include <asm/prom.h> @@ -621,13 +621,14 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); seq_printf(p, " Machine check exceptions\n"); +#ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_HVMODE)) { seq_printf(p, "%*s: ", prec, "HMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat, j).hmi_exceptions); + seq_printf(p, "%10u ", paca_ptrs[j]->hmi_irqs); seq_printf(p, " Hypervisor Maintenance Interrupts\n"); } +#endif seq_printf(p, "%*s: ", prec, "NMI"); for_each_online_cpu(j) @@ -665,7 +666,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += per_cpu(irq_stat, cpu).mce_exceptions; sum += per_cpu(irq_stat, cpu).spurious_irqs; sum += per_cpu(irq_stat, cpu).timer_irqs_others; - sum += per_cpu(irq_stat, cpu).hmi_exceptions; +#ifdef CONFIG_PPC_BOOK3S_64 + sum += paca_ptrs[cpu]->hmi_irqs; +#endif sum += per_cpu(irq_stat, cpu).sreset_irqs; #ifdef CONFIG_PPC_WATCHDOG sum += per_cpu(irq_stat, cpu).soft_nmi_irqs; @@ -748,9 +751,8 @@ void do_IRQ(struct pt_regs *regs) static void *__init alloc_vm_stack(void) { - return __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, VMALLOC_START, - VMALLOC_END, THREADINFO_GFP, PAGE_KERNEL, - 0, NUMA_NO_NODE, (void*)_RET_IP_); + return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP, + NUMA_NO_NODE, (void *)_RET_IP_); } static void __init vmap_irqstack_init(void) diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index 773671b512df..2257d24e6a26 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/notifier.h> +#include <linux/vmalloc.h> #include <asm/processor.h> #include <asm/io.h> @@ -38,6 +39,22 @@ EXPORT_SYMBOL_GPL(isa_bridge_pcidev); #define ISA_SPACE_MASK 0x1 #define ISA_SPACE_IO 0x1 +static void remap_isa_base(phys_addr_t pa, unsigned long size) +{ + WARN_ON_ONCE(ISA_IO_BASE & ~PAGE_MASK); + WARN_ON_ONCE(pa & ~PAGE_MASK); + WARN_ON_ONCE(size & ~PAGE_MASK); + + if (slab_is_available()) { + if (ioremap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa, + pgprot_noncached(PAGE_KERNEL))) + unmap_kernel_range(ISA_IO_BASE, size); + } else { + early_ioremap_range(ISA_IO_BASE, pa, size, + pgprot_noncached(PAGE_KERNEL)); + } +} + static void pci_process_ISA_OF_ranges(struct device_node *isa_node, unsigned long phb_io_base_phys) { @@ -105,15 +122,13 @@ static void pci_process_ISA_OF_ranges(struct device_node *isa_node, if (size > 0x10000) size = 0x10000; - __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, - size, pgprot_noncached(PAGE_KERNEL)); + remap_isa_base(phb_io_base_phys, size); return; inval_range: printk(KERN_ERR "no ISA IO ranges or unexpected isa range, " "mapping 64k\n"); - __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, - 0x10000, pgprot_noncached(PAGE_KERNEL)); + remap_isa_base(phb_io_base_phys, 0x10000); } @@ -248,8 +263,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np) * and map it */ isa_io_base = ISA_IO_BASE; - __ioremap_at(pbase, (void *)ISA_IO_BASE, - size, pgprot_noncached(PAGE_KERNEL)); + remap_isa_base(pbase, size); pr_debug("ISA: Non-PCI bridge is %pOF\n", np); } @@ -297,7 +311,7 @@ static void isa_bridge_remove(void) isa_bridge_pcidev = NULL; /* Unmap the ISA area */ - __iounmap_at((void *)ISA_IO_BASE, 0x10000); + unmap_kernel_range(ISA_IO_BASE, 0x10000); } /** diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index ca37702bde97..144858027fa3 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -6,14 +6,15 @@ #include <linux/kernel.h> #include <linux/jump_label.h> #include <asm/code-patching.h> +#include <asm/inst.h> void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - u32 *addr = (u32 *)(unsigned long)entry->code; + struct ppc_inst *addr = (struct ppc_inst *)(unsigned long)entry->code; if (type == JUMP_LABEL_JMP) patch_branch(addr, entry->target, 0); else - patch_instruction(addr, PPC_INST_NOP); + patch_instruction(addr, ppc_inst(PPC_INST_NOP)); } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7dd55eb1259d..409080208a6c 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -26,6 +26,7 @@ #include <asm/debug.h> #include <asm/code-patching.h> #include <linux/slab.h> +#include <asm/inst.h> /* * This table contains the mapping between PowerPC hardware trap types, and @@ -418,13 +419,13 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; unsigned int instr; - unsigned int *addr = (unsigned int *)bpt->bpt_addr; + struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = probe_kernel_address(addr, instr); + err = get_kernel_nofault(instr, (unsigned *) addr); if (err) return err; - err = patch_instruction(addr, BREAK_INSTR); + err = patch_instruction(addr, ppc_inst(BREAK_INSTR)); if (err) return -EFAULT; @@ -437,9 +438,9 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { int err; unsigned int instr = *(unsigned int *)bpt->saved_instr; - unsigned int *addr = (unsigned int *)bpt->bpt_addr; + struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = patch_instruction(addr, instr); + err = patch_instruction(addr, ppc_inst(instr)); if (err) return -EFAULT; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 81efb605113e..6ab9b4d037c3 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -23,6 +23,7 @@ #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/sections.h> +#include <asm/inst.h> #include <linux/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -105,7 +106,9 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; - kprobe_opcode_t insn = *p->addr; + struct kprobe *prev; + struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr); + struct ppc_inst prefix = ppc_inst_read((struct ppc_inst *)(p->addr - 1)); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); @@ -113,6 +116,17 @@ int arch_prepare_kprobe(struct kprobe *p) } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) { printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n"); ret = -EINVAL; + } else if (ppc_inst_prefixed(prefix)) { + printk("Cannot register a kprobe on the second word of prefixed instruction\n"); + ret = -EINVAL; + } + preempt_disable(); + prev = get_kprobe(p->addr - 1); + preempt_enable_no_resched(); + if (prev && + ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)prev->ainsn.insn))) { + printk("Cannot register a kprobe on the second word of prefixed instruction\n"); + ret = -EINVAL; } /* insn must be on a special executable page on ppc64. This is @@ -124,11 +138,8 @@ int arch_prepare_kprobe(struct kprobe *p) } if (!ret) { - memcpy(p->ainsn.insn, p->addr, - MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - p->opcode = *p->addr; - flush_icache_range((unsigned long)p->ainsn.insn, - (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t)); + patch_instruction((struct ppc_inst *)p->ainsn.insn, insn); + p->opcode = ppc_inst_val(insn); } p->ainsn.boostable = 0; @@ -138,13 +149,13 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe); void arch_arm_kprobe(struct kprobe *p) { - patch_instruction(p->addr, BREAKPOINT_INSTRUCTION); + patch_instruction((struct ppc_inst *)p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)); } NOKPROBE_SYMBOL(arch_arm_kprobe); void arch_disarm_kprobe(struct kprobe *p) { - patch_instruction(p->addr, p->opcode); + patch_instruction((struct ppc_inst *)p->addr, ppc_inst(p->opcode)); } NOKPROBE_SYMBOL(arch_disarm_kprobe); @@ -216,7 +227,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; - unsigned int insn = *p->ainsn.insn; + struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); /* regs->nip is also adjusted if emulate_step returns 1 */ ret = emulate_step(regs, insn); @@ -233,7 +244,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) * So, we should never get here... but, its still * good to catch them, just in case... */ - printk("Can't step on instruction %x\n", insn); + printk("Can't step on instruction %s\n", ppc_inst_as_str(insn)); BUG(); } else { /* @@ -276,14 +287,18 @@ int kprobe_handler(struct pt_regs *regs) p = get_kprobe(addr); if (!p) { - if (*addr != BREAKPOINT_INSTRUCTION) { + unsigned int instr; + + if (get_kernel_nofault(instr, addr)) + goto no_kprobe; + + if (instr != BREAKPOINT_INSTRUCTION) { /* * PowerPC has multiple variants of the "trap" * instruction. If the current instruction is a * trap variant, it could belong to someone else */ - kprobe_opcode_t cur_insn = *addr; - if (is_trap(cur_insn)) + if (is_trap(instr)) goto no_kprobe; /* * The breakpoint instruction was removed right @@ -464,14 +479,16 @@ NOKPROBE_SYMBOL(trampoline_probe_handler); */ int kprobe_post_handler(struct pt_regs *regs) { + int len; struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); if (!cur || user_mode(regs)) return 0; + len = ppc_inst_len(ppc_inst_read((struct ppc_inst *)cur->ainsn.insn)); /* make sure we got here for instruction we have a kprobe on */ - if (((unsigned long)cur->ainsn.insn + 4) != regs->nip) + if (((unsigned long)cur->ainsn.insn + len) != regs->nip) return 0; if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { @@ -480,7 +497,7 @@ int kprobe_post_handler(struct pt_regs *regs) } /* Adjust nip to after the single-stepped instruction */ - regs->nip = (unsigned long)cur->addr + 4; + regs->nip = (unsigned long)cur->addr + len; regs->msr |= kcb->kprobe_saved_msr; /*Restore back the original saved kprobes variables and continue. */ diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 2020d255585f..5f07aa5e9851 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -455,5 +455,6 @@ _GLOBAL(__inval_enable_L1) sync blr +_ASM_NOKPROBE_SYMBOL(__inval_enable_L1) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 8077b5fb18a7..ada59f6c4298 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -16,6 +16,7 @@ #include <linux/export.h> #include <linux/irq_work.h> #include <linux/extable.h> +#include <linux/ftrace.h> #include <asm/machdep.h> #include <asm/mce.h> @@ -48,6 +49,20 @@ static struct irq_work mce_ue_event_irq_work = { DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); +static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); + +int mce_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&mce_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(mce_register_notifier); + +int mce_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&mce_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(mce_unregister_notifier); + static void mce_set_error_info(struct machine_check_event *mce, struct mce_error_info *mce_err) { @@ -277,6 +292,7 @@ static void machine_process_ue_event(struct work_struct *work) while (__this_cpu_read(mce_ue_count) > 0) { index = __this_cpu_read(mce_ue_count) - 1; evt = this_cpu_ptr(&mce_ue_event_queue[index]); + blocking_notifier_call_chain(&mce_notifier_list, 0, evt); #ifdef CONFIG_MEMORY_FAILURE /* * This should probably queued elsewhere, but @@ -369,6 +385,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, static const char *mc_user_types[] = { "Indeterminate", "tlbie(l) invalid", + "scv invalid", }; static const char *mc_ra_types[] = { "Indeterminate", @@ -571,9 +588,16 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info); * * regs->nip and regs->msr contains srr0 and ssr1. */ -long machine_check_early(struct pt_regs *regs) +long notrace machine_check_early(struct pt_regs *regs) { long handled = 0; + bool nested = in_nmi(); + u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); + + this_cpu_set_ftrace_enabled(0); + + if (!nested) + nmi_enter(); hv_nmi_check_nonrecoverable(regs); @@ -582,6 +606,12 @@ long machine_check_early(struct pt_regs *regs) */ if (ppc_md.machine_check_early) handled = ppc_md.machine_check_early(regs); + + if (!nested) + nmi_exit(); + + this_cpu_set_ftrace_enabled(ftrace_enabled); + return handled; } @@ -697,7 +727,7 @@ long hmi_exception_realmode(struct pt_regs *regs) { int ret; - __this_cpu_inc(irq_stat.hmi_exceptions); + local_paca->hmi_irqs++; ret = hmi_handle_debugtrig(regs); if (ret >= 0) diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 067b094bfeff..b7e173754a2e 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -12,14 +12,15 @@ #include <linux/types.h> #include <linux/ptrace.h> #include <linux/extable.h> +#include <linux/pgtable.h> #include <asm/mmu.h> #include <asm/mce.h> #include <asm/machdep.h> -#include <asm/pgtable.h> #include <asm/pte-walk.h> #include <asm/sstep.h> #include <asm/exception-64s.h> #include <asm/extable.h> +#include <asm/inst.h> /* * Convert an address related to an mm to a PFN. NOTE: we are in real @@ -27,7 +28,7 @@ */ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) { - pte_t *ptep; + pte_t *ptep, pte; unsigned int shift; unsigned long pfn, flags; struct mm_struct *mm; @@ -39,19 +40,23 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) local_irq_save(flags); ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift); + if (!ptep) { + pfn = ULONG_MAX; + goto out; + } + pte = READ_ONCE(*ptep); - if (!ptep || pte_special(*ptep)) { + if (!pte_present(pte) || pte_special(pte)) { pfn = ULONG_MAX; goto out; } if (shift <= PAGE_SHIFT) - pfn = pte_pfn(*ptep); + pfn = pte_pfn(pte); else { unsigned long rpnmask = (1ul << shift) - PAGE_SIZE; - pfn = pte_pfn(__pte(pte_val(*ptep) | (addr & rpnmask))); + pfn = pte_pfn(__pte(pte_val(pte) | (addr & rpnmask))); } - out: local_irq_restore(flags); return pfn; @@ -238,6 +243,45 @@ static const struct mce_ierror_table mce_p9_ierror_table[] = { MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0, 0, 0, 0, 0, 0, 0 } }; +static const struct mce_ierror_table mce_p10_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x00000000001c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008080000, true, + MCE_ERROR_TYPE_USER,MCE_USER_ERROR_SCV, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x00000000080c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008100000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008140000, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; + struct mce_derror_table { unsigned long dsisr_value; bool dar_valid; /* dar is a valid indicator of faulting address */ @@ -356,6 +400,46 @@ static const struct mce_derror_table mce_p9_derror_table[] = { MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0, false, 0, 0, 0, 0, 0 } }; +static const struct mce_derror_table mce_p10_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000200, false, + MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000040, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000020, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000010, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000008, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; + static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, uint64_t *phys_addr) { @@ -365,7 +449,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, * in real-mode is tricky and can lead to recursive * faults */ - int instr; + struct ppc_inst instr; unsigned long pfn, instr_addr; struct instruction_op op; struct pt_regs tmp = *regs; @@ -373,7 +457,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, pfn = addr_to_pfn(regs, regs->nip); if (pfn != ULONG_MAX) { instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK); - instr = *(unsigned int *)(instr_addr); + instr = ppc_inst_read((struct ppc_inst *)instr_addr); if (!analyse_instr(&op, &tmp, instr)) { pfn = addr_to_pfn(regs, op.ea); *addr = op.ea; @@ -652,3 +736,8 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs) return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); } + +long __machine_check_early_realmode_p10(struct pt_regs *regs) +{ + return mce_handle_error(regs, mce_p10_derror_table, mce_p10_ierror_table); +} diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 65f9f731c229..5be96feccb55 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -36,6 +36,8 @@ _GLOBAL(add_reloc_offset) add r3,r3,r5 mtlr r0 blr +_ASM_NOKPROBE_SYMBOL(reloc_offset) +_ASM_NOKPROBE_SYMBOL(add_reloc_offset) .align 3 2: PPC_LONG 1b diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index d80212be8698..b24f866fef81 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -246,6 +246,7 @@ _GLOBAL(real_readb) sync isync blr +_ASM_NOKPROBE_SYMBOL(real_readb) /* * Do an IO access in real mode @@ -263,6 +264,7 @@ _GLOBAL(real_writeb) sync isync blr +_ASM_NOKPROBE_SYMBOL(real_writeb) #endif /* CONFIG_40x */ @@ -274,17 +276,8 @@ _GLOBAL(real_writeb) #ifndef CONFIG_PPC_8xx _GLOBAL(flush_instruction_cache) #if defined(CONFIG_4xx) -#ifdef CONFIG_403GCX - li r3, 512 - mtctr r3 - lis r4, KERNELBASE@h -1: iccci 0, r4 - addi r4, r4, 16 - bdnz 1b -#else lis r3, KERNELBASE@h iccci 0,r3 -#endif #elif defined(CONFIG_FSL_BOOKE) #ifdef CONFIG_E200 mfspr r3,SPRN_L1CSR0 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 1864605eca29..7bb46ad98207 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -413,20 +413,6 @@ _GLOBAL(kexec_sequence) li r0,0 std r0,16(r1) -BEGIN_FTR_SECTION - /* - * This is the best time to turn AMR/IAMR off. - * key 0 is used in radix for supervisor<->user - * protection, but on hash key 0 is reserved - * ideally we want to enter with a clean state. - * NOTE, we rely on r0 being 0 from above. - */ - mtspr SPRN_IAMR,r0 -BEGIN_FTR_SECTION_NESTED(42) - mtspr SPRN_AMOR,r0 -END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - /* save regs for local vars on new stack. * yes, we won't go back, but ... */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index df649acb5631..a211b0253cdb 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -86,3 +86,14 @@ int module_finalize(const Elf_Ehdr *hdr, return 0; } + +#ifdef MODULES_VADDR +void *module_alloc(unsigned long size) +{ + BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); + + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, + PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + __builtin_return_address(0)); +} +#endif diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index d7134c614c16..c27b8687b82a 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -67,21 +67,6 @@ static int relacmp(const void *_x, const void *_y) return 0; } -static void relaswap(void *_x, void *_y, int size) -{ - uint32_t *x, *y, tmp; - int i; - - y = (uint32_t *)_x; - x = (uint32_t *)_y; - - for (i = 0; i < sizeof(Elf32_Rela) / sizeof(uint32_t); i++) { - tmp = x[i]; - x[i] = y[i]; - y[i] = tmp; - } -} - /* Get the potential trampolines size required of the init and non-init sections */ static unsigned long get_plt_size(const Elf32_Ehdr *hdr, @@ -118,7 +103,7 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr, */ sort((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size / sizeof(Elf32_Rela), - sizeof(Elf32_Rela), relacmp, relaswap); + sizeof(Elf32_Rela), relacmp, NULL); ret += count_relocs((void *)hdr + sechdrs[i].sh_offset, diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 007606a48fd9..ae2b188365b1 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -20,6 +20,7 @@ #include <linux/sort.h> #include <asm/setup.h> #include <asm/sections.h> +#include <asm/inst.h> /* FIXME: We don't do .init separately. To do this, we'd need to have a separate r2 value in the init and core section, and stub between @@ -144,42 +145,6 @@ static u32 ppc64_stub_insns[] = { PPC_INST_BCTR, }; -#ifdef CONFIG_DYNAMIC_FTRACE -int module_trampoline_target(struct module *mod, unsigned long addr, - unsigned long *target) -{ - struct ppc64_stub_entry *stub; - func_desc_t funcdata; - u32 magic; - - if (!within_module_core(addr, mod)) { - pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name); - return -EFAULT; - } - - stub = (struct ppc64_stub_entry *)addr; - - if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) { - pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); - return -EFAULT; - } - - if (magic != STUB_MAGIC) { - pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name); - return -EFAULT; - } - - if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) { - pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); - return -EFAULT; - } - - *target = stub_func_addr(funcdata); - - return 0; -} -#endif - /* Count how many different 24-bit relocations (different symbol, different addend) */ static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) @@ -226,21 +191,6 @@ static int relacmp(const void *_x, const void *_y) return 0; } -static void relaswap(void *_x, void *_y, int size) -{ - uint64_t *x, *y, tmp; - int i; - - y = (uint64_t *)_x; - x = (uint64_t *)_y; - - for (i = 0; i < sizeof(Elf64_Rela) / sizeof(uint64_t); i++) { - tmp = x[i]; - x[i] = y[i]; - y[i] = tmp; - } -} - /* Get size of potential trampolines required. */ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, const Elf64_Shdr *sechdrs) @@ -264,7 +214,7 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, */ sort((void *)sechdrs[i].sh_addr, sechdrs[i].sh_size / sizeof(Elf64_Rela), - sizeof(Elf64_Rela), relacmp, relaswap); + sizeof(Elf64_Rela), relacmp, NULL); relocs += count_relocs((void *)sechdrs[i].sh_addr, sechdrs[i].sh_size @@ -384,6 +334,92 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, return 0; } +#ifdef CONFIG_MPROFILE_KERNEL + +#define PACATOC offsetof(struct paca_struct, kernel_toc) + +/* + * ld r12,PACATOC(r13) + * addis r12,r12,<high> + * addi r12,r12,<low> + * mtctr r12 + * bctr + */ +static u32 stub_insns[] = { + PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC, + PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12), + PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12), + PPC_INST_MTCTR | __PPC_RS(R12), + PPC_INST_BCTR, +}; + +/* + * For mprofile-kernel we use a special stub for ftrace_caller() because we + * can't rely on r2 containing this module's TOC when we enter the stub. + * + * That can happen if the function calling us didn't need to use the toc. In + * that case it won't have setup r2, and the r2 value will be either the + * kernel's toc, or possibly another modules toc. + * + * To deal with that this stub uses the kernel toc, which is always accessible + * via the paca (in r13). The target (ftrace_caller()) is responsible for + * saving and restoring the toc before returning. + */ +static inline int create_ftrace_stub(struct ppc64_stub_entry *entry, + unsigned long addr, + struct module *me) +{ + long reladdr; + + memcpy(entry->jump, stub_insns, sizeof(stub_insns)); + + /* Stub uses address relative to kernel toc (from the paca) */ + reladdr = addr - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("%s: Address of %ps out of range of kernel_toc.\n", + me->name, (void *)addr); + return 0; + } + + entry->jump[1] |= PPC_HA(reladdr); + entry->jump[2] |= PPC_LO(reladdr); + + /* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */ + entry->funcdata = func_desc(addr); + entry->magic = STUB_MAGIC; + + return 1; +} + +static bool is_mprofile_ftrace_call(const char *name) +{ + if (!strcmp("_mcount", name)) + return true; +#ifdef CONFIG_DYNAMIC_FTRACE + if (!strcmp("ftrace_caller", name)) + return true; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (!strcmp("ftrace_regs_caller", name)) + return true; +#endif +#endif + + return false; +} +#else +static inline int create_ftrace_stub(struct ppc64_stub_entry *entry, + unsigned long addr, + struct module *me) +{ + return 0; +} + +static bool is_mprofile_ftrace_call(const char *name) +{ + return false; +} +#endif + /* * r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the * value maximum span in an instruction which uses a signed offset). Round down @@ -399,10 +435,14 @@ static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me) static inline int create_stub(const Elf64_Shdr *sechdrs, struct ppc64_stub_entry *entry, unsigned long addr, - struct module *me) + struct module *me, + const char *name) { long reladdr; + if (is_mprofile_ftrace_call(name)) + return create_ftrace_stub(entry, addr, me); + memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns)); /* Stub uses address relative to r2. */ @@ -426,7 +466,8 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, stub to set up the TOC ptr (r2) for the function. */ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, unsigned long addr, - struct module *me) + struct module *me, + const char *name) { struct ppc64_stub_entry *stubs; unsigned int i, num_stubs; @@ -443,62 +484,19 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, return (unsigned long)&stubs[i]; } - if (!create_stub(sechdrs, &stubs[i], addr, me)) + if (!create_stub(sechdrs, &stubs[i], addr, me, name)) return 0; return (unsigned long)&stubs[i]; } -#ifdef CONFIG_MPROFILE_KERNEL -static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction) -{ - if (strcmp("_mcount", name)) - return false; - - /* - * Check if this is one of the -mprofile-kernel sequences. - */ - if (instruction[-1] == PPC_INST_STD_LR && - instruction[-2] == PPC_INST_MFLR) - return true; - - if (instruction[-1] == PPC_INST_MFLR) - return true; - - return false; -} - -/* - * In case of _mcount calls, do not save the current callee's TOC (in r2) into - * the original caller's stack frame. If we did we would clobber the saved TOC - * value of the original caller. - */ -static void squash_toc_save_inst(const char *name, unsigned long addr) -{ - struct ppc64_stub_entry *stub = (struct ppc64_stub_entry *)addr; - - /* Only for calls to _mcount */ - if (strcmp("_mcount", name) != 0) - return; - - stub->jump[2] = PPC_INST_NOP; -} -#else -static void squash_toc_save_inst(const char *name, unsigned long addr) { } - -static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction) -{ - return false; -} -#endif - /* We expect a noop next: if it is, replace it with instruction to restore r2. */ static int restore_r2(const char *name, u32 *instruction, struct module *me) { u32 *prev_insn = instruction - 1; - if (is_mprofile_mcount_callsite(name, prev_insn)) + if (is_mprofile_ftrace_call(name)) return 1; /* @@ -506,7 +504,7 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) * "link" branches and they don't return, so they don't need the r2 * restore afterwards. */ - if (!instr_is_relative_link_branch(*prev_insn)) + if (!instr_is_relative_link_branch(ppc_inst(*prev_insn))) return 1; if (*instruction != PPC_INST_NOP) { @@ -636,14 +634,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_LIVEPATCH) { /* External: go via stub */ - value = stub_for_addr(sechdrs, value, me); + value = stub_for_addr(sechdrs, value, me, + strtab + sym->st_name); if (!value) return -ENOENT; if (!restore_r2(strtab + sym->st_name, (u32 *)location + 1, me)) return -ENOEXEC; - - squash_toc_save_inst(strtab + sym->st_name, value); } else value += local_entry_offset(sym); @@ -745,89 +742,53 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } #ifdef CONFIG_DYNAMIC_FTRACE - -#ifdef CONFIG_MPROFILE_KERNEL - -#define PACATOC offsetof(struct paca_struct, kernel_toc) - -/* - * For mprofile-kernel we use a special stub for ftrace_caller() because we - * can't rely on r2 containing this module's TOC when we enter the stub. - * - * That can happen if the function calling us didn't need to use the toc. In - * that case it won't have setup r2, and the r2 value will be either the - * kernel's toc, or possibly another modules toc. - * - * To deal with that this stub uses the kernel toc, which is always accessible - * via the paca (in r13). The target (ftrace_caller()) is responsible for - * saving and restoring the toc before returning. - */ -static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, - struct module *me, unsigned long addr) +int module_trampoline_target(struct module *mod, unsigned long addr, + unsigned long *target) { - struct ppc64_stub_entry *entry; - unsigned int i, num_stubs; - /* - * ld r12,PACATOC(r13) - * addis r12,r12,<high> - * addi r12,r12,<low> - * mtctr r12 - * bctr - */ - static u32 stub_insns[] = { - PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC, - PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_MTCTR | __PPC_RS(R12), - PPC_INST_BCTR, - }; - long reladdr; + struct ppc64_stub_entry *stub; + func_desc_t funcdata; + u32 magic; - num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*entry); + if (!within_module_core(addr, mod)) { + pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name); + return -EFAULT; + } - /* Find the next available stub entry */ - entry = (void *)sechdrs[me->arch.stubs_section].sh_addr; - for (i = 0; i < num_stubs && stub_func_addr(entry->funcdata); i++, entry++); + stub = (struct ppc64_stub_entry *)addr; - if (i >= num_stubs) { - pr_err("%s: Unable to find a free slot for ftrace stub.\n", me->name); - return 0; + if (copy_from_kernel_nofault(&magic, &stub->magic, + sizeof(magic))) { + pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; } - memcpy(entry->jump, stub_insns, sizeof(stub_insns)); - - /* Stub uses address relative to kernel toc (from the paca) */ - reladdr = addr - kernel_toc_addr(); - if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - pr_err("%s: Address of %ps out of range of kernel_toc.\n", - me->name, (void *)addr); - return 0; + if (magic != STUB_MAGIC) { + pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; } - entry->jump[1] |= PPC_HA(reladdr); - entry->jump[2] |= PPC_LO(reladdr); + if (copy_from_kernel_nofault(&funcdata, &stub->funcdata, + sizeof(funcdata))) { + pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; + } - /* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */ - entry->funcdata = func_desc(addr); - entry->magic = STUB_MAGIC; + *target = stub_func_addr(funcdata); - return (unsigned long)entry; -} -#else -static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, - struct module *me, unsigned long addr) -{ - return stub_for_addr(sechdrs, addr, me); + return 0; } -#endif int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) { - mod->arch.tramp = create_ftrace_stub(sechdrs, mod, - (unsigned long)ftrace_caller); + mod->arch.tramp = stub_for_addr(sechdrs, + (unsigned long)ftrace_caller, + mod, + "ftrace_caller"); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - mod->arch.tramp_regs = create_ftrace_stub(sechdrs, mod, - (unsigned long)ftrace_regs_caller); + mod->arch.tramp_regs = stub_for_addr(sechdrs, + (unsigned long)ftrace_regs_caller, + mod, + "ftrace_regs_caller"); if (!mod->arch.tramp_regs) return -ENOENT; #endif diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index fb4f61096613..532f22637783 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -655,9 +655,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, int rc = -1; switch (reason) { - case KMSG_DUMP_RESTART: - case KMSG_DUMP_HALT: - case KMSG_DUMP_POWEROFF: + case KMSG_DUMP_SHUTDOWN: /* These are almost always orderly shutdowns. */ return; case KMSG_DUMP_OOPS: @@ -854,8 +852,8 @@ loff_t __init nvram_create_partition(const char *name, int sig, BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16); /* Convert sizes from bytes to blocks */ - req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; - min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; + req_size = ALIGN(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; + min_size = ALIGN(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; /* If no minimum size specified, make it the same as the * requested size diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 71a3f97dc988..f89376ff633e 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -62,8 +62,8 @@ static int of_pci_phb_probe(struct platform_device *dev) /* Init pci_dn data structures */ pci_devs_phb_init_dynamic(phb); - /* Create EEH PEs for the PHB */ - eeh_dev_phb_init_dynamic(phb); + /* Create EEH PE for the PHB */ + eeh_phb_pe_create(phb); /* Scan the bus */ pcibios_scan_phb(phb); diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 024f7aad1952..69bfe96884e2 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -16,6 +16,7 @@ #include <asm/code-patching.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> +#include <asm/inst.h> #define TMPL_CALL_HDLR_IDX \ (optprobe_template_call_handler - optprobe_template_entry) @@ -99,8 +100,9 @@ static unsigned long can_optimize(struct kprobe *p) * Ensure that the instruction is not a conditional branch, * and that can be emulated. */ - if (!is_conditional_branch(*p->ainsn.insn) && - analyse_instr(&op, ®s, *p->ainsn.insn) == 1) { + if (!is_conditional_branch(ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) && + analyse_instr(&op, ®s, + ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) == 1) { emulate_update_regs(®s, &op); nip = regs.nip; } @@ -147,50 +149,57 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) { /* addis r4,0,(insn)@h */ - patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(4) | - ((val >> 16) & 0xffff)); + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ADDIS | ___PPC_RT(4) | + ((val >> 16) & 0xffff))); addr++; /* ori r4,r4,(insn)@l */ - patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(4) | - ___PPC_RS(4) | (val & 0xffff)); + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ORI | ___PPC_RA(4) | + ___PPC_RS(4) | (val & 0xffff))); } /* * Generate instructions to load provided immediate 64-bit value - * to register 'r3' and patch these instructions at 'addr'. + * to register 'reg' and patch these instructions at 'addr'. */ -void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr) +void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) { - /* lis r3,(op)@highest */ - patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(3) | - ((val >> 48) & 0xffff)); + /* lis reg,(op)@highest */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | + ((val >> 48) & 0xffff))); addr++; - /* ori r3,r3,(op)@higher */ - patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) | - ___PPC_RS(3) | ((val >> 32) & 0xffff)); + /* ori reg,reg,(op)@higher */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | + ___PPC_RS(reg) | ((val >> 32) & 0xffff))); addr++; - /* rldicr r3,r3,32,31 */ - patch_instruction(addr, PPC_INST_RLDICR | ___PPC_RA(3) | - ___PPC_RS(3) | __PPC_SH64(32) | __PPC_ME64(31)); + /* rldicr reg,reg,32,31 */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | + ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); addr++; - /* oris r3,r3,(op)@h */ - patch_instruction(addr, PPC_INST_ORIS | ___PPC_RA(3) | - ___PPC_RS(3) | ((val >> 16) & 0xffff)); + /* oris reg,reg,(op)@h */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | + ___PPC_RS(reg) | ((val >> 16) & 0xffff))); addr++; - /* ori r3,r3,(op)@l */ - patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) | - ___PPC_RS(3) | (val & 0xffff)); + /* ori reg,reg,(op)@l */ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | + ___PPC_RS(reg) | (val & 0xffff))); } int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { - kprobe_opcode_t *buff, branch_op_callback, branch_emulate_step; - kprobe_opcode_t *op_callback_addr, *emulate_step_addr; + struct ppc_inst branch_op_callback, branch_emulate_step, temp; + kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff; long b_offset; unsigned long nip, size; int rc, i; @@ -230,7 +239,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int); pr_devel("Copying template to %p, size %lu\n", buff, size); for (i = 0; i < size; i++) { - rc = patch_instruction(buff + i, *(optprobe_template_entry + i)); + rc = patch_instruction((struct ppc_inst *)(buff + i), + ppc_inst(*(optprobe_template_entry + i))); if (rc < 0) goto error; } @@ -239,7 +249,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) * Fixup the template with instructions to: * 1. load the address of the actual probepoint */ - patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX); + patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); /* * 2. branch to optimized_callback() and emulate_step() @@ -251,29 +261,34 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) goto error; } - branch_op_callback = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX, - (unsigned long)op_callback_addr, - BRANCH_SET_LINK); + rc = create_branch(&branch_op_callback, + (struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), + (unsigned long)op_callback_addr, + BRANCH_SET_LINK); - branch_emulate_step = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX, - (unsigned long)emulate_step_addr, - BRANCH_SET_LINK); + rc |= create_branch(&branch_emulate_step, + (struct ppc_inst *)(buff + TMPL_EMULATE_IDX), + (unsigned long)emulate_step_addr, + BRANCH_SET_LINK); - if (!branch_op_callback || !branch_emulate_step) + if (rc) goto error; - patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback); - patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step); + patch_instruction((struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), + branch_op_callback); + patch_instruction((struct ppc_inst *)(buff + TMPL_EMULATE_IDX), + branch_emulate_step); /* * 3. load instruction to be emulated into relevant register, and */ - patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX); + temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); + patch_imm64_load_insns(ppc_inst_as_u64(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline */ - patch_branch(buff + TMPL_RET_IDX, (unsigned long)nip, 0); + patch_branch((struct ppc_inst *)(buff + TMPL_RET_IDX), (unsigned long)nip, 0); flush_icache_range((unsigned long)buff, (unsigned long)(&buff[TMPL_END_IDX])); @@ -305,6 +320,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op) void arch_optimize_kprobes(struct list_head *oplist) { + struct ppc_inst instr; struct optimized_kprobe *op; struct optimized_kprobe *tmp; @@ -315,9 +331,10 @@ void arch_optimize_kprobes(struct list_head *oplist) */ memcpy(op->optinsn.copied_insn, op->kp.addr, RELATIVEJUMP_SIZE); - patch_instruction(op->kp.addr, - create_branch((unsigned int *)op->kp.addr, - (unsigned long)op->optinsn.insn, 0)); + create_branch(&instr, + (struct ppc_inst *)op->kp.addr, + (unsigned long)op->optinsn.insn, 0); + patch_instruction((struct ppc_inst *)op->kp.addr, instr); list_del_init(&op->list); } } diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index cf383520843f..ff8ba4d3824d 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -94,6 +94,9 @@ optprobe_template_insn: /* 2, Pass instruction to be emulated in r4 */ nop nop + nop + nop + nop .global optprobe_template_call_emulate optprobe_template_call_emulate: diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 3f91ccaa9c74..0ad15768d762 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -8,14 +8,15 @@ #include <linux/memblock.h> #include <linux/sched/task.h> #include <linux/numa.h> +#include <linux/pgtable.h> #include <asm/lppaca.h> #include <asm/paca.h> #include <asm/sections.h> -#include <asm/pgtable.h> #include <asm/kexec.h> #include <asm/svm.h> #include <asm/ultravisor.h> +#include <asm/rtas.h> #include "setup.h" @@ -56,8 +57,8 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align, #define LPPACA_SIZE 0x400 -static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align, - unsigned long limit, int cpu) +static void *__init alloc_shared_lppaca(unsigned long size, unsigned long limit, + int cpu) { size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE); static unsigned long shared_lppaca_size; @@ -67,6 +68,13 @@ static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align, if (!shared_lppaca) { memblock_set_bottom_up(true); + /* + * See Documentation/powerpc/ultravisor.rst for more details. + * + * UV/HV data sharing is in PAGE_SIZE granularity. In order to + * minimize the number of pages shared, align the allocation to + * PAGE_SIZE. + */ shared_lppaca = memblock_alloc_try_nid(shared_lppaca_total_size, PAGE_SIZE, MEMBLOCK_LOW_LIMIT, @@ -86,7 +94,7 @@ static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align, * This is very early in boot, so no harm done if the kernel crashes at * this point. */ - BUG_ON(shared_lppaca_size >= shared_lppaca_total_size); + BUG_ON(shared_lppaca_size > shared_lppaca_total_size); return ptr; } @@ -121,7 +129,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit) return NULL; if (is_secure_guest()) - lp = alloc_shared_lppaca(LPPACA_SIZE, 0x400, limit, cpu); + lp = alloc_shared_lppaca(LPPACA_SIZE, limit, cpu); else lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu); @@ -164,6 +172,30 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) #endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_PPC_PSERIES +/** + * new_rtas_args() - Allocates rtas args + * @cpu: CPU number + * @limit: Memory limit for this allocation + * + * Allocates a struct rtas_args and return it's pointer, + * if not in Hypervisor mode + * + * Return: Pointer to allocated rtas_args + * NULL if CPU in Hypervisor Mode + */ +static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit) +{ + limit = min_t(unsigned long, limit, RTAS_INSTANTIATE_MAX); + + if (early_cpu_has_feature(CPU_FTR_HVMODE)) + return NULL; + + return alloc_paca_data(sizeof(struct rtas_args), L1_CACHE_BYTES, + limit, cpu); +} +#endif /* CONFIG_PPC_PSERIES */ + /* The Paca is an array with one entry per processor. Each contains an * lppaca, which contains the information shared between the * hypervisor and Linux. @@ -202,6 +234,10 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int /* For now -- if we have threads this will be adjusted later */ new_paca->tcd_ptr = &new_paca->tcd; #endif + +#ifdef CONFIG_PPC_PSERIES + new_paca->rtas_args_reentrant = NULL; +#endif } /* Put the paca pointer into r13 and SPRG_PACA */ @@ -274,6 +310,9 @@ void __init allocate_paca(int cpu) #ifdef CONFIG_PPC_BOOK3S_64 paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); #endif +#ifdef CONFIG_PPC_PSERIES + paca->rtas_args_reentrant = new_rtas_args(cpu, limit); +#endif paca_struct_size += sizeof(struct paca_struct); } diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index bf83f76563a3..2fc12198ec07 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -57,8 +57,6 @@ void pcibios_release_device(struct pci_dev *dev) struct pci_controller *phb = pci_bus_to_host(dev->bus); struct pci_dn *pdn = pci_get_pdn(dev); - eeh_remove_device(dev); - if (phb->controller_ops.release_device) phb->controller_ops.release_device(dev); diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index f83d1f69b1dd..9312e6eda7ff 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -100,7 +100,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus) pci_name(bus->self)); #ifdef CONFIG_PPC_BOOK3S_64 - __flush_hash_table_range(&init_mm, res->start + _IO_BASE, + __flush_hash_table_range(res->start + _IO_BASE, res->end + _IO_BASE + 1); #endif return 0; @@ -109,29 +109,53 @@ int pcibios_unmap_io_space(struct pci_bus *bus) /* Get the host bridge */ hose = pci_bus_to_host(bus); - /* Check if we have IOs allocated */ - if (hose->io_base_alloc == NULL) - return 0; - pr_debug("IO unmapping for PHB %pOF\n", hose->dn); pr_debug(" alloc=0x%p\n", hose->io_base_alloc); - /* This is a PHB, we fully unmap the IO area */ - vunmap(hose->io_base_alloc); - + iounmap(hose->io_base_alloc); return 0; } EXPORT_SYMBOL_GPL(pcibios_unmap_io_space); -static int pcibios_map_phb_io_space(struct pci_controller *hose) +void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size) { struct vm_struct *area; + unsigned long addr; + + WARN_ON_ONCE(paddr & ~PAGE_MASK); + WARN_ON_ONCE(size & ~PAGE_MASK); + + /* + * Let's allocate some IO space for that guy. We don't pass VM_IOREMAP + * because we don't care about alignment tricks that the core does in + * that case. Maybe we should due to stupid card with incomplete + * address decoding but I'd rather not deal with those outside of the + * reserved 64K legacy region. + */ + area = __get_vm_area_caller(size, 0, PHB_IO_BASE, PHB_IO_END, + __builtin_return_address(0)); + if (!area) + return NULL; + + addr = (unsigned long)area->addr; + if (ioremap_page_range(addr, addr + size, paddr, + pgprot_noncached(PAGE_KERNEL))) { + unmap_kernel_range(addr, size); + return NULL; + } + + return (void __iomem *)addr; +} +EXPORT_SYMBOL_GPL(ioremap_phb); + +static int pcibios_map_phb_io_space(struct pci_controller *hose) +{ unsigned long phys_page; unsigned long size_page; unsigned long io_virt_offset; - phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE); - size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE); + phys_page = ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE); + size_page = ALIGN(hose->pci_io_size, PAGE_SIZE); /* Make sure IO area address is clear */ hose->io_base_alloc = NULL; @@ -146,12 +170,11 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose) * with incomplete address decoding but I'd rather not deal with * those outside of the reserved 64K legacy region. */ - area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END); - if (area == NULL) + hose->io_base_alloc = ioremap_phb(phys_page, size_page); + if (!hose->io_base_alloc) return -ENOMEM; - hose->io_base_alloc = area->addr; - hose->io_base_virt = (void __iomem *)(area->addr + - hose->io_base_phys - phys_page); + hose->io_base_virt = hose->io_base_alloc + + hose->io_base_phys - phys_page; pr_debug("IO mapping for PHB %pOF\n", hose->dn); pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n", @@ -159,11 +182,6 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose) pr_debug(" size=0x%016llx (alloc=0x%016lx)\n", hose->pci_io_size, size_page); - /* Establish the mapping */ - if (__ioremap_at(phys_page, area->addr, size_page, - pgprot_noncached(PAGE_KERNEL)) == NULL) - return -ENOMEM; - /* Fixup hose IO resource */ io_virt_offset = pcibios_io_space_offset(hose); hose->io_resource.start += io_virt_offset; diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 4e654df55969..e99b7c547d7e 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -124,9 +124,28 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev) return NULL; } +#ifdef CONFIG_EEH +static struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) +{ + struct eeh_dev *edev; + + /* Allocate EEH device */ + edev = kzalloc(sizeof(*edev), GFP_KERNEL); + if (!edev) + return NULL; + + /* Associate EEH device with OF node */ + pdn->edev = edev; + edev->pdn = pdn; + edev->bdfn = (pdn->busno << 8) | pdn->devfn; + edev->controller = pdn->phb; + + return edev; +} +#endif /* CONFIG_EEH */ + #ifdef CONFIG_PCI_IOV static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent, - int vf_index, int busno, int devfn) { struct pci_dn *pdn; @@ -143,7 +162,6 @@ static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent, pdn->parent = parent; pdn->busno = busno; pdn->devfn = devfn; - pdn->vf_index = vf_index; pdn->pe_number = IODA_INVALID_PE; INIT_LIST_HEAD(&pdn->child_list); INIT_LIST_HEAD(&pdn->list); @@ -174,7 +192,7 @@ struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev) for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { struct eeh_dev *edev __maybe_unused; - pdn = add_one_sriov_vf_pdn(parent, i, + pdn = add_one_sriov_vf_pdn(parent, pci_iov_virtfn_bus(pdev, i), pci_iov_virtfn_devfn(pdev, i)); if (!pdn) { @@ -187,7 +205,10 @@ struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev) /* Create the EEH device for the VF */ edev = eeh_dev_init(pdn); BUG_ON(!edev); + + /* FIXME: these should probably be populated by the EEH probe */ edev->physfn = pdev; + edev->vf_index = i; #endif /* CONFIG_EEH */ } return pci_get_pdn(pdev); @@ -242,7 +263,7 @@ void remove_sriov_vf_pdns(struct pci_dev *pdev) * have a configured PE. */ if (edev->pe) - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); pdn->edev = NULL; kfree(edev); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9c21288f8645..73a57043ee66 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -41,7 +41,6 @@ #include <linux/pkeys.h> #include <linux/seq_buf.h> -#include <asm/pgtable.h> #include <asm/io.h> #include <asm/processor.h> #include <asm/mmu.h> @@ -472,49 +471,58 @@ EXPORT_SYMBOL(giveup_all); #ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_FPU -static int restore_fp(struct task_struct *tsk) +static bool should_restore_fp(void) { - if (tsk->thread.load_fp) { - load_fp_state(¤t->thread.fp_state); + if (current->thread.load_fp) { current->thread.load_fp++; - return 1; + return true; } - return 0; + return false; +} + +static void do_restore_fp(void) +{ + load_fp_state(¤t->thread.fp_state); } #else -static int restore_fp(struct task_struct *tsk) { return 0; } +static bool should_restore_fp(void) { return false; } +static void do_restore_fp(void) { } #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC -#define loadvec(thr) ((thr).load_vec) -static int restore_altivec(struct task_struct *tsk) +static bool should_restore_altivec(void) { - if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) { - load_vr_state(&tsk->thread.vr_state); - tsk->thread.used_vr = 1; - tsk->thread.load_vec++; - - return 1; + if (cpu_has_feature(CPU_FTR_ALTIVEC) && (current->thread.load_vec)) { + current->thread.load_vec++; + return true; } - return 0; + return false; +} + +static void do_restore_altivec(void) +{ + load_vr_state(¤t->thread.vr_state); + current->thread.used_vr = 1; } #else -#define loadvec(thr) 0 -static inline int restore_altivec(struct task_struct *tsk) { return 0; } +static bool should_restore_altivec(void) { return false; } +static void do_restore_altivec(void) { } #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -static int restore_vsx(struct task_struct *tsk) +static bool should_restore_vsx(void) { - if (cpu_has_feature(CPU_FTR_VSX)) { - tsk->thread.used_vsr = 1; - return 1; - } - - return 0; + if (cpu_has_feature(CPU_FTR_VSX)) + return true; + return false; +} +static void do_restore_vsx(void) +{ + current->thread.used_vsr = 1; } #else -static inline int restore_vsx(struct task_struct *tsk) { return 0; } +static bool should_restore_vsx(void) { return false; } +static void do_restore_vsx(void) { } #endif /* CONFIG_VSX */ /* @@ -530,32 +538,48 @@ static inline int restore_vsx(struct task_struct *tsk) { return 0; } void notrace restore_math(struct pt_regs *regs) { unsigned long msr; - - if (!MSR_TM_ACTIVE(regs->msr) && - !current->thread.load_fp && !loadvec(current->thread)) - return; + unsigned long new_msr = 0; msr = regs->msr; - msr_check_and_set(msr_all_available); /* - * Only reload if the bit is not set in the user MSR, the bit BEING set - * indicates that the registers are hot + * new_msr tracks the facilities that are to be restored. Only reload + * if the bit is not set in the user MSR (if it is set, the registers + * are live for the user thread). */ - if ((!(msr & MSR_FP)) && restore_fp(current)) - msr |= MSR_FP | current->thread.fpexc_mode; + if ((!(msr & MSR_FP)) && should_restore_fp()) + new_msr |= MSR_FP; - if ((!(msr & MSR_VEC)) && restore_altivec(current)) - msr |= MSR_VEC; + if ((!(msr & MSR_VEC)) && should_restore_altivec()) + new_msr |= MSR_VEC; - if ((msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC) && - restore_vsx(current)) { - msr |= MSR_VSX; + if ((!(msr & MSR_VSX)) && should_restore_vsx()) { + if (((msr | new_msr) & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) + new_msr |= MSR_VSX; } - msr_check_and_clear(msr_all_available); + if (new_msr) { + unsigned long fpexc_mode = 0; + + msr_check_and_set(new_msr); - regs->msr = msr; + if (new_msr & MSR_FP) { + do_restore_fp(); + + // This also covers VSX, because VSX implies FP + fpexc_mode = current->thread.fpexc_mode; + } + + if (new_msr & MSR_VEC) + do_restore_altivec(); + + if (new_msr & MSR_VSX) + do_restore_vsx(); + + msr_check_and_clear(new_msr); + + regs->msr |= new_msr | fpexc_mode; + } } #endif @@ -629,15 +653,12 @@ void do_break (struct pt_regs *regs, unsigned long address, if (debugger_break_match(regs)) return; - /* Clear the breakpoint */ - hw_breakpoint_disable(); - /* Deliver the signal to userspace */ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address); } #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk); +static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk[HBP_NUM_MAX]); #ifdef CONFIG_PPC_ADV_DEBUG_REGS /* @@ -711,21 +732,49 @@ void switch_booke_debug_regs(struct debug_reg *new_debug) EXPORT_SYMBOL_GPL(switch_booke_debug_regs); #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ #ifndef CONFIG_HAVE_HW_BREAKPOINT -static void set_breakpoint(struct arch_hw_breakpoint *brk) +static void set_breakpoint(int i, struct arch_hw_breakpoint *brk) { preempt_disable(); - __set_breakpoint(brk); + __set_breakpoint(i, brk); preempt_enable(); } static void set_debug_reg_defaults(struct thread_struct *thread) { - thread->hw_brk.address = 0; - thread->hw_brk.type = 0; - thread->hw_brk.len = 0; - thread->hw_brk.hw_len = 0; - if (ppc_breakpoint_available()) - set_breakpoint(&thread->hw_brk); + int i; + struct arch_hw_breakpoint null_brk = {0}; + + for (i = 0; i < nr_wp_slots(); i++) { + thread->hw_brk[i] = null_brk; + if (ppc_breakpoint_available()) + set_breakpoint(i, &thread->hw_brk[i]); + } +} + +static inline bool hw_brk_match(struct arch_hw_breakpoint *a, + struct arch_hw_breakpoint *b) +{ + if (a->address != b->address) + return false; + if (a->type != b->type) + return false; + if (a->len != b->len) + return false; + /* no need to check hw_len. it's calculated from address and len */ + return true; +} + +static void switch_hw_breakpoint(struct task_struct *new) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (likely(hw_brk_match(this_cpu_ptr(¤t_brk[i]), + &new->thread.hw_brk[i]))) + continue; + + __set_breakpoint(i, &new->thread.hw_brk[i]); + } } #endif /* !CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ @@ -772,12 +821,12 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) unsigned long lctrl1 = LCTRL1_CTE_GT | LCTRL1_CTF_LT | LCTRL1_CRWE_RW | LCTRL1_CRWF_RW; unsigned long lctrl2 = LCTRL2_LW0EN | LCTRL2_LW0LADC | LCTRL2_SLW0EN; - unsigned long start_addr = brk->address & ~HW_BREAKPOINT_ALIGN; - unsigned long end_addr = (brk->address + brk->len - 1) | HW_BREAKPOINT_ALIGN; + unsigned long start_addr = ALIGN_DOWN(brk->address, HW_BREAKPOINT_SIZE); + unsigned long end_addr = ALIGN(brk->address + brk->len, HW_BREAKPOINT_SIZE); if (start_addr == 0) lctrl2 |= LCTRL2_LW0LA_F; - else if (end_addr == ~0U) + else if (end_addr == 0) lctrl2 |= LCTRL2_LW0LA_E; else lctrl2 |= LCTRL2_LW0LA_EandF; @@ -793,20 +842,20 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) lctrl1 |= LCTRL1_CRWE_WO | LCTRL1_CRWF_WO; mtspr(SPRN_CMPE, start_addr - 1); - mtspr(SPRN_CMPF, end_addr + 1); + mtspr(SPRN_CMPF, end_addr); mtspr(SPRN_LCTRL1, lctrl1); mtspr(SPRN_LCTRL2, lctrl2); return 0; } -void __set_breakpoint(struct arch_hw_breakpoint *brk) +void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) { - memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); + memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk)); if (dawr_enabled()) // Power8 or later - set_dawr(brk); + set_dawr(nr, brk); else if (IS_ENABLED(CONFIG_PPC_8xx)) set_breakpoint_8xx(brk); else if (!cpu_has_feature(CPU_FTR_ARCH_207S)) @@ -829,19 +878,6 @@ bool ppc_breakpoint_available(void) } EXPORT_SYMBOL_GPL(ppc_breakpoint_available); -static inline bool hw_brk_match(struct arch_hw_breakpoint *a, - struct arch_hw_breakpoint *b) -{ - if (a->address != b->address) - return false; - if (a->type != b->type) - return false; - if (a->len != b->len) - return false; - /* no need to check hw_len. it's calculated from address and len */ - return true; -} - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline bool tm_enabled(struct task_struct *tsk) @@ -1174,8 +1210,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * schedule DABR */ #ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) - __set_breakpoint(&new->thread.hw_brk); + switch_hw_breakpoint(new); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif @@ -1228,7 +1263,8 @@ struct task_struct *__switch_to(struct task_struct *prev, * mappings, we must issue a cp_abort to clear any state and * prevent snooping, corruption or a covert channel. */ - if (current->thread.used_vas) + if (current->mm && + atomic_read(¤t->mm->context.vas_windows)) asm volatile(PPC_CP_ABORT); } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -1241,29 +1277,31 @@ struct task_struct *__switch_to(struct task_struct *prev, static void show_instructions(struct pt_regs *regs) { int i; + unsigned long nip = regs->nip; unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); printk("Instruction dump:"); + /* + * If we were executing with the MMU off for instructions, adjust pc + * rather than printing XXXXXXXX. + */ + if (!IS_ENABLED(CONFIG_BOOKE) && !(regs->msr & MSR_IR)) { + pc = (unsigned long)phys_to_virt(pc); + nip = (unsigned long)phys_to_virt(regs->nip); + } + for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; if (!(i % 8)) pr_cont("\n"); -#if !defined(CONFIG_BOOKE) - /* If executing with the IMMU off, adjust pc rather - * than print XXXXXXXX. - */ - if (!(regs->msr & MSR_IR)) - pc = (unsigned long)phys_to_virt(pc); -#endif - if (!__kernel_text_address(pc) || - probe_kernel_address((const void *)pc, instr)) { + get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); } else { - if (regs->nip == pc) + if (nip == pc) pr_cont("<%08x> ", instr); else pr_cont("%08x ", instr); @@ -1294,7 +1332,8 @@ void show_user_instructions(struct pt_regs *regs) for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) { int instr; - if (probe_user_read(&instr, (void __user *)pc, sizeof(instr))) { + if (copy_from_user_nofault(&instr, (void __user *)pc, + sizeof(instr))) { seq_buf_printf(&s, "XXXXXXXX "); continue; } @@ -1412,7 +1451,7 @@ void show_regs(struct pt_regs * regs) print_msr_bits(regs->msr); pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); - if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) + if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR)) pr_cont("CFAR: "REG" ", regs->orig_gpr3); if (trap == 0x200 || trap == 0x300 || trap == 0x600) #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) @@ -1444,7 +1483,7 @@ void show_regs(struct pt_regs * regs) printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); #endif - show_stack(current, (unsigned long *) regs->gpr[1]); + show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT); if (!user_mode(regs)) show_instructions(regs); } @@ -1467,27 +1506,6 @@ void arch_setup_new_exec(void) } #endif -int set_thread_uses_vas(void) -{ -#ifdef CONFIG_PPC_BOOK3S_64 - if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -EINVAL; - - current->thread.used_vas = 1; - - /* - * Even a process that has no foreign real address mapping can use - * an unpaired COPY instruction (to no real effect). Issue CP_ABORT - * to clear any pending COPY and prevent a covert channel. - * - * __switch_to() will issue CP_ABORT on future context switches. - */ - asm volatile(PPC_CP_ABORT); - -#endif /* CONFIG_PPC_BOOK3S_64 */ - return 0; -} - #ifdef CONFIG_PPC64 /** * Assign a TIDR (thread ID) for task @t and set it in the thread @@ -1600,16 +1618,20 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) /* * Copy architecture-specific thread state */ -int copy_thread_tls(unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long kthread_arg, struct task_struct *p, unsigned long tls) { struct pt_regs *childregs, *kregs; extern void ret_from_fork(void); + extern void ret_from_fork_scv(void); extern void ret_from_kernel_thread(void); void (*f)(void); unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; struct thread_info *ti = task_thread_info(p); +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int i; +#endif klp_init_thread_info(p); @@ -1639,7 +1661,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, if (usp) childregs->gpr[1] = usp; p->thread.regs = childregs; - childregs->gpr[3] = 0; /* Result from fork() */ + /* 64s sets this in ret_from_fork */ + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + childregs->gpr[3] = 0; /* Result from fork() */ if (clone_flags & CLONE_SETTLS) { if (!is_32bit_task()) childregs->gpr[13] = tls; @@ -1647,7 +1671,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, childregs->gpr[2] = tls; } - f = ret_from_fork; + if (trap_is_scv(regs)) + f = ret_from_fork_scv; + else + f = ret_from_fork; } childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); sp -= STACK_FRAME_OVERHEAD; @@ -1669,7 +1696,8 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, p->thread.ksp_limit = (unsigned long)end_of_stack(p); #endif #ifdef CONFIG_HAVE_HW_BREAKPOINT - p->thread.ptrace_bps[0] = NULL; + for (i = 0; i < nr_wp_slots(); i++) + p->thread.ptrace_bps[i] = NULL; #endif p->thread.fp_save_area = NULL; @@ -1740,7 +1768,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) * FULL_REGS(regs) return true. This is necessary to allow * ptrace to examine the thread immediately after exec. */ - regs->trap &= ~1UL; + SET_FULL_REGS(regs); #ifdef CONFIG_PPC32 regs->mq = 0; @@ -2068,7 +2096,8 @@ unsigned long get_wchan(struct task_struct *p) static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; -void show_stack(struct task_struct *tsk, unsigned long *stack) +void show_stack(struct task_struct *tsk, unsigned long *stack, + const char *loglvl) { unsigned long sp, ip, lr, newsp; int count = 0; @@ -2093,7 +2122,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) } lr = 0; - printk("Call Trace:\n"); + printk("%sCall Trace:\n", loglvl); do { if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD)) break; @@ -2102,7 +2131,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) newsp = stack[0]; ip = stack[STACK_FRAME_LR_SAVE]; if (!firstframe || ip != lr) { - printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); + printk("%s["REG"] ["REG"] %pS", + loglvl, sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER ret_addr = ftrace_graph_ret_addr(current, &ftrace_idx, ip, stack); @@ -2124,8 +2154,9 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); lr = regs->link; - printk("--- interrupt: %lx at %pS\n LR = %pS\n", - regs->trap, (void *)regs->nip, (void *)lr); + printk("%s--- interrupt: %lx at %pS\n LR = %pS\n", + loglvl, regs->trap, + (void *)regs->nip, (void *)lr); firstframe = 1; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 6620f37abe73..d8a2fb87ba0c 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -30,6 +30,7 @@ #include <linux/of_fdt.h> #include <linux/libfdt.h> #include <linux/cpu.h> +#include <linux/pgtable.h> #include <asm/prom.h> #include <asm/rtas.h> @@ -41,7 +42,6 @@ #include <asm/smp.h> #include <asm/mmu.h> #include <asm/paca.h> -#include <asm/pgtable.h> #include <asm/powernv.h> #include <asm/iommu.h> #include <asm/btext.h> @@ -96,8 +96,8 @@ static inline int overlaps_initrd(unsigned long start, unsigned long size) if (!initrd_start) return 0; - return (start + size) > _ALIGN_DOWN(initrd_start, PAGE_SIZE) && - start <= _ALIGN_UP(initrd_end, PAGE_SIZE); + return (start + size) > ALIGN_DOWN(initrd_start, PAGE_SIZE) && + start <= ALIGN(initrd_end, PAGE_SIZE); #else return 0; #endif @@ -163,7 +163,7 @@ static struct ibm_pa_feature { { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE }, { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE }, #ifdef CONFIG_PPC_RADIX_MMU - { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX }, + { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE }, #endif { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, @@ -175,6 +175,8 @@ static struct ibm_pa_feature { */ { .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP, .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP }, + + { .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 }, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -468,8 +470,9 @@ static bool validate_mem_limit(u64 base, u64 *size) * This contains a list of memory blocks along with NUMA affinity * information. */ -static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, - const __be32 **usm) +static int __init early_init_drmem_lmb(struct drmem_lmb *lmb, + const __be32 **usm, + void *data) { u64 base, size; int is_kexec_kdump = 0, rngs; @@ -484,7 +487,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, */ if ((lmb->flags & DRCONF_MEM_RESERVED) || !(lmb->flags & DRCONF_MEM_ASSIGNED)) - return; + return 0; if (*usm) is_kexec_kdump = 1; @@ -499,7 +502,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, */ rngs = dt_mem_next_cell(dt_root_size_cells, usm); if (!rngs) /* there are no (base, size) duple */ - return; + return 0; } do { @@ -515,10 +518,17 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, size = 0x80000000ul - base; } + if (!validate_mem_limit(base, &size)) + continue; + DBG("Adding: %llx -> %llx\n", base, size); - if (validate_mem_limit(base, &size)) - memblock_add(base, size); + memblock_add(base, size); + + if (lmb->flags & DRCONF_MEM_HOTREMOVABLE) + memblock_mark_hotplug(base, size); } while (--rngs); + + return 0; } #endif /* CONFIG_PPC_PSERIES */ @@ -529,7 +539,7 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node, #ifdef CONFIG_PPC_PSERIES if (depth == 1 && strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) { - walk_drmem_lmbs_early(node, early_init_drmem_lmb); + walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb); return 0; } #endif @@ -623,9 +633,9 @@ static void __init early_reserve_mem(void) #ifdef CONFIG_BLK_DEV_INITRD /* Then reserve the initrd, if any */ if (initrd_start && (initrd_end > initrd_start)) { - memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), - _ALIGN_UP(initrd_end, PAGE_SIZE) - - _ALIGN_DOWN(initrd_start, PAGE_SIZE)); + memblock_reserve(ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), + ALIGN(initrd_end, PAGE_SIZE) - + ALIGN_DOWN(initrd_start, PAGE_SIZE)); } #endif /* CONFIG_BLK_DEV_INITRD */ @@ -685,6 +695,23 @@ static void __init tm_init(void) static void tm_init(void) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#ifdef CONFIG_PPC64 +static void __init save_fscr_to_task(void) +{ + /* + * Ensure the init_task (pid 0, aka swapper) uses the value of FSCR we + * have configured via the device tree features or via __init_FSCR(). + * That value will then be propagated to pid 1 (init) and all future + * processes. + */ + if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) + init_task.thread.fscr = mfspr(SPRN_FSCR); +} +#else +static inline void save_fscr_to_task(void) {}; +#endif + + void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -773,6 +800,8 @@ void __init early_init_devtree(void *params) BUG(); } + save_fscr_to_task(); + #if defined(CONFIG_SMP) && defined(CONFIG_PPC64) /* We'll later wait for secondaries to check in; there are * NCPUS-1 non-boot CPUs :-) @@ -791,6 +820,11 @@ void __init early_init_devtree(void *params) /* Now try to figure out if we are running on LPAR and so on */ pseries_probe_fw_features(); + /* + * Initialize pkey features and default AMR/IAMR values + */ + pkey_early_init_devtree(); + #ifdef CONFIG_PPC_PS3 /* Identify PS3 firmware */ if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3")) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 806be751c336..ae7ec9903191 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -26,6 +26,7 @@ #include <linux/delay.h> #include <linux/initrd.h> #include <linux/bitops.h> +#include <linux/pgtable.h> #include <asm/prom.h> #include <asm/rtas.h> #include <asm/page.h> @@ -34,7 +35,6 @@ #include <asm/io.h> #include <asm/smp.h> #include <asm/mmu.h> -#include <asm/pgtable.h> #include <asm/iommu.h> #include <asm/btext.h> #include <asm/sections.h> @@ -169,6 +169,7 @@ static unsigned long __prombss prom_tce_alloc_end; #ifdef CONFIG_PPC_PSERIES static bool __prombss prom_radix_disable; +static bool __prombss prom_radix_gtse_disable; static bool __prombss prom_xive_disable; #endif @@ -823,6 +824,12 @@ static void __init early_cmdline_parse(void) if (prom_radix_disable) prom_debug("Radix disabled from cmdline\n"); + opt = prom_strstr(prom_cmd_line, "radix_hcall_invalidate=on"); + if (opt) { + prom_radix_gtse_disable = true; + prom_debug("Radix GTSE disabled from cmdline\n"); + } + opt = prom_strstr(prom_cmd_line, "xive=off"); if (opt) { prom_xive_disable = true; @@ -920,7 +927,7 @@ struct option_vector6 { } __packed; struct ibm_arch_vec { - struct { u32 mask, val; } pvrs[12]; + struct { u32 mask, val; } pvrs[14]; u8 num_vectors; @@ -974,6 +981,14 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .val = cpu_to_be32(0x004e0000), }, { + .mask = cpu_to_be32(0xffff0000), /* POWER10 */ + .val = cpu_to_be32(0x00800000), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 3.1-compliant */ + .val = cpu_to_be32(0x0f000006), + }, + { .mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */ .val = cpu_to_be32(0x0f000005), }, @@ -1002,7 +1017,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .byte1 = 0, .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, - .arch_versions3 = OV1_PPC_3_00, + .arch_versions3 = OV1_PPC_3_00 | OV1_PPC_3_1, }, .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)), @@ -1277,10 +1292,8 @@ static void __init prom_parse_platform_support(u8 index, u8 val, prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support); break; case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */ - if (val & OV5_FEAT(OV5_RADIX_GTSE)) { - prom_debug("Radix - GTSE supported\n"); - support->radix_gtse = true; - } + if (val & OV5_FEAT(OV5_RADIX_GTSE)) + support->radix_gtse = !prom_radix_gtse_disable; break; case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */ prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT), @@ -1328,12 +1341,15 @@ static void __init prom_check_platform_support(void) } } - if (supported.radix_mmu && supported.radix_gtse && - IS_ENABLED(CONFIG_PPC_RADIX_MMU)) { - /* Radix preferred - but we require GTSE for now */ - prom_debug("Asking for radix with GTSE\n"); + if (supported.radix_mmu && IS_ENABLED(CONFIG_PPC_RADIX_MMU)) { + /* Radix preferred - Check if GTSE is also supported */ + prom_debug("Asking for radix\n"); ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); - ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE); + if (supported.radix_gtse) + ibm_architecture_vec.vec5.radix_ext = + OV5_FEAT(OV5_RADIX_GTSE); + else + prom_debug("Radix GTSE isn't supported\n"); } else if (supported.hash_mmu) { /* Default to hash mmu (if we can) */ prom_debug("Asking for hash\n"); @@ -1449,18 +1465,18 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) unsigned long addr = 0; if (align) - base = _ALIGN_UP(base, align); + base = ALIGN(base, align); prom_debug("%s(%lx, %lx)\n", __func__, size, align); if (ram_top == 0) prom_panic("alloc_up() called with mem not initialized\n"); if (align) - base = _ALIGN_UP(alloc_bottom, align); + base = ALIGN(alloc_bottom, align); else base = alloc_bottom; for(; (base + size) <= alloc_top; - base = _ALIGN_UP(base + 0x100000, align)) { + base = ALIGN(base + 0x100000, align)) { prom_debug(" trying: 0x%lx\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); if (addr != PROM_ERROR && addr != 0) @@ -1500,7 +1516,7 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, if (highmem) { /* Carve out storage for the TCE table. */ - addr = _ALIGN_DOWN(alloc_top_high - size, align); + addr = ALIGN_DOWN(alloc_top_high - size, align); if (addr <= alloc_bottom) return 0; /* Will we bump into the RMO ? If yes, check out that we @@ -1518,9 +1534,9 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, goto bail; } - base = _ALIGN_DOWN(alloc_top - size, align); + base = ALIGN_DOWN(alloc_top - size, align); for (; base > alloc_bottom; - base = _ALIGN_DOWN(base - 0x100000, align)) { + base = ALIGN_DOWN(base - 0x100000, align)) { prom_debug(" trying: 0x%lx\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); if (addr != PROM_ERROR && addr != 0) @@ -1586,8 +1602,8 @@ static void __init reserve_mem(u64 base, u64 size) * have our terminator with "size" set to 0 since we are * dumb and just copy this entire array to the boot params */ - base = _ALIGN_DOWN(base, PAGE_SIZE); - top = _ALIGN_UP(top, PAGE_SIZE); + base = ALIGN_DOWN(base, PAGE_SIZE); + top = ALIGN(top, PAGE_SIZE); size = top - base; if (cnt >= (MEM_RESERVE_MAP_SIZE - 1)) @@ -2426,7 +2442,7 @@ static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end, { void *ret; - *mem_start = _ALIGN(*mem_start, align); + *mem_start = ALIGN(*mem_start, align); while ((*mem_start + needed) > *mem_end) { unsigned long room, chunk; @@ -2562,7 +2578,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, *lp++ = *p; } *lp = 0; - *mem_start = _ALIGN((unsigned long)lp + 1, 4); + *mem_start = ALIGN((unsigned long)lp + 1, 4); } /* get it again for debugging */ @@ -2608,7 +2624,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, /* push property content */ valp = make_room(mem_start, mem_end, l, 4); call_prom("getprop", 4, 1, node, pname, valp, l); - *mem_start = _ALIGN(*mem_start, 4); + *mem_start = ALIGN(*mem_start, 4); if (!prom_strcmp(pname, "phandle")) has_phandle = 1; @@ -2667,7 +2683,7 @@ static void __init flatten_device_tree(void) prom_panic ("couldn't get device tree root\n"); /* Build header and make room for mem rsv map */ - mem_start = _ALIGN(mem_start, 4); + mem_start = ALIGN(mem_start, 4); hdr = make_room(&mem_start, &mem_end, sizeof(struct boot_param_header), 4); dt_header_start = (unsigned long)hdr; @@ -3254,7 +3270,7 @@ static int enter_secure_mode(unsigned long kbase, unsigned long fdt) /* * Call the Ultravisor to transfer us to secure memory if we have an ESM blob. */ -static void setup_secure_guest(unsigned long kbase, unsigned long fdt) +static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt) { int ret; @@ -3284,7 +3300,7 @@ static void setup_secure_guest(unsigned long kbase, unsigned long fdt) } } #else -static void setup_secure_guest(unsigned long kbase, unsigned long fdt) +static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt) { } #endif /* CONFIG_PPC_SVM */ diff --git a/arch/powerpc/kernel/ptrace/ptrace-altivec.c b/arch/powerpc/kernel/ptrace/ptrace-altivec.c index dd8b75dfbd06..0d9bc4bd4972 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-altivec.c +++ b/arch/powerpc/kernel/ptrace/ptrace-altivec.c @@ -41,38 +41,25 @@ int vr_active(struct task_struct *target, const struct user_regset *regset) * }; */ int vr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; + union { + elf_vrreg_t reg; + u32 word; + } vrsave; flush_altivec_to_thread(target); BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != offsetof(struct thread_vr_state, vr[32])); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.vr_state, 0, - 33 * sizeof(vector128)); - if (!ret) { - /* - * Copy out only the low-order word of vrsave. - */ - int start, end; - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - - vrsave.word = target->thread.vrsave; - - start = 33 * sizeof(vector128); - end = start + sizeof(vrsave); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, - start, end); - } - - return ret; + membuf_write(&to, &target->thread.vr_state, 33 * sizeof(vector128)); + /* + * Copy out only the low-order word of vrsave. + */ + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.vrsave; + return membuf_write(&to, &vrsave, sizeof(vrsave)); } /* diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index 3c8a81999292..67447a6197eb 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -63,8 +63,7 @@ enum powerpc_regset { /* ptrace-(no)vsx */ -int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn fpr_get; int fpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); @@ -72,8 +71,7 @@ int fpr_set(struct task_struct *target, const struct user_regset *regset, /* ptrace-vsx */ int vsr_active(struct task_struct *target, const struct user_regset *regset); -int vsr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn vsr_get; int vsr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); @@ -81,8 +79,7 @@ int vsr_set(struct task_struct *target, const struct user_regset *regset, /* ptrace-altivec */ int vr_active(struct task_struct *target, const struct user_regset *regset); -int vr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn vr_get; int vr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); @@ -90,8 +87,7 @@ int vr_set(struct task_struct *target, const struct user_regset *regset, /* ptrace-spe */ int evr_active(struct task_struct *target, const struct user_regset *regset); -int evr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn evr_get; int evr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); @@ -100,9 +96,8 @@ int evr_set(struct task_struct *target, const struct user_regset *regset, int gpr32_get_common(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf, - unsigned long *regs); + struct membuf to, + unsigned long *regs); int gpr32_set_common(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, @@ -118,55 +113,46 @@ static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } #endif int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset); -int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_cgpr_get; int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset); -int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_cfpr_get; int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset); -int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_cvmx_get; int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset); -int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_cvsx_get; int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_spr_active(struct task_struct *target, const struct user_regset *regset); -int tm_spr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_spr_get; int tm_spr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_tar_active(struct task_struct *target, const struct user_regset *regset); -int tm_tar_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_tar_get; int tm_tar_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_ppr_active(struct task_struct *target, const struct user_regset *regset); -int tm_ppr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_ppr_get; int tm_ppr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_dscr_active(struct task_struct *target, const struct user_regset *regset); -int tm_dscr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_dscr_get; int tm_dscr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); -int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_cgpr32_get; int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c index f87e7c5c3bf3..697c7e4b5877 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -44,7 +44,7 @@ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo) dbginfo->version = 1; dbginfo->num_instruction_bps = 0; if (ppc_breakpoint_available()) - dbginfo->num_data_bps = 1; + dbginfo->num_data_bps = nr_wp_slots(); else dbginfo->num_data_bps = 0; dbginfo->num_condition_regs = 0; @@ -67,11 +67,16 @@ int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, /* We only support one DABR and no IABRS at the moment */ if (addr > 0) return -EINVAL; - dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | - (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); + dabr_fake = ((child->thread.hw_brk[0].address & (~HW_BRK_TYPE_DABR)) | + (child->thread.hw_brk[0].type & HW_BRK_TYPE_DABR)); return put_user(dabr_fake, datalp); } +/* + * ptrace_set_debugreg() fakes DABR and DABR is only one. So even if + * internal hw supports more than one watchpoint, we support only one + * watchpoint with this interface. + */ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -137,7 +142,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l return ret; thread->ptrace_bps[0] = bp; - thread->hw_brk = hw_brk; + thread->hw_brk[0] = hw_brk; return 0; } @@ -159,12 +164,37 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l if (set_bp && (!ppc_breakpoint_available())) return -ENODEV; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ - task->thread.hw_brk = hw_brk; + task->thread.hw_brk[0] = hw_brk; return 0; } +#ifdef CONFIG_HAVE_HW_BREAKPOINT +static int find_empty_ptrace_bp(struct thread_struct *thread) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (!thread->ptrace_bps[i]) + return i; + } + return -1; +} +#endif + +static int find_empty_hw_brk(struct thread_struct *thread) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (!thread->hw_brk[i].address) + return i; + } + return -1; +} + long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) { + int i; #ifdef CONFIG_HAVE_HW_BREAKPOINT int len = 0; struct thread_struct *thread = &child->thread; @@ -186,7 +216,7 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf if ((unsigned long)bp_info->addr >= TASK_SIZE) return -EIO; - brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN; + brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE); brk.type = HW_BRK_TYPE_TRANSLATE; brk.len = DABR_MAX_LEN; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) @@ -200,8 +230,9 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf len = 1; else return -EINVAL; - bp = thread->ptrace_bps[0]; - if (bp) + + i = find_empty_ptrace_bp(thread); + if (i < 0) return -ENOSPC; /* Create a new breakpoint request if one doesn't exist already */ @@ -211,27 +242,28 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf arch_bp_generic_fields(brk.type, &attr.bp_type); bp = register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, child); - thread->ptrace_bps[0] = bp; + thread->ptrace_bps[i] = bp; if (IS_ERR(bp)) { - thread->ptrace_bps[0] = NULL; + thread->ptrace_bps[i] = NULL; return PTR_ERR(bp); } - return 1; + return i + 1; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) return -EINVAL; - if (child->thread.hw_brk.address) + i = find_empty_hw_brk(&child->thread); + if (i < 0) return -ENOSPC; if (!ppc_breakpoint_available()) return -ENODEV; - child->thread.hw_brk = brk; + child->thread.hw_brk[i] = brk; - return 1; + return i + 1; } long ppc_del_hwdebug(struct task_struct *child, long data) @@ -241,24 +273,24 @@ long ppc_del_hwdebug(struct task_struct *child, long data) struct thread_struct *thread = &child->thread; struct perf_event *bp; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ - if (data != 1) + if (data < 1 || data > nr_wp_slots()) return -EINVAL; #ifdef CONFIG_HAVE_HW_BREAKPOINT - bp = thread->ptrace_bps[0]; + bp = thread->ptrace_bps[data - 1]; if (bp) { unregister_hw_breakpoint(bp); - thread->ptrace_bps[0] = NULL; + thread->ptrace_bps[data - 1] = NULL; } else { ret = -ENOENT; } return ret; #else /* CONFIG_HAVE_HW_BREAKPOINT */ - if (child->thread.hw_brk.address == 0) + if (child->thread.hw_brk[data - 1].address == 0) return -ENOENT; - child->thread.hw_brk.address = 0; - child->thread.hw_brk.type = 0; + child->thread.hw_brk[data - 1].address = 0; + child->thread.hw_brk[data - 1].type = 0; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ return 0; diff --git a/arch/powerpc/kernel/ptrace/ptrace-novsx.c b/arch/powerpc/kernel/ptrace/ptrace-novsx.c index b2dc4e92d11a..b3b36835658a 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-novsx.c +++ b/arch/powerpc/kernel/ptrace/ptrace-novsx.c @@ -19,15 +19,14 @@ * }; */ int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != offsetof(struct thread_fp_state, fpr[32])); flush_fp_to_thread(target); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fp_state, 0, -1); + return membuf_write(&to, &target->thread.fp_state, 33 * sizeof(u64)); } /* diff --git a/arch/powerpc/kernel/ptrace/ptrace-spe.c b/arch/powerpc/kernel/ptrace/ptrace-spe.c index 68b86b4a4be4..47034d069045 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-spe.c +++ b/arch/powerpc/kernel/ptrace/ptrace-spe.c @@ -23,25 +23,17 @@ int evr_active(struct task_struct *target, const struct user_regset *regset) } int evr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - flush_spe_to_thread(target); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.evr, - 0, sizeof(target->thread.evr)); + membuf_write(&to, &target->thread.evr, sizeof(target->thread.evr)); BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != offsetof(struct thread_struct, spefscr)); - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.acc, - sizeof(target->thread.evr), -1); - - return ret; + return membuf_write(&to, &target->thread.acc, + sizeof(u64) + sizeof(u32)); } int evr_set(struct task_struct *target, const struct user_regset *regset, diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index d75aff31f637..54f2d076206f 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -43,7 +43,7 @@ static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr) static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap) { - task->thread.ckpt_regs.trap = trap & 0xfff0; + set_trap(&task->thread.ckpt_regs, trap); return 0; } @@ -70,10 +70,7 @@ int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset) * tm_cgpr_get - get CGPR registers * @target: The target task. * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. + * @to: Destination of copy. * * This function gets transaction checkpointed GPR registers. * @@ -87,10 +84,8 @@ int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset) * }; */ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; @@ -101,31 +96,18 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, flush_fp_to_thread(target); flush_altivec_to_thread(target); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs, - 0, offsetof(struct pt_regs, msr)); - if (!ret) { - unsigned long msr = get_user_ckpt_msr(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, - offsetof(struct pt_regs, msr), - offsetof(struct pt_regs, msr) + - sizeof(msr)); - } + membuf_write(&to, &target->thread.ckpt_regs, + offsetof(struct pt_regs, msr)); + membuf_store(&to, get_user_ckpt_msr(target)); BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != offsetof(struct pt_regs, msr) + sizeof(long)); - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs.orig_gpr3, - offsetof(struct pt_regs, orig_gpr3), - sizeof(struct user_pt_regs)); - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct user_pt_regs), -1); - - return ret; + membuf_write(&to, &target->thread.ckpt_regs.orig_gpr3, + sizeof(struct user_pt_regs) - + offsetof(struct pt_regs, orig_gpr3)); + return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - + sizeof(struct user_pt_regs)); } /* @@ -229,10 +211,7 @@ int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset) * tm_cfpr_get - get CFPR registers * @target: The target task. * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. + * @to: Destination of copy. * * This function gets in transaction checkpointed FPR registers. * @@ -247,7 +226,7 @@ int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset) *}; */ int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { u64 buf[33]; int i; @@ -266,7 +245,7 @@ int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset, for (i = 0; i < 32 ; i++) buf[i] = target->thread.TS_CKFPR(i); buf[32] = target->thread.ckfp_state.fpscr; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + return membuf_write(&to, buf, sizeof(buf)); } /** @@ -344,10 +323,7 @@ int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset) * tm_cvmx_get - get CMVX registers * @target: The target task. * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. + * @to: Destination of copy. * * This function gets in transaction checkpointed VMX registers. * @@ -363,10 +339,12 @@ int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset) *}; */ int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - + union { + elf_vrreg_t reg; + u32 word; + } vrsave; BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); if (!cpu_has_feature(CPU_FTR_TM)) @@ -380,23 +358,13 @@ int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset, flush_fp_to_thread(target); flush_altivec_to_thread(target); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ckvr_state, - 0, 33 * sizeof(vector128)); - if (!ret) { - /* - * Copy out only the low-order word of vrsave. - */ - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - vrsave.word = target->thread.ckvrsave; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, - 33 * sizeof(vector128), -1); - } - - return ret; + membuf_write(&to, &target->thread.ckvr_state, 33 * sizeof(vector128)); + /* + * Copy out only the low-order word of vrsave. + */ + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.ckvrsave; + return membuf_write(&to, &vrsave, sizeof(vrsave)); } /** @@ -484,10 +452,7 @@ int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset) * tm_cvsx_get - get CVSX registers * @target: The target task. * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. + * @to: Destination of copy. * * This function gets in transaction checkpointed VSX registers. * @@ -501,10 +466,10 @@ int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset) *}; */ int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { u64 buf[32]; - int ret, i; + int i; if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; @@ -520,10 +485,7 @@ int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset, for (i = 0; i < 32 ; i++) buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - - return ret; + return membuf_write(&to, buf, 32 * sizeof(double)); } /** @@ -597,10 +559,7 @@ int tm_spr_active(struct task_struct *target, const struct user_regset *regset) * tm_spr_get - get the TM related SPR registers * @target: The target task. * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. + * @to: Destination of copy. * * This function gets transactional memory related SPR registers. * The userspace interface buffer layout is as follows. @@ -612,10 +571,8 @@ int tm_spr_active(struct task_struct *target, const struct user_regset *regset) * }; */ int tm_spr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - /* Build tests */ BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); @@ -630,21 +587,11 @@ int tm_spr_get(struct task_struct *target, const struct user_regset *regset, flush_altivec_to_thread(target); /* TFHAR register */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfhar, 0, sizeof(u64)); - + membuf_write(&to, &target->thread.tm_tfhar, sizeof(u64)); /* TEXASR register */ - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_texasr, sizeof(u64), - 2 * sizeof(u64)); - + membuf_write(&to, &target->thread.tm_texasr, sizeof(u64)); /* TFIAR register */ - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfiar, - 2 * sizeof(u64), 3 * sizeof(u64)); - return ret; + return membuf_write(&to, &target->thread.tm_tfiar, sizeof(u64)); } /** @@ -714,19 +661,15 @@ int tm_tar_active(struct task_struct *target, const struct user_regset *regset) } int tm_tar_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; if (!MSR_TM_ACTIVE(target->thread.regs->msr)) return -ENODATA; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tar, 0, sizeof(u64)); - return ret; + return membuf_write(&to, &target->thread.tm_tar, sizeof(u64)); } int tm_tar_set(struct task_struct *target, const struct user_regset *regset, @@ -759,19 +702,15 @@ int tm_ppr_active(struct task_struct *target, const struct user_regset *regset) int tm_ppr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; if (!MSR_TM_ACTIVE(target->thread.regs->msr)) return -ENODATA; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_ppr, 0, sizeof(u64)); - return ret; + return membuf_write(&to, &target->thread.tm_ppr, sizeof(u64)); } int tm_ppr_set(struct task_struct *target, const struct user_regset *regset, @@ -803,19 +742,15 @@ int tm_dscr_active(struct task_struct *target, const struct user_regset *regset) } int tm_dscr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; if (!MSR_TM_ACTIVE(target->thread.regs->msr)) return -ENODATA; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_dscr, 0, sizeof(u64)); - return ret; + return membuf_write(&to, &target->thread.tm_dscr, sizeof(u64)); } int tm_dscr_set(struct task_struct *target, const struct user_regset *regset, @@ -836,10 +771,11 @@ int tm_dscr_set(struct task_struct *target, const struct user_regset *regset, } int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + gpr32_get_common(target, regset, to, &target->thread.ckpt_regs.gpr[0]); + return membuf_zero(&to, ELF_NGREG * sizeof(u32)); } int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 15e3b79b6395..7e6478e7ed07 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -149,7 +149,7 @@ static int set_user_dscr(struct task_struct *task, unsigned long dscr) */ static int set_user_trap(struct task_struct *task, unsigned long trap) { - task->thread.regs->trap = trap & 0xfff0; + set_trap(task->thread.regs, trap); return 0; } @@ -215,9 +215,9 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) } static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int i, ret; + int i; if (target->thread.regs == NULL) return -EIO; @@ -228,30 +228,17 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, target->thread.regs->gpr[i] = NV_REG_POISON; } - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - target->thread.regs, - 0, offsetof(struct pt_regs, msr)); - if (!ret) { - unsigned long msr = get_user_msr(target); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, - offsetof(struct pt_regs, msr), - offsetof(struct pt_regs, msr) + - sizeof(msr)); - } + membuf_write(&to, target->thread.regs, offsetof(struct pt_regs, msr)); + membuf_store(&to, get_user_msr(target)); BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != offsetof(struct pt_regs, msr) + sizeof(long)); - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->orig_gpr3, - offsetof(struct pt_regs, orig_gpr3), - sizeof(struct user_pt_regs)); - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct user_pt_regs), -1); - - return ret; + membuf_write(&to, &target->thread.regs->orig_gpr3, + sizeof(struct user_pt_regs) - + offsetof(struct pt_regs, orig_gpr3)); + return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - + sizeof(struct user_pt_regs)); } static int gpr_set(struct task_struct *target, const struct user_regset *regset, @@ -309,10 +296,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, #ifdef CONFIG_PPC64 static int ppr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->ppr, 0, sizeof(u64)); + return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64)); } static int ppr_set(struct task_struct *target, const struct user_regset *regset, @@ -324,10 +310,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset, } static int dscr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.dscr, 0, sizeof(u64)); + return membuf_write(&to, &target->thread.dscr, sizeof(u64)); } static int dscr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, @@ -339,10 +324,9 @@ static int dscr_set(struct task_struct *target, const struct user_regset *regset #endif #ifdef CONFIG_PPC_BOOK3S_64 static int tar_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tar, 0, sizeof(u64)); + return membuf_write(&to, &target->thread.tar, sizeof(u64)); } static int tar_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, @@ -364,7 +348,7 @@ static int ebb_active(struct task_struct *target, const struct user_regset *regs } static int ebb_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { /* Build tests */ BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); @@ -376,8 +360,7 @@ static int ebb_get(struct task_struct *target, const struct user_regset *regset, if (!target->thread.used_ebb) return -ENODATA; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ebbrr, - 0, 3 * sizeof(unsigned long)); + return membuf_write(&to, &target->thread.ebbrr, 3 * sizeof(unsigned long)); } static int ebb_set(struct task_struct *target, const struct user_regset *regset, @@ -420,7 +403,7 @@ static int pmu_active(struct task_struct *target, const struct user_regset *regs } static int pmu_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { /* Build tests */ BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); @@ -431,8 +414,7 @@ static int pmu_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_feature(CPU_FTR_ARCH_207S)) return -ENODEV; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.siar, - 0, 5 * sizeof(unsigned long)); + return membuf_write(&to, &target->thread.siar, 5 * sizeof(unsigned long)); } static int pmu_set(struct task_struct *target, const struct user_regset *regset, @@ -486,16 +468,15 @@ static int pkey_active(struct task_struct *target, const struct user_regset *reg } static int pkey_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); - BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); if (!arch_pkeys_enabled()) return -ENODEV; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.amr, - 0, ELF_NPKEY * sizeof(unsigned long)); + membuf_write(&to, &target->thread.amr, 2 * sizeof(unsigned long)); + return membuf_store(&to, default_uamor); } static int pkey_set(struct task_struct *target, const struct user_regset *regset, @@ -517,9 +498,17 @@ static int pkey_set(struct task_struct *target, const struct user_regset *regset if (ret) return ret; - /* UAMOR determines which bits of the AMR can be set from userspace. */ - target->thread.amr = (new_amr & target->thread.uamor) | - (target->thread.amr & ~target->thread.uamor); + /* + * UAMOR determines which bits of the AMR can be set from userspace. + * UAMOR value 0b11 indicates that the AMR value can be modified + * from userspace. If the kernel is using a specific key, we avoid + * userspace modifying the AMR value for that key by masking them + * via UAMOR 0b00. + * + * Pick the AMR values for the keys that kernel is using. This + * will be indicated by the ~default_uamor bits. + */ + target->thread.amr = (new_amr & default_uamor) | (target->thread.amr & ~default_uamor); return 0; } @@ -529,110 +518,110 @@ static const struct user_regset native_regsets[] = { [REGSET_GPR] = { .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), - .get = gpr_get, .set = gpr_set + .regset_get = gpr_get, .set = gpr_set }, [REGSET_FPR] = { .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), - .get = fpr_get, .set = fpr_set + .regset_get = fpr_get, .set = fpr_set }, #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { .core_note_type = NT_PPC_VMX, .n = 34, .size = sizeof(vector128), .align = sizeof(vector128), - .active = vr_active, .get = vr_get, .set = vr_set + .active = vr_active, .regset_get = vr_get, .set = vr_set }, #endif #ifdef CONFIG_VSX [REGSET_VSX] = { .core_note_type = NT_PPC_VSX, .n = 32, .size = sizeof(double), .align = sizeof(double), - .active = vsr_active, .get = vsr_get, .set = vsr_set + .active = vsr_active, .regset_get = vsr_get, .set = vsr_set }, #endif #ifdef CONFIG_SPE [REGSET_SPE] = { .core_note_type = NT_PPC_SPE, .n = 35, .size = sizeof(u32), .align = sizeof(u32), - .active = evr_active, .get = evr_get, .set = evr_set + .active = evr_active, .regset_get = evr_get, .set = evr_set }, #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM [REGSET_TM_CGPR] = { .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), - .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set + .active = tm_cgpr_active, .regset_get = tm_cgpr_get, .set = tm_cgpr_set }, [REGSET_TM_CFPR] = { .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), - .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + .active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set }, [REGSET_TM_CVMX] = { .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, .size = sizeof(vector128), .align = sizeof(vector128), - .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + .active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set }, [REGSET_TM_CVSX] = { .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, .size = sizeof(double), .align = sizeof(double), - .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + .active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set }, [REGSET_TM_SPR] = { .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + .active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set }, [REGSET_TM_CTAR] = { .core_note_type = NT_PPC_TM_CTAR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + .active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set }, [REGSET_TM_CPPR] = { .core_note_type = NT_PPC_TM_CPPR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + .active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set }, [REGSET_TM_CDSCR] = { .core_note_type = NT_PPC_TM_CDSCR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + .active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set }, #endif #ifdef CONFIG_PPC64 [REGSET_PPR] = { .core_note_type = NT_PPC_PPR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .get = ppr_get, .set = ppr_set + .regset_get = ppr_get, .set = ppr_set }, [REGSET_DSCR] = { .core_note_type = NT_PPC_DSCR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .get = dscr_get, .set = dscr_set + .regset_get = dscr_get, .set = dscr_set }, #endif #ifdef CONFIG_PPC_BOOK3S_64 [REGSET_TAR] = { .core_note_type = NT_PPC_TAR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .get = tar_get, .set = tar_set + .regset_get = tar_get, .set = tar_set }, [REGSET_EBB] = { .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, .size = sizeof(u64), .align = sizeof(u64), - .active = ebb_active, .get = ebb_get, .set = ebb_set + .active = ebb_active, .regset_get = ebb_get, .set = ebb_set }, [REGSET_PMR] = { .core_note_type = NT_PPC_PMU, .n = ELF_NPMU, .size = sizeof(u64), .align = sizeof(u64), - .active = pmu_active, .get = pmu_get, .set = pmu_set + .active = pmu_active, .regset_get = pmu_get, .set = pmu_set }, #endif #ifdef CONFIG_PPC_MEM_KEYS [REGSET_PKEY] = { .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, .size = sizeof(u64), .align = sizeof(u64), - .active = pkey_active, .get = pkey_get, .set = pkey_set + .active = pkey_active, .regset_get = pkey_get, .set = pkey_set }, #endif }; @@ -646,49 +635,16 @@ const struct user_regset_view user_ppc_native_view = { int gpr32_get_common(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf, - unsigned long *regs) + struct membuf to, unsigned long *regs) { - compat_ulong_t *k = kbuf; - compat_ulong_t __user *u = ubuf; - compat_ulong_t reg; - - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) - for (; count > 0 && pos < PT_MSR; --count) - *k++ = regs[pos++]; - else - for (; count > 0 && pos < PT_MSR; --count) - if (__put_user((compat_ulong_t)regs[pos++], u++)) - return -EFAULT; - - if (count > 0 && pos == PT_MSR) { - reg = get_user_msr(target); - if (kbuf) - *k++ = reg; - else if (__put_user(reg, u++)) - return -EFAULT; - ++pos; - --count; - } - - if (kbuf) - for (; count > 0 && pos < PT_REGS_COUNT; --count) - *k++ = regs[pos++]; - else - for (; count > 0 && pos < PT_REGS_COUNT; --count) - if (__put_user((compat_ulong_t)regs[pos++], u++)) - return -EFAULT; + int i; - kbuf = k; - ubuf = u; - pos *= sizeof(reg); - count *= sizeof(reg); - return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - PT_REGS_COUNT * sizeof(reg), -1); + for (i = 0; i < PT_MSR; i++) + membuf_store(&to, (u32)regs[i]); + membuf_store(&to, (u32)get_user_msr(target)); + for (i++ ; i < PT_REGS_COUNT; i++) + membuf_store(&to, (u32)regs[i]); + return membuf_zero(&to, (ELF_NGREG - PT_REGS_COUNT) * sizeof(u32)); } int gpr32_set_common(struct task_struct *target, @@ -761,8 +717,7 @@ int gpr32_set_common(struct task_struct *target, static int gpr32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { int i; @@ -777,7 +732,7 @@ static int gpr32_get(struct task_struct *target, for (i = 14; i < 32; i++) target->thread.regs->gpr[i] = NV_REG_POISON; } - return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + return gpr32_get_common(target, regset, to, &target->thread.regs->gpr[0]); } @@ -801,25 +756,25 @@ static const struct user_regset compat_regsets[] = { [REGSET_GPR] = { .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = gpr32_get, .set = gpr32_set + .regset_get = gpr32_get, .set = gpr32_set }, [REGSET_FPR] = { .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), - .get = fpr_get, .set = fpr_set + .regset_get = fpr_get, .set = fpr_set }, #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { .core_note_type = NT_PPC_VMX, .n = 34, .size = sizeof(vector128), .align = sizeof(vector128), - .active = vr_active, .get = vr_get, .set = vr_set + .active = vr_active, .regset_get = vr_get, .set = vr_set }, #endif #ifdef CONFIG_SPE [REGSET_SPE] = { .core_note_type = NT_PPC_SPE, .n = 35, .size = sizeof(u32), .align = sizeof(u32), - .active = evr_active, .get = evr_get, .set = evr_set + .active = evr_active, .regset_get = evr_get, .set = evr_set }, #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -827,66 +782,66 @@ static const struct user_regset compat_regsets[] = { .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), .active = tm_cgpr_active, - .get = tm_cgpr32_get, .set = tm_cgpr32_set + .regset_get = tm_cgpr32_get, .set = tm_cgpr32_set }, [REGSET_TM_CFPR] = { .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), - .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + .active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set }, [REGSET_TM_CVMX] = { .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, .size = sizeof(vector128), .align = sizeof(vector128), - .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + .active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set }, [REGSET_TM_CVSX] = { .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, .size = sizeof(double), .align = sizeof(double), - .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + .active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set }, [REGSET_TM_SPR] = { .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + .active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set }, [REGSET_TM_CTAR] = { .core_note_type = NT_PPC_TM_CTAR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + .active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set }, [REGSET_TM_CPPR] = { .core_note_type = NT_PPC_TM_CPPR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + .active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set }, [REGSET_TM_CDSCR] = { .core_note_type = NT_PPC_TM_CDSCR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + .active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set }, #endif #ifdef CONFIG_PPC64 [REGSET_PPR] = { .core_note_type = NT_PPC_PPR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .get = ppr_get, .set = ppr_set + .regset_get = ppr_get, .set = ppr_set }, [REGSET_DSCR] = { .core_note_type = NT_PPC_DSCR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .get = dscr_get, .set = dscr_set + .regset_get = dscr_get, .set = dscr_set }, #endif #ifdef CONFIG_PPC_BOOK3S_64 [REGSET_TAR] = { .core_note_type = NT_PPC_TAR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .get = tar_get, .set = tar_set + .regset_get = tar_get, .set = tar_set }, [REGSET_EBB] = { .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, .size = sizeof(u64), .align = sizeof(u64), - .active = ebb_active, .get = ebb_get, .set = ebb_set + .active = ebb_active, .regset_get = ebb_get, .set = ebb_set }, #endif }; diff --git a/arch/powerpc/kernel/ptrace/ptrace-vsx.c b/arch/powerpc/kernel/ptrace/ptrace-vsx.c index d53466d49cc0..1da4303128ef 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-vsx.c +++ b/arch/powerpc/kernel/ptrace/ptrace-vsx.c @@ -19,7 +19,7 @@ * }; */ int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { u64 buf[33]; int i; @@ -30,7 +30,7 @@ int fpr_get(struct task_struct *target, const struct user_regset *regset, for (i = 0; i < 32 ; i++) buf[i] = target->thread.TS_FPR(i); buf[32] = target->thread.fp_state.fpscr; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + return membuf_write(&to, buf, 33 * sizeof(u64)); } /* @@ -95,10 +95,10 @@ int vsr_active(struct task_struct *target, const struct user_regset *regset) * }; */ int vsr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { u64 buf[32]; - int ret, i; + int i; flush_tmregs_to_thread(target); flush_fp_to_thread(target); @@ -108,10 +108,7 @@ int vsr_get(struct task_struct *target, const struct user_regset *regset, for (i = 0; i < 32 ; i++) buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - - return ret; + return membuf_write(&to, buf, 32 * sizeof(double)); } /* diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c index 7976ddf29c0e..7589a9665ffb 100644 --- a/arch/powerpc/kernel/ptrace/ptrace32.c +++ b/arch/powerpc/kernel/ptrace/ptrace32.c @@ -259,8 +259,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, ret = put_user(child->thread.debug.dac1, (u32 __user *)data); #else dabr_fake = ( - (child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | - (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); + (child->thread.hw_brk[0].address & (~HW_BRK_TYPE_DABR)) | + (child->thread.hw_brk[0].type & HW_BRK_TYPE_DABR)); ret = put_user(dabr_fake, (u32 __user *)data); #endif break; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c5fa251b8950..806d554ce357 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -41,6 +41,7 @@ #include <asm/time.h> #include <asm/mmu.h> #include <asm/topology.h> +#include <asm/paca.h> /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); @@ -842,96 +843,6 @@ static void rtas_percpu_suspend_me(void *info) __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } -enum rtas_cpu_state { - DOWN, - UP, -}; - -#ifndef CONFIG_SMP -static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, - cpumask_var_t cpus) -{ - if (!cpumask_empty(cpus)) { - cpumask_clear(cpus); - return -EINVAL; - } else - return 0; -} -#else -/* On return cpumask will be altered to indicate CPUs changed. - * CPUs with states changed will be set in the mask, - * CPUs with status unchanged will be unset in the mask. */ -static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, - cpumask_var_t cpus) -{ - int cpu; - int cpuret = 0; - int ret = 0; - - if (cpumask_empty(cpus)) - return 0; - - for_each_cpu(cpu, cpus) { - struct device *dev = get_cpu_device(cpu); - - switch (state) { - case DOWN: - cpuret = device_offline(dev); - break; - case UP: - cpuret = device_online(dev); - break; - } - if (cpuret < 0) { - pr_debug("%s: cpu_%s for cpu#%d returned %d.\n", - __func__, - ((state == UP) ? "up" : "down"), - cpu, cpuret); - if (!ret) - ret = cpuret; - if (state == UP) { - /* clear bits for unchanged cpus, return */ - cpumask_shift_right(cpus, cpus, cpu); - cpumask_shift_left(cpus, cpus, cpu); - break; - } else { - /* clear bit for unchanged cpu, continue */ - cpumask_clear_cpu(cpu, cpus); - } - } - cond_resched(); - } - - return ret; -} -#endif - -int rtas_online_cpus_mask(cpumask_var_t cpus) -{ - int ret; - - ret = rtas_cpu_state_change_mask(UP, cpus); - - if (ret) { - cpumask_var_t tmp_mask; - - if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL)) - return ret; - - /* Use tmp_mask to preserve cpus mask from first failure */ - cpumask_copy(tmp_mask, cpus); - rtas_offline_cpus_mask(tmp_mask); - free_cpumask_var(tmp_mask); - } - - return ret; -} - -int rtas_offline_cpus_mask(cpumask_var_t cpus) -{ - return rtas_cpu_state_change_mask(DOWN, cpus); -} - int rtas_ibm_suspend_me(u64 handle) { long state; @@ -939,8 +850,6 @@ int rtas_ibm_suspend_me(u64 handle) unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; struct rtas_suspend_me_data data; DECLARE_COMPLETION_ONSTACK(done); - cpumask_var_t offline_mask; - int cpuret; if (!rtas_service_present("ibm,suspend-me")) return -ENOSYS; @@ -961,9 +870,6 @@ int rtas_ibm_suspend_me(u64 handle) return -EIO; } - if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL)) - return -ENOMEM; - atomic_set(&data.working, 0); atomic_set(&data.done, 0); atomic_set(&data.error, 0); @@ -972,24 +878,8 @@ int rtas_ibm_suspend_me(u64 handle) lock_device_hotplug(); - /* All present CPUs must be online */ - cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask); - cpuret = rtas_online_cpus_mask(offline_mask); - if (cpuret) { - pr_err("%s: Could not bring present CPUs online.\n", __func__); - atomic_set(&data.error, cpuret); - goto out; - } - cpu_hotplug_disable(); - /* Check if we raced with a CPU-Offline Operation */ - if (!cpumask_equal(cpu_present_mask, cpu_online_mask)) { - pr_info("%s: Raced against a concurrent CPU-Offline\n", __func__); - atomic_set(&data.error, -EAGAIN); - goto out_hotplug_enable; - } - /* Call function on all CPUs. One of us will make the * rtas call */ @@ -1000,20 +890,64 @@ int rtas_ibm_suspend_me(u64 handle) if (atomic_read(&data.error) != 0) printk(KERN_ERR "Error doing global join\n"); -out_hotplug_enable: - cpu_hotplug_enable(); - /* Take down CPUs not online prior to suspend */ - cpuret = rtas_offline_cpus_mask(offline_mask); - if (cpuret) - pr_warn("%s: Could not restore CPUs to offline state.\n", - __func__); + cpu_hotplug_enable(); -out: unlock_device_hotplug(); - free_cpumask_var(offline_mask); + return atomic_read(&data.error); } + +/** + * rtas_call_reentrant() - Used for reentrant rtas calls + * @token: Token for desired reentrant RTAS call + * @nargs: Number of Input Parameters + * @nret: Number of Output Parameters + * @outputs: Array of outputs + * @...: Inputs for desired RTAS call + * + * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off", + * "ibm,get-xive" and "ibm,set-xive" are currently reentrant. + * Reentrant calls need their own rtas_args buffer, so not using rtas.args, but + * PACA one instead. + * + * Return: -1 on error, + * First output value of RTAS call if (nret > 0), + * 0 otherwise, + */ +int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) +{ + va_list list; + struct rtas_args *args; + unsigned long flags; + int i, ret = 0; + + if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) + return -1; + + local_irq_save(flags); + preempt_disable(); + + /* We use the per-cpu (PACA) rtas args buffer */ + args = local_paca->rtas_args_reentrant; + + va_start(list, outputs); + va_rtas_call_unlocked(args, token, nargs, nret, list); + va_end(list); + + if (nret > 1 && outputs) + for (i = 0; i < nret - 1; ++i) + outputs[i] = be32_to_cpu(args->rets[i + 1]); + + if (nret > 0) + ret = be32_to_cpu(args->rets[0]); + + local_irq_restore(flags); + preempt_enable(); + + return ret; +} + #else /* CONFIG_PPC_PSERIES */ int rtas_ibm_suspend_me(u64 handle) { diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index ae5e43eaca48..781c1869902e 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -13,9 +13,9 @@ #include <linux/pci.h> #include <linux/string.h> #include <linux/init.h> +#include <linux/pgtable.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/irq.h> #include <asm/prom.h> #include <asm/machdep.h> diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 89b798f8f656..8561dfb33f24 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -273,37 +273,15 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) } } -#ifdef CONFIG_PPC_PSERIES -static void handle_prrn_event(s32 scope) -{ - /* - * For PRRN, we must pass the negative of the scope value in - * the RTAS event. - */ - pseries_devicetree_update(-scope); - numa_update_cpu_topology(false); -} - static void handle_rtas_event(const struct rtas_error_log *log) { - if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled()) + if (!machine_is(pseries)) return; - /* For PRRN Events the extended log length is used to denote - * the scope for calling rtas update-nodes. - */ - handle_prrn_event(rtas_error_extended_log_length(log)); + if (rtas_error_type(log) == RTAS_TYPE_PRRN) + pr_info_ratelimited("Platform resource reassignment ignored.\n"); } -#else - -static void handle_rtas_event(const struct rtas_error_log *log) -{ - return; -} - -#endif - static int rtas_log_open(struct inode * inode, struct file * file) { return 0; diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c index 4b982324d368..f9af305d9579 100644 --- a/arch/powerpc/kernel/secure_boot.c +++ b/arch/powerpc/kernel/secure_boot.c @@ -23,12 +23,19 @@ bool is_ppc_secureboot_enabled(void) { struct device_node *node; bool enabled = false; + u32 secureboot; node = get_ppc_fw_sb_node(); enabled = of_property_read_bool(node, "os-secureboot-enforcing"); - of_node_put(node); + if (enabled) + goto out; + + if (!of_property_read_u32(of_root, "ibm,secure-boot", &secureboot)) + enabled = (secureboot > 1); + +out: pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled"); return enabled; @@ -38,12 +45,19 @@ bool is_ppc_trustedboot_enabled(void) { struct device_node *node; bool enabled = false; + u32 trustedboot; node = get_ppc_fw_sb_node(); enabled = of_property_read_bool(node, "trusted-enabled"); - of_node_put(node); + if (enabled) + goto out; + + if (!of_property_read_u32(of_root, "ibm,trusted-boot", &trustedboot)) + enabled = (trustedboot > 0); + +out: pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled"); return enabled; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index bd70f5be1c27..c9876aab3142 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -7,6 +7,8 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/device.h> +#include <linux/nospec.h> +#include <linux/prctl.h> #include <linux/seq_buf.h> #include <asm/asm-prototypes.h> @@ -14,17 +16,18 @@ #include <asm/debugfs.h> #include <asm/security_features.h> #include <asm/setup.h> +#include <asm/inst.h> u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; -enum count_cache_flush_type { - COUNT_CACHE_FLUSH_NONE = 0x1, - COUNT_CACHE_FLUSH_SW = 0x2, - COUNT_CACHE_FLUSH_HW = 0x4, +enum branch_cache_flush_type { + BRANCH_CACHE_FLUSH_NONE = 0x1, + BRANCH_CACHE_FLUSH_SW = 0x2, + BRANCH_CACHE_FLUSH_HW = 0x4, }; -static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; -static bool link_stack_flush_enabled; +static enum branch_cache_flush_type count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE; +static enum branch_cache_flush_type link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE; bool barrier_nospec_enabled; static bool no_nospec; @@ -216,24 +219,25 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (ccd) seq_buf_printf(&s, "Indirect branch cache disabled"); - if (link_stack_flush_enabled) - seq_buf_printf(&s, ", Software link stack flush"); - - } else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { + } else if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) { seq_buf_printf(&s, "Mitigation: Software count cache flush"); - if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW) + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) seq_buf_printf(&s, " (hardware accelerated)"); - if (link_stack_flush_enabled) - seq_buf_printf(&s, ", Software link stack flush"); - } else if (btb_flush_enabled) { seq_buf_printf(&s, "Mitigation: Branch predictor state flush"); } else { seq_buf_printf(&s, "Vulnerable"); } + if (bcs || ccd || count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) { + if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE) + seq_buf_printf(&s, ", Software link stack flush"); + if (link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) + seq_buf_printf(&s, " (hardware accelerated)"); + } + seq_buf_printf(&s, "\n"); return s.len; @@ -353,6 +357,40 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute * return sprintf(buf, "Vulnerable\n"); } +static int ssb_prctl_get(struct task_struct *task) +{ + if (stf_enabled_flush_types == STF_BARRIER_NONE) + /* + * We don't have an explicit signal from firmware that we're + * vulnerable or not, we only have certain CPU revisions that + * are known to be vulnerable. + * + * We assume that if we're on another CPU, where the barrier is + * NONE, then we are not vulnerable. + */ + return PR_SPEC_NOT_AFFECTED; + else + /* + * If we do have a barrier type then we are vulnerable. The + * barrier is not a global or per-process mitigation, so the + * only value we can report here is PR_SPEC_ENABLE, which + * appears as "vulnerable" in /proc. + */ + return PR_SPEC_ENABLE; + + return -EINVAL; +} + +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_get(task); + default: + return -ENODEV; + } +} + #ifdef CONFIG_DEBUG_FS static int stf_barrier_set(void *data, u64 val) { @@ -390,58 +428,79 @@ static __init int stf_barrier_debugfs_init(void) device_initcall(stf_barrier_debugfs_init); #endif /* CONFIG_DEBUG_FS */ -static void no_count_cache_flush(void) -{ - count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; - pr_info("count-cache-flush: software flush disabled.\n"); -} - -static void toggle_count_cache_flush(bool enable) +static void update_branch_cache_flush(void) { - if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) && - !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) - enable = false; - - if (!enable) { - patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - patch_instruction_site(&patch__call_kvm_flush_link_stack, PPC_INST_NOP); -#endif - pr_info("link-stack-flush: software flush disabled.\n"); - link_stack_flush_enabled = false; - no_count_cache_flush(); - return; + // This controls the branch from guest_exit_cont to kvm_flush_link_stack + if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { + patch_instruction_site(&patch__call_kvm_flush_link_stack, + ppc_inst(PPC_INST_NOP)); + } else { + // Could use HW flush, but that could also flush count cache + patch_branch_site(&patch__call_kvm_flush_link_stack, + (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); } - - // This enables the branch from _switch to flush_count_cache - patch_branch_site(&patch__call_flush_count_cache, - (u64)&flush_count_cache, BRANCH_SET_LINK); - -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - // This enables the branch from guest_exit_cont to kvm_flush_link_stack - patch_branch_site(&patch__call_kvm_flush_link_stack, - (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); #endif - pr_info("link-stack-flush: software flush enabled.\n"); - link_stack_flush_enabled = true; + // This controls the branch from _switch to flush_branch_caches + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE && + link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { + patch_instruction_site(&patch__call_flush_branch_caches, + ppc_inst(PPC_INST_NOP)); + } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW && + link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) { + patch_instruction_site(&patch__call_flush_branch_caches, + ppc_inst(PPC_INST_BCCTR_FLUSH)); + } else { + patch_branch_site(&patch__call_flush_branch_caches, + (u64)&flush_branch_caches, BRANCH_SET_LINK); + + // If we just need to flush the link stack, early return + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) { + patch_instruction_site(&patch__flush_link_stack_return, + ppc_inst(PPC_INST_BLR)); + + // If we have flush instruction, early return + } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) { + patch_instruction_site(&patch__flush_count_cache_return, + ppc_inst(PPC_INST_BLR)); + } + } +} - // If we just need to flush the link stack, patch an early return - if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { - patch_instruction_site(&patch__flush_link_stack_return, PPC_INST_BLR); - no_count_cache_flush(); - return; +static void toggle_branch_cache_flush(bool enable) +{ + if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) + count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE; + + pr_info("count-cache-flush: flush disabled.\n"); + } else { + if (security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { + count_cache_flush_type = BRANCH_CACHE_FLUSH_HW; + pr_info("count-cache-flush: hardware flush enabled.\n"); + } else { + count_cache_flush_type = BRANCH_CACHE_FLUSH_SW; + pr_info("count-cache-flush: software flush enabled.\n"); + } } - if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { - count_cache_flush_type = COUNT_CACHE_FLUSH_SW; - pr_info("count-cache-flush: full software flush sequence enabled.\n"); - return; + if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) { + if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE) + link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE; + + pr_info("link-stack-flush: flush disabled.\n"); + } else { + if (security_ftr_enabled(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST)) { + link_stack_flush_type = BRANCH_CACHE_FLUSH_HW; + pr_info("link-stack-flush: hardware flush enabled.\n"); + } else { + link_stack_flush_type = BRANCH_CACHE_FLUSH_SW; + pr_info("link-stack-flush: software flush enabled.\n"); + } } - patch_instruction_site(&patch__flush_count_cache_return, PPC_INST_BLR); - count_cache_flush_type = COUNT_CACHE_FLUSH_HW; - pr_info("count-cache-flush: hardware assisted flush sequence enabled\n"); + update_branch_cache_flush(); } void setup_count_cache_flush(void) @@ -465,7 +524,7 @@ void setup_count_cache_flush(void) security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) security_ftr_set(SEC_FTR_FLUSH_LINK_STACK); - toggle_count_cache_flush(enable); + toggle_branch_cache_flush(enable); } #ifdef CONFIG_DEBUG_FS @@ -480,14 +539,14 @@ static int count_cache_flush_set(void *data, u64 val) else return -EINVAL; - toggle_count_cache_flush(enable); + toggle_branch_cache_flush(enable); return 0; } static int count_cache_flush_get(void *data, u64 *val) { - if (count_cache_flush_type == COUNT_CACHE_FLUSH_NONE) + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) *val = 0; else *val = 1; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index f9c0d888ce8a..808ec9fab605 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -31,13 +31,13 @@ #include <linux/memblock.h> #include <linux/of_platform.h> #include <linux/hugetlb.h> +#include <linux/pgtable.h> #include <asm/debugfs.h> #include <asm/io.h> #include <asm/paca.h> #include <asm/prom.h> #include <asm/processor.h> #include <asm/vdso_datapage.h> -#include <asm/pgtable.h> #include <asm/smp.h> #include <asm/elf.h> #include <asm/machdep.h> @@ -306,15 +306,12 @@ static int show_cpuinfo(struct seq_file *m, void *v) } } else { switch (PVR_VER(pvr)) { - case 0x0020: /* 403 family */ - maj = PVR_MAJ(pvr) + 1; - min = PVR_MIN(pvr); - break; case 0x1008: /* 740P/750P ?? */ maj = ((pvr >> 8) & 0xFF) - 1; min = pvr & 0xFF; break; case 0x004e: /* POWER9 bits 12-15 give chip type */ + case 0x0080: /* POWER10 bit 12 gives SMT8/4 */ maj = (pvr >> 8) & 0x0F; min = pvr & 0xFF; break; @@ -932,6 +929,9 @@ void __init setup_arch(char **cmdline_p) /* Reserve large chunks of memory for use by CMA for KVM. */ kvm_cma_reserve(); + /* Reserve large chunks of memory for us by CMA for hugetlb */ + gigantic_hugetlb_cma_reserve(); + klp_init_thread_info(&init_task); init_mm.start_code = (unsigned long)_stext; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 305ca89d856f..1823706ae076 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -19,11 +19,11 @@ #include <linux/memblock.h> #include <linux/export.h> #include <linux/nvram.h> +#include <linux/pgtable.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/processor.h> -#include <asm/pgtable.h> #include <asm/setup.h> #include <asm/smp.h> #include <asm/elf.h> @@ -74,20 +74,20 @@ EXPORT_SYMBOL(DMA_MODE_WRITE); */ notrace void __init machine_init(u64 dt_ptr) { - unsigned int *addr = (unsigned int *)patch_site_addr(&patch__memset_nocache); - unsigned long insn; + struct ppc_inst *addr = (struct ppc_inst *)patch_site_addr(&patch__memset_nocache); + struct ppc_inst insn; /* Configure static keys first, now that we're relocated. */ setup_feature_keys(); - early_ioremap_setup(); + early_ioremap_init(); /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - patch_instruction_site(&patch__memcpy_nocache, PPC_INST_NOP); + patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_INST_NOP)); - insn = create_cond_branch(addr, branch_target(addr), 0x820000); + create_cond_branch(&insn, addr, branch_target(addr), 0x820000); patch_instruction(addr, insn); /* replace b by bne cr0 */ /* Do some early initialization based on the flat device tree */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8105010b0e76..6be430107c6f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -30,13 +30,13 @@ #include <linux/lockdep.h> #include <linux/memory.h> #include <linux/nmi.h> +#include <linux/pgtable.h> #include <asm/debugfs.h> #include <asm/io.h> #include <asm/kdump.h> #include <asm/prom.h> #include <asm/processor.h> -#include <asm/pgtable.h> #include <asm/smp.h> #include <asm/elf.h> #include <asm/machdep.h> @@ -196,7 +196,10 @@ static void __init configure_exceptions(void) /* Under a PAPR hypervisor, we need hypercalls */ if (firmware_has_feature(FW_FEATURE_SET_MODE)) { /* Enable AIL if possible */ - pseries_enable_reloc_on_exc(); + if (!pseries_enable_reloc_on_exc()) { + init_task.thread.fscr &= ~FSCR_SCV; + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV; + } /* * Tell the hypervisor that we want our exceptions to @@ -711,7 +714,7 @@ void __init exc_lvl_early_init(void) */ void __init emergency_stack_init(void) { - u64 limit; + u64 limit, mce_limit; unsigned int i; /* @@ -728,7 +731,16 @@ void __init emergency_stack_init(void) * initialized in kernel/irq.c. These are initialized here in order * to have emergency stacks available as early as possible. */ - limit = min(ppc64_bolted_size(), ppc64_rma_size); + limit = mce_limit = min(ppc64_bolted_size(), ppc64_rma_size); + + /* + * Machine check on pseries calls rtas, but can't use the static + * rtas_args due to a machine check hitting while the lock is held. + * rtas args have to be under 4GB, so the machine check stack is + * limited to 4GB so args can be put on stack. + */ + if (firmware_has_feature(FW_FEATURE_LPAR) && mce_limit > SZ_4G) + mce_limit = SZ_4G; for_each_possible_cpu(i) { paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; @@ -738,7 +750,7 @@ void __init emergency_stack_init(void) paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; /* emergency stack for machine check exception handling. */ - paca_ptrs[i]->mc_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE; + paca_ptrs[i]->mc_emergency_sp = alloc_stack(mce_limit, i) + THREAD_SIZE; #endif } } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index a264989626fd..d15a98c758b8 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -198,12 +198,21 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, int restart = 1; /* syscall ? */ - if (TRAP(regs) != 0x0C00) + if (!trap_is_syscall(regs)) + return; + + if (trap_norestart(regs)) return; /* error signalled ? */ - if (!(regs->ccr & 0x10000000)) + if (trap_is_scv(regs)) { + /* 32-bit compat mode sign extend? */ + if (!IS_ERR_VALUE(ret)) + return; + ret = -ret; + } else if (!(regs->ccr & 0x10000000)) { return; + } switch (ret) { case ERESTART_RESTARTBLOCK: @@ -236,9 +245,14 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, regs->nip -= 4; regs->result = 0; } else { - regs->result = -EINTR; - regs->gpr[3] = EINTR; - regs->ccr |= 0x10000000; + if (trap_is_scv(regs)) { + regs->result = -EINTR; + regs->gpr[3] = -EINTR; + } else { + regs->result = -EINTR; + regs->gpr[3] = EINTR; + regs->ccr |= 0x10000000; + } } } @@ -258,19 +272,24 @@ static void do_signal(struct task_struct *tsk) if (ksig.sig <= 0) { /* No signal to deliver -- put the saved sigmask back */ restore_saved_sigmask(); - tsk->thread.regs->trap = 0; + set_trap_norestart(tsk->thread.regs); return; /* no signals delivered */ } -#ifndef CONFIG_PPC_ADV_DEBUG_REGS /* * Reenable the DABR before delivering the signal to * user space. The DABR will have been cleared if it * triggered inside the kernel. */ - if (tsk->thread.hw_brk.address && tsk->thread.hw_brk.type) - __set_breakpoint(&tsk->thread.hw_brk); -#endif + if (!IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) { + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (tsk->thread.hw_brk[i].address && tsk->thread.hw_brk[i].type) + __set_breakpoint(i, &tsk->thread.hw_brk[i]); + } + } + /* Re-enable the breakpoints for the signal stack */ thread_change_pc(tsk, tsk->thread.regs); @@ -285,7 +304,7 @@ static void do_signal(struct task_struct *tsk) ret = handle_rt_signal64(&ksig, oldset, tsk); } - tsk->thread.regs->trap = 0; + set_trap_norestart(tsk->thread.regs); signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP)); } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 4f96d29a22bf..96950f189b5a 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -47,7 +47,6 @@ #include <asm/unistd.h> #else #include <asm/ucontext.h> -#include <asm/pgtable.h> #endif #include "signal.h" @@ -103,22 +102,18 @@ static inline int save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; - int i; - /* Force usr to alway see softe as 1 (interrupts enabled) */ - elf_greg_t64 softe = 0x1; + int val, i; WARN_ON(!FULL_REGS(regs)); for (i = 0; i <= PT_RESULT; i ++) { - if (i == 14 && !FULL_REGS(regs)) - i = 32; - if ( i == PT_SOFTE) { - if(__put_user((unsigned int)softe, &frame->mc_gregs[i])) - return -EFAULT; - else - continue; - } - if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i])) + /* Force usr to alway see softe as 1 (interrupts enabled) */ + if (i == PT_SOFTE) + val = 1; + else + val = gregs[i]; + + if (__put_user(val, &frame->mc_gregs[i])) return -EFAULT; } return 0; @@ -500,7 +495,7 @@ static long restore_user_regs(struct pt_regs *regs, if (!sig) save_r2 = (unsigned int)regs->gpr[2]; err = restore_general_regs(regs, sr); - regs->trap = 0; + set_trap_norestart(regs); err |= __get_user(msr, &sr->mc_gregs[PT_MSR]); if (!sig) regs->gpr[2] = (unsigned long) save_r2; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index adfde59cf4ba..bfc939360bad 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -21,11 +21,11 @@ #include <linux/ptrace.h> #include <linux/ratelimit.h> #include <linux/syscalls.h> +#include <linux/pagemap.h> #include <asm/sigcontext.h> #include <asm/ucontext.h> #include <linux/uaccess.h> -#include <asm/pgtable.h> #include <asm/unistd.h> #include <asm/cacheflush.h> #include <asm/syscalls.h> @@ -40,8 +40,8 @@ #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) #define FP_REGS_SIZE sizeof(elf_fpregset_t) -#define TRAMP_TRACEBACK 3 -#define TRAMP_SIZE 6 +#define TRAMP_TRACEBACK 4 +#define TRAMP_SIZE 7 /* * When we have signals to deliver, we set up on the user stack, @@ -350,8 +350,8 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]); err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]); err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]); - /* skip SOFTE */ - regs->trap = 0; + /* Don't allow userspace to set SOFTE */ + set_trap_norestart(regs); err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); @@ -472,10 +472,8 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, &sc->gp_regs[PT_XER]); err |= __get_user(tsk->thread.ckpt_regs.ccr, &sc->gp_regs[PT_CCR]); - - /* Don't allow userspace to set the trap value */ - regs->trap = 0; - + /* Don't allow userspace to set SOFTE */ + set_trap_norestart(regs); /* These regs are not checkpointed; they can go in 'regs'. */ err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); @@ -603,13 +601,15 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) int i; long err = 0; + /* bctrl # call the handler */ + err |= __put_user(PPC_INST_BCTRL, &tramp[0]); /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */ err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) | - (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]); + (__SIGNAL_FRAMESIZE & 0xffff), &tramp[1]); /* li r0, __NR_[rt_]sigreturn| */ - err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[1]); + err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[2]); /* sc */ - err |= __put_user(PPC_INST_SC, &tramp[2]); + err |= __put_user(PPC_INST_SC, &tramp[3]); /* Minimal traceback info */ for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++) @@ -635,7 +635,6 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, struct ucontext __user *, new_ctx, long, ctx_size) { - unsigned char tmp; sigset_t set; unsigned long new_msr = 0; int ctx_has_vsx_region = 0; @@ -670,9 +669,8 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, } if (new_ctx == NULL) return 0; - if (!access_ok(new_ctx, ctx_size) - || __get_user(tmp, (u8 __user *) new_ctx) - || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1)) + if (!access_ok(new_ctx, ctx_size) || + fault_in_pages_readable((u8 __user *)new_ctx, ctx_size)) return -EFAULT; /* @@ -867,12 +865,12 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, /* Set up to return from userspace. */ if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) { - regs->link = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; + regs->nip = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) goto badframe; - regs->link = (unsigned long) &frame->tramp[0]; + regs->nip = (unsigned long) &frame->tramp[0]; } /* Allocate a dummy caller frame for the signal handler. */ @@ -881,8 +879,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, /* Set up "regs" so we "return" to the signal handler. */ if (is_elf2_task()) { - regs->nip = (unsigned long) ksig->ka.sa.sa_handler; - regs->gpr[12] = regs->nip; + regs->ctr = (unsigned long) ksig->ka.sa.sa_handler; + regs->gpr[12] = regs->ctr; } else { /* Handler is *really* a pointer to the function descriptor for * the signal routine. The first entry in the function @@ -892,7 +890,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, func_descr_t __user *funct_desc_ptr = (func_descr_t __user *) ksig->ka.sa.sa_handler; - err |= get_user(regs->nip, &funct_desc_ptr->entry); + err |= get_user(regs->ctr, &funct_desc_ptr->entry); err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 6d2a3a3666f0..8261999c7d52 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -33,6 +33,7 @@ #include <linux/processor.h> #include <linux/random.h> #include <linux/stackprotector.h> +#include <linux/pgtable.h> #include <asm/ptrace.h> #include <linux/atomic.h> @@ -41,7 +42,6 @@ #include <asm/kvm_ppc.h> #include <asm/dbell.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/prom.h> #include <asm/smp.h> #include <asm/time.h> @@ -59,6 +59,7 @@ #include <asm/asm-prototypes.h> #include <asm/cpu_has_feature.h> #include <asm/ftrace.h> +#include <asm/kup.h> #ifdef DEBUG #include <asm/udbg.h> @@ -1383,7 +1384,7 @@ void __init smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_SCHED_SMT if (has_big_cores) { - pr_info("Using small cores at SMT level\n"); + pr_info("Big cores detected but using small core scheduling\n"); power9_topology[0].mask = smallcore_smt_mask; powerpc_topology[0].mask = smallcore_smt_mask; } diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index c477b8585a29..b6440657ef92 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -260,7 +260,7 @@ static void raise_backtrace_ipi(cpumask_t *mask) pr_cont(" current pointer corrupt? (%px)\n", p->__current); pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1); - show_stack(p->__current, (unsigned long *)p->saved_r1); + show_stack(p->__current, (unsigned long *)p->saved_r1, KERN_WARNING); } } diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index cbdf86228eaa..f73f4d72fea4 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -395,6 +395,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) li r3,0 blr +_ASM_NOKPROBE_SYMBOL(swsusp_arch_resume) /* FIXME:This construct is actually not useful since we don't shut * down the instruction MMU, we could just flip back MSR-DR on. @@ -406,4 +407,5 @@ turn_on_mmu: sync isync rfi +_ASM_NOKPROBE_SYMBOL(turn_on_mmu) diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 7b7c89cad901..8e50818aa50b 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -60,6 +60,11 @@ notrace long system_call_exception(long r3, long r4, long r5, local_irq_enable(); if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { + if (unlikely(regs->trap == 0x7ff0)) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } /* * We use the return value of do_syscall_trace_enter() as the * syscall number. If the syscall was rejected for any reason @@ -78,6 +83,11 @@ notrace long system_call_exception(long r3, long r4, long r5, r8 = regs->gpr[8]; } else if (unlikely(r0 >= NR_syscalls)) { + if (unlikely(regs->trap == 0x7ff0)) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } return -ENOSYS; } @@ -102,6 +112,35 @@ notrace long system_call_exception(long r3, long r4, long r5, } /* + * local irqs must be disabled. Returns false if the caller must re-enable + * them, check for new work, and try again. + */ +static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri) +{ + /* This must be done with RI=1 because tracing may touch vmaps */ + trace_hardirqs_on(); + + /* This pattern matches prep_irq_for_idle */ + if (clear_ri) + __hard_EE_RI_disable(); + else + __hard_irq_disable(); + if (unlikely(lazy_irq_pending_nocheck())) { + /* Took an interrupt, may have more exit work to do. */ + if (clear_ri) + __hard_RI_enable(); + trace_hardirqs_off(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + return false; + } + local_paca->irq_happened = 0; + irq_soft_mask_set(IRQS_ENABLED); + + return true; +} + +/* * This should be called after a syscall returns, with r3 the return value * from the syscall. If this function returns non-zero, the system call * exit assembly should additionally load all GPR registers and CTR and XER @@ -111,7 +150,8 @@ notrace long system_call_exception(long r3, long r4, long r5, * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. */ notrace unsigned long syscall_exit_prepare(unsigned long r3, - struct pt_regs *regs) + struct pt_regs *regs, + long scv) { unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long ti_flags; @@ -126,7 +166,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ti_flags = *ti_flagsp; - if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) { + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) { if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { r3 = -r3; regs->ccr |= 0x10000000; /* Set SO bit in CR */ @@ -181,26 +221,23 @@ again: else if (cpu_has_feature(CPU_FTR_ALTIVEC)) mathflags |= MSR_VEC; + /* + * If userspace MSR has all available FP bits set, + * then they are live and no need to restore. If not, + * it means the regs were given up and restore_math + * may decide to restore them (to avoid taking an FP + * fault). + */ if ((regs->msr & mathflags) != mathflags) restore_math(regs); } } - /* This must be done with RI=1 because tracing may touch vmaps */ - trace_hardirqs_on(); - - /* This pattern matches prep_irq_for_idle */ - __hard_EE_RI_disable(); - if (unlikely(lazy_irq_pending_nocheck())) { - __hard_RI_enable(); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + /* scv need not set RI=0 because SRRs are not used */ + if (unlikely(!prep_irq_for_enabled_exit(!scv))) { local_irq_enable(); - /* Took an interrupt, may have more exit work to do. */ goto again; } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; @@ -228,6 +265,10 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned BUG_ON(!FULL_REGS(regs)); BUG_ON(regs->softe != IRQS_ENABLED); + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * AMR can only have been unlocked if we interrupted the kernel. + */ kuap_check_amr(); local_irq_save(flags); @@ -259,24 +300,17 @@ again: else if (cpu_has_feature(CPU_FTR_ALTIVEC)) mathflags |= MSR_VEC; + /* See above restore_math comment */ if ((regs->msr & mathflags) != mathflags) restore_math(regs); } } - trace_hardirqs_on(); - __hard_EE_RI_disable(); - if (unlikely(lazy_irq_pending_nocheck())) { - __hard_RI_enable(); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + if (unlikely(!prep_irq_for_enabled_exit(true))) { local_irq_enable(); local_irq_disable(); - /* Took an interrupt, may have more exit work to do. */ goto again; } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); #ifdef CONFIG_PPC_BOOK3E if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) { @@ -307,13 +341,14 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long flags; unsigned long ret = 0; + unsigned long amr; if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI))) unrecoverable_exception(regs); BUG_ON(regs->msr & MSR_PR); BUG_ON(!FULL_REGS(regs)); - kuap_check_amr(); + amr = kuap_get_and_check_amr(); if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) { clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp); @@ -334,13 +369,7 @@ again: } } - trace_hardirqs_on(); - __hard_EE_RI_disable(); - if (unlikely(lazy_irq_pending_nocheck())) { - __hard_RI_enable(); - irq_soft_mask_set(IRQS_ALL_DISABLED); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + if (unlikely(!prep_irq_for_enabled_exit(true))) { /* * Can't local_irq_restore to replay if we were in * interrupt context. Must replay directly. @@ -354,8 +383,6 @@ again: /* Took an interrupt, may have more exit work to do. */ goto again; } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); } else { /* Returning to a kernel context with local irqs disabled. */ __hard_EE_RI_disable(); @@ -369,10 +396,11 @@ again: #endif /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * The value of AMR only matters while we're in the kernel. + * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr, + * which would cause Read-After-Write stalls. Hence, we take the AMR + * value from the check above. */ - kuap_restore_amr(regs); + kuap_restore_amr(regs, amr); return ret; } diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 220ae11555f2..c2d737ff2e7b 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -197,7 +197,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 nospu _sysctl sys_sysctl compat_sys_sysctl +149 nospu _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall @@ -433,8 +433,8 @@ 336 common recv sys_recv compat_sys_recv 337 common recvfrom sys_recvfrom compat_sys_recvfrom 338 common shutdown sys_shutdown -339 common setsockopt sys_setsockopt compat_sys_setsockopt -340 common getsockopt sys_getsockopt compat_sys_getsockopt +339 common setsockopt sys_setsockopt sys_setsockopt +340 common getsockopt sys_getsockopt sys_getsockopt 341 common sendmsg sys_sendmsg compat_sys_sendmsg 342 common recvmsg sys_recvmsg compat_sys_recvmsg 343 32 recvmmsg sys_recvmmsg_time32 compat_sys_recvmmsg_time32 @@ -525,5 +525,7 @@ 435 32 clone3 ppc_clone3 sys_clone3 435 64 clone3 sys_clone3 435 spu clone3 sys_ni_syscall +436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 479c70680b76..46b4ebc33db7 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -19,6 +19,7 @@ #include <asm/smp.h> #include <asm/pmc.h> #include <asm/firmware.h> +#include <asm/idle.h> #include <asm/svm.h> #include "cacheinfo.h" @@ -621,8 +622,10 @@ SYSFS_PMCSETUP(pmc7, SPRN_PMC7); SYSFS_PMCSETUP(pmc8, SPRN_PMC8); SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); +SYSFS_PMCSETUP(mmcr3, SPRN_MMCR3); static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); +static DEVICE_ATTR(mmcr3, 0600, show_mmcr3, store_mmcr3); #endif /* HAS_PPC_PMC56 */ @@ -760,6 +763,74 @@ static void create_svm_file(void) } #endif /* CONFIG_PPC_SVM */ +#ifdef CONFIG_PPC_PSERIES +static void read_idle_purr(void *val) +{ + u64 *ret = val; + + *ret = read_this_idle_purr(); +} + +static ssize_t idle_purr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + u64 val; + + smp_call_function_single(cpu->dev.id, read_idle_purr, &val, 1); + return sprintf(buf, "%llx\n", val); +} +static DEVICE_ATTR(idle_purr, 0400, idle_purr_show, NULL); + +static void create_idle_purr_file(struct device *s) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + device_create_file(s, &dev_attr_idle_purr); +} + +static void remove_idle_purr_file(struct device *s) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + device_remove_file(s, &dev_attr_idle_purr); +} + +static void read_idle_spurr(void *val) +{ + u64 *ret = val; + + *ret = read_this_idle_spurr(); +} + +static ssize_t idle_spurr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + u64 val; + + smp_call_function_single(cpu->dev.id, read_idle_spurr, &val, 1); + return sprintf(buf, "%llx\n", val); +} +static DEVICE_ATTR(idle_spurr, 0400, idle_spurr_show, NULL); + +static void create_idle_spurr_file(struct device *s) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + device_create_file(s, &dev_attr_idle_spurr); +} + +static void remove_idle_spurr_file(struct device *s) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + device_remove_file(s, &dev_attr_idle_spurr); +} + +#else /* CONFIG_PPC_PSERIES */ +#define create_idle_purr_file(s) +#define remove_idle_purr_file(s) +#define create_idle_spurr_file(s) +#define remove_idle_spurr_file(s) +#endif /* CONFIG_PPC_PSERIES */ + static int register_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); @@ -817,16 +888,22 @@ static int register_cpu_online(unsigned int cpu) #ifdef CONFIG_PMU_SYSFS if (cpu_has_feature(CPU_FTR_MMCRA)) device_create_file(s, &dev_attr_mmcra); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + device_create_file(s, &dev_attr_mmcr3); #endif /* CONFIG_PMU_SYSFS */ if (cpu_has_feature(CPU_FTR_PURR)) { if (!firmware_has_feature(FW_FEATURE_LPAR)) add_write_permission_dev_attr(&dev_attr_purr); device_create_file(s, &dev_attr_purr); + create_idle_purr_file(s); } - if (cpu_has_feature(CPU_FTR_SPURR)) + if (cpu_has_feature(CPU_FTR_SPURR)) { device_create_file(s, &dev_attr_spurr); + create_idle_spurr_file(s); + } if (cpu_has_feature(CPU_FTR_DSCR)) device_create_file(s, &dev_attr_dscr); @@ -908,13 +985,20 @@ static int unregister_cpu_online(unsigned int cpu) #ifdef CONFIG_PMU_SYSFS if (cpu_has_feature(CPU_FTR_MMCRA)) device_remove_file(s, &dev_attr_mmcra); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + device_remove_file(s, &dev_attr_mmcr3); #endif /* CONFIG_PMU_SYSFS */ - if (cpu_has_feature(CPU_FTR_PURR)) + if (cpu_has_feature(CPU_FTR_PURR)) { device_remove_file(s, &dev_attr_purr); + remove_idle_purr_file(s); + } - if (cpu_has_feature(CPU_FTR_SPURR)) + if (cpu_has_feature(CPU_FTR_SPURR)) { device_remove_file(s, &dev_attr_spurr); + remove_idle_spurr_file(s); + } if (cpu_has_feature(CPU_FTR_DSCR)) device_remove_file(s, &dev_attr_dscr); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 6fcae436ae51..f85539ebb513 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -183,6 +183,8 @@ static inline unsigned long read_spurr(unsigned long tb) #ifdef CONFIG_PPC_SPLPAR +#include <asm/dtl.h> + /* * Scan the dispatch trace log and count up the stolen time. * Should be called with interrupts disabled. diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 7ea0ca044b65..42761ebec9f7 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -27,6 +27,7 @@ #include <asm/code-patching.h> #include <asm/ftrace.h> #include <asm/syscall.h> +#include <asm/inst.h> #ifdef CONFIG_DYNAMIC_FTRACE @@ -40,23 +41,23 @@ #define NUM_FTRACE_TRAMPS 8 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; -static unsigned int +static struct ppc_inst ftrace_call_replace(unsigned long ip, unsigned long addr, int link) { - unsigned int op; + struct ppc_inst op; addr = ppc_function_entry((void *)addr); /* if (link) set op to 'bl' else 'b' */ - op = create_branch((unsigned int *)ip, addr, link ? 1 : 0); + create_branch(&op, (struct ppc_inst *)ip, addr, link ? 1 : 0); return op; } static int -ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) +ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) { - unsigned int replaced; + struct ppc_inst replaced; /* * Note: @@ -67,18 +68,18 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) */ /* read the text we want to modify */ - if (probe_kernel_read(&replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (probe_kernel_read_inst(&replaced, (void *)ip)) return -EFAULT; /* Make sure it is what we expect it to be */ - if (replaced != old) { - pr_err("%p: replaced (%#x) != old (%#x)", - (void *)ip, replaced, old); + if (!ppc_inst_equal(replaced, old)) { + pr_err("%p: replaced (%s) != old (%s)", + (void *)ip, ppc_inst_as_str(replaced), ppc_inst_as_str(old)); return -EINVAL; } /* replace the text with the new text */ - if (patch_instruction((unsigned int *)ip, new)) + if (patch_instruction((struct ppc_inst *)ip, new)) return -EPERM; return 0; @@ -89,27 +90,28 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) */ static int test_24bit_addr(unsigned long ip, unsigned long addr) { + struct ppc_inst op; addr = ppc_function_entry((void *)addr); /* use the create_branch to verify that this offset can be branched */ - return create_branch((unsigned int *)ip, addr, 0); + return create_branch(&op, (struct ppc_inst *)ip, addr, 0) == 0; } -static int is_bl_op(unsigned int op) +static int is_bl_op(struct ppc_inst op) { - return (op & 0xfc000003) == 0x48000001; + return (ppc_inst_val(op) & 0xfc000003) == 0x48000001; } -static int is_b_op(unsigned int op) +static int is_b_op(struct ppc_inst op) { - return (op & 0xfc000003) == 0x48000000; + return (ppc_inst_val(op) & 0xfc000003) == 0x48000000; } -static unsigned long find_bl_target(unsigned long ip, unsigned int op) +static unsigned long find_bl_target(unsigned long ip, struct ppc_inst op) { int offset; - offset = (op & 0x03fffffc); + offset = (ppc_inst_val(op) & 0x03fffffc); /* make it signed */ if (offset & 0x02000000) offset |= 0xfe000000; @@ -125,17 +127,17 @@ __ftrace_make_nop(struct module *mod, { unsigned long entry, ptr, tramp; unsigned long ip = rec->ip; - unsigned int op, pop; + struct ppc_inst op, pop; /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + if (probe_kernel_read_inst(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -160,16 +162,18 @@ __ftrace_make_nop(struct module *mod, #ifdef CONFIG_MPROFILE_KERNEL /* When using -mkernel_profile there is no load to jump over */ - pop = PPC_INST_NOP; + pop = ppc_inst(PPC_INST_NOP); - if (probe_kernel_read(&op, (void *)(ip - 4), 4)) { + if (probe_kernel_read_inst(&op, (void *)(ip - 4))) { pr_err("Fetching instruction at %lx failed.\n", ip - 4); return -EFAULT; } /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ - if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) { - pr_err("Unexpected instruction %08x around bl _mcount\n", op); + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) && + !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { + pr_err("Unexpected instruction %s around bl _mcount\n", + ppc_inst_as_str(op)); return -EINVAL; } #else @@ -187,24 +191,24 @@ __ftrace_make_nop(struct module *mod, * Use a b +8 to jump over the load. */ - pop = PPC_INST_BRANCH | 8; /* b +8 */ + pop = ppc_inst(PPC_INST_BRANCH | 8); /* b +8 */ /* * Check what is in the next instruction. We can see ld r2,40(r1), but * on first pass after boot we will see mflr r0. */ - if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) { + if (probe_kernel_read_inst(&op, (void *)(ip + 4))) { pr_err("Fetching op failed.\n"); return -EFAULT; } - if (op != PPC_INST_LD_TOC) { - pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op); + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { + pr_err("Expected %08x found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); return -EINVAL; } #endif /* CONFIG_MPROFILE_KERNEL */ - if (patch_instruction((unsigned int *)ip, pop)) { + if (patch_instruction((struct ppc_inst *)ip, pop)) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -217,17 +221,17 @@ static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op; + struct ppc_inst op; unsigned int jmp[4]; unsigned long ip = rec->ip; unsigned long tramp; - if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -245,7 +249,7 @@ __ftrace_make_nop(struct module *mod, pr_devel("ip:%lx jumps to %lx", ip, tramp); /* Find where the trampoline jumps to */ - if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { + if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) { pr_err("Failed to read %lx\n", tramp); return -EFAULT; } @@ -274,9 +278,9 @@ __ftrace_make_nop(struct module *mod, return -EINVAL; } - op = PPC_INST_NOP; + op = ppc_inst(PPC_INST_NOP); - if (patch_instruction((unsigned int *)ip, op)) + if (patch_instruction((struct ppc_inst *)ip, op)) return -EPERM; return 0; @@ -287,6 +291,7 @@ __ftrace_make_nop(struct module *mod, static unsigned long find_ftrace_tramp(unsigned long ip) { int i; + struct ppc_inst instr; /* * We have the compiler generated long_branch tramps at the end @@ -295,7 +300,8 @@ static unsigned long find_ftrace_tramp(unsigned long ip) for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--) if (!ftrace_tramps[i]) continue; - else if (create_branch((void *)ip, ftrace_tramps[i], 0)) + else if (create_branch(&instr, (void *)ip, + ftrace_tramps[i], 0) == 0) return ftrace_tramps[i]; return 0; @@ -322,8 +328,10 @@ static int add_ftrace_tramp(unsigned long tramp) */ static int setup_mcount_compiler_tramp(unsigned long tramp) { - int i, op; + int i; + struct ppc_inst op; unsigned long ptr; + struct ppc_inst instr; static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS]; /* Is this a known long jump tramp? */ @@ -341,7 +349,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) return -1; /* New trampoline -- read where this goes */ - if (probe_kernel_read(&op, (void *)tramp, sizeof(int))) { + if (probe_kernel_read_inst(&op, (void *)tramp)) { pr_debug("Fetching opcode failed.\n"); return -1; } @@ -366,13 +374,13 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) #else ptr = ppc_global_function_entry((void *)ftrace_caller); #endif - if (!create_branch((void *)tramp, ptr, 0)) { + if (create_branch(&instr, (void *)tramp, ptr, 0)) { pr_debug("%ps is not reachable from existing mcount tramp\n", (void *)ptr); return -1; } - if (patch_branch((unsigned int *)tramp, ptr, 0)) { + if (patch_branch((struct ppc_inst *)tramp, ptr, 0)) { pr_debug("REL24 out of range!\n"); return -1; } @@ -388,17 +396,17 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) { unsigned long tramp, ip = rec->ip; - unsigned int op; + struct ppc_inst op; /* Read where this goes */ - if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + if (probe_kernel_read_inst(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -416,7 +424,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) } } - if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) { + if (patch_instruction((struct ppc_inst *)ip, ppc_inst(PPC_INST_NOP))) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -428,7 +436,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; - unsigned int old, new; + struct ppc_inst old, new; /* * If the calling address is more that 24 bits away, @@ -438,7 +446,7 @@ int ftrace_make_nop(struct module *mod, if (test_24bit_addr(ip, addr)) { /* within range */ old = ftrace_call_replace(ip, addr, 1); - new = PPC_INST_NOP; + new = ppc_inst(PPC_INST_NOP); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) return __ftrace_make_nop_kernel(rec, addr); @@ -481,7 +489,7 @@ int ftrace_make_nop(struct module *mod, */ #ifndef CONFIG_MPROFILE_KERNEL static int -expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) +expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) { /* * We expect to see: @@ -492,16 +500,17 @@ expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) * The load offset is different depending on the ABI. For simplicity * just mask it out when doing the compare. */ - if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000)) + if (!ppc_inst_equal(op0, ppc_inst(0x48000008)) || + (ppc_inst_val(op1) & 0xffff0000) != 0xe8410000) return 0; return 1; } #else static int -expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) +expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) { /* look for patched "NOP" on ppc64 with -mprofile-kernel */ - if (op0 != PPC_INST_NOP) + if (!ppc_inst_equal(op0, ppc_inst(PPC_INST_NOP))) return 0; return 1; } @@ -510,18 +519,22 @@ expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op[2]; + struct ppc_inst op[2]; + struct ppc_inst instr; void *ip = (void *)rec->ip; unsigned long entry, ptr, tramp; struct module *mod = rec->arch.mod; /* read where this goes */ - if (probe_kernel_read(op, ip, sizeof(op))) + if (probe_kernel_read_inst(op, ip)) + return -EFAULT; + + if (probe_kernel_read_inst(op + 1, ip + 4)) return -EFAULT; if (!expected_nop_sequence(ip, op[0], op[1])) { - pr_err("Unexpected call sequence at %p: %x %x\n", - ip, op[0], op[1]); + pr_err("Unexpected call sequence at %p: %s %s\n", + ip, ppc_inst_as_str(op[0]), ppc_inst_as_str(op[1])); return -EINVAL; } @@ -557,7 +570,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* Ensure branch is within 24 bits */ - if (!create_branch(ip, tramp, BRANCH_SET_LINK)) { + if (create_branch(&instr, ip, tramp, BRANCH_SET_LINK)) { pr_err("Branch out of range\n"); return -EINVAL; } @@ -574,16 +587,17 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op; + int err; + struct ppc_inst op; unsigned long ip = rec->ip; /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) + if (probe_kernel_read_inst(&op, (void *)ip)) return -EFAULT; /* It should be pointing to a nop */ - if (op != PPC_INST_NOP) { - pr_err("Expected NOP but have %x\n", op); + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + pr_err("Expected NOP but have %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -594,16 +608,16 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* create the branch to the trampoline */ - op = create_branch((unsigned int *)ip, - rec->arch.mod->arch.tramp, BRANCH_SET_LINK); - if (!op) { + err = create_branch(&op, (struct ppc_inst *)ip, + rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + if (err) { pr_err("REL24 out of range!\n"); return -EINVAL; } pr_devel("write to %lx\n", rec->ip); - if (patch_instruction((unsigned int *)ip, op)) + if (patch_instruction((struct ppc_inst *)ip, op)) return -EPERM; return 0; @@ -613,7 +627,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op; + struct ppc_inst op; void *ip = (void *)rec->ip; unsigned long tramp, entry, ptr; @@ -634,13 +648,13 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) } /* Make sure we have a nop */ - if (probe_kernel_read(&op, ip, sizeof(op))) { + if (probe_kernel_read_inst(&op, ip)) { pr_err("Unable to read ftrace location %p\n", ip); return -EFAULT; } - if (op != PPC_INST_NOP) { - pr_err("Unexpected call sequence at %p: %x\n", ip, op); + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op)); return -EINVAL; } @@ -661,7 +675,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; - unsigned int old, new; + struct ppc_inst old, new; /* * If the calling address is more that 24 bits away, @@ -670,7 +684,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) */ if (test_24bit_addr(ip, addr)) { /* within range */ - old = PPC_INST_NOP; + old = ppc_inst(PPC_INST_NOP); new = ftrace_call_replace(ip, addr, 1); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) @@ -700,7 +714,7 @@ static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - unsigned int op; + struct ppc_inst op; unsigned long ip = rec->ip; unsigned long entry, ptr, tramp; struct module *mod = rec->arch.mod; @@ -712,14 +726,14 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + if (probe_kernel_read_inst(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -748,7 +762,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, /* The new target may be within range */ if (test_24bit_addr(ip, addr)) { /* within range */ - if (patch_branch((unsigned int *)ip, addr, BRANCH_SET_LINK)) { + if (patch_branch((struct ppc_inst *)ip, addr, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -776,12 +790,12 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } /* Ensure branch is within 24 bits */ - if (!create_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) { + if (create_branch(&op, (struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { pr_err("Branch out of range\n"); return -EINVAL; } - if (patch_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) { + if (patch_branch((struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -794,7 +808,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { unsigned long ip = rec->ip; - unsigned int old, new; + struct ppc_inst old, new; /* * If the calling address is more that 24 bits away, @@ -834,10 +848,10 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); - unsigned int old, new; + struct ppc_inst old, new; int ret; - old = *(unsigned int *)&ftrace_call; + old = ppc_inst_read((struct ppc_inst *)&ftrace_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); @@ -845,7 +859,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) /* Also update the regs callback function */ if (!ret) { ip = (unsigned long)(&ftrace_regs_call); - old = *(unsigned int *)&ftrace_regs_call; + old = ppc_inst_read((struct ppc_inst *)&ftrace_regs_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); } @@ -919,7 +933,7 @@ int ftrace_enable_ftrace_graph_caller(void) unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); - unsigned int old, new; + struct ppc_inst old, new; old = ftrace_call_replace(ip, stub, 0); new = ftrace_call_replace(ip, addr, 0); @@ -932,7 +946,7 @@ int ftrace_disable_ftrace_graph_caller(void) unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); - unsigned int old, new; + struct ppc_inst old, new; old = ftrace_call_replace(ip, addr, 0); new = ftrace_call_replace(ip, stub, 0); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3fca22276bb1..d1ebe152f210 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -39,7 +39,6 @@ #include <linux/kmsg_dump.h> #include <asm/emulated_ops.h> -#include <asm/pgtable.h> #include <linux/uaccess.h> #include <asm/debugfs.h> #include <asm/io.h> @@ -441,15 +440,12 @@ nonrecoverable: void system_reset_exception(struct pt_regs *regs) { unsigned long hsrr0, hsrr1; - bool nested = in_nmi(); bool saved_hsrrs = false; + u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); - /* - * Avoid crashes in case of nested NMI exceptions. Recoverability - * is determined by RI and in_nmi - */ - if (!nested) - nmi_enter(); + this_cpu_set_ftrace_enabled(0); + + nmi_enter(); /* * System reset can interrupt code where HSRRs are live and MSR[RI]=1. @@ -510,19 +506,20 @@ out: #ifdef CONFIG_PPC_BOOK3S_64 BUG_ON(get_paca()->in_nmi == 0); if (get_paca()->in_nmi > 1) - nmi_panic(regs, "Unrecoverable nested System Reset"); + die("Unrecoverable nested System Reset", regs, SIGABRT); #endif /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) - nmi_panic(regs, "Unrecoverable System Reset"); + die("Unrecoverable System Reset", regs, SIGABRT); if (saved_hsrrs) { mtspr(SPRN_HSRR0, hsrr0); mtspr(SPRN_HSRR1, hsrr1); } - if (!nested) - nmi_exit(); + nmi_exit(); + + this_cpu_set_ftrace_enabled(ftrace_enabled); /* What should we do here? We could issue a shutdown or hard reset. */ } @@ -583,6 +580,8 @@ static inline int check_io_access(struct pt_regs *regs) #define REASON_ILLEGAL (ESR_PIL | ESR_PUO) #define REASON_PRIVILEGED ESR_PPR #define REASON_TRAP ESR_PTR +#define REASON_PREFIXED 0 +#define REASON_BOUNDARY 0 /* single-step stuff */ #define single_stepping(regs) (current->thread.debug.dbcr0 & DBCR0_IC) @@ -597,12 +596,16 @@ static inline int check_io_access(struct pt_regs *regs) #define REASON_ILLEGAL SRR1_PROGILL #define REASON_PRIVILEGED SRR1_PROGPRIV #define REASON_TRAP SRR1_PROGTRAP +#define REASON_PREFIXED SRR1_PREFIXED +#define REASON_BOUNDARY SRR1_BOUNDARY #define single_stepping(regs) ((regs)->msr & MSR_SE) #define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) #define clear_br_trace(regs) ((regs)->msr &= ~MSR_BE) #endif +#define inst_length(reason) (((reason) & REASON_PREFIXED) ? 8 : 4) + #if defined(CONFIG_E500) int machine_check_e500mc(struct pt_regs *regs) { @@ -823,9 +826,20 @@ int machine_check_generic(struct pt_regs *regs) void machine_check_exception(struct pt_regs *regs) { int recover = 0; - bool nested = in_nmi(); - if (!nested) - nmi_enter(); + + /* + * BOOK3S_64 does not call this handler as a non-maskable interrupt + * (it uses its own early real-mode handler to handle the MCE proper + * and then raises irq_work to call this handler when interrupts are + * enabled). + * + * This is silly. The BOOK3S_64 should just call a different function + * rather than expecting semantics to magically change. Something + * like 'non_nmi_machine_check_exception()', perhaps? + */ + const bool nmi = !IS_ENABLED(CONFIG_PPC_BOOK3S_64); + + if (nmi) nmi_enter(); __this_cpu_inc(irq_stat.mce_exceptions); @@ -851,20 +865,18 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - if (!nested) - nmi_exit(); + if (nmi) nmi_exit(); die("Machine check", regs, SIGBUS); /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) - nmi_panic(regs, "Unrecoverable Machine check"); + die("Unrecoverable Machine check", regs, SIGBUS); return; bail: - if (!nested) - nmi_exit(); + if (nmi) nmi_exit(); } void SMIException(struct pt_regs *regs) @@ -1593,11 +1605,20 @@ void alignment_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); int sig, code, fixed = 0; + unsigned long reason; /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) local_irq_enable(); + reason = get_reason(regs); + + if (reason & REASON_BOUNDARY) { + sig = SIGBUS; + code = BUS_ADRALN; + goto bad; + } + if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT)) goto bail; @@ -1606,7 +1627,8 @@ void alignment_exception(struct pt_regs *regs) fixed = fix_alignment(regs); if (fixed == 1) { - regs->nip += 4; /* skip over emulated instruction */ + /* skip over emulated instruction */ + regs->nip += inst_length(reason); emulate_single_step(regs); goto bail; } @@ -1619,6 +1641,7 @@ void alignment_exception(struct pt_regs *regs) sig = SIGBUS; code = BUS_ADRALN; } +bad: if (user_mode(regs)) _exception(sig, regs, code, regs->dar); else @@ -1720,6 +1743,7 @@ void facility_unavailable_exception(struct pt_regs *regs) [FSCR_TAR_LG] = "TAR", [FSCR_MSGP_LG] = "MSGP", [FSCR_SCV_LG] = "SCV", + [FSCR_PREFIX_LG] = "PREFIX", }; char *facility = "unknown"; u64 value; @@ -2036,14 +2060,6 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) NOKPROBE_SYMBOL(DebugException); #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -#if !defined(CONFIG_TAU_INT) -void TAUException(struct pt_regs *regs) -{ - printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n", - regs->nip, regs->msr, regs->trap, print_tainted()); -} -#endif /* CONFIG_INT_TAU */ - #ifdef CONFIG_ALTIVEC void altivec_assist_exception(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 1cfef0e5fec5..d200e7df7167 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -14,6 +14,7 @@ #include <linux/kdebug.h> #include <asm/sstep.h> +#include <asm/inst.h> #define UPROBE_TRAP_NR UINT_MAX @@ -111,7 +112,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * support doesn't exist and have to fix-up the next instruction * to be executed. */ - regs->nip = utask->vaddr + MAX_UINSN_BYTES; + regs->nip = (unsigned long)ppc_inst_next((void *)utask->vaddr, &auprobe->insn); user_disable_single_step(current); return 0; @@ -173,7 +174,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) * emulate_step() returns 1 if the insn was successfully emulated. * For all other cases, we need to single-step in hardware. */ - ret = emulate_step(regs, auprobe->insn); + ret = emulate_step(regs, ppc_inst_read(&auprobe->insn)); if (ret > 0) return true; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index f38f26e844b6..8dad44262e75 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -18,7 +18,6 @@ #include <linux/security.h> #include <linux/memblock.h> -#include <asm/pgtable.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mmu_context.h> @@ -171,7 +170,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * and end up putting it elsewhere. * Add enough to the size so that the result can be aligned. */ - if (down_write_killable(&mm->mmap_sem)) + if (mmap_write_lock_killable(mm)) return -EINTR; vdso_base = get_unmapped_area(NULL, vdso_base, (vdso_pages << PAGE_SHIFT) + @@ -211,11 +210,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) goto fail_mmapsem; } - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return 0; fail_mmapsem: - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return rc; } @@ -678,7 +677,7 @@ int vdso_getcpu_init(void) node = cpu_to_node(cpu); WARN_ON_ONCE(node > 0xffff); - val = (cpu & 0xfff) | ((node & 0xffff) << 16); + val = (cpu & 0xffff) | ((node & 0xffff) << 16); mtspr(SPRN_SPRG_VDSO_WRITE, val); get_paca()->sprg_vdso = val; diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index 526f5ba2593e..cab14324242b 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -8,6 +8,7 @@ #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/vdso.h> +#include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> .text @@ -24,14 +25,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc mflr r12 .cfi_register lr,r12 - mr r11,r3 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r10, r0 mtlr r12 - mr r10,r3 lwz r7,CFG_DCACHE_BLOCKSZ(r10) addi r5,r7,-1 - andc r6,r11,r5 /* round low to line bdy */ + andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) @@ -48,7 +47,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) lwz r7,CFG_ICACHE_BLOCKSZ(r10) addi r5,r7,-1 - andc r6,r11,r5 /* round low to line bdy */ + andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index dc84f5ae3802..067247d3efb9 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -10,35 +10,13 @@ #include <asm/asm-offsets.h> #include <asm/unistd.h> #include <asm/vdso.h> +#include <asm/vdso_datapage.h> .text .global __kernel_datapage_offset; __kernel_datapage_offset: .long 0 -V_FUNCTION_BEGIN(__get_datapage) - .cfi_startproc - /* We don't want that exposed or overridable as we want other objects - * to be able to bl directly to here - */ - .protected __get_datapage - .hidden __get_datapage - - mflr r0 - .cfi_register lr,r0 - - bcl 20,31,data_page_branch -data_page_branch: - mflr r3 - mtlr r0 - addi r3, r3, __kernel_datapage_offset-data_page_branch - lwz r0,0(r3) - .cfi_restore lr - add r3,r0,r3 - blr - .cfi_endproc -V_FUNCTION_END(__get_datapage) - /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; * @@ -53,7 +31,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr r4,r3 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP64 cmpldi cr0,r4,0 @@ -75,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 ld r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 crclr cr0*4+so diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 1c9a04703250..20f8be40c653 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -9,6 +9,7 @@ #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/vdso.h> +#include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> @@ -26,7 +27,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) mr r11,r3 /* r11 holds tv */ mr r10,r4 /* r10 holds tz */ - bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + get_datapage r3, r0 cmpldi r11,0 /* check if tv is NULL */ beq 2f lis r7,1000000@ha /* load up USEC_PER_SEC */ @@ -71,7 +72,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) mflr r12 /* r12 saves lr */ .cfi_register lr,r12 mr r11,r4 /* r11 saves tp */ - bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + get_datapage r3, r0 lis r7,NSEC_PER_SEC@h /* want nanoseconds */ ori r7,r7,NSEC_PER_SEC@l beq cr5,70f @@ -188,7 +189,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) mflr r12 .cfi_register lr,r12 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 lwz r5, CLOCK_HRTIMER_RES(r3) mtlr r12 li r3,0 @@ -221,7 +222,7 @@ V_FUNCTION_BEGIN(__kernel_time) .cfi_register lr,r12 mr r11,r3 /* r11 holds t */ - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 ld r4,STAMP_XTIME_SEC(r3) diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S index a8cc0409d7d2..bbf68cd01088 100644 --- a/arch/powerpc/kernel/vdso64/sigtramp.S +++ b/arch/powerpc/kernel/vdso64/sigtramp.S @@ -6,6 +6,7 @@ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp. */ +#include <asm/cache.h> /* IFETCH_ALIGN_BYTES */ #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/unistd.h> @@ -14,21 +15,17 @@ .text -/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from - the return address to get an address in the middle of the presumed - call instruction. Since we don't have a call here, we artificially - extend the range covered by the unwind info by padding before the - real start. */ - nop .balign 8 + .balign IFETCH_ALIGN_BYTES V_FUNCTION_BEGIN(__kernel_sigtramp_rt64) -.Lsigrt_start = . - 4 +.Lsigrt_start: + bctrl /* call the handler */ addi r1, r1, __SIGNAL_FRAMESIZE li r0,__NR_rt_sigreturn sc .Lsigrt_end: V_FUNCTION_END(__kernel_sigtramp_rt64) -/* The ".balign 8" above and the following zeros mimic the old stack +/* The .balign 8 above and the following zeros mimic the old stack trampoline layout. The last magic value is the ucontext pointer, chosen in such a way that older libgcc unwind code returns a zero for a sigcontext pointer. */ diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index 4acd3fb2b38e..ae632569446f 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -10,6 +10,7 @@ #include <asm/processor.h> #include <asm/switch_to.h> #include <linux/uaccess.h> +#include <asm/inst.h> /* Functions in vector.S */ extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); @@ -260,21 +261,24 @@ static unsigned int rfin(unsigned int x) int emulate_altivec(struct pt_regs *regs) { - unsigned int instr, i; + struct ppc_inst instr; + unsigned int i, word; unsigned int va, vb, vc, vd; vector128 *vrs; - if (get_user(instr, (unsigned int __user *) regs->nip)) + if (get_user_instr(instr, (void __user *)regs->nip)) return -EFAULT; - if ((instr >> 26) != 4) + + word = ppc_inst_val(instr); + if (ppc_inst_primary_opcode(instr) != 4) return -EINVAL; /* not an altivec instruction */ - vd = (instr >> 21) & 0x1f; - va = (instr >> 16) & 0x1f; - vb = (instr >> 11) & 0x1f; - vc = (instr >> 6) & 0x1f; + vd = (word >> 21) & 0x1f; + va = (word >> 16) & 0x1f; + vb = (word >> 11) & 0x1f; + vc = (word >> 6) & 0x1f; vrs = current->thread.vr_state.vr; - switch (instr & 0x3f) { + switch (word & 0x3f) { case 10: switch (vc) { case 0: /* vaddfp */ diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index d20c5e79e03c..801dc28fdcca 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -76,9 +76,7 @@ _GLOBAL(load_up_altivec) oris r12,r12,MSR_VEC@h std r12,_MSR(r1) #endif - /* Don't care if r4 overflows, this is desired behaviour */ - lbz r4,THREAD_LOAD_VEC(r5) - addi r4,r4,1 + li r4,1 stb r4,THREAD_LOAD_VEC(r5) addi r6,r5,THREAD_VRSTATE li r4,1 @@ -89,6 +87,7 @@ _GLOBAL(load_up_altivec) REST_32VRS(0,r4,r6) /* restore registers and return */ blr +_ASM_NOKPROBE_SYMBOL(load_up_altivec) /* * save_altivec(tsk) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 31a0f201fb6f..326e113d2e45 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -15,7 +15,6 @@ #include <asm/thread_info.h> #define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT) -#define ETEXT_ALIGN_SIZE (1 << CONFIG_ETEXT_SHIFT) ENTRY(_stext) @@ -90,6 +89,7 @@ SECTIONS #ifdef CONFIG_PPC64 *(.tramp.ftrace.text); #endif + NOINSTR_TEXT SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT @@ -116,7 +116,7 @@ SECTIONS } :text - . = ALIGN(ETEXT_ALIGN_SIZE); + . = ALIGN(PAGE_SIZE); _etext = .; PROVIDE32 (etext = .); diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile index 86380c69f5ce..4aff6846c772 100644 --- a/arch/powerpc/kexec/Makefile +++ b/arch/powerpc/kexec/Makefile @@ -7,7 +7,7 @@ obj-y += core.o crash.o core_$(BITS).o obj-$(CONFIG_PPC32) += relocate_32.o -obj-$(CONFIG_KEXEC_FILE) += file_load.o elf_$(BITS).o +obj-$(CONFIG_KEXEC_FILE) += file_load.o ranges.o file_load_$(BITS).o elf_$(BITS).o ifdef CONFIG_HAVE_IMA_KEXEC ifdef CONFIG_IMA diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 078fe3d76feb..56da5eb2b923 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -115,11 +115,12 @@ void machine_kexec(struct kimage *image) void __init reserve_crashkernel(void) { - unsigned long long crash_size, crash_base; + unsigned long long crash_size, crash_base, total_mem_sz; int ret; + total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size(); /* use common parsing */ - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, total_mem_sz, &crash_size, &crash_base); if (ret == 0 && crash_size > 0) { crashk_res.start = crash_base; @@ -178,6 +179,7 @@ void __init reserve_crashkernel(void) /* Crash kernel trumps memory limit */ if (memory_limit && memory_limit <= crashk_res.end) { memory_limit = crashk_res.end + 1; + total_mem_sz = memory_limit; printk("Adjusted memory limit for crashkernel, now 0x%llx\n", memory_limit); } @@ -186,7 +188,7 @@ void __init reserve_crashkernel(void) "for crashkernel (System RAM: %ldMB)\n", (unsigned long)(crash_size >> 20), (unsigned long)(crashk_res.start >> 20), - (unsigned long)(memblock_phys_mem_size() >> 20)); + (unsigned long)(total_mem_sz >> 20)); if (!memblock_is_region_memory(crashk_res.start, crash_size) || memblock_reserve(crashk_res.start, crash_size)) { diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index b4184092172a..8a449b2d8715 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -152,6 +152,8 @@ static void kexec_smp_down(void *arg) if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 1); + reset_sprs(); + kexec_smp_wait(); /* NOTREACHED */ } diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index d488311efab1..c9a889880214 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -311,6 +311,9 @@ void default_machine_crash_shutdown(struct pt_regs *regs) unsigned int i; int (*old_handler)(struct pt_regs *regs); + /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ + printk_nmi_enter(); + /* * This function is only called after the system * has panicked or is otherwise in a critical state. diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index 3072fd6dbe94..d0e459bb2f05 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -35,6 +35,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, void *fdt; const void *slave_code; struct elfhdr ehdr; + char *modified_cmdline = NULL; struct kexec_elf_info elf_info; struct kexec_buf kbuf = { .image = image, .buf_min = 0, .buf_max = ppc64_rma_size }; @@ -46,6 +47,14 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, if (ret) goto out; + if (image->type == KEXEC_TYPE_CRASH) { + /* min & max buffer values for kdump case */ + kbuf.buf_min = pbuf.buf_min = crashk_res.start; + kbuf.buf_max = pbuf.buf_max = + ((crashk_res.end < ppc64_rma_size) ? + crashk_res.end : (ppc64_rma_size - 1)); + } + ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr); if (ret) goto out; @@ -60,6 +69,25 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem); + /* Load additional segments needed for panic kernel */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = load_crashdump_segments_ppc64(image, &kbuf); + if (ret) { + pr_err("Failed to load kdump kernel segments\n"); + goto out; + } + + /* Setup cmdline for kdump kernel case */ + modified_cmdline = setup_kdump_cmdline(image, cmdline, + cmdline_len); + if (!modified_cmdline) { + pr_err("Setting up cmdline for kdump kernel failed\n"); + ret = -EINVAL; + goto out; + } + cmdline = modified_cmdline; + } + if (initrd != NULL) { kbuf.buffer = initrd; kbuf.bufsz = kbuf.memsz = initrd_len; @@ -88,7 +116,8 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, goto out; } - ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); + ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr, + initrd_len, cmdline); if (ret) goto out; @@ -107,12 +136,13 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr); slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset; - ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, - fdt_load_addr); + ret = setup_purgatory_ppc64(image, slave_code, fdt, kernel_load_addr, + fdt_load_addr); if (ret) pr_err("Error setting up the purgatory.\n"); out: + kfree(modified_cmdline); kexec_free_elf_info(&elf_info); /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */ diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c index 143c91724617..9a232bc36c8f 100644 --- a/arch/powerpc/kexec/file_load.c +++ b/arch/powerpc/kexec/file_load.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * ppc64 code to implement the kexec_file_load syscall + * powerpc code to implement the kexec_file_load syscall * * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) * Copyright (C) 2004 IBM Corp. @@ -18,23 +18,45 @@ #include <linux/kexec.h> #include <linux/of_fdt.h> #include <linux/libfdt.h> +#include <asm/setup.h> #include <asm/ima.h> -#define SLAVE_CODE_SIZE 256 +#define SLAVE_CODE_SIZE 256 /* First 0x100 bytes */ -const struct kexec_file_ops * const kexec_file_loaders[] = { - &kexec_elf64_ops, - NULL -}; - -int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len) +/** + * setup_kdump_cmdline - Prepend "elfcorehdr=<addr> " to command line + * of kdump kernel for exporting the core. + * @image: Kexec image + * @cmdline: Command line parameters to update. + * @cmdline_len: Length of the cmdline parameters. + * + * kdump segment must be setup before calling this function. + * + * Returns new cmdline buffer for kdump kernel on success, NULL otherwise. + */ +char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len) { - /* We don't support crash kernels yet. */ - if (image->type == KEXEC_TYPE_CRASH) - return -EOPNOTSUPP; + int elfcorehdr_strlen; + char *cmdline_ptr; + + cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); + if (!cmdline_ptr) + return NULL; + + elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", + image->arch.elfcorehdr_addr); + + if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { + pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n"); + kfree(cmdline_ptr); + return NULL; + } - return kexec_image_probe_default(image, buf, buf_len); + memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); + // Ensure it's nul terminated + cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; + return cmdline_ptr; } /** @@ -236,6 +258,20 @@ int setup_new_fdt(const struct kimage *image, void *fdt, } } + if (image->type == KEXEC_TYPE_CRASH) { + /* + * Avoid elfcorehdr from being stomped on in kdump kernel by + * setting up memory reserve map. + */ + ret = fdt_add_mem_rsv(fdt, image->arch.elfcorehdr_addr, + image->arch.elf_headers_sz); + if (ret) { + pr_err("Error reserving elfcorehdr memory: %s\n", + fdt_strerror(ret)); + goto err; + } + } + ret = setup_ima_buffer(image, fdt, chosen_node); if (ret) { pr_err("Error setting up the new device tree.\n"); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c new file mode 100644 index 000000000000..53bb71e3a2e1 --- /dev/null +++ b/arch/powerpc/kexec/file_load_64.c @@ -0,0 +1,1119 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ppc64 code to implement the kexec_file_load syscall + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2020 IBM Corporation + * + * Based on kexec-tools' kexec-ppc64.c, kexec-elf-rel-ppc64.c, fs2dt.c. + * Heavily modified for the kernel by + * Hari Bathini, IBM Corporation. + */ + +#include <linux/kexec.h> +#include <linux/of_fdt.h> +#include <linux/libfdt.h> +#include <linux/of_device.h> +#include <linux/memblock.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <asm/drmem.h> +#include <asm/kexec_ranges.h> +#include <asm/crashdump-ppc64.h> + +struct umem_info { + u64 *buf; /* data buffer for usable-memory property */ + u32 size; /* size allocated for the data buffer */ + u32 max_entries; /* maximum no. of entries */ + u32 idx; /* index of current entry */ + + /* usable memory ranges to look up */ + unsigned int nr_ranges; + const struct crash_mem_range *ranges; +}; + +const struct kexec_file_ops * const kexec_file_loaders[] = { + &kexec_elf64_ops, + NULL +}; + +/** + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes + * regions like opal/rtas, tce-table, initrd, + * kernel, htab which should be avoided while + * setting up kexec load segments. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_exclude_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_initrd_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_htab_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_kernel_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_reserved_mem_ranges(mem_ranges); + if (ret) + goto out; + + /* exclude memory ranges should be sorted for easy lookup */ + sort_memory_ranges(*mem_ranges, true); +out: + if (ret) + pr_err("Failed to setup exclude memory ranges\n"); + return ret; +} + +/** + * get_usable_memory_ranges - Get usable memory ranges. This list includes + * regions like crashkernel, opal/rtas & tce-table, + * that kdump kernel could use. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_usable_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + /* + * Early boot failure observed on guests when low memory (first memory + * block?) is not added to usable memory. So, add [0, crashk_res.end] + * instead of [crashk_res.start, crashk_res.end] to workaround it. + * Also, crashed kernel's memory must be added to reserve map to + * avoid kdump kernel from using it. + */ + ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_tce_mem_ranges(mem_ranges); +out: + if (ret) + pr_err("Failed to setup usable memory ranges\n"); + return ret; +} + +/** + * get_crash_memory_ranges - Get crash memory ranges. This list includes + * first/crashing kernel's memory regions that + * would be exported via an elfcore. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_crash_memory_ranges(struct crash_mem **mem_ranges) +{ + struct memblock_region *reg; + struct crash_mem *tmem; + int ret; + + for_each_memblock(memory, reg) { + u64 base, size; + + base = (u64)reg->base; + size = (u64)reg->size; + + /* Skip backup memory region, which needs a separate entry */ + if (base == BACKUP_SRC_START) { + if (size > BACKUP_SRC_SIZE) { + base = BACKUP_SRC_END + 1; + size -= BACKUP_SRC_SIZE; + } else + continue; + } + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + goto out; + + /* Try merging adjacent ranges before reallocation attempt */ + if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) + sort_memory_ranges(*mem_ranges, true); + } + + /* Reallocate memory ranges if there is no space to split ranges */ + tmem = *mem_ranges; + if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { + tmem = realloc_mem_ranges(mem_ranges); + if (!tmem) + goto out; + } + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); + if (ret) + goto out; + + /* + * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL + * regions are exported to save their context at the time of + * crash, they should actually be backed up just like the + * first 64K bytes of memory. + */ + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + /* create a separate program header for the backup region */ + ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); + if (ret) + goto out; + + sort_memory_ranges(*mem_ranges, false); +out: + if (ret) + pr_err("Failed to setup crash memory ranges\n"); + return ret; +} + +/** + * get_reserved_memory_ranges - Get reserve memory ranges. This list includes + * memory regions that should be added to the + * memory reserve map to ensure the region is + * protected from any mischief. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_reserved_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_reserved_mem_ranges(mem_ranges); +out: + if (ret) + pr_err("Failed to setup reserved memory ranges\n"); + return ret; +} + +/** + * __locate_mem_hole_top_down - Looks top down for a large enough memory hole + * in the memory regions between buf_min & buf_max + * for the buffer. If found, sets kbuf->mem. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * + * Returns 0 on success, negative errno on error. + */ +static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max) +{ + int ret = -EADDRNOTAVAIL; + phys_addr_t start, end; + u64 i; + + for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, + MEMBLOCK_NONE, &start, &end, NULL) { + /* + * memblock uses [start, end) convention while it is + * [start, end] here. Fix the off-by-one to have the + * same convention. + */ + end -= 1; + + if (start > buf_max) + continue; + + /* Memory hole not found */ + if (end < buf_min) + break; + + /* Adjust memory region based on the given range */ + if (start < buf_min) + start = buf_min; + if (end > buf_max) + end = buf_max; + + start = ALIGN(start, kbuf->buf_align); + if (start < end && (end - start + 1) >= kbuf->memsz) { + /* Suitable memory range found. Set kbuf->mem */ + kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1, + kbuf->buf_align); + ret = 0; + break; + } + } + + return ret; +} + +/** + * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a + * suitable buffer with top down approach. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * @emem: Exclude memory ranges. + * + * Returns 0 on success, negative errno on error. + */ +static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max, + const struct crash_mem *emem) +{ + int i, ret = 0, err = -EADDRNOTAVAIL; + u64 start, end, tmin, tmax; + + tmax = buf_max; + for (i = (emem->nr_ranges - 1); i >= 0; i--) { + start = emem->ranges[i].start; + end = emem->ranges[i].end; + + if (start > tmax) + continue; + + if (end < tmax) { + tmin = (end < buf_min ? buf_min : end + 1); + ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); + if (!ret) + return 0; + } + + tmax = start - 1; + + if (tmax < buf_min) { + ret = err; + break; + } + ret = 0; + } + + if (!ret) { + tmin = buf_min; + ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); + } + return ret; +} + +/** + * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole + * in the memory regions between buf_min & buf_max + * for the buffer. If found, sets kbuf->mem. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * + * Returns 0 on success, negative errno on error. + */ +static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max) +{ + int ret = -EADDRNOTAVAIL; + phys_addr_t start, end; + u64 i; + + for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, + MEMBLOCK_NONE, &start, &end, NULL) { + /* + * memblock uses [start, end) convention while it is + * [start, end] here. Fix the off-by-one to have the + * same convention. + */ + end -= 1; + + if (end < buf_min) + continue; + + /* Memory hole not found */ + if (start > buf_max) + break; + + /* Adjust memory region based on the given range */ + if (start < buf_min) + start = buf_min; + if (end > buf_max) + end = buf_max; + + start = ALIGN(start, kbuf->buf_align); + if (start < end && (end - start + 1) >= kbuf->memsz) { + /* Suitable memory range found. Set kbuf->mem */ + kbuf->mem = start; + ret = 0; + break; + } + } + + return ret; +} + +/** + * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a + * suitable buffer with bottom up approach. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * @emem: Exclude memory ranges. + * + * Returns 0 on success, negative errno on error. + */ +static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max, + const struct crash_mem *emem) +{ + int i, ret = 0, err = -EADDRNOTAVAIL; + u64 start, end, tmin, tmax; + + tmin = buf_min; + for (i = 0; i < emem->nr_ranges; i++) { + start = emem->ranges[i].start; + end = emem->ranges[i].end; + + if (end < tmin) + continue; + + if (start > tmin) { + tmax = (start > buf_max ? buf_max : start - 1); + ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); + if (!ret) + return 0; + } + + tmin = end + 1; + + if (tmin > buf_max) { + ret = err; + break; + } + ret = 0; + } + + if (!ret) { + tmax = buf_max; + ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); + } + return ret; +} + +/** + * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries + * @um_info: Usable memory buffer and ranges info. + * @cnt: No. of entries to accommodate. + * + * Frees up the old buffer if memory reallocation fails. + * + * Returns buffer on success, NULL on error. + */ +static u64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt) +{ + u32 new_size; + u64 *tbuf; + + if ((um_info->idx + cnt) <= um_info->max_entries) + return um_info->buf; + + new_size = um_info->size + MEM_RANGE_CHUNK_SZ; + tbuf = krealloc(um_info->buf, new_size, GFP_KERNEL); + if (tbuf) { + um_info->buf = tbuf; + um_info->size = new_size; + um_info->max_entries = (um_info->size / sizeof(u64)); + } + + return tbuf; +} + +/** + * add_usable_mem - Add the usable memory ranges within the given memory range + * to the buffer + * @um_info: Usable memory buffer and ranges info. + * @base: Base address of memory range to look for. + * @end: End address of memory range to look for. + * + * Returns 0 on success, negative errno on error. + */ +static int add_usable_mem(struct umem_info *um_info, u64 base, u64 end) +{ + u64 loc_base, loc_end; + bool add; + int i; + + for (i = 0; i < um_info->nr_ranges; i++) { + add = false; + loc_base = um_info->ranges[i].start; + loc_end = um_info->ranges[i].end; + if (loc_base >= base && loc_end <= end) + add = true; + else if (base < loc_end && end > loc_base) { + if (loc_base < base) + loc_base = base; + if (loc_end > end) + loc_end = end; + add = true; + } + + if (add) { + if (!check_realloc_usable_mem(um_info, 2)) + return -ENOMEM; + + um_info->buf[um_info->idx++] = cpu_to_be64(loc_base); + um_info->buf[um_info->idx++] = + cpu_to_be64(loc_end - loc_base + 1); + } + } + + return 0; +} + +/** + * kdump_setup_usable_lmb - This is a callback function that gets called by + * walk_drmem_lmbs for every LMB to set its + * usable memory ranges. + * @lmb: LMB info. + * @usm: linux,drconf-usable-memory property value. + * @data: Pointer to usable memory buffer and ranges info. + * + * Returns 0 on success, negative errno on error. + */ +static int kdump_setup_usable_lmb(struct drmem_lmb *lmb, const __be32 **usm, + void *data) +{ + struct umem_info *um_info; + int tmp_idx, ret; + u64 base, end; + + /* + * kdump load isn't supported on kernels already booted with + * linux,drconf-usable-memory property. + */ + if (*usm) { + pr_err("linux,drconf-usable-memory property already exists!"); + return -EINVAL; + } + + um_info = data; + tmp_idx = um_info->idx; + if (!check_realloc_usable_mem(um_info, 1)) + return -ENOMEM; + + um_info->idx++; + base = lmb->base_addr; + end = base + drmem_lmb_size() - 1; + ret = add_usable_mem(um_info, base, end); + if (!ret) { + /* + * Update the no. of ranges added. Two entries (base & size) + * for every range added. + */ + um_info->buf[tmp_idx] = + cpu_to_be64((um_info->idx - tmp_idx - 1) / 2); + } + + return ret; +} + +#define NODE_PATH_LEN 256 +/** + * add_usable_mem_property - Add usable memory property for the given + * memory node. + * @fdt: Flattened device tree for the kdump kernel. + * @dn: Memory node. + * @um_info: Usable memory buffer and ranges info. + * + * Returns 0 on success, negative errno on error. + */ +static int add_usable_mem_property(void *fdt, struct device_node *dn, + struct umem_info *um_info) +{ + int n_mem_addr_cells, n_mem_size_cells, node; + char path[NODE_PATH_LEN]; + int i, len, ranges, ret; + const __be32 *prop; + u64 base, end; + + of_node_get(dn); + + if (snprintf(path, NODE_PATH_LEN, "%pOF", dn) > (NODE_PATH_LEN - 1)) { + pr_err("Buffer (%d) too small for memory node: %pOF\n", + NODE_PATH_LEN, dn); + return -EOVERFLOW; + } + pr_debug("Memory node path: %s\n", path); + + /* Now that we know the path, find its offset in kdump kernel's fdt */ + node = fdt_path_offset(fdt, path); + if (node < 0) { + pr_err("Malformed device tree: error reading %s\n", path); + ret = -EINVAL; + goto out; + } + + /* Get the address & size cells */ + n_mem_addr_cells = of_n_addr_cells(dn); + n_mem_size_cells = of_n_size_cells(dn); + pr_debug("address cells: %d, size cells: %d\n", n_mem_addr_cells, + n_mem_size_cells); + + um_info->idx = 0; + if (!check_realloc_usable_mem(um_info, 2)) { + ret = -ENOMEM; + goto out; + } + + prop = of_get_property(dn, "reg", &len); + if (!prop || len <= 0) { + ret = 0; + goto out; + } + + /* + * "reg" property represents sequence of (addr,size) tuples + * each representing a memory range. + */ + ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); + + for (i = 0; i < ranges; i++) { + base = of_read_number(prop, n_mem_addr_cells); + prop += n_mem_addr_cells; + end = base + of_read_number(prop, n_mem_size_cells) - 1; + prop += n_mem_size_cells; + + ret = add_usable_mem(um_info, base, end); + if (ret) + goto out; + } + + /* + * No kdump kernel usable memory found in this memory node. + * Write (0,0) tuple in linux,usable-memory property for + * this region to be ignored. + */ + if (um_info->idx == 0) { + um_info->buf[0] = 0; + um_info->buf[1] = 0; + um_info->idx = 2; + } + + ret = fdt_setprop(fdt, node, "linux,usable-memory", um_info->buf, + (um_info->idx * sizeof(u64))); + +out: + of_node_put(dn); + return ret; +} + + +/** + * update_usable_mem_fdt - Updates kdump kernel's fdt with linux,usable-memory + * and linux,drconf-usable-memory DT properties as + * appropriate to restrict its memory usage. + * @fdt: Flattened device tree for the kdump kernel. + * @usable_mem: Usable memory ranges for kdump kernel. + * + * Returns 0 on success, negative errno on error. + */ +static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem) +{ + struct umem_info um_info; + struct device_node *dn; + int node, ret = 0; + + if (!usable_mem) { + pr_err("Usable memory ranges for kdump kernel not found\n"); + return -ENOENT; + } + + node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory"); + if (node == -FDT_ERR_NOTFOUND) + pr_debug("No dynamic reconfiguration memory found\n"); + else if (node < 0) { + pr_err("Malformed device tree: error reading /ibm,dynamic-reconfiguration-memory.\n"); + return -EINVAL; + } + + um_info.buf = NULL; + um_info.size = 0; + um_info.max_entries = 0; + um_info.idx = 0; + /* Memory ranges to look up */ + um_info.ranges = &(usable_mem->ranges[0]); + um_info.nr_ranges = usable_mem->nr_ranges; + + dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (dn) { + ret = walk_drmem_lmbs(dn, &um_info, kdump_setup_usable_lmb); + of_node_put(dn); + + if (ret) { + pr_err("Could not setup linux,drconf-usable-memory property for kdump\n"); + goto out; + } + + ret = fdt_setprop(fdt, node, "linux,drconf-usable-memory", + um_info.buf, (um_info.idx * sizeof(u64))); + if (ret) { + pr_err("Failed to update fdt with linux,drconf-usable-memory property"); + goto out; + } + } + + /* + * Walk through each memory node and set linux,usable-memory property + * for the corresponding node in kdump kernel's fdt. + */ + for_each_node_by_type(dn, "memory") { + ret = add_usable_mem_property(fdt, dn, &um_info); + if (ret) { + pr_err("Failed to set linux,usable-memory property for %s node", + dn->full_name); + goto out; + } + } + +out: + kfree(um_info.buf); + return ret; +} + +/** + * load_backup_segment - Locate a memory hole to place the backup region. + * @image: Kexec image. + * @kbuf: Buffer contents and memory parameters. + * + * Returns 0 on success, negative errno on error. + */ +static int load_backup_segment(struct kimage *image, struct kexec_buf *kbuf) +{ + void *buf; + int ret; + + /* + * Setup a source buffer for backup segment. + * + * A source buffer has no meaning for backup region as data will + * be copied from backup source, after crash, in the purgatory. + * But as load segment code doesn't recognize such segments, + * setup a dummy source buffer to keep it happy for now. + */ + buf = vzalloc(BACKUP_SRC_SIZE); + if (!buf) + return -ENOMEM; + + kbuf->buffer = buf; + kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf->bufsz = kbuf->memsz = BACKUP_SRC_SIZE; + kbuf->top_down = false; + + ret = kexec_add_buffer(kbuf); + if (ret) { + vfree(buf); + return ret; + } + + image->arch.backup_buf = buf; + image->arch.backup_start = kbuf->mem; + return 0; +} + +/** + * update_backup_region_phdr - Update backup region's offset for the core to + * export the region appropriately. + * @image: Kexec image. + * @ehdr: ELF core header. + * + * Assumes an exclusive program header is setup for the backup region + * in the ELF headers + * + * Returns nothing. + */ +static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr) +{ + Elf64_Phdr *phdr; + unsigned int i; + + phdr = (Elf64_Phdr *)(ehdr + 1); + for (i = 0; i < ehdr->e_phnum; i++) { + if (phdr->p_paddr == BACKUP_SRC_START) { + phdr->p_offset = image->arch.backup_start; + pr_debug("Backup region offset updated to 0x%lx\n", + image->arch.backup_start); + return; + } + } +} + +/** + * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr + * segment needed to load kdump kernel. + * @image: Kexec image. + * @kbuf: Buffer contents and memory parameters. + * + * Returns 0 on success, negative errno on error. + */ +static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) +{ + struct crash_mem *cmem = NULL; + unsigned long headers_sz; + void *headers = NULL; + int ret; + + ret = get_crash_memory_ranges(&cmem); + if (ret) + goto out; + + /* Setup elfcorehdr segment */ + ret = crash_prepare_elf64_headers(cmem, false, &headers, &headers_sz); + if (ret) { + pr_err("Failed to prepare elf headers for the core\n"); + goto out; + } + + /* Fix the offset for backup region in the ELF header */ + update_backup_region_phdr(image, headers); + + kbuf->buffer = headers; + kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf->bufsz = kbuf->memsz = headers_sz; + kbuf->top_down = false; + + ret = kexec_add_buffer(kbuf); + if (ret) { + vfree(headers); + goto out; + } + + image->arch.elfcorehdr_addr = kbuf->mem; + image->arch.elf_headers_sz = headers_sz; + image->arch.elf_headers = headers; +out: + kfree(cmem); + return ret; +} + +/** + * load_crashdump_segments_ppc64 - Initialize the additional segements needed + * to load kdump kernel. + * @image: Kexec image. + * @kbuf: Buffer contents and memory parameters. + * + * Returns 0 on success, negative errno on error. + */ +int load_crashdump_segments_ppc64(struct kimage *image, + struct kexec_buf *kbuf) +{ + int ret; + + /* Load backup segment - first 64K bytes of the crashing kernel */ + ret = load_backup_segment(image, kbuf); + if (ret) { + pr_err("Failed to load backup segment\n"); + return ret; + } + pr_debug("Loaded the backup region at 0x%lx\n", kbuf->mem); + + /* Load elfcorehdr segment - to export crashing kernel's vmcore */ + ret = load_elfcorehdr_segment(image, kbuf); + if (ret) { + pr_err("Failed to load elfcorehdr segment\n"); + return ret; + } + pr_debug("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n", + image->arch.elfcorehdr_addr, kbuf->bufsz, kbuf->memsz); + + return 0; +} + +/** + * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global + * variables and call setup_purgatory() to initialize + * common global variable. + * @image: kexec image. + * @slave_code: Slave code for the purgatory. + * @fdt: Flattened device tree for the next kernel. + * @kernel_load_addr: Address where the kernel is loaded. + * @fdt_load_addr: Address where the flattened device tree is loaded. + * + * Returns 0 on success, negative errno on error. + */ +int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr) +{ + struct device_node *dn = NULL; + int ret; + + ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, + fdt_load_addr); + if (ret) + goto out; + + if (image->type == KEXEC_TYPE_CRASH) { + u32 my_run_at_load = 1; + + /* + * Tell relocatable kernel to run at load address + * via the word meant for that at 0x5c. + */ + ret = kexec_purgatory_get_set_symbol(image, "run_at_load", + &my_run_at_load, + sizeof(my_run_at_load), + false); + if (ret) + goto out; + } + + /* Tell purgatory where to look for backup region */ + ret = kexec_purgatory_get_set_symbol(image, "backup_start", + &image->arch.backup_start, + sizeof(image->arch.backup_start), + false); + if (ret) + goto out; + + /* Setup OPAL base & entry values */ + dn = of_find_node_by_path("/ibm,opal"); + if (dn) { + u64 val; + + of_property_read_u64(dn, "opal-base-address", &val); + ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val, + sizeof(val), false); + if (ret) + goto out; + + of_property_read_u64(dn, "opal-entry-address", &val); + ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val, + sizeof(val), false); + } +out: + if (ret) + pr_err("Failed to setup purgatory symbols"); + of_node_put(dn); + return ret; +} + +/** + * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel + * being loaded. + * @image: kexec image being loaded. + * @fdt: Flattened device tree for the next kernel. + * @initrd_load_addr: Address where the next initrd will be loaded. + * @initrd_len: Size of the next initrd, or 0 if there will be none. + * @cmdline: Command line for the next kernel, or NULL if there will + * be none. + * + * Returns 0 on success, negative errno on error. + */ +int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, + unsigned long initrd_len, const char *cmdline) +{ + struct crash_mem *umem = NULL, *rmem = NULL; + int i, nr_ranges, ret; + + ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); + if (ret) + goto out; + + /* + * Restrict memory usage for kdump kernel by setting up + * usable memory ranges and memory reserve map. + */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = get_usable_memory_ranges(&umem); + if (ret) + goto out; + + ret = update_usable_mem_fdt(fdt, umem); + if (ret) { + pr_err("Error setting up usable-memory property for kdump kernel\n"); + goto out; + } + + /* + * Ensure we don't touch crashed kernel's memory except the + * first 64K of RAM, which will be backed up. + */ + ret = fdt_add_mem_rsv(fdt, BACKUP_SRC_END + 1, + crashk_res.start - BACKUP_SRC_SIZE); + if (ret) { + pr_err("Error reserving crash memory: %s\n", + fdt_strerror(ret)); + goto out; + } + + /* Ensure backup region is not used by kdump/capture kernel */ + ret = fdt_add_mem_rsv(fdt, image->arch.backup_start, + BACKUP_SRC_SIZE); + if (ret) { + pr_err("Error reserving memory for backup: %s\n", + fdt_strerror(ret)); + goto out; + } + } + + /* Update memory reserve map */ + ret = get_reserved_memory_ranges(&rmem); + if (ret) + goto out; + + nr_ranges = rmem ? rmem->nr_ranges : 0; + for (i = 0; i < nr_ranges; i++) { + u64 base, size; + + base = rmem->ranges[i].start; + size = rmem->ranges[i].end - base + 1; + ret = fdt_add_mem_rsv(fdt, base, size); + if (ret) { + pr_err("Error updating memory reserve map: %s\n", + fdt_strerror(ret)); + goto out; + } + } + +out: + kfree(rmem); + kfree(umem); + return ret; +} + +/** + * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal, + * tce-table, reserved-ranges & such (exclude + * memory ranges) as they can't be used for kexec + * segment buffer. Sets kbuf->mem when a suitable + * memory hole is found. + * @kbuf: Buffer contents and memory parameters. + * + * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align. + * + * Returns 0 on success, negative errno on error. + */ +int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) +{ + struct crash_mem **emem; + u64 buf_min, buf_max; + int ret; + + /* Look up the exclude ranges list while locating the memory hole */ + emem = &(kbuf->image->arch.exclude_ranges); + if (!(*emem) || ((*emem)->nr_ranges == 0)) { + pr_warn("No exclude range list. Using the default locate mem hole method\n"); + return kexec_locate_mem_hole(kbuf); + } + + buf_min = kbuf->buf_min; + buf_max = kbuf->buf_max; + /* Segments for kdump kernel should be within crashkernel region */ + if (kbuf->image->type == KEXEC_TYPE_CRASH) { + buf_min = (buf_min < crashk_res.start ? + crashk_res.start : buf_min); + buf_max = (buf_max > crashk_res.end ? + crashk_res.end : buf_max); + } + + if (buf_min > buf_max) { + pr_err("Invalid buffer min and/or max values\n"); + return -EINVAL; + } + + if (kbuf->top_down) + ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max, + *emem); + else + ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max, + *emem); + + /* Add the buffer allocated to the exclude list for the next lookup */ + if (!ret) { + add_mem_range(emem, kbuf->mem, kbuf->memsz); + sort_memory_ranges(*emem, true); + } else { + pr_err("Failed to locate memory buffer of size %lu\n", + kbuf->memsz); + } + return ret; +} + +/** + * arch_kexec_kernel_image_probe - Does additional handling needed to setup + * kexec segments. + * @image: kexec image being loaded. + * @buf: Buffer pointing to elf data. + * @buf_len: Length of the buffer. + * + * Returns 0 on success, negative errno on error. + */ +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + int ret; + + /* Get exclude memory ranges needed for setting up kexec segments */ + ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges)); + if (ret) { + pr_err("Failed to setup exclude memory ranges for buffer lookup\n"); + return ret; + } + + return kexec_image_probe_default(image, buf, buf_len); +} + +/** + * arch_kimage_file_post_load_cleanup - Frees up all the allocations done + * while loading the image. + * @image: kexec image being loaded. + * + * Returns 0 on success, negative errno on error. + */ +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + kfree(image->arch.exclude_ranges); + image->arch.exclude_ranges = NULL; + + vfree(image->arch.backup_buf); + image->arch.backup_buf = NULL; + + vfree(image->arch.elf_headers); + image->arch.elf_headers = NULL; + image->arch.elf_headers_sz = 0; + + return kexec_image_post_load_cleanup_default(image); +} diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c new file mode 100644 index 000000000000..6b81c852feab --- /dev/null +++ b/arch/powerpc/kexec/ranges.c @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * powerpc code to implement the kexec_file_load syscall + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2020 IBM Corporation + * + * Based on kexec-tools' kexec-ppc64.c, fs2dt.c. + * Heavily modified for the kernel by + * Hari Bathini, IBM Corporation. + */ + +#define pr_fmt(fmt) "kexec ranges: " fmt + +#include <linux/sort.h> +#include <linux/kexec.h> +#include <linux/of_device.h> +#include <linux/slab.h> +#include <asm/sections.h> +#include <asm/kexec_ranges.h> + +/** + * get_max_nr_ranges - Get the max no. of ranges crash_mem structure + * could hold, given the size allocated for it. + * @size: Allocation size of crash_mem structure. + * + * Returns the maximum no. of ranges. + */ +static inline unsigned int get_max_nr_ranges(size_t size) +{ + return ((size - sizeof(struct crash_mem)) / + sizeof(struct crash_mem_range)); +} + +/** + * get_mem_rngs_size - Get the allocated size of mem_rngs based on + * max_nr_ranges and chunk size. + * @mem_rngs: Memory ranges. + * + * Returns the maximum size of @mem_rngs. + */ +static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs) +{ + size_t size; + + if (!mem_rngs) + return 0; + + size = (sizeof(struct crash_mem) + + (mem_rngs->max_nr_ranges * sizeof(struct crash_mem_range))); + + /* + * Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ. + * So, align to get the actual length. + */ + return ALIGN(size, MEM_RANGE_CHUNK_SZ); +} + +/** + * __add_mem_range - add a memory range to memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * @base: Base address of the range to add. + * @size: Size of the memory range to add. + * + * (Re)allocates memory, if needed. + * + * Returns 0 on success, negative errno on error. + */ +static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) +{ + struct crash_mem *mem_rngs = *mem_ranges; + + if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) { + mem_rngs = realloc_mem_ranges(mem_ranges); + if (!mem_rngs) + return -ENOMEM; + } + + mem_rngs->ranges[mem_rngs->nr_ranges].start = base; + mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1; + pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n", + base, base + size - 1, mem_rngs->nr_ranges); + mem_rngs->nr_ranges++; + return 0; +} + +/** + * __merge_memory_ranges - Merges the given memory ranges list. + * @mem_rngs: Range list to merge. + * + * Assumes a sorted range list. + * + * Returns nothing. + */ +static void __merge_memory_ranges(struct crash_mem *mem_rngs) +{ + struct crash_mem_range *ranges; + int i, idx; + + if (!mem_rngs) + return; + + idx = 0; + ranges = &(mem_rngs->ranges[0]); + for (i = 1; i < mem_rngs->nr_ranges; i++) { + if (ranges[i].start <= (ranges[i-1].end + 1)) + ranges[idx].end = ranges[i].end; + else { + idx++; + if (i == idx) + continue; + + ranges[idx] = ranges[i]; + } + } + mem_rngs->nr_ranges = idx + 1; +} + +/* cmp_func_t callback to sort ranges with sort() */ +static int rngcmp(const void *_x, const void *_y) +{ + const struct crash_mem_range *x = _x, *y = _y; + + if (x->start > y->start) + return 1; + if (x->start < y->start) + return -1; + return 0; +} + +/** + * sort_memory_ranges - Sorts the given memory ranges list. + * @mem_rngs: Range list to sort. + * @merge: If true, merge the list after sorting. + * + * Returns nothing. + */ +void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge) +{ + int i; + + if (!mem_rngs) + return; + + /* Sort the ranges in-place */ + sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges, + sizeof(mem_rngs->ranges[0]), rngcmp, NULL); + + if (merge) + __merge_memory_ranges(mem_rngs); + + /* For debugging purpose */ + pr_debug("Memory ranges:\n"); + for (i = 0; i < mem_rngs->nr_ranges; i++) { + pr_debug("\t[%03d][%#016llx - %#016llx]\n", i, + mem_rngs->ranges[i].start, + mem_rngs->ranges[i].end); + } +} + +/** + * realloc_mem_ranges - reallocate mem_ranges with size incremented + * by MEM_RANGE_CHUNK_SZ. Frees up the old memory, + * if memory allocation fails. + * @mem_ranges: Memory ranges to reallocate. + * + * Returns pointer to reallocated memory on success, NULL otherwise. + */ +struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges) +{ + struct crash_mem *mem_rngs = *mem_ranges; + unsigned int nr_ranges; + size_t size; + + size = get_mem_rngs_size(mem_rngs); + nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0; + + size += MEM_RANGE_CHUNK_SZ; + mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL); + if (!mem_rngs) { + kfree(*mem_ranges); + *mem_ranges = NULL; + return NULL; + } + + mem_rngs->nr_ranges = nr_ranges; + mem_rngs->max_nr_ranges = get_max_nr_ranges(size); + *mem_ranges = mem_rngs; + + return mem_rngs; +} + +/** + * add_mem_range - Updates existing memory range, if there is an overlap. + * Else, adds a new memory range. + * @mem_ranges: Range list to add the memory range to. + * @base: Base address of the range to add. + * @size: Size of the memory range to add. + * + * (Re)allocates memory, if needed. + * + * Returns 0 on success, negative errno on error. + */ +int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) +{ + struct crash_mem *mem_rngs = *mem_ranges; + u64 mstart, mend, end; + unsigned int i; + + if (!size) + return 0; + + end = base + size - 1; + + if (!mem_rngs || !(mem_rngs->nr_ranges)) + return __add_mem_range(mem_ranges, base, size); + + for (i = 0; i < mem_rngs->nr_ranges; i++) { + mstart = mem_rngs->ranges[i].start; + mend = mem_rngs->ranges[i].end; + if (base < mend && end > mstart) { + if (base < mstart) + mem_rngs->ranges[i].start = base; + if (end > mend) + mem_rngs->ranges[i].end = end; + return 0; + } + } + + return __add_mem_range(mem_ranges, base, size); +} + +/** + * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list. + * @mem_ranges: Range list to add the memory range(s) to. + * + * Returns 0 on success, negative errno on error. + */ +int add_tce_mem_ranges(struct crash_mem **mem_ranges) +{ + struct device_node *dn = NULL; + int ret = 0; + + for_each_node_by_type(dn, "pci") { + u64 base; + u32 size; + + ret = of_property_read_u64(dn, "linux,tce-base", &base); + ret |= of_property_read_u32(dn, "linux,tce-size", &size); + if (ret) { + /* + * It is ok to have pci nodes without tce. So, ignore + * property does not exist error. + */ + if (ret == -EINVAL) { + ret = 0; + continue; + } + break; + } + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + break; + } + + of_node_put(dn); + return ret; +} + +/** + * add_initrd_mem_range - Adds initrd range to the given memory ranges list, + * if the initrd was retained. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_initrd_mem_range(struct crash_mem **mem_ranges) +{ + u64 base, end; + int ret; + + /* This range means something, only if initrd was retained */ + if (!strstr(saved_command_line, "retain_initrd")) + return 0; + + ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base); + ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end); + if (!ret) + ret = add_mem_range(mem_ranges, base, end - base + 1); + + return ret; +} + +#ifdef CONFIG_PPC_BOOK3S_64 +/** + * add_htab_mem_range - Adds htab range to the given memory ranges list, + * if it exists + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_htab_mem_range(struct crash_mem **mem_ranges) +{ + if (!htab_address) + return 0; + + return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes); +} +#endif + +/** + * add_kernel_mem_range - Adds kernel text region to the given + * memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_kernel_mem_range(struct crash_mem **mem_ranges) +{ + return add_mem_range(mem_ranges, 0, __pa(_end)); +} + +/** + * add_rtas_mem_range - Adds RTAS region to the given memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_rtas_mem_range(struct crash_mem **mem_ranges) +{ + struct device_node *dn; + u32 base, size; + int ret = 0; + + dn = of_find_node_by_path("/rtas"); + if (!dn) + return 0; + + ret = of_property_read_u32(dn, "linux,rtas-base", &base); + ret |= of_property_read_u32(dn, "rtas-size", &size); + if (!ret) + ret = add_mem_range(mem_ranges, base, size); + + of_node_put(dn); + return ret; +} + +/** + * add_opal_mem_range - Adds OPAL region to the given memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_opal_mem_range(struct crash_mem **mem_ranges) +{ + struct device_node *dn; + u64 base, size; + int ret; + + dn = of_find_node_by_path("/ibm,opal"); + if (!dn) + return 0; + + ret = of_property_read_u64(dn, "opal-base-address", &base); + ret |= of_property_read_u64(dn, "opal-runtime-size", &size); + if (!ret) + ret = add_mem_range(mem_ranges, base, size); + + of_node_put(dn); + return ret; +} + +/** + * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w + * to the given memory ranges list. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int add_reserved_mem_ranges(struct crash_mem **mem_ranges) +{ + int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0; + const __be32 *prop; + + prop = of_get_property(of_root, "reserved-ranges", &len); + if (!prop) + return 0; + + n_mem_addr_cells = of_n_addr_cells(of_root); + n_mem_size_cells = of_n_size_cells(of_root); + cells = n_mem_addr_cells + n_mem_size_cells; + + /* Each reserved range is an (address,size) pair */ + for (i = 0; i < (len / (sizeof(u32) * cells)); i++) { + u64 base, size; + + base = of_read_number(prop + (i * cells), n_mem_addr_cells); + size = of_read_number(prop + (i * cells) + n_mem_addr_cells, + n_mem_size_cells); + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + break; + } + + return ret; +} diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 12885eda324e..549591d9aaa2 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -7,7 +7,7 @@ source "virt/kvm/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" - ---help--- + help Say Y here to get to see options for using your Linux host to run other operating systems inside virtual machines (guests). This option alone does not add any kernel code. @@ -54,7 +54,7 @@ config KVM_BOOK3S_32 select KVM select KVM_BOOK3S_32_HANDLER select KVM_BOOK3S_PR_POSSIBLE - ---help--- + help Support running unmodified book3s_32 guest kernels in virtual machines on book3s_32 host processors. @@ -70,7 +70,7 @@ config KVM_BOOK3S_64 select KVM select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV) - ---help--- + help Support running unmodified book3s_64 and book3s_32 guest kernels in virtual machines on book3s_64 host processors. @@ -85,7 +85,7 @@ config KVM_BOOK3S_64_HV select KVM_BOOK3S_HV_POSSIBLE select MMU_NOTIFIER select CMA - ---help--- + help Support running unmodified book3s_64 guest kernels in virtual machines on POWER7 and newer processors that have hypervisor mode available to the host. @@ -104,7 +104,7 @@ config KVM_BOOK3S_64_PR tristate "KVM support without using hypervisor mode in host" depends on KVM_BOOK3S_64 select KVM_BOOK3S_PR_POSSIBLE - ---help--- + help Support running guest kernels in virtual machines on processors without using hypervisor mode in the host, by running the guest in user mode (problem state) and emulating all @@ -119,7 +119,7 @@ config KVM_BOOK3S_64_PR config KVM_BOOK3S_HV_EXIT_TIMING bool "Detailed timing for hypervisor real-mode code" depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS - ---help--- + help Calculate time taken for each vcpu in the real-mode guest entry, exit, and interrupt handling code, plus time spent in the guest and in nap mode due to idle (cede) while other threads are still @@ -136,7 +136,7 @@ config KVM_BOOKE_HV config KVM_EXIT_TIMING bool "Detailed exit timing" depends on KVM_E500V2 || KVM_E500MC - ---help--- + help Calculate elapsed time for every exit/enter cycle. A per-vcpu report is available in debugfs kvm/vm#_vcpu#_timing. The overhead is relatively small, however it is not recommended for @@ -150,7 +150,7 @@ config KVM_E500V2 select KVM select KVM_MMIO select MMU_NOTIFIER - ---help--- + help Support running unmodified E500 guest kernels in virtual machines on E500v2 host processors. @@ -166,7 +166,7 @@ config KVM_E500MC select KVM_MMIO select KVM_BOOKE_HV select MMU_NOTIFIER - ---help--- + help Support running unmodified E500MC/E5500/E6500 guest kernels in virtual machines on E500MC/E5500/E6500 host processors. @@ -194,7 +194,7 @@ config KVM_XICS select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD default y - ---help--- + help Include support for the XICS (eXternal Interrupt Controller Specification) interrupt controller architecture used on IBM POWER (pSeries) servers. diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 5690a1f9b976..49db50d1db04 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -36,41 +36,38 @@ #include "book3s.h" #include "trace.h" -#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ -#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ - /* #define EXIT_DEBUG */ struct kvm_stats_debugfs_item debugfs_entries[] = { - { "exits", VCPU_STAT(sum_exits) }, - { "mmio", VCPU_STAT(mmio_exits) }, - { "sig", VCPU_STAT(signal_exits) }, - { "sysc", VCPU_STAT(syscall_exits) }, - { "inst_emu", VCPU_STAT(emulated_inst_exits) }, - { "dec", VCPU_STAT(dec_exits) }, - { "ext_intr", VCPU_STAT(ext_intr_exits) }, - { "queue_intr", VCPU_STAT(queue_intr) }, - { "halt_poll_success_ns", VCPU_STAT(halt_poll_success_ns) }, - { "halt_poll_fail_ns", VCPU_STAT(halt_poll_fail_ns) }, - { "halt_wait_ns", VCPU_STAT(halt_wait_ns) }, - { "halt_successful_poll", VCPU_STAT(halt_successful_poll), }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), }, - { "halt_successful_wait", VCPU_STAT(halt_successful_wait) }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, - { "pf_storage", VCPU_STAT(pf_storage) }, - { "sp_storage", VCPU_STAT(sp_storage) }, - { "pf_instruc", VCPU_STAT(pf_instruc) }, - { "sp_instruc", VCPU_STAT(sp_instruc) }, - { "ld", VCPU_STAT(ld) }, - { "ld_slow", VCPU_STAT(ld_slow) }, - { "st", VCPU_STAT(st) }, - { "st_slow", VCPU_STAT(st_slow) }, - { "pthru_all", VCPU_STAT(pthru_all) }, - { "pthru_host", VCPU_STAT(pthru_host) }, - { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, - { "largepages_2M", VM_STAT(num_2M_pages, .mode = 0444) }, - { "largepages_1G", VM_STAT(num_1G_pages, .mode = 0444) }, + VCPU_STAT("exits", sum_exits), + VCPU_STAT("mmio", mmio_exits), + VCPU_STAT("sig", signal_exits), + VCPU_STAT("sysc", syscall_exits), + VCPU_STAT("inst_emu", emulated_inst_exits), + VCPU_STAT("dec", dec_exits), + VCPU_STAT("ext_intr", ext_intr_exits), + VCPU_STAT("queue_intr", queue_intr), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VCPU_STAT("halt_wait_ns", halt_wait_ns), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_successful_wait", halt_successful_wait), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("pf_storage", pf_storage), + VCPU_STAT("sp_storage", sp_storage), + VCPU_STAT("pf_instruc", pf_instruc), + VCPU_STAT("sp_instruc", sp_instruc), + VCPU_STAT("ld", ld), + VCPU_STAT("ld_slow", ld_slow), + VCPU_STAT("st", st), + VCPU_STAT("st_slow", st_slow), + VCPU_STAT("pthru_all", pthru_all), + VCPU_STAT("pthru_host", pthru_host), + VCPU_STAT("pthru_bad_aff", pthru_bad_aff), + VM_STAT("largepages_2M", num_2M_pages, .mode = 0444), + VM_STAT("largepages_1G", num_1G_pages, .mode = 0444), { NULL } }; @@ -758,9 +755,9 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) } EXPORT_SYMBOL_GPL(kvmppc_set_msr); -int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +int kvmppc_vcpu_run(struct kvm_vcpu *vcpu) { - return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu); + return vcpu->kvm->arch.kvm_ops->vcpu_run(vcpu); } int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, @@ -837,7 +834,8 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change); } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags) { return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); } diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index eae259ee49af..9b6323ec8e60 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -18,7 +18,7 @@ extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu); -extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, +extern int kvmppc_core_emulate_op_pr(struct kvm_vcpu *vcpu, unsigned int inst, int *advance); extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2b35f9bcf892..38ea396a23d6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -260,11 +260,15 @@ int kvmppc_mmu_hv_init(void) if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE)) return -EINVAL; - /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ host_lpid = 0; if (cpu_has_feature(CPU_FTR_HVMODE)) host_lpid = mfspr(SPRN_LPID); - rsvd_lpid = LPID_RSVD; + + /* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + rsvd_lpid = LPID_RSVD; + else + rsvd_lpid = LPID_RSVD_POWER7; kvmppc_init_lpid(rsvd_lpid + 1); @@ -281,11 +285,10 @@ static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, { long ret; - /* Protect linux PTE lookup from page table destruction */ - rcu_read_lock_sched(); /* this disables preemption too */ + preempt_disable(); ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, kvm->mm->pgd, false, pte_idx_ret); - rcu_read_unlock_sched(); + preempt_enable(); if (ret == H_TOO_HARD) { /* this can't happen */ pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); @@ -413,7 +416,7 @@ static int instruction_is_store(unsigned int instr) return (instr & mask) != 0; } -int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu, unsigned long gpa, gva_t ea, int is_store) { u32 last_inst; @@ -473,10 +476,10 @@ int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.paddr_accessed = gpa; vcpu->arch.vaddr_accessed = ea; - return kvmppc_emulate_mmio(run, vcpu); + return kvmppc_emulate_mmio(vcpu); } -int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; @@ -499,7 +502,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte_t pte, *ptep; if (kvm_is_radix(kvm)) - return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr); + return kvmppc_book3s_radix_page_fault(vcpu, ea, dsisr); /* * Real-mode code has already searched the HPT and found the @@ -519,7 +522,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, gpa_base = r & HPTE_R_RPN & ~(psize - 1); gfn_base = gpa_base >> PAGE_SHIFT; gpa = gpa_base | (ea & (psize - 1)); - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, + return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, dsisr & DSISR_ISSTORE); } } @@ -555,7 +558,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* No memslot means it's an emulated MMIO region */ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, + return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, dsisr & DSISR_ISSTORE); /* @@ -582,7 +585,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, * We always ask for write permission since the common case * is that the page is writable. */ - if (__get_user_pages_fast(hva, 1, 1, &page) == 1) { + if (get_user_page_fast_only(hva, FOLL_WRITE, &page)) { write_ok = true; } else { /* Call KVM generic code to do the slow-path check */ @@ -602,12 +605,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, * Read the PTE from the process' radix tree and use that * so we get the shift and attribute bits. */ - local_irq_disable(); - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + spin_lock(&kvm->mmu_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); pte = __pte(0); if (ptep) - pte = *ptep; - local_irq_enable(); + pte = READ_ONCE(*ptep); + spin_unlock(&kvm->mmu_lock); /* * If the PTE disappeared temporarily due to a THP * collapse, just return and let the guest try again. diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index aa12cd4078b3..22a677b18695 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -11,12 +11,12 @@ #include <linux/anon_inodes.h> #include <linux/file.h> #include <linux/debugfs.h> +#include <linux/pgtable.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/pte-walk.h> #include <asm/ultravisor.h> @@ -33,14 +33,15 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, gva_t eaddr, void *to, void *from, unsigned long n) { - int uninitialized_var(old_pid), old_lpid; + int old_pid, old_lpid; unsigned long quadrant, ret = n; bool is_load = !!to; /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */ if (kvmhv_on_pseries()) return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr, - __pa(to), __pa(from), n); + (to != NULL) ? __pa(to): 0, + (from != NULL) ? __pa(from): 0, n); quadrant = 1; if (!pid) @@ -64,9 +65,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, isync(); if (is_load) - ret = probe_user_read(to, (const void __user *)from, n); + ret = copy_from_user_nofault(to, (const void __user *)from, n); else - ret = probe_user_write((void __user *)to, from, n); + ret = copy_to_user_nofault((void __user *)to, from, n); /* switch the pid first to avoid running host with unallocated pid */ if (quadrant == 1 && pid != old_pid) @@ -160,7 +161,9 @@ int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr, return -EINVAL; /* Read the entry from guest memory */ addr = base + (index * sizeof(rpte)); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte)); + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); if (ret) { if (pte_ret_p) *pte_ret_p = addr; @@ -236,7 +239,9 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr, /* Read the table to find the root of the radix tree */ ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry)); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry)); + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); if (ret) return ret; @@ -353,7 +358,13 @@ static struct kmem_cache *kvm_pmd_cache; static pte_t *kvmppc_pte_alloc(void) { - return kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL); + pte_t *pte; + + pte = kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL); + /* pmd_populate() will only reference _pa(pte). */ + kmemleak_ignore(pte); + + return pte; } static void kvmppc_pte_free(pte_t *ptep) @@ -363,7 +374,13 @@ static void kvmppc_pte_free(pte_t *ptep) static pmd_t *kvmppc_pmd_alloc(void) { - return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL); + pmd_t *pmd; + + pmd = kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL); + /* pud_populate() will only reference _pa(pmd). */ + kmemleak_ignore(pmd); + + return pmd; } static void kvmppc_pmd_free(pmd_t *pmdp) @@ -417,9 +434,13 @@ void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, * Callers are responsible for flushing the PWC. * * When page tables are being unmapped/freed as part of page fault path - * (full == false), ptes are not expected. There is code to unmap them - * and emit a warning if encountered, but there may already be data - * corruption due to the unexpected mappings. + * (full == false), valid ptes are generally not expected; however, there + * is one situation where they arise, which is when dirty page logging is + * turned off for a memslot while the VM is running. The new memslot + * becomes visible to page faults before the memslot commit function + * gets to flush the memslot, which can lead to a 2MB page mapping being + * installed for a guest physical address where there are already 64kB + * (or 4kB) mappings (of sub-pages of the same 2MB page). */ static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full, unsigned int lpid) @@ -433,7 +454,6 @@ static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full, for (it = 0; it < PTRS_PER_PTE; ++it, ++p) { if (pte_val(*p) == 0) continue; - WARN_ON_ONCE(1); kvmppc_unmap_pte(kvm, p, pte_pfn(*p) << PAGE_SHIFT, PAGE_SHIFT, NULL, lpid); @@ -499,13 +519,14 @@ void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid) unsigned long ig; for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { + p4d_t *p4d = p4d_offset(pgd, 0); pud_t *pud; - if (!pgd_present(*pgd)) + if (!p4d_present(*p4d)) continue; - pud = pud_offset(pgd, 0); + pud = pud_offset(p4d, 0); kvmppc_unmap_free_pud(kvm, pud, lpid); - pgd_clear(pgd); + p4d_clear(p4d); } } @@ -566,6 +587,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, unsigned long *rmapp, struct rmap_nested **n_rmap) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud, *new_pud = NULL; pmd_t *pmd, *new_pmd = NULL; pte_t *ptep, *new_ptep = NULL; @@ -573,9 +595,11 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, /* Traverse the guest's 2nd-level tree, allocate new levels needed */ pgd = pgtable + pgd_index(gpa); + p4d = p4d_offset(pgd, gpa); + pud = NULL; - if (pgd_present(*pgd)) - pud = pud_offset(pgd, gpa); + if (p4d_present(*p4d)) + pud = pud_offset(p4d, gpa); else new_pud = pud_alloc_one(kvm->mm, gpa); @@ -596,13 +620,13 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, /* Now traverse again under the lock and change the tree */ ret = -ENOMEM; - if (pgd_none(*pgd)) { + if (p4d_none(*p4d)) { if (!new_pud) goto out_unlock; - pgd_populate(kvm->mm, pgd, new_pud); + p4d_populate(kvm->mm, p4d, new_pud); new_pud = NULL; } - pud = pud_offset(pgd, gpa); + pud = pud_offset(p4d, gpa); if (pud_is_leaf(*pud)) { unsigned long hgpa = gpa & PUD_MASK; @@ -735,7 +759,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, return ret; } -bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, +bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing, unsigned long gpa, unsigned int lpid) { unsigned long pgflags; @@ -750,12 +774,12 @@ bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, pgflags = _PAGE_ACCESSED; if (writing) pgflags |= _PAGE_DIRTY; - /* - * We are walking the secondary (partition-scoped) page table here. - * We can do this without disabling irq because the Linux MM - * subsystem doesn't do THP splits and collapses on this tree. - */ - ptep = __find_linux_pte(pgtable, gpa, NULL, &shift); + + if (nested) + ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); + else + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); + if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) { kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift); return true; @@ -791,7 +815,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, * is that the page is writable. */ hva = gfn_to_hva_memslot(memslot, gfn); - if (!kvm_ro && __get_user_pages_fast(hva, 1, 1, &page) == 1) { + if (!kvm_ro && get_user_page_fast_only(hva, FOLL_WRITE, &page)) { upgrade_write = true; } else { unsigned long pfn; @@ -813,12 +837,12 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, * Read the PTE from the process' radix tree and use that * so we get the shift and attribute bits. */ - local_irq_disable(); - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + spin_lock(&kvm->mmu_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); pte = __pte(0); if (ptep) - pte = *ptep; - local_irq_enable(); + pte = READ_ONCE(*ptep); + spin_unlock(&kvm->mmu_lock); /* * If the PTE disappeared temporarily due to a THP * collapse, just return and let the guest try again. @@ -887,7 +911,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, return ret; } -int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; @@ -933,7 +957,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_core_queue_data_storage(vcpu, ea, dsisr); return RESUME_GUEST; } - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing); + return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing); } if (memslot->flags & KVM_MEM_READONLY) { @@ -949,8 +973,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Failed to set the reference/change bits */ if (dsisr & DSISR_SET_RC) { spin_lock(&kvm->mmu_lock); - if (kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, - writing, gpa, kvm->arch.lpid)) + if (kvmppc_hv_handle_set_rc(kvm, false, writing, + gpa, kvm->arch.lpid)) dsisr &= ~DSISR_SET_RC; spin_unlock(&kvm->mmu_lock); @@ -981,11 +1005,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, return 0; } - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, kvm->arch.lpid); - return 0; + return 0; } /* Called with kvm->mmu_lock held */ @@ -1001,7 +1025,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ref; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) { old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, gpa, shift); @@ -1028,7 +1052,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ref; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) ref = 1; return ref; @@ -1040,7 +1064,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, { unsigned long gfn = memslot->base_gfn + pagenum; unsigned long gpa = gfn << PAGE_SHIFT; - pte_t *ptep; + pte_t *ptep, pte; unsigned int shift; int ret = 0; unsigned long old, *rmapp; @@ -1048,12 +1072,35 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ret; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); - if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { - ret = 1; - if (shift) - ret = 1 << (shift - PAGE_SHIFT); + /* + * For performance reasons we don't hold kvm->mmu_lock while walking the + * partition scoped table. + */ + ptep = find_kvm_secondary_pte_unlocked(kvm, gpa, &shift); + if (!ptep) + return 0; + + pte = READ_ONCE(*ptep); + if (pte_present(pte) && pte_dirty(pte)) { spin_lock(&kvm->mmu_lock); + /* + * Recheck the pte again + */ + if (pte_val(pte) != pte_val(*ptep)) { + /* + * We have KVM_MEM_LOG_DIRTY_PAGES enabled. Hence we can + * only find PAGE_SIZE pte entries here. We can continue + * to use the pte addr returned by above page table + * walk. + */ + if (!pte_present(*ptep) || !pte_dirty(*ptep)) { + spin_unlock(&kvm->mmu_lock); + return 0; + } + } + + ret = 1; + VM_BUG_ON(shift); old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid); @@ -1109,12 +1156,17 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, gpa = memslot->base_gfn << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); for (n = memslot->npages; n; --n) { - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, kvm->arch.lpid); gpa += PAGE_SIZE; } + /* + * Increase the mmu notifier sequence number to prevent any page + * fault that read the memslot earlier from writing a PTE. + */ + kvm->mmu_notifier_seq++; spin_unlock(&kvm->mmu_lock); } @@ -1220,7 +1272,8 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, unsigned long gpa; pgd_t *pgt; struct kvm_nested_guest *nested; - pgd_t pgd, *pgdp; + pgd_t *pgdp; + p4d_t p4d, *p4dp; pud_t pud, *pudp; pmd_t pmd, *pmdp; pte_t *ptep; @@ -1293,13 +1346,14 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, } pgdp = pgt + pgd_index(gpa); - pgd = READ_ONCE(*pgdp); - if (!(pgd_val(pgd) & _PAGE_PRESENT)) { - gpa = (gpa & PGDIR_MASK) + PGDIR_SIZE; + p4dp = p4d_offset(pgdp, gpa); + p4d = READ_ONCE(*p4dp); + if (!(p4d_val(p4d) & _PAGE_PRESENT)) { + gpa = (gpa & P4D_MASK) + P4D_SIZE; continue; } - pudp = pud_offset(&pgd, gpa); + pudp = pud_offset(&p4d, gpa); pud = READ_ONCE(*pudp); if (!(pud_val(pud) & _PAGE_PRESENT)) { gpa = (gpa & PUD_MASK) + PUD_SIZE; diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 50555ad1db93..1a529df0ab44 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -73,6 +73,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, struct kvmppc_spapr_tce_iommu_table *stit, *tmp; struct iommu_table_group *table_group = NULL; + rcu_read_lock(); list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { table_group = iommu_group_get_iommudata(grp); @@ -87,7 +88,9 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, kref_put(&stit->kref, kvm_spapr_tce_liobn_put); } } + cond_resched_rcu(); } + rcu_read_unlock(); } extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, @@ -105,12 +108,14 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!f.file) return -EBADF; + rcu_read_lock(); list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { if (stt == f.file->private_data) { found = true; break; } } + rcu_read_unlock(); fdput(f); @@ -143,6 +148,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!tbl) return -EINVAL; + rcu_read_lock(); list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { if (tbl != stit->tbl) continue; @@ -150,14 +156,17 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!kref_get_unless_zero(&stit->kref)) { /* stit is being destroyed */ iommu_tce_table_put(tbl); + rcu_read_unlock(); return -ENOTTY; } /* * The table is already known to this KVM, we just increased * its KVM reference counter and can return. */ + rcu_read_unlock(); return 0; } + rcu_read_unlock(); stit = kzalloc(sizeof(*stit), GFP_KERNEL); if (!stit) { @@ -365,18 +374,19 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, if (kvmppc_tce_to_ua(stt->kvm, tce, &ua)) return H_TOO_HARD; + rcu_read_lock(); list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { unsigned long hpa = 0; struct mm_iommu_table_group_mem_t *mem; long shift = stit->tbl->it_page_shift; mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift); - if (!mem) - return H_TOO_HARD; - - if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) + if (!mem || mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) { + rcu_read_unlock(); return H_TOO_HARD; + } } + rcu_read_unlock(); return H_SUCCESS; } diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 6fcaf1fa8e02..ac6ac192b8bb 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -74,8 +74,8 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm, EXPORT_SYMBOL_GPL(kvmppc_find_table); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce, - unsigned long *ua, unsigned long **prmap) +static long kvmppc_rm_tce_to_ua(struct kvm *kvm, + unsigned long tce, unsigned long *ua) { unsigned long gfn = tce >> PAGE_SHIFT; struct kvm_memory_slot *memslot; @@ -87,9 +87,6 @@ static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce, *ua = __gfn_to_hva_memslot(memslot, gfn) | (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); - if (prmap) - *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; - return 0; } @@ -116,7 +113,7 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt, if (iommu_tce_check_gpa(stt->page_shift, gpa)) return H_PARAMETER; - if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua, NULL)) + if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua)) return H_TOO_HARD; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -208,7 +205,7 @@ static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt, idx = (ioba >> stt->page_shift) - stt->offset; sttpage = idx / TCES_PER_PAGE; - sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) / + sttpages = ALIGN(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) / TCES_PER_PAGE; for (i = sttpage; i < sttpage + sttpages; ++i) if (!stt->pages[i]) @@ -411,7 +408,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return ret; dir = iommu_tce_direction(tce); - if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) + if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) return H_PARAMETER; entry = ioba >> stt->page_shift; @@ -437,8 +434,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return H_SUCCESS; } -static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, - unsigned long ua, unsigned long *phpa) +static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, + unsigned long ua, unsigned long *phpa) { pte_t *ptep, pte; unsigned shift = 0; @@ -452,10 +449,17 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, * to exit which will agains result in the below page table walk * to finish. */ - ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift); - if (!ptep || !pte_present(*ptep)) + /* an rmap lock won't make it safe. because that just ensure hash + * page table entries are removed with rmap lock held. After that + * mmu notifier returns and we go ahead and removing ptes from Qemu page table. + */ + ptep = find_kvm_host_pte(vcpu->kvm, mmu_seq, ua, &shift); + if (!ptep) + return -ENXIO; + + pte = READ_ONCE(*ptep); + if (!pte_present(pte)) return -ENXIO; - pte = *ptep; if (!shift) shift = PAGE_SHIFT; @@ -477,10 +481,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce_list, unsigned long npages) { + struct kvm *kvm = vcpu->kvm; struct kvmppc_spapr_tce_table *stt; long i, ret = H_SUCCESS; unsigned long tces, entry, ua = 0; - unsigned long *rmap = NULL; + unsigned long mmu_seq; bool prereg = false; struct kvmppc_spapr_tce_iommu_table *stit; @@ -488,6 +493,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (kvm_is_radix(vcpu->kvm)) return H_TOO_HARD; + /* + * used to check for invalidations in progress + */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -515,7 +526,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, */ struct mm_iommu_table_group_mem_t *mem; - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua)) return H_TOO_HARD; mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); @@ -531,23 +542,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, * We do not require memory to be preregistered in this case * so lock rmap and do __find_linux_pte_or_hugepte(). */ - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) - return H_TOO_HARD; - - rmap = (void *) vmalloc_to_phys(rmap); - if (WARN_ON_ONCE_RM(!rmap)) + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua)) return H_TOO_HARD; - /* - * Synchronize with the MMU notifier callbacks in - * book3s_64_mmu_hv.c (kvm_unmap_hva_range_hv etc.). - * While we have the rmap lock, code running on other CPUs - * cannot finish unmapping the host real page that backs - * this guest real page, so we are OK to access the host - * real page. - */ - lock_rmap(rmap); - if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + if (kvmppc_rm_ua_to_hpa(vcpu, mmu_seq, ua, &tces)) { ret = H_TOO_HARD; goto unlock_exit; } @@ -565,7 +564,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); ua = 0; - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) { ret = H_PARAMETER; goto invalidate_exit; } @@ -590,9 +589,8 @@ invalidate_exit: iommu_tce_kill_rm(stit->tbl, entry, npages); unlock_exit: - if (rmap) - unlock_rmap(rmap); - + if (!prereg) + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index dad71d276b91..0effd48c8f4d 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -235,7 +235,7 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) #endif -int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_core_emulate_op_pr(struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; @@ -371,13 +371,13 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) break; - run->papr_hcall.nr = cmd; + vcpu->run->papr_hcall.nr = cmd; for (i = 0; i < 9; ++i) { ulong gpr = kvmppc_get_gpr(vcpu, 4 + i); - run->papr_hcall.args[i] = gpr; + vcpu->run->papr_hcall.args[i] = gpr; } - run->exit_reason = KVM_EXIT_PAPR_HCALL; + vcpu->run->exit_reason = KVM_EXIT_PAPR_HCALL; vcpu->arch.hcall_needed = 1; emulated = EMULATE_EXIT_USER; break; @@ -629,7 +629,7 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, } if (emulated == EMULATE_FAIL) - emulated = kvmppc_emulate_paired_single(run, vcpu); + emulated = kvmppc_emulate_paired_single(vcpu); return emulated; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 93493f0cbfe8..4ba06a2a306c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -74,6 +74,7 @@ #include <asm/hw_breakpoint.h> #include <asm/kvm_book3s_uvmem.h> #include <asm/ultravisor.h> +#include <asm/dtl.h> #include "book3s.h" @@ -230,13 +231,11 @@ static bool kvmppc_ipi_thread(int cpu) static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) { int cpu; - struct swait_queue_head *wqp; + struct rcuwait *waitp; - wqp = kvm_arch_vcpu_wq(vcpu); - if (swq_has_sleeper(wqp)) { - swake_up_one(wqp); + waitp = kvm_arch_vcpu_get_wait(vcpu); + if (rcuwait_wake_up(waitp)) ++vcpu->stat.halt_wakeup; - } cpu = READ_ONCE(vcpu->arch.thread_cpu); if (cpu >= 0 && kvmppc_ipi_thread(cpu)) @@ -345,7 +344,7 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) } /* Dummy value used in computing PCR value below */ -#define PCR_ARCH_300 (PCR_ARCH_207 << 1) +#define PCR_ARCH_31 (PCR_ARCH_300 << 1) static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { @@ -353,7 +352,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) struct kvmppc_vcore *vc = vcpu->arch.vcore; /* We can (emulate) our own architecture version and anything older */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_31)) + host_pcr_bit = PCR_ARCH_31; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) host_pcr_bit = PCR_ARCH_300; else if (cpu_has_feature(CPU_FTR_ARCH_207S)) host_pcr_bit = PCR_ARCH_207; @@ -379,6 +380,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) case PVR_ARCH_300: guest_pcr_bit = PCR_ARCH_300; break; + case PVR_ARCH_31: + guest_pcr_bit = PCR_ARCH_31; + break; default: return -EINVAL; } @@ -769,7 +773,7 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, return H_P3; vcpu->arch.ciabr = value1; return H_SUCCESS; - case H_SET_MODE_RESOURCE_SET_DAWR: + case H_SET_MODE_RESOURCE_SET_DAWR0: if (!kvmppc_power8_compatible(vcpu)) return H_P2; if (!ppc_breakpoint_available()) @@ -1099,9 +1103,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) ret = kvmppc_h_svm_init_done(vcpu->kvm); break; case H_SVM_INIT_ABORT: - ret = H_UNSUPPORTED; - if (kvmppc_get_srr1(vcpu) & MSR_S) - ret = kvmppc_h_svm_init_abort(vcpu->kvm); + /* + * Even if that call is made by the Ultravisor, the SSR1 value + * is the guest context one, with the secure bit clear as it has + * not yet been secured. So we can't check it here. + * Instead the kvm->arch.secure_guest flag is checked inside + * kvmppc_h_svm_init_abort(). + */ + ret = kvmppc_h_svm_init_abort(vcpu->kvm); break; default: @@ -1156,8 +1165,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) return kvmppc_hcall_impl_hv_realmode(cmd); } -static int kvmppc_emulate_debug_inst(struct kvm_run *run, - struct kvm_vcpu *vcpu) +static int kvmppc_emulate_debug_inst(struct kvm_vcpu *vcpu) { u32 last_inst; @@ -1171,8 +1179,8 @@ static int kvmppc_emulate_debug_inst(struct kvm_run *run, } if (last_inst == KVMPPC_INST_SW_BREAKPOINT) { - run->exit_reason = KVM_EXIT_DEBUG; - run->debug.arch.address = kvmppc_get_pc(vcpu); + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu); return RESUME_HOST; } else { kvmppc_core_queue_program(vcpu, SRR1_PROGILL); @@ -1273,9 +1281,10 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) return RESUME_GUEST; } -static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, struct task_struct *tsk) { + struct kvm_run *run = vcpu->run; int r = RESUME_HOST; vcpu->stat.sum_exits++; @@ -1410,7 +1419,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, swab32(vcpu->arch.emul_inst) : vcpu->arch.emul_inst; if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { - r = kvmppc_emulate_debug_inst(run, vcpu); + r = kvmppc_emulate_debug_inst(vcpu); } else { kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; @@ -1462,7 +1471,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, return r; } -static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) +static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) { int r; int srcu_idx; @@ -1520,7 +1529,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) */ case BOOK3S_INTERRUPT_H_DATA_STORAGE: srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmhv_nested_page_fault(run, vcpu); + r = kvmhv_nested_page_fault(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; case BOOK3S_INTERRUPT_H_INST_STORAGE: @@ -1530,7 +1539,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE) vcpu->arch.fault_dsisr |= DSISR_ISSTORE; srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmhv_nested_page_fault(run, vcpu); + r = kvmhv_nested_page_fault(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; @@ -1679,10 +1688,22 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_UAMOR: *val = get_reg_val(id, vcpu->arch.uamor); break; - case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS: + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1: i = id - KVM_REG_PPC_MMCR0; *val = get_reg_val(id, vcpu->arch.mmcr[i]); break; + case KVM_REG_PPC_MMCR2: + *val = get_reg_val(id, vcpu->arch.mmcr[2]); + break; + case KVM_REG_PPC_MMCRA: + *val = get_reg_val(id, vcpu->arch.mmcra); + break; + case KVM_REG_PPC_MMCRS: + *val = get_reg_val(id, vcpu->arch.mmcrs); + break; + case KVM_REG_PPC_MMCR3: + *val = get_reg_val(id, vcpu->arch.mmcr[3]); + break; case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: i = id - KVM_REG_PPC_PMC1; *val = get_reg_val(id, vcpu->arch.pmc[i]); @@ -1698,7 +1719,13 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.sdar); break; case KVM_REG_PPC_SIER: - *val = get_reg_val(id, vcpu->arch.sier); + *val = get_reg_val(id, vcpu->arch.sier[0]); + break; + case KVM_REG_PPC_SIER2: + *val = get_reg_val(id, vcpu->arch.sier[1]); + break; + case KVM_REG_PPC_SIER3: + *val = get_reg_val(id, vcpu->arch.sier[2]); break; case KVM_REG_PPC_IAMR: *val = get_reg_val(id, vcpu->arch.iamr); @@ -1900,10 +1927,22 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_UAMOR: vcpu->arch.uamor = set_reg_val(id, *val); break; - case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS: + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1: i = id - KVM_REG_PPC_MMCR0; vcpu->arch.mmcr[i] = set_reg_val(id, *val); break; + case KVM_REG_PPC_MMCR2: + vcpu->arch.mmcr[2] = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MMCRA: + vcpu->arch.mmcra = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MMCRS: + vcpu->arch.mmcrs = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MMCR3: + *val = get_reg_val(id, vcpu->arch.mmcr[3]); + break; case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: i = id - KVM_REG_PPC_PMC1; vcpu->arch.pmc[i] = set_reg_val(id, *val); @@ -1919,7 +1958,13 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.sdar = set_reg_val(id, *val); break; case KVM_REG_PPC_SIER: - vcpu->arch.sier = set_reg_val(id, *val); + vcpu->arch.sier[0] = set_reg_val(id, *val); + break; + case KVM_REG_PPC_SIER2: + vcpu->arch.sier[1] = set_reg_val(id, *val); + break; + case KVM_REG_PPC_SIER3: + vcpu->arch.sier[2] = set_reg_val(id, *val); break; case KVM_REG_PPC_IAMR: vcpu->arch.iamr = set_reg_val(id, *val); @@ -2125,7 +2170,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id) spin_lock_init(&vcore->lock); spin_lock_init(&vcore->stoltb_lock); - init_swait_queue_head(&vcore->wq); + rcuwait_init(&vcore->wait); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; vcore->first_vcpuid = id; @@ -2318,7 +2363,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) * to trap and then we emulate them. */ vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB | - HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP; + HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX; if (cpu_has_feature(CPU_FTR_HVMODE)) { vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) @@ -2934,7 +2979,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) ret = RESUME_GUEST; if (vcpu->arch.trap) - ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, + ret = kvmppc_handle_exit_hv(vcpu, vcpu->arch.run_task); vcpu->arch.ret = ret; @@ -3392,8 +3437,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, int trap; unsigned long host_hfscr = mfspr(SPRN_HFSCR); unsigned long host_ciabr = mfspr(SPRN_CIABR); - unsigned long host_dawr = mfspr(SPRN_DAWR); - unsigned long host_dawrx = mfspr(SPRN_DAWRX); + unsigned long host_dawr = mfspr(SPRN_DAWR0); + unsigned long host_dawrx = mfspr(SPRN_DAWRX0); unsigned long host_psscr = mfspr(SPRN_PSSCR); unsigned long host_pidr = mfspr(SPRN_PID); @@ -3422,8 +3467,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_SPURR, vcpu->arch.spurr); if (dawr_enabled()) { - mtspr(SPRN_DAWR, vcpu->arch.dawr); - mtspr(SPRN_DAWRX, vcpu->arch.dawrx); + mtspr(SPRN_DAWR0, vcpu->arch.dawr); + mtspr(SPRN_DAWRX0, vcpu->arch.dawrx); } mtspr(SPRN_CIABR, vcpu->arch.ciabr); mtspr(SPRN_IC, vcpu->arch.ic); @@ -3475,8 +3520,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); mtspr(SPRN_HFSCR, host_hfscr); mtspr(SPRN_CIABR, host_ciabr); - mtspr(SPRN_DAWR, host_dawr); - mtspr(SPRN_DAWRX, host_dawrx); + mtspr(SPRN_DAWR0, host_dawr); + mtspr(SPRN_DAWRX0, host_dawrx); mtspr(SPRN_PID, host_pidr); /* @@ -3784,7 +3829,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) ktime_t cur, start_poll, start_wait; int do_sleep = 1; u64 block_ns; - DECLARE_SWAITQUEUE(wait); /* Poll for pending exceptions and ceded state */ cur = start_poll = ktime_get(); @@ -3812,10 +3856,10 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) } } - prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE); - + prepare_to_rcuwait(&vc->wait); + set_current_state(TASK_INTERRUPTIBLE); if (kvmppc_vcore_check_block(vc)) { - finish_swait(&vc->wq, &wait); + finish_rcuwait(&vc->wait); do_sleep = 0; /* If we polled, count this as a successful poll */ if (vc->halt_poll_ns) @@ -3829,7 +3873,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) trace_kvmppc_vcore_blocked(vc, 0); spin_unlock(&vc->lock); schedule(); - finish_swait(&vc->wq, &wait); + finish_rcuwait(&vc->wait); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; trace_kvmppc_vcore_blocked(vc, 1); @@ -3900,15 +3944,16 @@ static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu) return r; } -static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; int n_ceded, i, r; struct kvmppc_vcore *vc; struct kvm_vcpu *v; trace_kvmppc_run_vcpu_enter(vcpu); - kvm_run->exit_reason = 0; + run->exit_reason = 0; vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; kvmppc_update_vpas(vcpu); @@ -3920,7 +3965,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) spin_lock(&vc->lock); vcpu->arch.ceded = 0; vcpu->arch.run_task = current; - vcpu->arch.kvm_run = kvm_run; vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; vcpu->arch.busy_preempt = TB_NIL; @@ -3940,7 +3984,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_start_thread(vcpu, vc); trace_kvm_guest_enter(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { - swake_up_one(&vc->wq); + rcuwait_wake_up(&vc->wait); } } @@ -3953,8 +3997,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) r = kvmhv_setup_mmu(vcpu); spin_lock(&vc->lock); if (r) { - kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; - kvm_run->fail_entry. + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + run->fail_entry. hardware_entry_failure_reason = 0; vcpu->arch.ret = r; break; @@ -3973,7 +4017,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (signal_pending(v->arch.run_task)) { kvmppc_remove_runnable(vc, v); v->stat.signal_exits++; - v->arch.kvm_run->exit_reason = KVM_EXIT_INTR; + v->run->exit_reason = KVM_EXIT_INTR; v->arch.ret = -EINTR; wake_up(&v->arch.cpu_run); } @@ -4014,7 +4058,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { kvmppc_remove_runnable(vc, vcpu); vcpu->stat.signal_exits++; - kvm_run->exit_reason = KVM_EXIT_INTR; + run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; } @@ -4025,15 +4069,15 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) wake_up(&v->arch.cpu_run); } - trace_kvmppc_run_vcpu_exit(vcpu, kvm_run); + trace_kvmppc_run_vcpu_exit(vcpu); spin_unlock(&vc->lock); return vcpu->arch.ret; } -int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, - struct kvm_vcpu *vcpu, u64 time_limit, +int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr) { + struct kvm_run *run = vcpu->run; int trap, r, pcpu; int srcu_idx, lpid; struct kvmppc_vcore *vc; @@ -4042,14 +4086,13 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, trace_kvmppc_run_vcpu_enter(vcpu); - kvm_run->exit_reason = 0; + run->exit_reason = 0; vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; vc = vcpu->arch.vcore; vcpu->arch.ceded = 0; vcpu->arch.run_task = current; - vcpu->arch.kvm_run = kvm_run; vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; vcpu->arch.busy_preempt = TB_NIL; @@ -4167,9 +4210,9 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, r = RESUME_GUEST; if (trap) { if (!nested) - r = kvmppc_handle_exit_hv(kvm_run, vcpu, current); + r = kvmppc_handle_exit_hv(vcpu, current); else - r = kvmppc_handle_nested_exit(kvm_run, vcpu); + r = kvmppc_handle_nested_exit(vcpu); } vcpu->arch.ret = r; @@ -4179,7 +4222,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) { if (signal_pending(current)) { vcpu->stat.signal_exits++; - kvm_run->exit_reason = KVM_EXIT_INTR; + run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; break; } @@ -4195,13 +4238,13 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, done: kvmppc_remove_runnable(vc, vcpu); - trace_kvmppc_run_vcpu_exit(vcpu, kvm_run); + trace_kvmppc_run_vcpu_exit(vcpu); return vcpu->arch.ret; sigpend: vcpu->stat.signal_exits++; - kvm_run->exit_reason = KVM_EXIT_INTR; + run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; out: local_irq_enable(); @@ -4209,8 +4252,9 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, goto done; } -static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) +static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; int r; int srcu_idx; unsigned long ebb_regs[3] = {}; /* shut up GCC */ @@ -4279,7 +4323,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } user_vrsave = mfspr(SPRN_VRSAVE); - vcpu->arch.wqp = &vcpu->arch.vcore->wq; + vcpu->arch.waitp = &vcpu->arch.vcore->wait; vcpu->arch.pgdir = kvm->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; @@ -4294,10 +4338,10 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) */ if (kvm->arch.threads_indep && kvm_is_radix(kvm) && !no_mixing_hpt_and_radix) - r = kvmhv_run_single_vcpu(run, vcpu, ~(u64)0, + r = kvmhv_run_single_vcpu(vcpu, ~(u64)0, vcpu->arch.vcore->lpcr); else - r = kvmppc_run_vcpu(run, vcpu); + r = kvmppc_run_vcpu(vcpu); if (run->exit_reason == KVM_EXIT_PAPR_HCALL && !(vcpu->arch.shregs.msr & MSR_PR)) { @@ -4307,7 +4351,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_core_prepare_to_enter(vcpu); } else if (r == RESUME_PAGE_FAULT) { srcu_idx = srcu_read_lock(&kvm->srcu); - r = kvmppc_book3s_hv_page_fault(run, vcpu, + r = kvmppc_book3s_hv_page_fault(vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); srcu_read_unlock(&kvm->srcu, srcu_idx); } else if (r == RESUME_PASSTHROUGH) { @@ -4516,16 +4560,14 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, switch (change) { case KVM_MR_CREATE: - if (kvmppc_uvmem_slot_init(kvm, new)) - return; - uv_register_mem_slot(kvm->arch.lpid, - new->base_gfn << PAGE_SHIFT, - new->npages * PAGE_SIZE, - 0, new->id); + /* + * @TODO kvmppc_uvmem_memslot_create() can fail and + * return error. Fix this. + */ + kvmppc_uvmem_memslot_create(kvm, new); break; case KVM_MR_DELETE: - uv_unregister_mem_slot(kvm->arch.lpid, old->id); - kvmppc_uvmem_slot_free(kvm, old); + kvmppc_uvmem_memslot_delete(kvm, old); break; default: /* TODO: Handle KVM_MR_MOVE */ @@ -4627,14 +4669,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) /* Look up the VMA for the start of this memory slot */ hva = memslot->userspace_addr; - down_read(&kvm->mm->mmap_sem); + mmap_read_lock(kvm->mm); vma = find_vma(kvm->mm, hva); if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO)) goto up_out; psize = vma_kernel_pagesize(vma); - up_read(&kvm->mm->mmap_sem); + mmap_read_unlock(kvm->mm); /* We can handle 4k, 64k or 16M pages in the VRMA */ if (psize >= 0x1000000) @@ -4667,7 +4709,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) return err; up_out: - up_read(&kvm->mm->mmap_sem); + mmap_read_unlock(kvm->mm); goto out_srcu; } @@ -4981,7 +5023,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) } /* We don't need to emulate any privileged instructions or dcbz */ -static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_core_emulate_op_hv(struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { return EMULATE_FAIL; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7cd3cf3d366b..073617ce83e0 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -113,7 +113,7 @@ void __init kvm_cma_reserve(void) selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT; if (selected_size) { - pr_debug("%s: reserving %ld MiB for global area\n", __func__, + pr_info("%s: reserving %ld MiB for global area\n", __func__, (unsigned long)selected_size / SZ_1M); align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; cma_declare_contiguous(0, selected_size, 0, align_size, diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 63fd81f3039d..59822cba454d 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -140,6 +140,14 @@ BEGIN_FTR_SECTION std r8, HSTATE_MMCR2(r13) std r9, HSTATE_SIER(r13) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + mfspr r5, SPRN_MMCR3 + mfspr r6, SPRN_SIER2 + mfspr r7, SPRN_SIER3 + std r5, HSTATE_MMCR3(r13) + std r6, HSTATE_SIER2(r13) + std r7, HSTATE_SIER3(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) mfspr r3, SPRN_PMC1 mfspr r5, SPRN_PMC2 mfspr r6, SPRN_PMC3 diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index dc97e5be76f6..6822d23a2da4 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -11,11 +11,11 @@ #include <linux/kernel.h> #include <linux/kvm_host.h> #include <linux/llist.h> +#include <linux/pgtable.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> #include <asm/mmu.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/pte-walk.h> #include <asm/reg.h> @@ -233,20 +233,21 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) /* copy parameters in */ hv_ptr = kvmppc_get_gpr(vcpu, 4); + regs_ptr = kvmppc_get_gpr(vcpu, 5); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv, - sizeof(struct hv_guest_state)); + sizeof(struct hv_guest_state)) || + kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, + sizeof(struct pt_regs)); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (err) return H_PARAMETER; + if (kvmppc_need_byteswap(vcpu)) byteswap_hv_regs(&l2_hv); if (l2_hv.version != HV_GUEST_STATE_VERSION) return H_P2; - regs_ptr = kvmppc_get_gpr(vcpu, 5); - err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, - sizeof(struct pt_regs)); - if (err) - return H_PARAMETER; if (kvmppc_need_byteswap(vcpu)) byteswap_pt_regs(&l2_regs); if (l2_hv.vcpu_token >= NR_CPUS) @@ -290,8 +291,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) r = RESUME_HOST; break; } - r = kvmhv_run_single_vcpu(vcpu->arch.kvm_run, vcpu, hdec_exp, - lpcr); + r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr); } while (is_kvmppc_resume_guest(r)); /* save L2 state for return */ @@ -324,12 +324,12 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) byteswap_hv_regs(&l2_hv); byteswap_pt_regs(&l2_regs); } + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv, - sizeof(struct hv_guest_state)); - if (err) - return H_AUTHORITY; - err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, + sizeof(struct hv_guest_state)) || + kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, sizeof(struct pt_regs)); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (err) return H_AUTHORITY; @@ -509,12 +509,16 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) goto not_found; /* Write what was loaded into our buffer back to the L1 guest */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (rc) goto not_found; } else { /* Load the data to be stored from the L1 guest into our buf */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (rc) goto not_found; @@ -549,9 +553,12 @@ static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp) ret = -EFAULT; ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4); - if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) + if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) { + int srcu_idx = srcu_read_lock(&kvm->srcu); ret = kvm_read_guest(kvm, ptbl_addr, &ptbl_entry, sizeof(ptbl_entry)); + srcu_read_unlock(&kvm->srcu, srcu_idx); + } if (ret) { gp->l1_gr_to_hr = 0; gp->process_table = 0; @@ -750,6 +757,23 @@ static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid) return kvm->arch.nested_guests[lpid]; } +pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid, + unsigned long ea, unsigned *hshift) +{ + struct kvm_nested_guest *gp; + pte_t *pte; + + gp = kvmhv_find_nested(kvm, lpid); + if (!gp) + return NULL; + + VM_WARN(!spin_is_locked(&kvm->mmu_lock), + "%s called with kvm mmu_lock not held \n", __func__); + pte = __find_linux_pte(gp->shadow_pgtable, ea, NULL, hshift); + + return pte; +} + static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2) { return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK | @@ -792,19 +816,15 @@ static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap, unsigned long clr, unsigned long set, unsigned long hpa, unsigned long mask) { - struct kvm_nested_guest *gp; unsigned long gpa; unsigned int shift, lpid; pte_t *ptep; gpa = n_rmap & RMAP_NESTED_GPA_MASK; lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT; - gp = kvmhv_find_nested(kvm, lpid); - if (!gp) - return; /* Find the pte */ - ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); + ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); /* * If the pte is present and the pfn is still the same, update the pte. * If the pfn has changed then this is a stale rmap entry, the nested @@ -854,7 +874,7 @@ static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap, return; /* Find and invalidate the pte */ - ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); + ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); /* Don't spuriously invalidate ptes if the pfn has changed */ if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid); @@ -921,7 +941,7 @@ static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu, int shift; spin_lock(&kvm->mmu_lock); - ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); + ptep = find_kvm_nested_guest_pte(kvm, gp->l1_lpid, gpa, &shift); if (!shift) shift = PAGE_SHIFT; if (ptep && pte_present(*ptep)) { @@ -1169,7 +1189,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu, } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { /* Can we execute? */ if (!gpte_p->may_execute) { - flags |= SRR1_ISI_N_OR_G; + flags |= SRR1_ISI_N_G_OR_CIP; goto forward_to_l1; } } else { @@ -1212,16 +1232,16 @@ static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu, spin_lock(&kvm->mmu_lock); /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */ - ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing, - gpte.raddr, kvm->arch.lpid); + ret = kvmppc_hv_handle_set_rc(kvm, false, writing, + gpte.raddr, kvm->arch.lpid); if (!ret) { ret = -EINVAL; goto out_unlock; } /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */ - ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa, - gp->shadow_lpid); + ret = kvmppc_hv_handle_set_rc(kvm, true, writing, + n_gpa, gp->l1_lpid); if (!ret) ret = -EINVAL; else @@ -1257,8 +1277,7 @@ static inline int kvmppc_radix_shift_to_level(int shift) } /* called with gp->tlb_lock held */ -static long int __kvmhv_nested_page_fault(struct kvm_run *run, - struct kvm_vcpu *vcpu, +static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, struct kvm_nested_guest *gp) { struct kvm *kvm = vcpu->kvm; @@ -1341,7 +1360,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run, } /* passthrough of emulated MMIO case */ - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing); + return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing); } if (memslot->flags & KVM_MEM_READONLY) { if (writing) { @@ -1362,7 +1381,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run, /* See if can find translation in our partition scoped tables for L1 */ pte = __pte(0); spin_lock(&kvm->mmu_lock); - pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + pte_p = find_kvm_secondary_pte(kvm, gpa, &shift); if (!shift) shift = PAGE_SHIFT; if (pte_p) @@ -1416,8 +1435,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run, rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level, mmu_seq, gp->shadow_lpid, rmapp, &n_rmap); - if (n_rmap) - kfree(n_rmap); + kfree(n_rmap); if (ret == -EAGAIN) ret = RESUME_GUEST; /* Let the guest try again */ @@ -1428,13 +1446,13 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run, return RESUME_GUEST; } -long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu) +long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu) { struct kvm_nested_guest *gp = vcpu->arch.nested; long int ret; mutex_lock(&gp->tlb_lock); - ret = __kvmhv_nested_page_fault(run, vcpu, gp); + ret = __kvmhv_nested_page_fault(vcpu, gp); mutex_unlock(&gp->tlb_lock); return ret; } diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 79f7d07ef674..6028628ea3ac 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -244,7 +244,7 @@ long kvmppc_realmode_hmi_handler(void) { bool resync_req; - __this_cpu_inc(irq_stat.hmi_exceptions); + local_paca->hmi_irqs++; if (hmi_handle_debugtrig(NULL) >= 0) return 1; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 220305454c23..88da2764c1bb 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -210,7 +210,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, pte_t *ptep; unsigned int writing; unsigned long mmu_seq; - unsigned long rcbits, irq_flags = 0; + unsigned long rcbits; if (kvm_is_radix(kvm)) return H_FUNCTION; @@ -248,17 +248,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, /* Translate to host virtual address */ hva = __gfn_to_hva_memslot(memslot, gfn); - /* - * If we had a page table table change after lookup, we would - * retry via mmu_notifier_retry. - */ - if (!realmode) - local_irq_save(irq_flags); - /* - * If called in real mode we have MSR_EE = 0. Otherwise - * we disable irq above. - */ - ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift); + + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift); if (ptep) { pte_t pte; unsigned int host_pte_size; @@ -272,8 +264,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, * to <= host page size, if host is using hugepage */ if (host_pte_size < psize) { - if (!realmode) - local_irq_restore(flags); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return H_PARAMETER; } pte = kvmppc_read_update_linux_pte(ptep, writing); @@ -287,8 +278,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, pa |= gpa & ~PAGE_MASK; } } - if (!realmode) - local_irq_restore(irq_flags); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); ptel |= pa; @@ -888,8 +878,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, return ret; } -static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa, - int writing, unsigned long *hpa, +static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, + unsigned long gpa, int writing, unsigned long *hpa, struct kvm_memory_slot **memslot_p) { struct kvm *kvm = vcpu->kvm; @@ -908,7 +898,7 @@ static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa, hva = __gfn_to_hva_memslot(memslot, gfn); /* Try to find the host pte for that virtual address */ - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); if (!ptep) return H_TOO_HARD; pte = kvmppc_read_update_linux_pte(ptep, writing); @@ -943,16 +933,11 @@ static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); - ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot); - if (ret != H_SUCCESS) - return ret; + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); - /* Check if we've been invalidated */ - raw_spin_lock(&kvm->mmu_lock.rlock); - if (mmu_notifier_retry(kvm, mmu_seq)) { - ret = H_TOO_HARD; + ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot); + if (ret != H_SUCCESS) goto out_unlock; - } /* Zero the page */ for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES) @@ -960,7 +945,7 @@ static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE); out_unlock: - raw_spin_unlock(&kvm->mmu_lock.rlock); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } @@ -976,19 +961,14 @@ static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); - ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, &dest_memslot); - if (ret != H_SUCCESS) - return ret; - ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL); + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot); if (ret != H_SUCCESS) - return ret; + goto out_unlock; - /* Check if we've been invalidated */ - raw_spin_lock(&kvm->mmu_lock.rlock); - if (mmu_notifier_retry(kvm, mmu_seq)) { - ret = H_TOO_HARD; + ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL); + if (ret != H_SUCCESS) goto out_unlock; - } /* Copy the page */ memcpy((void *)dest_pa, (void *)src_pa, SZ_4K); @@ -996,7 +976,7 @@ static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE); out_unlock: - raw_spin_unlock(&kvm->mmu_lock.rlock); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } @@ -1260,7 +1240,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ if (!data) { if (gr & (HPTE_R_N | HPTE_R_G)) - return status | SRR1_ISI_N_OR_G; + return status | SRR1_ISI_N_G_OR_CIP; if (!hpte_read_permission(pp, slb_v & key)) return status | SRR1_ISI_PROT; } else if (status & DSISR_ISSTORE) { diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 287d5911df0f..4d7e5610731a 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -8,6 +8,7 @@ #include <linux/kvm_host.h> #include <linux/err.h> #include <linux/kernel_stat.h> +#include <linux/pgtable.h> #include <asm/kvm_book3s.h> #include <asm/kvm_ppc.h> @@ -15,7 +16,6 @@ #include <asm/xics.h> #include <asm/synch.h> #include <asm/cputhreads.h> -#include <asm/pgtable.h> #include <asm/ppc-opcode.h> #include <asm/pnv-pci.h> #include <asm/opal.h> diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c index 174d75e476fa..6f18632e30e9 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xive.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xive.c @@ -3,6 +3,7 @@ #include <linux/kvm_host.h> #include <linux/err.h> #include <linux/kernel_stat.h> +#include <linux/pgtable.h> #include <asm/kvm_book3s.h> #include <asm/kvm_ppc.h> @@ -11,7 +12,6 @@ #include <asm/debug.h> #include <asm/synch.h> #include <asm/cputhreads.h> -#include <asm/pgtable.h> #include <asm/ppc-opcode.h> #include <asm/pnv-pci.h> #include <asm/opal.h> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 780a499c7114..799d6d0f4ead 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -707,8 +707,8 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) BEGIN_FTR_SECTION mfspr r5, SPRN_CIABR - mfspr r6, SPRN_DAWR - mfspr r7, SPRN_DAWRX + mfspr r6, SPRN_DAWR0 + mfspr r7, SPRN_DAWRX0 mfspr r8, SPRN_IAMR std r5, STACK_SLOT_CIABR(r1) std r6, STACK_SLOT_DAWR(r1) @@ -803,8 +803,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) beq 1f ld r5, VCPU_DAWR(r4) ld r6, VCPU_DAWRX(r4) - mtspr SPRN_DAWR, r5 - mtspr SPRN_DAWRX, r6 + mtspr SPRN_DAWR0, r5 + mtspr SPRN_DAWRX0, r6 1: ld r7, VCPU_CIABR(r4) ld r8, VCPU_TAR(r4) @@ -1766,8 +1766,8 @@ BEGIN_FTR_SECTION * If the DAWR doesn't work, it's ok to write these here as * this value should always be zero */ - mtspr SPRN_DAWR, r6 - mtspr SPRN_DAWRX, r7 + mtspr SPRN_DAWR0, r6 + mtspr SPRN_DAWRX0, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) @@ -2577,8 +2577,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfmsr r6 andi. r6, r6, MSR_DR /* in real mode? */ bne 4f - mtspr SPRN_DAWR, r4 - mtspr SPRN_DAWRX, r5 + mtspr SPRN_DAWR0, r4 + mtspr SPRN_DAWRX0, r5 4: li r3, 0 blr @@ -2907,6 +2907,11 @@ kvm_cede_exit: beq 4f li r0, 0 stb r0, VCPU_CEDED(r9) + /* + * The escalation interrupts are special as we don't EOI them. + * There is no need to use the load-after-store ordering offset + * to set PQ to 10 as we won't use StoreEOI. + */ li r6, XIVE_ESB_SET_PQ_10 b 5f 4: li r0, 1 @@ -3329,7 +3334,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_AMR, r0 mtspr SPRN_IAMR, r0 mtspr SPRN_CIABR, r0 - mtspr SPRN_DAWRX, r0 + mtspr SPRN_DAWRX0, r0 BEGIN_MMU_FTR_SECTION b 4f @@ -3423,7 +3428,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) mtspr SPRN_PMC6, r9 ld r3, VCPU_MMCR(r4) ld r5, VCPU_MMCR + 8(r4) - ld r6, VCPU_MMCR + 16(r4) + ld r6, VCPU_MMCRA(r4) ld r7, VCPU_SIAR(r4) ld r8, VCPU_SDAR(r4) mtspr SPRN_MMCR1, r5 @@ -3431,14 +3436,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) mtspr SPRN_SIAR, r7 mtspr SPRN_SDAR, r8 BEGIN_FTR_SECTION - ld r5, VCPU_MMCR + 24(r4) + ld r5, VCPU_MMCR + 24(r4) + ld r6, VCPU_SIER + 8(r4) + ld r7, VCPU_SIER + 16(r4) + mtspr SPRN_MMCR3, r5 + mtspr SPRN_SIER2, r6 + mtspr SPRN_SIER3, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) +BEGIN_FTR_SECTION + ld r5, VCPU_MMCR + 16(r4) ld r6, VCPU_SIER(r4) mtspr SPRN_MMCR2, r5 mtspr SPRN_SIER, r6 BEGIN_FTR_SECTION_NESTED(96) lwz r7, VCPU_PMC + 24(r4) lwz r8, VCPU_PMC + 28(r4) - ld r9, VCPU_MMCR + 32(r4) + ld r9, VCPU_MMCRS(r4) mtspr SPRN_SPMC1, r7 mtspr SPRN_SPMC2, r8 mtspr SPRN_MMCRS, r9 @@ -3491,6 +3504,14 @@ BEGIN_FTR_SECTION mtspr SPRN_MMCR2, r8 mtspr SPRN_SIER, r9 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + ld r5, HSTATE_MMCR3(r13) + ld r6, HSTATE_SIER2(r13) + ld r7, HSTATE_SIER3(r13) + mtspr SPRN_MMCR3, r5 + mtspr SPRN_SIER2, r6 + mtspr SPRN_SIER3, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) mtspr SPRN_MMCR0, r3 isync mtlr r0 @@ -3546,10 +3567,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r8, SPRN_SDAR std r4, VCPU_MMCR(r9) std r5, VCPU_MMCR + 8(r9) - std r6, VCPU_MMCR + 16(r9) + std r6, VCPU_MMCRA(r9) BEGIN_FTR_SECTION - std r10, VCPU_MMCR + 24(r9) + std r10, VCPU_MMCR + 16(r9) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + mfspr r5, SPRN_MMCR3 + mfspr r6, SPRN_SIER2 + mfspr r7, SPRN_SIER3 + std r5, VCPU_MMCR + 24(r9) + std r6, VCPU_SIER + 8(r9) + std r7, VCPU_SIER + 16(r9) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) std r7, VCPU_SIAR(r9) std r8, VCPU_SDAR(r9) mfspr r3, SPRN_PMC1 @@ -3573,7 +3602,7 @@ BEGIN_FTR_SECTION_NESTED(96) mfspr r8, SPRN_MMCRS stw r6, VCPU_PMC + 24(r9) stw r7, VCPU_PMC + 28(r9) - std r8, VCPU_MMCR + 32(r9) + std r8, VCPU_MMCRS(r9) lis r4, 0x8000 mtspr SPRN_MMCRS, r4 END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 76d05c71fb1f..7705d5557239 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -47,7 +47,7 @@ * Locking order * * 1. kvm->srcu - Protects KVM memslots - * 2. kvm->mm->mmap_sem - find_vma, migrate_vma_pages and helpers, ksm_madvise + * 2. kvm->mm->mmap_lock - find_vma, migrate_vma_pages and helpers, ksm_madvise * 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting * as sync-points for page-in/out */ @@ -93,12 +93,133 @@ #include <asm/ultravisor.h> #include <asm/mman.h> #include <asm/kvm_ppc.h> +#include <asm/kvm_book3s_uvmem.h> static struct dev_pagemap kvmppc_uvmem_pgmap; static unsigned long *kvmppc_uvmem_bitmap; static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock); -#define KVMPPC_UVMEM_PFN (1UL << 63) +/* + * States of a GFN + * --------------- + * The GFN can be in one of the following states. + * + * (a) Secure - The GFN is secure. The GFN is associated with + * a Secure VM, the contents of the GFN is not accessible + * to the Hypervisor. This GFN can be backed by a secure-PFN, + * or can be backed by a normal-PFN with contents encrypted. + * The former is true when the GFN is paged-in into the + * ultravisor. The latter is true when the GFN is paged-out + * of the ultravisor. + * + * (b) Shared - The GFN is shared. The GFN is associated with a + * a secure VM. The contents of the GFN is accessible to + * Hypervisor. This GFN is backed by a normal-PFN and its + * content is un-encrypted. + * + * (c) Normal - The GFN is a normal. The GFN is associated with + * a normal VM. The contents of the GFN is accesible to + * the Hypervisor. Its content is never encrypted. + * + * States of a VM. + * --------------- + * + * Normal VM: A VM whose contents are always accessible to + * the hypervisor. All its GFNs are normal-GFNs. + * + * Secure VM: A VM whose contents are not accessible to the + * hypervisor without the VM's consent. Its GFNs are + * either Shared-GFN or Secure-GFNs. + * + * Transient VM: A Normal VM that is transitioning to secure VM. + * The transition starts on successful return of + * H_SVM_INIT_START, and ends on successful return + * of H_SVM_INIT_DONE. This transient VM, can have GFNs + * in any of the three states; i.e Secure-GFN, Shared-GFN, + * and Normal-GFN. The VM never executes in this state + * in supervisor-mode. + * + * Memory slot State. + * ----------------------------- + * The state of a memory slot mirrors the state of the + * VM the memory slot is associated with. + * + * VM State transition. + * -------------------- + * + * A VM always starts in Normal Mode. + * + * H_SVM_INIT_START moves the VM into transient state. During this + * time the Ultravisor may request some of its GFNs to be shared or + * secured. So its GFNs can be in one of the three GFN states. + * + * H_SVM_INIT_DONE moves the VM entirely from transient state to + * secure-state. At this point any left-over normal-GFNs are + * transitioned to Secure-GFN. + * + * H_SVM_INIT_ABORT moves the transient VM back to normal VM. + * All its GFNs are moved to Normal-GFNs. + * + * UV_TERMINATE transitions the secure-VM back to normal-VM. All + * the secure-GFN and shared-GFNs are tranistioned to normal-GFN + * Note: The contents of the normal-GFN is undefined at this point. + * + * GFN state implementation: + * ------------------------- + * + * Secure GFN is associated with a secure-PFN; also called uvmem_pfn, + * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag + * set, and contains the value of the secure-PFN. + * It is associated with a normal-PFN; also called mem_pfn, when + * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set. + * The value of the normal-PFN is not tracked. + * + * Shared GFN is associated with a normal-PFN. Its pfn[] has + * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN + * is not tracked. + * + * Normal GFN is associated with normal-PFN. Its pfn[] has + * no flag set. The value of the normal-PFN is not tracked. + * + * Life cycle of a GFN + * -------------------- + * + * -------------------------------------------------------------- + * | | Share | Unshare | SVM |H_SVM_INIT_DONE| + * | |operation |operation | abort/ | | + * | | | | terminate | | + * ------------------------------------------------------------- + * | | | | | | + * | Secure | Shared | Secure |Normal |Secure | + * | | | | | | + * | Shared | Shared | Secure |Normal |Shared | + * | | | | | | + * | Normal | Shared | Secure |Normal |Secure | + * -------------------------------------------------------------- + * + * Life cycle of a VM + * -------------------- + * + * -------------------------------------------------------------------- + * | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ | + * | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE | + * | | | | | | | + * --------- ---------------------------------------------------------- + * | | | | | | | + * | Normal | Normal | Transient|Error |Error |Normal | + * | | | | | | | + * | Secure | Error | Error |Error |Error |Normal | + * | | | | | | | + * |Transient| N/A | Error |Secure |Normal |Normal | + * -------------------------------------------------------------------- + */ + +#define KVMPPC_GFN_UVMEM_PFN (1UL << 63) +#define KVMPPC_GFN_MEM_PFN (1UL << 62) +#define KVMPPC_GFN_SHARED (1UL << 61) +#define KVMPPC_GFN_SECURE (KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN) +#define KVMPPC_GFN_FLAG_MASK (KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED) +#define KVMPPC_GFN_PFN_MASK (~KVMPPC_GFN_FLAG_MASK) struct kvmppc_uvmem_slot { struct list_head list; @@ -106,11 +227,11 @@ struct kvmppc_uvmem_slot { unsigned long base_pfn; unsigned long *pfns; }; - struct kvmppc_uvmem_page_pvt { struct kvm *kvm; unsigned long gpa; bool skip_page_out; + bool remove_gfn; }; bool kvmppc_uvmem_available(void) @@ -163,8 +284,8 @@ void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) mutex_unlock(&kvm->arch.uvmem_lock); } -static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn, - struct kvm *kvm) +static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm, + unsigned long flag, unsigned long uvmem_pfn) { struct kvmppc_uvmem_slot *p; @@ -172,24 +293,41 @@ static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn, if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { unsigned long index = gfn - p->base_pfn; - p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN; + if (flag == KVMPPC_GFN_UVMEM_PFN) + p->pfns[index] = uvmem_pfn | flag; + else + p->pfns[index] = flag; return; } } } -static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm) +/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */ +static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn, + unsigned long uvmem_pfn, struct kvm *kvm) { - struct kvmppc_uvmem_slot *p; + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn); +} - list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { - if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { - p->pfns[gfn - p->base_pfn] = 0; - return; - } - } +/* mark the GFN as secure-GFN associated with a memory-PFN. */ +static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm) +{ + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0); +} + +/* mark the GFN as a shared GFN. */ +static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm) +{ + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0); +} + +/* mark the GFN as a non-existent GFN. */ +static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm) +{ + kvmppc_mark_gfn(gfn, kvm, 0, 0); } +/* return true, if the GFN is a secure-GFN backed by a secure-PFN */ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, unsigned long *uvmem_pfn) { @@ -199,10 +337,10 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { unsigned long index = gfn - p->base_pfn; - if (p->pfns[index] & KVMPPC_UVMEM_PFN) { + if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) { if (uvmem_pfn) *uvmem_pfn = p->pfns[index] & - ~KVMPPC_UVMEM_PFN; + KVMPPC_GFN_PFN_MASK; return true; } else return false; @@ -211,10 +349,114 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, return false; } +/* + * starting from *gfn search for the next available GFN that is not yet + * transitioned to a secure GFN. return the value of that GFN in *gfn. If a + * GFN is found, return true, else return false + * + * Must be called with kvm->arch.uvmem_lock held. + */ +static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot, + struct kvm *kvm, unsigned long *gfn) +{ + struct kvmppc_uvmem_slot *p; + bool ret = false; + unsigned long i; + + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) + if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns) + break; + if (!p) + return ret; + /* + * The code below assumes, one to one correspondence between + * kvmppc_uvmem_slot and memslot. + */ + for (i = *gfn; i < p->base_pfn + p->nr_pfns; i++) { + unsigned long index = i - p->base_pfn; + + if (!(p->pfns[index] & KVMPPC_GFN_FLAG_MASK)) { + *gfn = i; + ret = true; + break; + } + } + return ret; +} + +static int kvmppc_memslot_page_merge(struct kvm *kvm, + const struct kvm_memory_slot *memslot, bool merge) +{ + unsigned long gfn = memslot->base_gfn; + unsigned long end, start = gfn_to_hva(kvm, gfn); + int ret = 0; + struct vm_area_struct *vma; + int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE; + + if (kvm_is_error_hva(start)) + return H_STATE; + + end = start + (memslot->npages << PAGE_SHIFT); + + mmap_write_lock(kvm->mm); + do { + vma = find_vma_intersection(kvm->mm, start, end); + if (!vma) { + ret = H_STATE; + break; + } + ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, + merge_flag, &vma->vm_flags); + if (ret) { + ret = H_STATE; + break; + } + start = vma->vm_end; + } while (end > vma->vm_end); + + mmap_write_unlock(kvm->mm); + return ret; +} + +static void __kvmppc_uvmem_memslot_delete(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + uv_unregister_mem_slot(kvm->arch.lpid, memslot->id); + kvmppc_uvmem_slot_free(kvm, memslot); + kvmppc_memslot_page_merge(kvm, memslot, true); +} + +static int __kvmppc_uvmem_memslot_create(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + int ret = H_PARAMETER; + + if (kvmppc_memslot_page_merge(kvm, memslot, false)) + return ret; + + if (kvmppc_uvmem_slot_init(kvm, memslot)) + goto out1; + + ret = uv_register_mem_slot(kvm->arch.lpid, + memslot->base_gfn << PAGE_SHIFT, + memslot->npages * PAGE_SIZE, + 0, memslot->id); + if (ret < 0) { + ret = H_PARAMETER; + goto out; + } + return 0; +out: + kvmppc_uvmem_slot_free(kvm, memslot); +out1: + kvmppc_memslot_page_merge(kvm, memslot, true); + return ret; +} + unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) { struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; + struct kvm_memory_slot *memslot, *m; int ret = H_SUCCESS; int srcu_idx; @@ -232,35 +474,117 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) return H_AUTHORITY; srcu_idx = srcu_read_lock(&kvm->srcu); + + /* register the memslot */ slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { - if (kvmppc_uvmem_slot_init(kvm, memslot)) { - ret = H_PARAMETER; - goto out; - } - ret = uv_register_mem_slot(kvm->arch.lpid, - memslot->base_gfn << PAGE_SHIFT, - memslot->npages * PAGE_SIZE, - 0, memslot->id); - if (ret < 0) { - kvmppc_uvmem_slot_free(kvm, memslot); - ret = H_PARAMETER; - goto out; + ret = __kvmppc_uvmem_memslot_create(kvm, memslot); + if (ret) + break; + } + + if (ret) { + slots = kvm_memslots(kvm); + kvm_for_each_memslot(m, slots) { + if (m == memslot) + break; + __kvmppc_uvmem_memslot_delete(kvm, memslot); } } -out: + srcu_read_unlock(&kvm->srcu, srcu_idx); return ret; } -unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) +/* + * Provision a new page on HV side and copy over the contents + * from secure memory using UV_PAGE_OUT uvcall. + * Caller must held kvm->arch.uvmem_lock. + */ +static int __kvmppc_svm_page_out(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, unsigned long page_shift, + struct kvm *kvm, unsigned long gpa) { - if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) - return H_UNSUPPORTED; + unsigned long src_pfn, dst_pfn = 0; + struct migrate_vma mig; + struct page *dpage, *spage; + struct kvmppc_uvmem_page_pvt *pvt; + unsigned long pfn; + int ret = U_SUCCESS; - kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; - pr_info("LPID %d went secure\n", kvm->arch.lpid); - return H_SUCCESS; + memset(&mig, 0, sizeof(mig)); + mig.vma = vma; + mig.start = start; + mig.end = end; + mig.src = &src_pfn; + mig.dst = &dst_pfn; + mig.pgmap_owner = &kvmppc_uvmem_pgmap; + mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + + /* The requested page is already paged-out, nothing to do */ + if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) + return ret; + + ret = migrate_vma_setup(&mig); + if (ret) + return -1; + + spage = migrate_pfn_to_page(*mig.src); + if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) + goto out_finalize; + + if (!is_zone_device_page(spage)) + goto out_finalize; + + dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); + if (!dpage) { + ret = -1; + goto out_finalize; + } + + lock_page(dpage); + pvt = spage->zone_device_data; + pfn = page_to_pfn(dpage); + + /* + * This function is used in two cases: + * - When HV touches a secure page, for which we do UV_PAGE_OUT + * - When a secure page is converted to shared page, we *get* + * the page to essentially unmap the device page. In this + * case we skip page-out. + */ + if (!pvt->skip_page_out) + ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, + gpa, 0, page_shift); + + if (ret == U_SUCCESS) + *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; + else { + unlock_page(dpage); + __free_page(dpage); + goto out_finalize; + } + + migrate_vma_pages(&mig); + +out_finalize: + migrate_vma_finalize(&mig); + return ret; +} + +static inline int kvmppc_svm_page_out(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long page_shift, + struct kvm *kvm, unsigned long gpa) +{ + int ret; + + mutex_lock(&kvm->arch.uvmem_lock); + ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa); + mutex_unlock(&kvm->arch.uvmem_lock); + + return ret; } /* @@ -271,33 +595,53 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) * fault on them, do fault time migration to replace the device PTEs in * QEMU page table with normal PTEs from newly allocated pages. */ -void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, +void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot, struct kvm *kvm, bool skip_page_out) { int i; struct kvmppc_uvmem_page_pvt *pvt; - unsigned long pfn, uvmem_pfn; - unsigned long gfn = free->base_gfn; + struct page *uvmem_page; + struct vm_area_struct *vma = NULL; + unsigned long uvmem_pfn, gfn; + unsigned long addr; - for (i = free->npages; i; --i, ++gfn) { - struct page *uvmem_page; + mmap_read_lock(kvm->mm); + + addr = slot->userspace_addr; + + gfn = slot->base_gfn; + for (i = slot->npages; i; --i, ++gfn, addr += PAGE_SIZE) { + + /* Fetch the VMA if addr is not in the latest fetched one */ + if (!vma || addr >= vma->vm_end) { + vma = find_vma_intersection(kvm->mm, addr, addr+1); + if (!vma) { + pr_err("Can't find VMA for gfn:0x%lx\n", gfn); + break; + } + } mutex_lock(&kvm->arch.uvmem_lock); - if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { - mutex_unlock(&kvm->arch.uvmem_lock); - continue; + + if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { + uvmem_page = pfn_to_page(uvmem_pfn); + pvt = uvmem_page->zone_device_data; + pvt->skip_page_out = skip_page_out; + pvt->remove_gfn = true; + + if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE, + PAGE_SHIFT, kvm, pvt->gpa)) + pr_err("Can't page out gpa:0x%lx addr:0x%lx\n", + pvt->gpa, addr); + } else { + /* Remove the shared flag if any */ + kvmppc_gfn_remove(gfn, kvm); } - uvmem_page = pfn_to_page(uvmem_pfn); - pvt = uvmem_page->zone_device_data; - pvt->skip_page_out = skip_page_out; mutex_unlock(&kvm->arch.uvmem_lock); - - pfn = gfn_to_pfn(kvm, gfn); - if (is_error_noslot_pfn(pfn)) - continue; - kvm_release_pfn_clean(pfn); } + + mmap_read_unlock(kvm->mm); } unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm) @@ -360,7 +704,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm) goto out_clear; uvmem_pfn = bit + pfn_first; - kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); + kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); pvt->gpa = gpa; pvt->kvm = kvm; @@ -379,13 +723,14 @@ out: } /* - * Alloc a PFN from private device memory pool and copy page from normal - * memory to secure memory using UV_PAGE_IN uvcall. + * Alloc a PFN from private device memory pool. If @pagein is true, + * copy page from normal memory to secure memory using UV_PAGE_IN uvcall. */ -static int -kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, - unsigned long end, unsigned long gpa, struct kvm *kvm, - unsigned long page_shift, bool *downgrade) +static int kvmppc_svm_page_in(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, unsigned long gpa, struct kvm *kvm, + unsigned long page_shift, + bool pagein) { unsigned long src_pfn, dst_pfn = 0; struct migrate_vma mig; @@ -400,18 +745,7 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, mig.end = end; mig.src = &src_pfn; mig.dst = &dst_pfn; - - /* - * We come here with mmap_sem write lock held just for - * ksm_madvise(), otherwise we only need read mmap_sem. - * Hence downgrade to read lock once ksm_madvise() is done. - */ - ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, - MADV_UNMERGEABLE, &vma->vm_flags); - downgrade_write(&kvm->mm->mmap_sem); - *downgrade = true; - if (ret) - return ret; + mig.flags = MIGRATE_VMA_SELECT_SYSTEM; ret = migrate_vma_setup(&mig); if (ret) @@ -428,11 +762,16 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, goto out_finalize; } - pfn = *mig.src >> MIGRATE_PFN_SHIFT; - spage = migrate_pfn_to_page(*mig.src); - if (spage) - uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, - page_shift); + if (pagein) { + pfn = *mig.src >> MIGRATE_PFN_SHIFT; + spage = migrate_pfn_to_page(*mig.src); + if (spage) { + ret = uv_page_in(kvm->arch.lpid, pfn << page_shift, + gpa, 0, page_shift); + if (ret) + goto out_finalize; + } + } *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; migrate_vma_pages(&mig); @@ -441,6 +780,80 @@ out_finalize: return ret; } +static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + unsigned long gfn = memslot->base_gfn; + struct vm_area_struct *vma; + unsigned long start, end; + int ret = 0; + + mmap_read_lock(kvm->mm); + mutex_lock(&kvm->arch.uvmem_lock); + while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) { + ret = H_STATE; + start = gfn_to_hva(kvm, gfn); + if (kvm_is_error_hva(start)) + break; + + end = start + (1UL << PAGE_SHIFT); + vma = find_vma_intersection(kvm->mm, start, end); + if (!vma || vma->vm_start > start || vma->vm_end < end) + break; + + ret = kvmppc_svm_page_in(vma, start, end, + (gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false); + if (ret) { + ret = H_STATE; + break; + } + + /* relinquish the cpu if needed */ + cond_resched(); + } + mutex_unlock(&kvm->arch.uvmem_lock); + mmap_read_unlock(kvm->mm); + return ret; +} + +unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int srcu_idx; + long ret = H_SUCCESS; + + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) + return H_UNSUPPORTED; + + /* migrate any unmoved normal pfn to device pfns*/ + srcu_idx = srcu_read_lock(&kvm->srcu); + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) { + ret = kvmppc_uv_migrate_mem_slot(kvm, memslot); + if (ret) { + /* + * The pages will remain transitioned. + * Its the callers responsibility to + * terminate the VM, which will undo + * all state of the VM. Till then + * this VM is in a erroneous state. + * Its KVMPPC_SECURE_INIT_DONE will + * remain unset. + */ + ret = H_STATE; + goto out; + } + } + + kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; + pr_info("LPID %d went secure\n", kvm->arch.lpid); + +out: + srcu_read_unlock(&kvm->srcu, srcu_idx); + return ret; +} + /* * Shares the page with HV, thus making it a normal page. * @@ -450,8 +863,8 @@ out_finalize: * In the former case, uses dev_pagemap_ops.migrate_to_ram handler * to unmap the device page from QEMU's page tables. */ -static unsigned long -kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift) +static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa, + unsigned long page_shift) { int ret = H_PARAMETER; @@ -468,6 +881,11 @@ kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift) uvmem_page = pfn_to_page(uvmem_pfn); pvt = uvmem_page->zone_device_data; pvt->skip_page_out = true; + /* + * do not drop the GFN. It is a valid GFN + * that is transitioned to a shared GFN. + */ + pvt->remove_gfn = false; } retry: @@ -481,12 +899,16 @@ retry: uvmem_page = pfn_to_page(uvmem_pfn); pvt = uvmem_page->zone_device_data; pvt->skip_page_out = true; + pvt->remove_gfn = false; /* it continues to be a valid GFN */ kvm_release_pfn_clean(pfn); goto retry; } - if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift)) + if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, + page_shift)) { + kvmppc_gfn_shared(gfn, kvm); ret = H_SUCCESS; + } kvm_release_pfn_clean(pfn); mutex_unlock(&kvm->arch.uvmem_lock); out: @@ -500,11 +922,10 @@ out: * H_PAGE_IN_SHARED flag makes the page shared which means that the same * memory in is visible from both UV and HV. */ -unsigned long -kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, - unsigned long flags, unsigned long page_shift) +unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, + unsigned long flags, + unsigned long page_shift) { - bool downgrade = false; unsigned long start, end; struct vm_area_struct *vma; int srcu_idx; @@ -525,7 +946,7 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, ret = H_PARAMETER; srcu_idx = srcu_read_lock(&kvm->srcu); - down_write(&kvm->mm->mmap_sem); + mmap_read_lock(kvm->mm); start = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(start)) @@ -541,96 +962,20 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, if (!vma || vma->vm_start > start || vma->vm_end < end) goto out_unlock; - if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, - &downgrade)) - ret = H_SUCCESS; + if (kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, + true)) + goto out_unlock; + + ret = H_SUCCESS; + out_unlock: mutex_unlock(&kvm->arch.uvmem_lock); out: - if (downgrade) - up_read(&kvm->mm->mmap_sem); - else - up_write(&kvm->mm->mmap_sem); + mmap_read_unlock(kvm->mm); srcu_read_unlock(&kvm->srcu, srcu_idx); return ret; } -/* - * Provision a new page on HV side and copy over the contents - * from secure memory using UV_PAGE_OUT uvcall. - */ -static int -kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, - unsigned long end, unsigned long page_shift, - struct kvm *kvm, unsigned long gpa) -{ - unsigned long src_pfn, dst_pfn = 0; - struct migrate_vma mig; - struct page *dpage, *spage; - struct kvmppc_uvmem_page_pvt *pvt; - unsigned long pfn; - int ret = U_SUCCESS; - - memset(&mig, 0, sizeof(mig)); - mig.vma = vma; - mig.start = start; - mig.end = end; - mig.src = &src_pfn; - mig.dst = &dst_pfn; - mig.src_owner = &kvmppc_uvmem_pgmap; - - mutex_lock(&kvm->arch.uvmem_lock); - /* The requested page is already paged-out, nothing to do */ - if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) - goto out; - - ret = migrate_vma_setup(&mig); - if (ret) - goto out; - - spage = migrate_pfn_to_page(*mig.src); - if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) - goto out_finalize; - - if (!is_zone_device_page(spage)) - goto out_finalize; - - dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); - if (!dpage) { - ret = -1; - goto out_finalize; - } - - lock_page(dpage); - pvt = spage->zone_device_data; - pfn = page_to_pfn(dpage); - - /* - * This function is used in two cases: - * - When HV touches a secure page, for which we do UV_PAGE_OUT - * - When a secure page is converted to shared page, we *get* - * the page to essentially unmap the device page. In this - * case we skip page-out. - */ - if (!pvt->skip_page_out) - ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, - gpa, 0, page_shift); - - if (ret == U_SUCCESS) - *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; - else { - unlock_page(dpage); - __free_page(dpage); - goto out_finalize; - } - - migrate_vma_pages(&mig); -out_finalize: - migrate_vma_finalize(&mig); -out: - mutex_unlock(&kvm->arch.uvmem_lock); - return ret; -} /* * Fault handler callback that gets called when HV touches any page that @@ -655,7 +1000,8 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf) /* * Release the device PFN back to the pool * - * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT. + * Gets called when secure GFN tranistions from a secure-PFN + * to a normal PFN during H_SVM_PAGE_OUT. * Gets called with kvm->arch.uvmem_lock held. */ static void kvmppc_uvmem_page_free(struct page *page) @@ -670,7 +1016,10 @@ static void kvmppc_uvmem_page_free(struct page *page) pvt = page->zone_device_data; page->zone_device_data = NULL; - kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); + if (pvt->remove_gfn) + kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); + else + kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm); kfree(pvt); } @@ -703,7 +1052,7 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa, ret = H_PARAMETER; srcu_idx = srcu_read_lock(&kvm->srcu); - down_read(&kvm->mm->mmap_sem); + mmap_read_lock(kvm->mm); start = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(start)) goto out; @@ -716,7 +1065,7 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa, if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa)) ret = H_SUCCESS; out: - up_read(&kvm->mm->mmap_sem); + mmap_read_unlock(kvm->mm); srcu_read_unlock(&kvm->srcu, srcu_idx); return ret; } @@ -742,6 +1091,21 @@ out: return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT; } +int kvmppc_uvmem_memslot_create(struct kvm *kvm, const struct kvm_memory_slot *new) +{ + int ret = __kvmppc_uvmem_memslot_create(kvm, new); + + if (!ret) + ret = kvmppc_uv_migrate_mem_slot(kvm, new); + + return ret; +} + +void kvmppc_uvmem_memslot_delete(struct kvm *kvm, const struct kvm_memory_slot *old) +{ + __kvmppc_uvmem_memslot_delete(kvm, old); +} + static u64 kvmppc_get_secmem_size(void) { struct device_node *np; @@ -749,6 +1113,20 @@ static u64 kvmppc_get_secmem_size(void) const __be32 *prop; u64 size = 0; + /* + * First try the new ibm,secure-memory nodes which supersede the + * secure-memory-ranges property. + * If we found some, no need to read the deprecated ones. + */ + for_each_compatible_node(np, NULL, "ibm,secure-memory") { + prop = of_get_property(np, "reg", &len); + if (!prop) + continue; + size += of_read_number(prop + 2, 2); + } + if (size) + return size; + np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware"); if (!np) goto out; diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index f7ad99d972ce..25a3679fb590 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -26,7 +26,7 @@ #define FUNC(name) name #define GET_SHADOW_VCPU(reg) lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2) -#endif /* CONFIG_PPC_BOOK3S_XX */ +#endif /* CONFIG_PPC_BOOK3S_64 */ #define VCPU_LOAD_NVGPRS(vcpu) \ PPC_LL r14, VCPU_GPR(R14)(vcpu); \ @@ -55,8 +55,7 @@ ****************************************************************************/ /* Registers: - * r3: kvm_run pointer - * r4: vcpu pointer + * r3: vcpu pointer */ _GLOBAL(__kvmppc_vcpu_run) @@ -68,8 +67,8 @@ kvm_start_entry: /* Save host state to the stack */ PPC_STLU r1, -SWITCH_FRAME_SIZE(r1) - /* Save r3 (kvm_run) and r4 (vcpu) */ - SAVE_2GPRS(3, r1) + /* Save r3 (vcpu) */ + SAVE_GPR(3, r1) /* Save non-volatile registers (r14 - r31) */ SAVE_NVGPRS(r1) @@ -82,47 +81,46 @@ kvm_start_entry: PPC_STL r0, _LINK(r1) /* Load non-volatile guest state from the vcpu */ - VCPU_LOAD_NVGPRS(r4) + VCPU_LOAD_NVGPRS(r3) kvm_start_lightweight: /* Copy registers into shadow vcpu so we can access them in real mode */ - mr r3, r4 bl FUNC(kvmppc_copy_to_svcpu) nop - REST_GPR(4, r1) + REST_GPR(3, r1) #ifdef CONFIG_PPC_BOOK3S_64 /* Get the dcbz32 flag */ - PPC_LL r3, VCPU_HFLAGS(r4) - rldicl r3, r3, 0, 63 /* r3 &= 1 */ - stb r3, HSTATE_RESTORE_HID5(r13) + PPC_LL r0, VCPU_HFLAGS(r3) + rldicl r0, r0, 0, 63 /* r3 &= 1 */ + stb r0, HSTATE_RESTORE_HID5(r13) /* Load up guest SPRG3 value, since it's user readable */ - lwz r3, VCPU_SHAREDBE(r4) - cmpwi r3, 0 - ld r5, VCPU_SHARED(r4) + lbz r4, VCPU_SHAREDBE(r3) + cmpwi r4, 0 + ld r5, VCPU_SHARED(r3) beq sprg3_little_endian sprg3_big_endian: #ifdef __BIG_ENDIAN__ - ld r3, VCPU_SHARED_SPRG3(r5) + ld r4, VCPU_SHARED_SPRG3(r5) #else addi r5, r5, VCPU_SHARED_SPRG3 - ldbrx r3, 0, r5 + ldbrx r4, 0, r5 #endif b after_sprg3_load sprg3_little_endian: #ifdef __LITTLE_ENDIAN__ - ld r3, VCPU_SHARED_SPRG3(r5) + ld r4, VCPU_SHARED_SPRG3(r5) #else addi r5, r5, VCPU_SHARED_SPRG3 - ldbrx r3, 0, r5 + ldbrx r4, 0, r5 #endif after_sprg3_load: - mtspr SPRN_SPRG3, r3 + mtspr SPRN_SPRG3, r4 #endif /* CONFIG_PPC_BOOK3S_64 */ - PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ + PPC_LL r4, VCPU_SHADOW_MSR(r3) /* get shadow_msr */ /* Jump to segment patching handler and into our guest */ bl FUNC(kvmppc_entry_trampoline) @@ -146,7 +144,7 @@ after_sprg3_load: * */ - PPC_LL r3, GPR4(r1) /* vcpu pointer */ + PPC_LL r3, GPR3(r1) /* vcpu pointer */ /* * kvmppc_copy_from_svcpu can clobber volatile registers, save @@ -169,7 +167,7 @@ after_sprg3_load: #endif /* CONFIG_PPC_BOOK3S_64 */ /* R7 = vcpu */ - PPC_LL r7, GPR4(r1) + PPC_LL r7, GPR3(r1) PPC_STL r14, VCPU_GPR(R14)(r7) PPC_STL r15, VCPU_GPR(R15)(r7) @@ -190,11 +188,11 @@ after_sprg3_load: PPC_STL r30, VCPU_GPR(R30)(r7) PPC_STL r31, VCPU_GPR(R31)(r7) - /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ - lwz r5, VCPU_TRAP(r7) + /* Pass the exit number as 2nd argument to kvmppc_handle_exit */ + lwz r4, VCPU_TRAP(r7) - /* Restore r3 (kvm_run) and r4 (vcpu) */ - REST_2GPRS(3, r1) + /* Restore r3 (vcpu) */ + REST_GPR(3, r1) bl FUNC(kvmppc_handle_exit_pr) /* If RESUME_GUEST, get back in the loop */ @@ -223,11 +221,11 @@ kvm_loop_heavyweight: PPC_LL r4, _LINK(r1) PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1) - /* Load vcpu and cpu_run */ - REST_2GPRS(3, r1) + /* Load vcpu */ + REST_GPR(3, r1) /* Load non-volatile guest state from the vcpu */ - VCPU_LOAD_NVGPRS(r4) + VCPU_LOAD_NVGPRS(r3) /* Jump back into the beginning of this function */ b kvm_start_lightweight @@ -235,7 +233,7 @@ kvm_loop_heavyweight: kvm_loop_lightweight: /* We'll need the vcpu pointer */ - REST_GPR(4, r1) + REST_GPR(3, r1) /* Jump back into the beginning of this function */ b kvm_start_lightweight diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index bf0282775e37..a11436720a8c 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -169,7 +169,7 @@ static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); } -static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_emulate_fpr_load(struct kvm_vcpu *vcpu, int rs, ulong addr, int ls_type) { int emulated = EMULATE_FAIL; @@ -188,7 +188,7 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_inject_pf(vcpu, addr, false); goto done_load; } else if (r == EMULATE_DO_MMIO) { - emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs, + emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR | rs, len, 1); goto done_load; } @@ -213,7 +213,7 @@ done_load: return emulated; } -static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_emulate_fpr_store(struct kvm_vcpu *vcpu, int rs, ulong addr, int ls_type) { int emulated = EMULATE_FAIL; @@ -248,7 +248,7 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, if (r < 0) { kvmppc_inject_pf(vcpu, addr, true); } else if (r == EMULATE_DO_MMIO) { - emulated = kvmppc_handle_store(run, vcpu, val, len, 1); + emulated = kvmppc_handle_store(vcpu, val, len, 1); } else { emulated = EMULATE_DONE; } @@ -259,7 +259,7 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_emulate_psq_load(struct kvm_vcpu *vcpu, int rs, ulong addr, bool w, int i) { int emulated = EMULATE_FAIL; @@ -279,12 +279,12 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_inject_pf(vcpu, addr, false); goto done_load; } else if ((r == EMULATE_DO_MMIO) && w) { - emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs, + emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR | rs, 4, 1); vcpu->arch.qpr[rs] = tmp[1]; goto done_load; } else if (r == EMULATE_DO_MMIO) { - emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs, + emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FQPR | rs, 8, 1); goto done_load; } @@ -302,7 +302,7 @@ done_load: return emulated; } -static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_emulate_psq_store(struct kvm_vcpu *vcpu, int rs, ulong addr, bool w, int i) { int emulated = EMULATE_FAIL; @@ -318,10 +318,10 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, if (r < 0) { kvmppc_inject_pf(vcpu, addr, true); } else if ((r == EMULATE_DO_MMIO) && w) { - emulated = kvmppc_handle_store(run, vcpu, tmp[0], 4, 1); + emulated = kvmppc_handle_store(vcpu, tmp[0], 4, 1); } else if (r == EMULATE_DO_MMIO) { u64 val = ((u64)tmp[0] << 32) | tmp[1]; - emulated = kvmppc_handle_store(run, vcpu, val, 8, 1); + emulated = kvmppc_handle_store(vcpu, val, 8, 1); } else { emulated = EMULATE_DONE; } @@ -618,7 +618,7 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, return EMULATE_DONE; } -int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) +int kvmppc_emulate_paired_single(struct kvm_vcpu *vcpu) { u32 inst; enum emulation_result emulated = EMULATE_DONE; @@ -680,7 +680,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 17, 19); addr += get_d_signext(inst); - emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i); break; } case OP_PSQ_LU: @@ -690,7 +690,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 17, 19); addr += get_d_signext(inst); - emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i); if (emulated == EMULATE_DONE) kvmppc_set_gpr(vcpu, ax_ra, addr); @@ -703,7 +703,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 17, 19); addr += get_d_signext(inst); - emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i); break; } case OP_PSQ_STU: @@ -713,7 +713,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 17, 19); addr += get_d_signext(inst); - emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i); if (emulated == EMULATE_DONE) kvmppc_set_gpr(vcpu, ax_ra, addr); @@ -733,7 +733,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 22, 24); addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i); break; } case OP_4X_PS_CMPO0: @@ -747,7 +747,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 22, 24); addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i); if (emulated == EMULATE_DONE) kvmppc_set_gpr(vcpu, ax_ra, addr); @@ -824,7 +824,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 22, 24); addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i); break; } case OP_4XW_PSQ_STUX: @@ -834,7 +834,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int i = inst_get_field(inst, 22, 24); addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i); if (emulated == EMULATE_DONE) kvmppc_set_gpr(vcpu, ax_ra, addr); @@ -922,7 +922,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_SINGLE); break; } @@ -930,7 +930,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_SINGLE); if (emulated == EMULATE_DONE) @@ -941,7 +941,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_DOUBLE); break; } @@ -949,7 +949,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_DOUBLE); if (emulated == EMULATE_DONE) @@ -960,7 +960,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_SINGLE); break; } @@ -968,7 +968,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_SINGLE); if (emulated == EMULATE_DONE) @@ -979,7 +979,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_DOUBLE); break; } @@ -987,7 +987,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_DOUBLE); if (emulated == EMULATE_DONE) @@ -1001,7 +1001,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_SINGLE); break; } @@ -1010,7 +1010,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_SINGLE); if (emulated == EMULATE_DONE) @@ -1022,7 +1022,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_DOUBLE); break; } @@ -1031,7 +1031,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr, FPU_LS_DOUBLE); if (emulated == EMULATE_DONE) @@ -1043,7 +1043,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_SINGLE); break; } @@ -1052,7 +1052,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_SINGLE); if (emulated == EMULATE_DONE) @@ -1064,7 +1064,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_DOUBLE); break; } @@ -1073,7 +1073,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_DOUBLE); if (emulated == EMULATE_DONE) @@ -1085,7 +1085,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr, FPU_LS_SINGLE_LOW); break; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index a0f6813f4560..88fac22fbf09 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -700,7 +700,7 @@ static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT); } -int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_handle_pagefault(struct kvm_vcpu *vcpu, ulong eaddr, int vec) { bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); @@ -795,7 +795,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* The guest's PTE is not mapped yet. Map on the host */ if (kvmppc_mmu_map_page(vcpu, &pte, iswrite) == -EIO) { /* Exit KVM if mapping failed */ - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return RESUME_HOST; } if (data) @@ -808,7 +808,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->stat.mmio_exits++; vcpu->arch.paddr_accessed = pte.raddr; vcpu->arch.vaddr_accessed = pte.eaddr; - r = kvmppc_emulate_mmio(run, vcpu); + r = kvmppc_emulate_mmio(vcpu); if ( r == RESUME_HOST_NV ) r = RESUME_HOST; } @@ -992,7 +992,7 @@ static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac) enum emulation_result er = EMULATE_FAIL; if (!(kvmppc_get_msr(vcpu) & MSR_PR)) - er = kvmppc_emulate_instruction(vcpu->run, vcpu); + er = kvmppc_emulate_instruction(vcpu); if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) { /* Couldn't emulate, trigger interrupt in guest */ @@ -1089,8 +1089,7 @@ static void kvmppc_clear_debug(struct kvm_vcpu *vcpu) } } -static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int exit_nr) +static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, unsigned int exit_nr) { enum emulation_result er; ulong flags; @@ -1124,7 +1123,7 @@ static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu, } vcpu->stat.emulated_inst_exits++; - er = kvmppc_emulate_instruction(run, vcpu); + er = kvmppc_emulate_instruction(vcpu); switch (er) { case EMULATE_DONE: r = RESUME_GUEST_NV; @@ -1139,7 +1138,7 @@ static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; case EMULATE_DO_MMIO: - run->exit_reason = KVM_EXIT_MMIO; + vcpu->run->exit_reason = KVM_EXIT_MMIO; r = RESUME_HOST_NV; break; case EMULATE_EXIT_USER: @@ -1152,9 +1151,9 @@ static int kvmppc_exit_pr_progint(struct kvm_run *run, struct kvm_vcpu *vcpu, return r; } -int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int exit_nr) +int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr) { + struct kvm_run *run = vcpu->run; int r = RESUME_HOST; int s; @@ -1198,7 +1197,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, /* only care about PTEG not found errors, but leave NX alone */ if (shadow_srr1 & 0x40000000) { int idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); + r = kvmppc_handle_pagefault(vcpu, kvmppc_get_pc(vcpu), exit_nr); srcu_read_unlock(&vcpu->kvm->srcu, idx); vcpu->stat.sp_instruc++; } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && @@ -1248,7 +1247,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, */ if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) { int idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); + r = kvmppc_handle_pagefault(vcpu, dar, exit_nr); srcu_read_unlock(&vcpu->kvm->srcu, idx); } else { kvmppc_core_queue_data_storage(vcpu, dar, fault_dsisr); @@ -1292,7 +1291,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOK3S_INTERRUPT_PROGRAM: case BOOK3S_INTERRUPT_H_EMUL_ASSIST: - r = kvmppc_exit_pr_progint(run, vcpu, exit_nr); + r = kvmppc_exit_pr_progint(vcpu, exit_nr); break; case BOOK3S_INTERRUPT_SYSCALL: { @@ -1370,7 +1369,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); if (emul == EMULATE_DONE) - r = kvmppc_exit_pr_progint(run, vcpu, exit_nr); + r = kvmppc_exit_pr_progint(vcpu, exit_nr); else r = RESUME_GUEST; @@ -1825,16 +1824,13 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) vfree(vcpu_book3s); } -static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu) { int ret; -#ifdef CONFIG_ALTIVEC - unsigned long uninitialized_var(vrsave); -#endif /* Check if we can run the vcpu at all */ if (!vcpu->arch.sane) { - kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = -EINVAL; goto out; } @@ -1861,7 +1857,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_fix_ee_before_entry(); - ret = __kvmppc_vcpu_run(kvm_run, vcpu); + ret = __kvmppc_vcpu_run(vcpu); kvmppc_clear_debug(vcpu); diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 26b25994c969..c5e677508d3b 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -229,7 +229,9 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) */ args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args)); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (rc) goto fail; diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 6ef0151ff70a..bdea91df1497 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -31,6 +31,12 @@ static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) { u64 val; + /* + * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10 + * load operation, so there is no need to enforce load-after-store + * ordering. + */ + if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) offset |= offset << 4; diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index a8a900ace1e6..4ad3c0279458 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -58,6 +58,9 @@ static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset) { u64 val; + if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + offset |= XIVE_ESB_LD_ST_MO; + if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) offset |= offset << 4; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 6c18ea88fd25..3e1c9f08e302 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -35,29 +35,28 @@ unsigned long kvmppc_booke_handlers; -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU - struct kvm_stats_debugfs_item debugfs_entries[] = { - { "mmio", VCPU_STAT(mmio_exits) }, - { "sig", VCPU_STAT(signal_exits) }, - { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, - { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, - { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, - { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) }, - { "sysc", VCPU_STAT(syscall_exits) }, - { "isi", VCPU_STAT(isi_exits) }, - { "dsi", VCPU_STAT(dsi_exits) }, - { "inst_emu", VCPU_STAT(emulated_inst_exits) }, - { "dec", VCPU_STAT(dec_exits) }, - { "ext_intr", VCPU_STAT(ext_intr_exits) }, - { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, - { "doorbell", VCPU_STAT(dbell_exits) }, - { "guest doorbell", VCPU_STAT(gdbell_exits) }, - { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, + VCPU_STAT("mmio", mmio_exits), + VCPU_STAT("sig", signal_exits), + VCPU_STAT("itlb_r", itlb_real_miss_exits), + VCPU_STAT("itlb_v", itlb_virt_miss_exits), + VCPU_STAT("dtlb_r", dtlb_real_miss_exits), + VCPU_STAT("dtlb_v", dtlb_virt_miss_exits), + VCPU_STAT("sysc", syscall_exits), + VCPU_STAT("isi", isi_exits), + VCPU_STAT("dsi", dsi_exits), + VCPU_STAT("inst_emu", emulated_inst_exits), + VCPU_STAT("dec", dec_exits), + VCPU_STAT("ext_intr", ext_intr_exits), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("doorbell", dbell_exits), + VCPU_STAT("guest doorbell", gdbell_exits), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VM_STAT("remote_tlb_flush", remote_tlb_flush), { NULL } }; @@ -730,13 +729,13 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) return r; } -int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +int kvmppc_vcpu_run(struct kvm_vcpu *vcpu) { int ret, s; struct debug_reg debug; if (!vcpu->arch.sane) { - kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; } @@ -778,7 +777,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vcpu->arch.pgdir = vcpu->kvm->mm->pgd; kvmppc_fix_ee_before_entry(); - ret = __kvmppc_vcpu_run(kvm_run, vcpu); + ret = __kvmppc_vcpu_run(vcpu); /* No need for guest_exit. It's done in handle_exit. We also get here with interrupts enabled. */ @@ -800,11 +799,11 @@ out: return ret; } -static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) +static int emulation_exit(struct kvm_vcpu *vcpu) { enum emulation_result er; - er = kvmppc_emulate_instruction(run, vcpu); + er = kvmppc_emulate_instruction(vcpu); switch (er) { case EMULATE_DONE: /* don't overwrite subtypes, just account kvm_stats */ @@ -821,8 +820,8 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) __func__, vcpu->arch.regs.nip, vcpu->arch.last_inst); /* For debugging, encode the failing instruction and * report it to userspace. */ - run->hw.hardware_exit_reason = ~0ULL << 32; - run->hw.hardware_exit_reason |= vcpu->arch.last_inst; + vcpu->run->hw.hardware_exit_reason = ~0ULL << 32; + vcpu->run->hw.hardware_exit_reason |= vcpu->arch.last_inst; kvmppc_core_queue_program(vcpu, ESR_PIL); return RESUME_HOST; @@ -834,8 +833,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) } } -static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) +static int kvmppc_handle_debug(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg); u32 dbsr = vcpu->arch.dbsr; @@ -954,7 +954,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, } } -static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_resume_inst_load(struct kvm_vcpu *vcpu, enum emulation_result emulated, u32 last_inst) { switch (emulated) { @@ -966,8 +966,8 @@ static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu, __func__, vcpu->arch.regs.nip); /* For debugging, encode the failing instruction and * report it to userspace. */ - run->hw.hardware_exit_reason = ~0ULL << 32; - run->hw.hardware_exit_reason |= last_inst; + vcpu->run->hw.hardware_exit_reason = ~0ULL << 32; + vcpu->run->hw.hardware_exit_reason |= last_inst; kvmppc_core_queue_program(vcpu, ESR_PIL); return RESUME_HOST; @@ -981,9 +981,9 @@ static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu, * * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) */ -int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int exit_nr) +int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr) { + struct kvm_run *run = vcpu->run; int r = RESUME_HOST; int s; int idx; @@ -1024,7 +1024,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->ready_for_interrupt_injection = 1; if (emulated != EMULATE_DONE) { - r = kvmppc_resume_inst_load(run, vcpu, emulated, last_inst); + r = kvmppc_resume_inst_load(vcpu, emulated, last_inst); goto out; } @@ -1084,7 +1084,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_HV_PRIV: - r = emulation_exit(run, vcpu); + r = emulation_exit(vcpu); break; case BOOKE_INTERRUPT_PROGRAM: @@ -1094,7 +1094,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * We are here because of an SW breakpoint instr, * so lets return to host to handle. */ - r = kvmppc_handle_debug(run, vcpu); + r = kvmppc_handle_debug(vcpu); run->exit_reason = KVM_EXIT_DEBUG; kvmppc_account_exit(vcpu, DEBUG_EXITS); break; @@ -1115,7 +1115,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; } - r = emulation_exit(run, vcpu); + r = emulation_exit(vcpu); break; case BOOKE_INTERRUPT_FP_UNAVAIL: @@ -1282,7 +1282,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * actually RAM. */ vcpu->arch.paddr_accessed = gpaddr; vcpu->arch.vaddr_accessed = eaddr; - r = kvmppc_emulate_mmio(run, vcpu); + r = kvmppc_emulate_mmio(vcpu); kvmppc_account_exit(vcpu, MMIO_EXITS); } @@ -1333,7 +1333,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } case BOOKE_INTERRUPT_DEBUG: { - r = kvmppc_handle_debug(run, vcpu); + r = kvmppc_handle_debug(vcpu); if (r == RESUME_HOST) run->exit_reason = KVM_EXIT_DEBUG; kvmppc_account_exit(vcpu, DEBUG_EXITS); diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 65b4d337d337..be9da96d9f06 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -70,7 +70,7 @@ void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr); void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); -int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_booke_emulate_op(struct kvm_vcpu *vcpu, unsigned int inst, int *advance); int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); @@ -94,16 +94,12 @@ enum int_class { void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); -extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, - struct kvm_vcpu *vcpu, +extern int kvmppc_core_emulate_op_e500(struct kvm_vcpu *vcpu, unsigned int inst, int *advance); extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); -extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, - struct kvm_vcpu *vcpu, - unsigned int inst, int *advance); extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 689ff5f90e9e..d8d38aca71bd 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -39,7 +39,7 @@ static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, vcpu->arch.csrr1); } -int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_booke_emulate_op(struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 2e56ab5a5f55..6fa82efe833b 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -237,7 +237,7 @@ _GLOBAL(kvmppc_resume_host) /* Switch to kernel stack and jump to handler. */ LOAD_REG_ADDR(r3, kvmppc_handle_exit) mtctr r3 - lwz r3, HOST_RUN(r1) + mr r3, r4 lwz r2, HOST_R2(r1) mr r14, r4 /* Save vcpu pointer. */ @@ -337,15 +337,14 @@ heavyweight_exit: /* Registers: - * r3: kvm_run pointer - * r4: vcpu pointer + * r3: vcpu pointer */ _GLOBAL(__kvmppc_vcpu_run) stwu r1, -HOST_STACK_SIZE(r1) - stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ + stw r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */ /* Save host state to stack. */ - stw r3, HOST_RUN(r1) + mr r4, r3 mflr r3 stw r3, HOST_STACK_LR(r1) mfcr r5 diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index c577ba4b3169..8262c14fc9e6 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -434,9 +434,10 @@ _GLOBAL(kvmppc_resume_host) #endif /* Switch to kernel stack and jump to handler. */ - PPC_LL r3, HOST_RUN(r1) + mr r3, r4 mr r5, r14 /* intno */ mr r14, r4 /* Save vcpu pointer. */ + mr r4, r5 bl kvmppc_handle_exit /* Restore vcpu pointer and the nonvolatiles we used. */ @@ -525,15 +526,14 @@ heavyweight_exit: blr /* Registers: - * r3: kvm_run pointer - * r4: vcpu pointer + * r3: vcpu pointer */ _GLOBAL(__kvmppc_vcpu_run) stwu r1, -HOST_STACK_SIZE(r1) - PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ + PPC_STL r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */ /* Save host state to stack. */ - PPC_STL r3, HOST_RUN(r1) + mr r4, r3 mflr r3 mfcr r5 PPC_STL r3, HOST_STACK_LR(r1) diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 3d0d3ec5be96..64eb833e9f02 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -83,16 +83,16 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb) } #endif -static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int kvmppc_e500_emul_ehpriv(struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; switch (get_oc(inst)) { case EHPRIV_OC_DEBUG: - run->exit_reason = KVM_EXIT_DEBUG; - run->debug.arch.address = vcpu->arch.regs.nip; - run->debug.arch.status = 0; + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + vcpu->run->debug.arch.address = vcpu->arch.regs.nip; + vcpu->run->debug.arch.status = 0; kvmppc_account_exit(vcpu, DEBUG_EXITS); emulated = EMULATE_EXIT_USER; *advance = 0; @@ -125,7 +125,7 @@ static int kvmppc_e500_emul_mftmr(struct kvm_vcpu *vcpu, unsigned int inst, return EMULATE_FAIL; } -int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_core_emulate_op_e500(struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; @@ -182,8 +182,7 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case XOP_EHPRIV: - emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst, - advance); + emulated = kvmppc_e500_emul_ehpriv(vcpu, inst, advance); break; default: @@ -197,7 +196,7 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, } if (emulated == EMULATE_FAIL) - emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance); + emulated = kvmppc_booke_emulate_op(vcpu, inst, advance); return emulated; } diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index df9989cf7ba3..ed0c9c43d0cf 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -355,7 +355,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (tlbsel == 1) { struct vm_area_struct *vma; - down_read(&kvm->mm->mmap_sem); + mmap_read_lock(kvm->mm); vma = find_vma(kvm->mm, hva); if (vma && hva >= vma->vm_start && @@ -441,7 +441,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); } - up_read(&kvm->mm->mmap_sem); + mmap_read_unlock(kvm->mm); } if (likely(!pfnmap)) { @@ -734,7 +734,8 @@ static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) return 0; } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags) { /* kvm_unmap_hva flushes everything anyways */ kvm_unmap_hva(kvm, start); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 6fca38ca791f..ee1147c98cd8 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -191,7 +191,7 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) /* XXX Should probably auto-generate instruction decoding for a particular core * from opcode tables in the future. */ -int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) +int kvmppc_emulate_instruction(struct kvm_vcpu *vcpu) { u32 inst; int rs, rt, sprn; @@ -270,9 +270,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) * these are illegal instructions. */ if (inst == KVMPPC_INST_SW_BREAKPOINT) { - run->exit_reason = KVM_EXIT_DEBUG; - run->debug.arch.status = 0; - run->debug.arch.address = kvmppc_get_pc(vcpu); + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + vcpu->run->debug.arch.status = 0; + vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu); emulated = EMULATE_EXIT_USER; advance = 0; } else @@ -285,7 +285,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } if (emulated == EMULATE_FAIL) { - emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst, + emulated = vcpu->kvm->arch.kvm_ops->emulate_op(vcpu, inst, &advance); if (emulated == EMULATE_AGAIN) { advance = 0; diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 1139bc56e004..48272a9b9c30 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -71,7 +71,6 @@ static bool kvmppc_check_altivec_disabled(struct kvm_vcpu *vcpu) */ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu->run; u32 inst; enum emulation_result emulated = EMULATE_FAIL; int advance = 1; @@ -95,7 +94,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) emulated = EMULATE_FAIL; vcpu->arch.regs.msr = vcpu->arch.shared->msr; - if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) { + if (analyse_instr(&op, &vcpu->arch.regs, ppc_inst(inst)) == 0) { int type = op.type & INSTR_TYPE_MASK; int size = GETSIZE(op.type); @@ -104,10 +103,10 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) int instr_byte_swap = op.type & BYTEREV; if (op.type & SIGNEXT) - emulated = kvmppc_handle_loads(run, vcpu, + emulated = kvmppc_handle_loads(vcpu, op.reg, size, !instr_byte_swap); else - emulated = kvmppc_handle_load(run, vcpu, + emulated = kvmppc_handle_load(vcpu, op.reg, size, !instr_byte_swap); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) @@ -124,10 +123,10 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) vcpu->arch.mmio_sp64_extend = 1; if (op.type & SIGNEXT) - emulated = kvmppc_handle_loads(run, vcpu, + emulated = kvmppc_handle_loads(vcpu, KVM_MMIO_REG_FPR|op.reg, size, 1); else - emulated = kvmppc_handle_load(run, vcpu, + emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR|op.reg, size, 1); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) @@ -164,12 +163,12 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) if (size == 16) { vcpu->arch.mmio_vmx_copy_nums = 2; - emulated = kvmppc_handle_vmx_load(run, - vcpu, KVM_MMIO_REG_VMX|op.reg, + emulated = kvmppc_handle_vmx_load(vcpu, + KVM_MMIO_REG_VMX|op.reg, 8, 1); } else { vcpu->arch.mmio_vmx_copy_nums = 1; - emulated = kvmppc_handle_vmx_load(run, vcpu, + emulated = kvmppc_handle_vmx_load(vcpu, KVM_MMIO_REG_VMX|op.reg, size, 1); } @@ -217,7 +216,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) io_size_each = op.element_size; } - emulated = kvmppc_handle_vsx_load(run, vcpu, + emulated = kvmppc_handle_vsx_load(vcpu, KVM_MMIO_REG_VSX|op.reg, io_size_each, 1, op.type & SIGNEXT); break; @@ -227,8 +226,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) /* if need byte reverse, op.val has been reversed by * analyse_instr(). */ - emulated = kvmppc_handle_store(run, vcpu, op.val, - size, 1); + emulated = kvmppc_handle_store(vcpu, op.val, size, 1); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) kvmppc_set_gpr(vcpu, op.update_reg, op.ea); @@ -250,7 +248,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) if (op.type & FPCONV) vcpu->arch.mmio_sp64_extend = 1; - emulated = kvmppc_handle_store(run, vcpu, + emulated = kvmppc_handle_store(vcpu, VCPU_FPR(vcpu, op.reg), size, 1); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) @@ -290,12 +288,12 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) if (size == 16) { vcpu->arch.mmio_vmx_copy_nums = 2; - emulated = kvmppc_handle_vmx_store(run, - vcpu, op.reg, 8, 1); + emulated = kvmppc_handle_vmx_store(vcpu, + op.reg, 8, 1); } else { vcpu->arch.mmio_vmx_copy_nums = 1; - emulated = kvmppc_handle_vmx_store(run, - vcpu, op.reg, size, 1); + emulated = kvmppc_handle_vmx_store(vcpu, + op.reg, size, 1); } break; @@ -338,7 +336,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) io_size_each = op.element_size; } - emulated = kvmppc_handle_vsx_store(run, vcpu, + emulated = kvmppc_handle_vsx_store(vcpu, op.reg, io_size_each, 1); break; } diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S index 3dfae0cb6228..315c94946bad 100644 --- a/arch/powerpc/kvm/fpu.S +++ b/arch/powerpc/kvm/fpu.S @@ -5,10 +5,10 @@ * Copyright (C) 2010 Alexander Graf (agraf@suse.de) */ +#include <linux/pgtable.h> #include <asm/reg.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/pgtable.h> #include <asm/cputable.h> #include <asm/cache.h> #include <asm/thread_info.h> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ad2f172c26a6..13999123b735 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -279,7 +279,7 @@ out: } EXPORT_SYMBOL_GPL(kvmppc_sanity_check); -int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) +int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu) { enum emulation_result er; int r; @@ -295,7 +295,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) r = RESUME_GUEST; break; case EMULATE_DO_MMIO: - run->exit_reason = KVM_EXIT_MMIO; + vcpu->run->exit_reason = KVM_EXIT_MMIO; /* We must reload nonvolatiles because "update" load/store * instructions modify register state. */ /* Future optimization: only reload non-volatiles if they were @@ -403,7 +403,10 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, return EMULATE_DONE; } - if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size)) + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + if (rc) return EMULATE_DO_MMIO; return EMULATE_DONE; @@ -752,7 +755,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) if (err) goto out_vcpu_uninit; - vcpu->arch.wqp = &vcpu->wq; + vcpu->arch.waitp = &vcpu->wait; kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id); return 0; @@ -1107,10 +1110,10 @@ static inline u32 dp_to_sp(u64 fprd) #define dp_to_sp(x) (x) #endif /* CONFIG_PPC_FPU */ -static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, - struct kvm_run *run) +static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu) { - u64 uninitialized_var(gpr); + struct kvm_run *run = vcpu->run; + u64 gpr; if (run->mmio.len > sizeof(gpr)) { printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); @@ -1219,10 +1222,11 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, } } -static int __kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +static int __kvmppc_handle_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian, int sign_extend) { + struct kvm_run *run = vcpu->run; int idx, ret; bool host_swabbed; @@ -1256,7 +1260,7 @@ static int __kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, srcu_read_unlock(&vcpu->kvm->srcu, idx); if (!ret) { - kvmppc_complete_mmio_load(vcpu, run); + kvmppc_complete_mmio_load(vcpu); vcpu->mmio_needed = 0; return EMULATE_DONE; } @@ -1264,24 +1268,24 @@ static int __kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_DO_MMIO; } -int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian) { - return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 0); + return __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 0); } EXPORT_SYMBOL_GPL(kvmppc_handle_load); /* Same as above, but sign extends */ -int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_loads(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian) { - return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1); + return __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 1); } #ifdef CONFIG_VSX -int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_vsx_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian, int mmio_sign_extend) { @@ -1292,13 +1296,13 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_FAIL; while (vcpu->arch.mmio_vsx_copy_nums) { - emulated = __kvmppc_handle_load(run, vcpu, rt, bytes, + emulated = __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, mmio_sign_extend); if (emulated != EMULATE_DONE) break; - vcpu->arch.paddr_accessed += run->mmio.len; + vcpu->arch.paddr_accessed += vcpu->run->mmio.len; vcpu->arch.mmio_vsx_copy_nums--; vcpu->arch.mmio_vsx_offset++; @@ -1307,9 +1311,10 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, } #endif /* CONFIG_VSX */ -int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_store(struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_default_endian) { + struct kvm_run *run = vcpu->run; void *data = run->mmio.data; int idx, ret; bool host_swabbed; @@ -1423,7 +1428,7 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val) return result; } -int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_vsx_store(struct kvm_vcpu *vcpu, int rs, unsigned int bytes, int is_default_endian) { u64 val; @@ -1439,13 +1444,13 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1) return EMULATE_FAIL; - emulated = kvmppc_handle_store(run, vcpu, + emulated = kvmppc_handle_store(vcpu, val, bytes, is_default_endian); if (emulated != EMULATE_DONE) break; - vcpu->arch.paddr_accessed += run->mmio.len; + vcpu->arch.paddr_accessed += vcpu->run->mmio.len; vcpu->arch.mmio_vsx_copy_nums--; vcpu->arch.mmio_vsx_offset++; @@ -1454,19 +1459,19 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, - struct kvm_run *run) +static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; enum emulation_result emulated = EMULATE_FAIL; int r; vcpu->arch.paddr_accessed += run->mmio.len; if (!vcpu->mmio_is_write) { - emulated = kvmppc_handle_vsx_load(run, vcpu, vcpu->arch.io_gpr, + emulated = kvmppc_handle_vsx_load(vcpu, vcpu->arch.io_gpr, run->mmio.len, 1, vcpu->arch.mmio_sign_extend); } else { - emulated = kvmppc_handle_vsx_store(run, vcpu, + emulated = kvmppc_handle_vsx_store(vcpu, vcpu->arch.io_gpr, run->mmio.len, 1); } @@ -1490,7 +1495,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, #endif /* CONFIG_VSX */ #ifdef CONFIG_ALTIVEC -int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian) { enum emulation_result emulated = EMULATE_DONE; @@ -1499,13 +1504,13 @@ int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_FAIL; while (vcpu->arch.mmio_vmx_copy_nums) { - emulated = __kvmppc_handle_load(run, vcpu, rt, bytes, + emulated = __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 0); if (emulated != EMULATE_DONE) break; - vcpu->arch.paddr_accessed += run->mmio.len; + vcpu->arch.paddr_accessed += vcpu->run->mmio.len; vcpu->arch.mmio_vmx_copy_nums--; vcpu->arch.mmio_vmx_offset++; } @@ -1585,7 +1590,7 @@ int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, +int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu, unsigned int rs, unsigned int bytes, int is_default_endian) { u64 val = 0; @@ -1620,12 +1625,12 @@ int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_FAIL; } - emulated = kvmppc_handle_store(run, vcpu, val, bytes, + emulated = kvmppc_handle_store(vcpu, val, bytes, is_default_endian); if (emulated != EMULATE_DONE) break; - vcpu->arch.paddr_accessed += run->mmio.len; + vcpu->arch.paddr_accessed += vcpu->run->mmio.len; vcpu->arch.mmio_vmx_copy_nums--; vcpu->arch.mmio_vmx_offset++; } @@ -1633,19 +1638,19 @@ int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu, - struct kvm_run *run) +static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; enum emulation_result emulated = EMULATE_FAIL; int r; vcpu->arch.paddr_accessed += run->mmio.len; if (!vcpu->mmio_is_write) { - emulated = kvmppc_handle_vmx_load(run, vcpu, + emulated = kvmppc_handle_vmx_load(vcpu, vcpu->arch.io_gpr, run->mmio.len, 1); } else { - emulated = kvmppc_handle_vmx_store(run, vcpu, + emulated = kvmppc_handle_vmx_store(vcpu, vcpu->arch.io_gpr, run->mmio.len, 1); } @@ -1765,8 +1770,9 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) return r; } -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; int r; vcpu_load(vcpu); @@ -1774,7 +1780,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (vcpu->mmio_needed) { vcpu->mmio_needed = 0; if (!vcpu->mmio_is_write) - kvmppc_complete_mmio_load(vcpu, run); + kvmppc_complete_mmio_load(vcpu); #ifdef CONFIG_VSX if (vcpu->arch.mmio_vsx_copy_nums > 0) { vcpu->arch.mmio_vsx_copy_nums--; @@ -1782,7 +1788,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) } if (vcpu->arch.mmio_vsx_copy_nums > 0) { - r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run); + r = kvmppc_emulate_mmio_vsx_loadstore(vcpu); if (r == RESUME_HOST) { vcpu->mmio_needed = 1; goto out; @@ -1796,7 +1802,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) } if (vcpu->arch.mmio_vmx_copy_nums > 0) { - r = kvmppc_emulate_mmio_vmx_loadstore(vcpu, run); + r = kvmppc_emulate_mmio_vmx_loadstore(vcpu); if (r == RESUME_HOST) { vcpu->mmio_needed = 1; goto out; @@ -1829,7 +1835,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (run->immediate_exit) r = -EINTR; else - r = kvmppc_vcpu_run(run, vcpu); + r = kvmppc_vcpu_run(vcpu); kvm_sigset_deactivate(vcpu); diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 8a1e3b0047f1..830a126e095d 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -89,7 +89,7 @@ {H_CREATE_RPT, "H_CREATE_RPT"}, \ {H_REMOVE_RPT, "H_REMOVE_RPT"}, \ {H_REGISTER_RPAGES, "H_REGISTER_RPAGES"}, \ - {H_DISABLE_AND_GETC, "H_DISABLE_AND_GETC"}, \ + {H_DISABLE_AND_GET, "H_DISABLE_AND_GET"}, \ {H_ERROR_DATA, "H_ERROR_DATA"}, \ {H_GET_HCA_INFO, "H_GET_HCA_INFO"}, \ {H_GET_PERF_COUNT, "H_GET_PERF_COUNT"}, \ @@ -472,9 +472,9 @@ TRACE_EVENT(kvmppc_run_vcpu_enter, ); TRACE_EVENT(kvmppc_run_vcpu_exit, - TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run), + TP_PROTO(struct kvm_vcpu *vcpu), - TP_ARGS(vcpu, run), + TP_ARGS(vcpu), TP_STRUCT__entry( __field(int, vcpu_id) @@ -484,7 +484,7 @@ TRACE_EVENT(kvmppc_run_vcpu_exit, TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->exit = run->exit_reason; + __entry->exit = vcpu->run->exit_reason; __entry->ret = vcpu->arch.ret; ), diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index b8de3be10eb4..d66a645503eb 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -16,7 +16,7 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += alloc.o code-patching.o feature-fixups.o pmem.o +obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o test_code-patching.o ifndef CONFIG_KASAN obj-y += string.o memcmp_$(BITS).o @@ -41,7 +41,10 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ memcpy_64.o memcpy_mcsafe_64.o +ifndef CONFIG_PPC_QUEUED_SPINLOCKS obj64-$(CONFIG_SMP) += locks.o +endif + obj64-$(CONFIG_ALTIVEC) += vmx-helper.o obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \ test_emulate_step_exec_instr.o diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 3345f039a876..8c3934ea6220 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -12,18 +12,23 @@ #include <linux/slab.h> #include <linux/uaccess.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/page.h> #include <asm/code-patching.h> #include <asm/setup.h> +#include <asm/inst.h> -static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, - unsigned int *patch_addr) +static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr, + struct ppc_inst *patch_addr) { int err = 0; - __put_user_asm(instr, patch_addr, err, "stw"); + if (!ppc_inst_prefixed(instr)) { + __put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw"); + } else { + __put_user_asm(ppc_inst_as_u64(instr), patch_addr, err, "std"); + } + if (err) return err; @@ -33,7 +38,7 @@ static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, return 0; } -int raw_patch_instruction(unsigned int *addr, unsigned int instr) +int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) { return __patch_instruction(addr, instr, addr); } @@ -88,7 +93,7 @@ static int map_patch_area(void *addr, unsigned long text_poke_addr) unsigned long pfn; int err; - if (is_vmalloc_addr(addr)) + if (is_vmalloc_or_module_addr(addr)) pfn = vmalloc_to_pfn(addr); else pfn = __pa_symbol(addr) >> PAGE_SHIFT; @@ -107,13 +112,18 @@ static inline int unmap_patch_area(unsigned long addr) pte_t *ptep; pmd_t *pmdp; pud_t *pudp; + p4d_t *p4dp; pgd_t *pgdp; pgdp = pgd_offset_k(addr); if (unlikely(!pgdp)) return -EINVAL; - pudp = pud_offset(pgdp, addr); + p4dp = p4d_offset(pgdp, addr); + if (unlikely(!p4dp)) + return -EINVAL; + + pudp = pud_offset(p4dp, addr); if (unlikely(!pudp)) return -EINVAL; @@ -136,10 +146,10 @@ static inline int unmap_patch_area(unsigned long addr) return 0; } -static int do_patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) { int err; - unsigned int *patch_addr = NULL; + struct ppc_inst *patch_addr = NULL; unsigned long flags; unsigned long text_poke_addr; unsigned long kaddr = (unsigned long)addr; @@ -160,8 +170,7 @@ static int do_patch_instruction(unsigned int *addr, unsigned int instr) goto out; } - patch_addr = (unsigned int *)(text_poke_addr) + - ((kaddr & ~PAGE_MASK) / sizeof(unsigned int)); + patch_addr = (struct ppc_inst *)(text_poke_addr + (kaddr & ~PAGE_MASK)); __patch_instruction(addr, instr, patch_addr); @@ -176,14 +185,14 @@ out: } #else /* !CONFIG_STRICT_KERNEL_RWX */ -static int do_patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) { return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ -int patch_instruction(unsigned int *addr, unsigned int instr) +int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) { /* Make sure we aren't patching a freed init section */ if (init_mem_is_free && init_section_contains(addr, 4)) { @@ -194,9 +203,12 @@ int patch_instruction(unsigned int *addr, unsigned int instr) } NOKPROBE_SYMBOL(patch_instruction); -int patch_branch(unsigned int *addr, unsigned long target, int flags) +int patch_branch(struct ppc_inst *addr, unsigned long target, int flags) { - return patch_instruction(addr, create_branch(addr, target, flags)); + struct ppc_inst instr; + + create_branch(&instr, addr, target, flags); + return patch_instruction(addr, instr); } bool is_offset_in_branch_range(long offset) @@ -225,14 +237,14 @@ bool is_offset_in_branch_range(long offset) * Helper to check if a given instruction is a conditional branch * Derived from the conditional checks in analyse_instr() */ -bool is_conditional_branch(unsigned int instr) +bool is_conditional_branch(struct ppc_inst instr) { - unsigned int opcode = instr >> 26; + unsigned int opcode = ppc_inst_primary_opcode(instr); if (opcode == 16) /* bc, bca, bcl, bcla */ return true; if (opcode == 19) { - switch ((instr >> 1) & 0x3ff) { + switch ((ppc_inst_val(instr) >> 1) & 0x3ff) { case 16: /* bclr, bclrl */ case 528: /* bcctr, bcctrl */ case 560: /* bctar, bctarl */ @@ -243,30 +255,30 @@ bool is_conditional_branch(unsigned int instr) } NOKPROBE_SYMBOL(is_conditional_branch); -unsigned int create_branch(const unsigned int *addr, - unsigned long target, int flags) +int create_branch(struct ppc_inst *instr, + const struct ppc_inst *addr, + unsigned long target, int flags) { - unsigned int instruction; long offset; + *instr = ppc_inst(0); offset = target; if (! (flags & BRANCH_ABSOLUTE)) offset = offset - (unsigned long)addr; /* Check we can represent the target in the instruction format */ if (!is_offset_in_branch_range(offset)) - return 0; + return 1; /* Mask out the flags and target, so they don't step on each other. */ - instruction = 0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC); + *instr = ppc_inst(0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC)); - return instruction; + return 0; } -unsigned int create_cond_branch(const unsigned int *addr, - unsigned long target, int flags) +int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr, + unsigned long target, int flags) { - unsigned int instruction; long offset; offset = target; @@ -275,104 +287,107 @@ unsigned int create_cond_branch(const unsigned int *addr, /* Check we can represent the target in the instruction format */ if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3) - return 0; + return 1; /* Mask out the flags and target, so they don't step on each other. */ - instruction = 0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC); + *instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC)); - return instruction; + return 0; } -static unsigned int branch_opcode(unsigned int instr) +static unsigned int branch_opcode(struct ppc_inst instr) { - return (instr >> 26) & 0x3F; + return ppc_inst_primary_opcode(instr) & 0x3F; } -static int instr_is_branch_iform(unsigned int instr) +static int instr_is_branch_iform(struct ppc_inst instr) { return branch_opcode(instr) == 18; } -static int instr_is_branch_bform(unsigned int instr) +static int instr_is_branch_bform(struct ppc_inst instr) { return branch_opcode(instr) == 16; } -int instr_is_relative_branch(unsigned int instr) +int instr_is_relative_branch(struct ppc_inst instr) { - if (instr & BRANCH_ABSOLUTE) + if (ppc_inst_val(instr) & BRANCH_ABSOLUTE) return 0; return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); } -int instr_is_relative_link_branch(unsigned int instr) +int instr_is_relative_link_branch(struct ppc_inst instr) { - return instr_is_relative_branch(instr) && (instr & BRANCH_SET_LINK); + return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK); } -static unsigned long branch_iform_target(const unsigned int *instr) +static unsigned long branch_iform_target(const struct ppc_inst *instr) { signed long imm; - imm = *instr & 0x3FFFFFC; + imm = ppc_inst_val(*instr) & 0x3FFFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x2000000) imm -= 0x4000000; - if ((*instr & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; } -static unsigned long branch_bform_target(const unsigned int *instr) +static unsigned long branch_bform_target(const struct ppc_inst *instr) { signed long imm; - imm = *instr & 0xFFFC; + imm = ppc_inst_val(*instr) & 0xFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x8000) imm -= 0x10000; - if ((*instr & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; } -unsigned long branch_target(const unsigned int *instr) +unsigned long branch_target(const struct ppc_inst *instr) { - if (instr_is_branch_iform(*instr)) + if (instr_is_branch_iform(ppc_inst_read(instr))) return branch_iform_target(instr); - else if (instr_is_branch_bform(*instr)) + else if (instr_is_branch_bform(ppc_inst_read(instr))) return branch_bform_target(instr); return 0; } -int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr) +int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr) { - if (instr_is_branch_iform(*instr) || instr_is_branch_bform(*instr)) + if (instr_is_branch_iform(ppc_inst_read(instr)) || + instr_is_branch_bform(ppc_inst_read(instr))) return branch_target(instr) == addr; return 0; } -unsigned int translate_branch(const unsigned int *dest, const unsigned int *src) +int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest, + const struct ppc_inst *src) { unsigned long target; - target = branch_target(src); - if (instr_is_branch_iform(*src)) - return create_branch(dest, target, *src); - else if (instr_is_branch_bform(*src)) - return create_cond_branch(dest, target, *src); + if (instr_is_branch_iform(ppc_inst_read(src))) + return create_branch(instr, dest, target, + ppc_inst_val(ppc_inst_read(src))); + else if (instr_is_branch_bform(ppc_inst_read(src))) + return create_cond_branch(instr, dest, target, + ppc_inst_val(ppc_inst_read(src))); - return 0; + return 1; } #ifdef CONFIG_PPC_BOOK3E_64 @@ -387,7 +402,7 @@ void __patch_exception(int exc, unsigned long addr) * instruction of the exception, not the first one */ - patch_branch(ibase + (exc / 4) + 1, addr, 0); + patch_branch((struct ppc_inst *)(ibase + (exc / 4) + 1), addr, 0); } #endif @@ -403,165 +418,171 @@ static void __init test_trampoline(void) static void __init test_branch_iform(void) { - unsigned int instr; + int err; + struct ppc_inst instr; unsigned long addr; addr = (unsigned long)&instr; /* The simplest case, branch to self, no flags */ - check(instr_is_branch_iform(0x48000000)); + check(instr_is_branch_iform(ppc_inst(0x48000000))); /* All bits of target set, and flags */ - check(instr_is_branch_iform(0x4bffffff)); + check(instr_is_branch_iform(ppc_inst(0x4bffffff))); /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_iform(0xcbffffff)); + check(!instr_is_branch_iform(ppc_inst(0xcbffffff))); /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_iform(0x7bffffff)); + check(!instr_is_branch_iform(ppc_inst(0x7bffffff))); /* Simplest case, branch to self with link */ - check(instr_is_branch_iform(0x48000001)); + check(instr_is_branch_iform(ppc_inst(0x48000001))); /* All bits of targets set */ - check(instr_is_branch_iform(0x4bfffffd)); + check(instr_is_branch_iform(ppc_inst(0x4bfffffd))); /* Some bits of targets set */ - check(instr_is_branch_iform(0x4bff00fd)); + check(instr_is_branch_iform(ppc_inst(0x4bff00fd))); /* Must be a valid branch to start with */ - check(!instr_is_branch_iform(0x7bfffffd)); + check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); /* Absolute branch to 0x100 */ - instr = 0x48000103; + instr = ppc_inst(0x48000103); check(instr_is_branch_to_addr(&instr, 0x100)); /* Absolute branch to 0x420fc */ - instr = 0x480420ff; + instr = ppc_inst(0x480420ff); check(instr_is_branch_to_addr(&instr, 0x420fc)); /* Maximum positive relative branch, + 20MB - 4B */ - instr = 0x49fffffc; + instr = ppc_inst(0x49fffffc); check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC)); /* Smallest negative relative branch, - 4B */ - instr = 0x4bfffffc; + instr = ppc_inst(0x4bfffffc); check(instr_is_branch_to_addr(&instr, addr - 4)); /* Largest negative relative branch, - 32 MB */ - instr = 0x4a000000; + instr = ppc_inst(0x4a000000); check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); /* Branch to self, with link */ - instr = create_branch(&instr, addr, BRANCH_SET_LINK); + err = create_branch(&instr, &instr, addr, BRANCH_SET_LINK); check(instr_is_branch_to_addr(&instr, addr)); /* Branch to self - 0x100, with link */ - instr = create_branch(&instr, addr - 0x100, BRANCH_SET_LINK); + err = create_branch(&instr, &instr, addr - 0x100, BRANCH_SET_LINK); check(instr_is_branch_to_addr(&instr, addr - 0x100)); /* Branch to self + 0x100, no link */ - instr = create_branch(&instr, addr + 0x100, 0); + err = create_branch(&instr, &instr, addr + 0x100, 0); check(instr_is_branch_to_addr(&instr, addr + 0x100)); /* Maximum relative negative offset, - 32 MB */ - instr = create_branch(&instr, addr - 0x2000000, BRANCH_SET_LINK); + err = create_branch(&instr, &instr, addr - 0x2000000, BRANCH_SET_LINK); check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); /* Out of range relative negative offset, - 32 MB + 4*/ - instr = create_branch(&instr, addr - 0x2000004, BRANCH_SET_LINK); - check(instr == 0); + err = create_branch(&instr, &instr, addr - 0x2000004, BRANCH_SET_LINK); + check(err); /* Out of range relative positive offset, + 32 MB */ - instr = create_branch(&instr, addr + 0x2000000, BRANCH_SET_LINK); - check(instr == 0); + err = create_branch(&instr, &instr, addr + 0x2000000, BRANCH_SET_LINK); + check(err); /* Unaligned target */ - instr = create_branch(&instr, addr + 3, BRANCH_SET_LINK); - check(instr == 0); + err = create_branch(&instr, &instr, addr + 3, BRANCH_SET_LINK); + check(err); /* Check flags are masked correctly */ - instr = create_branch(&instr, addr, 0xFFFFFFFC); + err = create_branch(&instr, &instr, addr, 0xFFFFFFFC); check(instr_is_branch_to_addr(&instr, addr)); - check(instr == 0x48000000); + check(ppc_inst_equal(instr, ppc_inst(0x48000000))); } static void __init test_create_function_call(void) { - unsigned int *iptr; + struct ppc_inst *iptr; unsigned long dest; + struct ppc_inst instr; /* Check we can create a function call */ - iptr = (unsigned int *)ppc_function_entry(test_trampoline); + iptr = (struct ppc_inst *)ppc_function_entry(test_trampoline); dest = ppc_function_entry(test_create_function_call); - patch_instruction(iptr, create_branch(iptr, dest, BRANCH_SET_LINK)); + create_branch(&instr, iptr, dest, BRANCH_SET_LINK); + patch_instruction(iptr, instr); check(instr_is_branch_to_addr(iptr, dest)); } static void __init test_branch_bform(void) { + int err; unsigned long addr; - unsigned int *iptr, instr, flags; + struct ppc_inst *iptr, instr; + unsigned int flags; iptr = &instr; addr = (unsigned long)iptr; /* The simplest case, branch to self, no flags */ - check(instr_is_branch_bform(0x40000000)); + check(instr_is_branch_bform(ppc_inst(0x40000000))); /* All bits of target set, and flags */ - check(instr_is_branch_bform(0x43ffffff)); + check(instr_is_branch_bform(ppc_inst(0x43ffffff))); /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_bform(0xc3ffffff)); + check(!instr_is_branch_bform(ppc_inst(0xc3ffffff))); /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_bform(0x7bffffff)); + check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); /* Absolute conditional branch to 0x100 */ - instr = 0x43ff0103; + instr = ppc_inst(0x43ff0103); check(instr_is_branch_to_addr(&instr, 0x100)); /* Absolute conditional branch to 0x20fc */ - instr = 0x43ff20ff; + instr = ppc_inst(0x43ff20ff); check(instr_is_branch_to_addr(&instr, 0x20fc)); /* Maximum positive relative conditional branch, + 32 KB - 4B */ - instr = 0x43ff7ffc; + instr = ppc_inst(0x43ff7ffc); check(instr_is_branch_to_addr(&instr, addr + 0x7FFC)); /* Smallest negative relative conditional branch, - 4B */ - instr = 0x43fffffc; + instr = ppc_inst(0x43fffffc); check(instr_is_branch_to_addr(&instr, addr - 4)); /* Largest negative relative conditional branch, - 32 KB */ - instr = 0x43ff8000; + instr = ppc_inst(0x43ff8000); check(instr_is_branch_to_addr(&instr, addr - 0x8000)); /* All condition code bits set & link */ flags = 0x3ff000 | BRANCH_SET_LINK; /* Branch to self */ - instr = create_cond_branch(iptr, addr, flags); + err = create_cond_branch(&instr, iptr, addr, flags); check(instr_is_branch_to_addr(&instr, addr)); /* Branch to self - 0x100 */ - instr = create_cond_branch(iptr, addr - 0x100, flags); + err = create_cond_branch(&instr, iptr, addr - 0x100, flags); check(instr_is_branch_to_addr(&instr, addr - 0x100)); /* Branch to self + 0x100 */ - instr = create_cond_branch(iptr, addr + 0x100, flags); + err = create_cond_branch(&instr, iptr, addr + 0x100, flags); check(instr_is_branch_to_addr(&instr, addr + 0x100)); /* Maximum relative negative offset, - 32 KB */ - instr = create_cond_branch(iptr, addr - 0x8000, flags); + err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); check(instr_is_branch_to_addr(&instr, addr - 0x8000)); /* Out of range relative negative offset, - 32 KB + 4*/ - instr = create_cond_branch(iptr, addr - 0x8004, flags); - check(instr == 0); + err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); + check(err); /* Out of range relative positive offset, + 32 KB */ - instr = create_cond_branch(iptr, addr + 0x8000, flags); - check(instr == 0); + err = create_cond_branch(&instr, iptr, addr + 0x8000, flags); + check(err); /* Unaligned target */ - instr = create_cond_branch(iptr, addr + 3, flags); - check(instr == 0); + err = create_cond_branch(&instr, iptr, addr + 3, flags); + check(err); /* Check flags are masked correctly */ - instr = create_cond_branch(iptr, addr, 0xFFFFFFFC); + err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); check(instr_is_branch_to_addr(&instr, addr)); - check(instr == 0x43FF0000); + check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); } static void __init test_translate_branch(void) { unsigned long addr; - unsigned int *p, *q; + void *p, *q; + struct ppc_inst instr; void *buf; buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); @@ -574,8 +595,9 @@ static void __init test_translate_branch(void) addr = (unsigned long)p; patch_branch(p, addr, 0); check(instr_is_branch_to_addr(p, addr)); - q = p + 1; - patch_instruction(q, translate_branch(q, p)); + q = p + 4; + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(q, addr)); /* Maximum negative case, move b . to addr + 32 MB */ @@ -583,27 +605,30 @@ static void __init test_translate_branch(void) addr = (unsigned long)p; patch_branch(p, addr, 0); q = buf + 0x2000000; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x4a000000); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000))); /* Maximum positive case, move x to x - 32 MB + 4 */ p = buf + 0x2000000; addr = (unsigned long)p; patch_branch(p, addr, 0); q = buf + 4; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x49fffffc); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc))); /* Jump to x + 16 MB moved to x + 20 MB */ p = buf; addr = 0x1000000 + (unsigned long)buf; patch_branch(p, addr, BRANCH_SET_LINK); q = buf + 0x1400000; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); @@ -612,7 +637,8 @@ static void __init test_translate_branch(void) addr = 0x2000000 + (unsigned long)buf; patch_branch(p, addr, 0); q = buf + 4; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); @@ -622,47 +648,57 @@ static void __init test_translate_branch(void) /* Simple case, branch to self moved a little */ p = buf; addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0)); + create_cond_branch(&instr, p, addr, 0); + patch_instruction(p, instr); check(instr_is_branch_to_addr(p, addr)); - q = p + 1; - patch_instruction(q, translate_branch(q, p)); + q = buf + 4; + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(q, addr)); /* Maximum negative case, move b . to addr + 32 KB */ p = buf; addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC)); + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + patch_instruction(p, instr); q = buf + 0x8000; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x43ff8000); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000))); /* Maximum positive case, move x to x - 32 KB + 4 */ p = buf + 0x8000; addr = (unsigned long)p; - patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC)); + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + patch_instruction(p, instr); q = buf + 4; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); - check(*q == 0x43ff7ffc); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc))); /* Jump to x + 12 KB moved to x + 20 KB */ p = buf; addr = 0x3000 + (unsigned long)buf; - patch_instruction(p, create_cond_branch(p, addr, BRANCH_SET_LINK)); + create_cond_branch(&instr, p, addr, BRANCH_SET_LINK); + patch_instruction(p, instr); q = buf + 0x5000; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); /* Jump to x + 8 KB moved to x - 8 KB + 4 */ p = buf + 0x2000; addr = 0x4000 + (unsigned long)buf; - patch_instruction(p, create_cond_branch(p, addr, 0)); + create_cond_branch(&instr, p, addr, 0); + patch_instruction(p, instr); q = buf + 4; - patch_instruction(q, translate_branch(q, p)); + translate_branch(&instr, q, p); + patch_instruction(q, instr); check(instr_is_branch_to_addr(p, addr)); check(instr_is_branch_to_addr(q, addr)); @@ -670,6 +706,26 @@ static void __init test_translate_branch(void) vfree(buf); } +#ifdef CONFIG_PPC64 +static void __init test_prefixed_patching(void) +{ + extern unsigned int code_patching_test1[]; + extern unsigned int code_patching_test1_expected[]; + extern unsigned int end_code_patching_test1[]; + + __patch_instruction((struct ppc_inst *)code_patching_test1, + ppc_inst_prefix(OP_PREFIX << 26, 0x00000000), + (struct ppc_inst *)code_patching_test1); + + check(!memcmp(code_patching_test1, + code_patching_test1_expected, + sizeof(unsigned int) * + (end_code_patching_test1 - code_patching_test1))); +} +#else +static inline void test_prefixed_patching(void) {} +#endif + static int __init test_code_patching(void) { printk(KERN_DEBUG "Running code patching self-tests ...\n"); @@ -678,6 +734,7 @@ static int __init test_code_patching(void) test_branch_bform(); test_create_function_call(); test_translate_branch(); + test_prefixed_patching(); return 0; } diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S index b12168c2447a..480172fbd024 100644 --- a/arch/powerpc/lib/feature-fixups-test.S +++ b/arch/powerpc/lib/feature-fixups-test.S @@ -7,6 +7,7 @@ #include <asm/ppc_asm.h> #include <asm/synch.h> #include <asm/asm-compat.h> +#include <asm/ppc-opcode.h> .text @@ -791,3 +792,71 @@ globl(lwsync_fixup_test_expected_SYNC) 1: or 1,1,1 sync +globl(ftr_fixup_prefix1) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 +globl(end_ftr_fixup_prefix1) + +globl(ftr_fixup_prefix1_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + +globl(ftr_fixup_prefix1_expected) + or 1,1,1 + nop + nop + or 2,2,2 + +globl(ftr_fixup_prefix2) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 +globl(end_ftr_fixup_prefix2) + +globl(ftr_fixup_prefix2_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + +globl(ftr_fixup_prefix2_alt) + .long OP_PREFIX << 26 + .long 0x0000001 + +globl(ftr_fixup_prefix2_expected) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000001 + or 2,2,2 + +globl(ftr_fixup_prefix3) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + or 3,3,3 +globl(end_ftr_fixup_prefix3) + +globl(ftr_fixup_prefix3_orig) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000000 + or 2,2,2 + or 3,3,3 + +globl(ftr_fixup_prefix3_alt) + .long OP_PREFIX << 26 + .long 0x0000001 + nop + +globl(ftr_fixup_prefix3_expected) + or 1,1,1 + .long OP_PREFIX << 26 + .long 0x0000001 + nop + or 3,3,3 diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4ba634b89ce5..4c0a7ee9fa00 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -21,6 +21,7 @@ #include <asm/setup.h> #include <asm/security_features.h> #include <asm/firmware.h> +#include <asm/inst.h> struct fixup_entry { unsigned long mask; @@ -31,30 +32,31 @@ struct fixup_entry { long alt_end_off; }; -static unsigned int *calc_addr(struct fixup_entry *fcur, long offset) +static struct ppc_inst *calc_addr(struct fixup_entry *fcur, long offset) { /* * We store the offset to the code as a negative offset from * the start of the alt_entry, to support the VDSO. This * routine converts that back into an actual address. */ - return (unsigned int *)((unsigned long)fcur + offset); + return (struct ppc_inst *)((unsigned long)fcur + offset); } -static int patch_alt_instruction(unsigned int *src, unsigned int *dest, - unsigned int *alt_start, unsigned int *alt_end) +static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest, + struct ppc_inst *alt_start, struct ppc_inst *alt_end) { - unsigned int instr; + int err; + struct ppc_inst instr; - instr = *src; + instr = ppc_inst_read(src); if (instr_is_relative_branch(*src)) { - unsigned int *target = (unsigned int *)branch_target(src); + struct ppc_inst *target = (struct ppc_inst *)branch_target(src); /* Branch within the section doesn't need translating */ if (target < alt_start || target > alt_end) { - instr = translate_branch(dest, src); - if (!instr) + err = translate_branch(&instr, dest, src); + if (err) return 1; } } @@ -66,7 +68,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest, static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) { - unsigned int *start, *end, *alt_start, *alt_end, *src, *dest; + struct ppc_inst *start, *end, *alt_start, *alt_end, *src, *dest, nop; start = calc_addr(fcur, fcur->start_off); end = calc_addr(fcur, fcur->end_off); @@ -82,13 +84,15 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) src = alt_start; dest = start; - for (; src < alt_end; src++, dest++) { + for (; src < alt_end; src = ppc_inst_next(src, src), + dest = ppc_inst_next(dest, dest)) { if (patch_alt_instruction(src, dest, alt_start, alt_end)) return 1; } - for (; dest < end; dest++) - raw_patch_instruction(dest, PPC_INST_NOP); + nop = ppc_inst(PPC_INST_NOP); + for (; dest < end; dest = ppc_inst_next(dest, &nop)) + raw_patch_instruction(dest, nop); return 0; } @@ -145,15 +149,17 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instrs[0]); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); if (types & STF_BARRIER_FALLBACK) - patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback, + patch_branch((struct ppc_inst *)(dest + 1), + (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK); else - patch_instruction(dest + 1, instrs[1]); + patch_instruction((struct ppc_inst *)(dest + 1), + ppc_inst(instrs[1])); - patch_instruction(dest + 2, instrs[2]); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); } printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, @@ -206,12 +212,12 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instrs[0]); - patch_instruction(dest + 1, instrs[1]); - patch_instruction(dest + 2, instrs[2]); - patch_instruction(dest + 3, instrs[3]); - patch_instruction(dest + 4, instrs[4]); - patch_instruction(dest + 5, instrs[5]); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3])); + patch_instruction((struct ppc_inst *)(dest + 4), ppc_inst(instrs[4])); + patch_instruction((struct ppc_inst *)(dest + 5), ppc_inst(instrs[5])); } printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : @@ -259,9 +265,9 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instrs[0]); - patch_instruction(dest + 1, instrs[1]); - patch_instruction(dest + 2, instrs[2]); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); } printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, @@ -294,7 +300,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instr); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instr)); } printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); @@ -337,8 +343,8 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction(dest, instr[0]); - patch_instruction(dest + 1, instr[1]); + patch_instruction((struct ppc_inst *)dest, ppc_inst(instr[0])); + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instr[1])); } printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); @@ -352,7 +358,7 @@ static void patch_btb_flush_section(long *curr) end = (void *)curr + *(curr + 1); for (; start < end; start++) { pr_devel("patching dest %lx\n", (unsigned long)start); - patch_instruction(start, PPC_INST_NOP); + patch_instruction((struct ppc_inst *)start, ppc_inst(PPC_INST_NOP)); } } @@ -371,7 +377,7 @@ void do_btb_flush_fixups(void) void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; - unsigned int *dest; + struct ppc_inst *dest; if (!(value & CPU_FTR_LWSYNC)) return ; @@ -381,27 +387,27 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) for (; start < end; start++) { dest = (void *)start + *start; - raw_patch_instruction(dest, PPC_INST_LWSYNC); + raw_patch_instruction(dest, ppc_inst(PPC_INST_LWSYNC)); } } static void do_final_fixups(void) { #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE) - int *src, *dest; - unsigned long length; + struct ppc_inst inst, *src, *dest, *end; if (PHYSICAL_START == 0) return; - src = (int *)(KERNELBASE + PHYSICAL_START); - dest = (int *)KERNELBASE; - length = (__end_interrupts - _stext) / sizeof(int); + src = (struct ppc_inst *)(KERNELBASE + PHYSICAL_START); + dest = (struct ppc_inst *)KERNELBASE; + end = (void *)src + (__end_interrupts - _stext); - while (length--) { - raw_patch_instruction(dest, *src); - src++; - dest++; + while (src < end) { + inst = ppc_inst_read(src); + raw_patch_instruction(dest, inst); + src = ppc_inst_next(src, src); + dest = ppc_inst_next(dest, dest); } #endif } @@ -684,6 +690,78 @@ static void test_lwsync_macros(void) } } +#ifdef CONFIG_PPC64 +static void __init test_prefix_patching(void) +{ + extern unsigned int ftr_fixup_prefix1[]; + extern unsigned int end_ftr_fixup_prefix1[]; + extern unsigned int ftr_fixup_prefix1_orig[]; + extern unsigned int ftr_fixup_prefix1_expected[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix1 - ftr_fixup_prefix1); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix1 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix1 + 3); + fixup.alt_start_off = fixup.alt_end_off = 0; + + /* Sanity check */ + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_expected, size) == 0); + check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) != 0); +} + +static void __init test_prefix_alt_patching(void) +{ + extern unsigned int ftr_fixup_prefix2[]; + extern unsigned int end_ftr_fixup_prefix2[]; + extern unsigned int ftr_fixup_prefix2_orig[]; + extern unsigned int ftr_fixup_prefix2_expected[]; + extern unsigned int ftr_fixup_prefix2_alt[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix2 - ftr_fixup_prefix2); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix2 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix2 + 3); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix2_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix2_alt + 2); + /* Sanity check */ + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_expected, size) == 0); + check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) != 0); +} + +static void __init test_prefix_word_alt_patching(void) +{ + extern unsigned int ftr_fixup_prefix3[]; + extern unsigned int end_ftr_fixup_prefix3[]; + extern unsigned int ftr_fixup_prefix3_orig[]; + extern unsigned int ftr_fixup_prefix3_expected[]; + extern unsigned int ftr_fixup_prefix3_alt[]; + int size = sizeof(unsigned int) * (end_ftr_fixup_prefix3 - ftr_fixup_prefix3); + + fixup.value = fixup.mask = 8; + fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix3 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix3 + 4); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix3_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix3_alt + 3); + /* Sanity check */ + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) == 0); + + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_expected, size) == 0); + patch_feature_section(0, &fixup); + check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) != 0); +} +#else +static inline void test_prefix_patching(void) {} +static inline void test_prefix_alt_patching(void) {} +static inline void test_prefix_word_alt_patching(void) {} +#endif /* CONFIG_PPC64 */ + static int __init test_feature_fixups(void) { printk(KERN_DEBUG "Running feature fixup self-tests ...\n"); @@ -698,6 +776,9 @@ static int __init test_feature_fixups(void) test_cpu_macros(); test_fw_macros(); test_lwsync_macros(); + test_prefix_patching(); + test_prefix_alt_patching(); + test_prefix_word_alt_patching(); return 0; } diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c new file mode 100644 index 000000000000..9cc17eb62462 --- /dev/null +++ b/arch/powerpc/lib/inst.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020, IBM Corporation. + */ + +#include <linux/uaccess.h> +#include <asm/disassemble.h> +#include <asm/inst.h> +#include <asm/ppc-opcode.h> + +#ifdef CONFIG_PPC64 +int probe_user_read_inst(struct ppc_inst *inst, + struct ppc_inst __user *nip) +{ + unsigned int val, suffix; + int err; + + err = copy_from_user_nofault(&val, nip, sizeof(val)); + if (err) + return err; + if (get_op(val) == OP_PREFIX) { + err = copy_from_user_nofault(&suffix, (void __user *)nip + 4, 4); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return err; +} + +int probe_kernel_read_inst(struct ppc_inst *inst, + struct ppc_inst *src) +{ + unsigned int val, suffix; + int err; + + err = copy_from_kernel_nofault(&val, src, sizeof(val)); + if (err) + return err; + if (get_op(val) == OP_PREFIX) { + err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return err; +} +#else /* !CONFIG_PPC64 */ +int probe_user_read_inst(struct ppc_inst *inst, + struct ppc_inst __user *nip) +{ + unsigned int val; + int err; + + err = copy_from_user_nofault(&val, nip, sizeof(val)); + if (!err) + *inst = ppc_inst(val); + + return err; +} + +int probe_kernel_read_inst(struct ppc_inst *inst, + struct ppc_inst *src) +{ + unsigned int val; + int err; + + err = copy_from_kernel_nofault(&val, src, sizeof(val)); + if (!err) + *inst = ppc_inst(val); + + return err; +} +#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 6440d5943c00..04165b7a163f 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -27,14 +27,14 @@ void splpar_spin_yield(arch_spinlock_t *lock) return; holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); + + yield_count = yield_count_of(holder_cpu); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); if (lock->slock != lock_value) return; /* something has changed */ - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); + yield_to_preempted(holder_cpu, yield_count); } EXPORT_SYMBOL_GPL(splpar_spin_yield); @@ -53,13 +53,13 @@ void splpar_rw_yield(arch_rwlock_t *rw) return; /* no write lock at present */ holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); + + yield_count = yield_count_of(holder_cpu); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); if (rw->lock != lock_value) return; /* something has changed */ - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); + yield_to_preempted(holder_cpu, yield_count); } #endif diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c index 0666a8d29596..1550e0d2513a 100644 --- a/arch/powerpc/lib/pmem.c +++ b/arch/powerpc/lib/pmem.c @@ -9,20 +9,56 @@ #include <asm/cacheflush.h> +static inline void __clean_pmem_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory"); +} + +static inline void __flush_pmem_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory"); +} + +static inline void clean_pmem_range(unsigned long start, unsigned long stop) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return __clean_pmem_range(start, stop); +} + +static inline void flush_pmem_range(unsigned long start, unsigned long stop) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return __flush_pmem_range(start, stop); +} + /* * CONFIG_ARCH_HAS_PMEM_API symbols */ void arch_wb_cache_pmem(void *addr, size_t size) { unsigned long start = (unsigned long) addr; - flush_dcache_range(start, start + size); + clean_pmem_range(start, start + size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); void arch_invalidate_pmem(void *addr, size_t size) { unsigned long start = (unsigned long) addr; - flush_dcache_range(start, start + size); + flush_pmem_range(start, start + size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); @@ -35,19 +71,17 @@ long __copy_from_user_flushcache(void *dest, const void __user *src, unsigned long copied, start = (unsigned long) dest; copied = __copy_from_user(dest, src, size); - flush_dcache_range(start, start + size); + clean_pmem_range(start, start + size); return copied; } -void *memcpy_flushcache(void *dest, const void *src, size_t size) +void memcpy_flushcache(void *dest, const void *src, size_t size) { unsigned long start = (unsigned long) dest; memcpy(dest, src, size); - flush_dcache_range(start, start + size); - - return dest; + clean_pmem_range(start, start + size); } EXPORT_SYMBOL(memcpy_flushcache); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5f3a7bd9d90d..caee8cc77e19 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -13,8 +13,10 @@ #include <linux/uaccess.h> #include <asm/cpu_has_feature.h> #include <asm/cputable.h> +#include <asm/disassemble.h> extern char system_call_common[]; +extern char system_call_vectored_emulate[]; #ifdef CONFIG_PPC64 /* Bits in SRR1 that are copied from MSR */ @@ -188,6 +190,44 @@ static nokprobe_inline unsigned long xform_ea(unsigned int instr, } /* + * Calculate effective address for a MLS:D-form / 8LS:D-form + * prefixed instruction + */ +static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr, + unsigned int suffix, + const struct pt_regs *regs) +{ + int ra, prefix_r; + unsigned int dd; + unsigned long ea, d0, d1, d; + + prefix_r = GET_PREFIX_R(instr); + ra = GET_PREFIX_RA(suffix); + + d0 = instr & 0x3ffff; + d1 = suffix & 0xffff; + d = (d0 << 16) | d1; + + /* + * sign extend a 34 bit number + */ + dd = (unsigned int)(d >> 2); + ea = (signed int)dd; + ea = (ea << 2) | (d & 0x3); + + if (!prefix_r && ra) + ea += regs->gpr[ra]; + else if (!prefix_r && !ra) + ; /* Leave ea as is */ + else if (prefix_r && !ra) + ea += regs->nip; + else if (prefix_r && ra) + ; /* Invalid form. Should already be checked for by caller! */ + + return ea; +} + +/* * Return the largest power of 2, not greater than sizeof(unsigned long), * such that x is a multiple of it. */ @@ -1163,54 +1203,64 @@ static nokprobe_inline int trap_compare(long v1, long v2) * otherwise. */ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, - unsigned int instr) + struct ppc_inst instr) { +#ifdef CONFIG_PPC64 + unsigned int suffixopcode, prefixtype, prefix_r; +#endif unsigned int opcode, ra, rb, rc, rd, spr, u; unsigned long int imm; unsigned long int val, val2; unsigned int mb, me, sh; + unsigned int word, suffix; long ival; + word = ppc_inst_val(instr); + suffix = ppc_inst_suffix(instr); + op->type = COMPUTE; - opcode = instr >> 26; + opcode = ppc_inst_primary_opcode(instr); switch (opcode) { case 16: /* bc */ op->type = BRANCH; - imm = (signed short)(instr & 0xfffc); - if ((instr & 2) == 0) + imm = (signed short)(word & 0xfffc); + if ((word & 2) == 0) imm += regs->nip; op->val = truncate_if_32bit(regs->msr, imm); - if (instr & 1) + if (word & 1) op->type |= SETLK; - if (branch_taken(instr, regs, op)) + if (branch_taken(word, regs, op)) op->type |= BRTAKEN; return 1; #ifdef CONFIG_PPC64 case 17: /* sc */ - if ((instr & 0xfe2) == 2) + if ((word & 0xfe2) == 2) op->type = SYSCALL; + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && + (word & 0xfe3) == 1) + op->type = SYSCALL_VECTORED_0; else op->type = UNKNOWN; return 0; #endif case 18: /* b */ op->type = BRANCH | BRTAKEN; - imm = instr & 0x03fffffc; + imm = word & 0x03fffffc; if (imm & 0x02000000) imm -= 0x04000000; - if ((instr & 2) == 0) + if ((word & 2) == 0) imm += regs->nip; op->val = truncate_if_32bit(regs->msr, imm); - if (instr & 1) + if (word & 1) op->type |= SETLK; return 1; case 19: - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { case 0: /* mcrf */ op->type = COMPUTE + SETCC; - rd = 7 - ((instr >> 23) & 0x7); - ra = 7 - ((instr >> 18) & 0x7); + rd = 7 - ((word >> 23) & 0x7); + ra = 7 - ((word >> 18) & 0x7); rd *= 4; ra *= 4; val = (regs->ccr >> ra) & 0xf; @@ -1220,11 +1270,11 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 16: /* bclr */ case 528: /* bcctr */ op->type = BRANCH; - imm = (instr & 0x400)? regs->ctr: regs->link; + imm = (word & 0x400)? regs->ctr: regs->link; op->val = truncate_if_32bit(regs->msr, imm); - if (instr & 1) + if (word & 1) op->type |= SETLK; - if (branch_taken(instr, regs, op)) + if (branch_taken(word, regs, op)) op->type |= BRTAKEN; return 1; @@ -1247,23 +1297,23 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 417: /* crorc */ case 449: /* cror */ op->type = COMPUTE + SETCC; - ra = (instr >> 16) & 0x1f; - rb = (instr >> 11) & 0x1f; - rd = (instr >> 21) & 0x1f; + ra = (word >> 16) & 0x1f; + rb = (word >> 11) & 0x1f; + rd = (word >> 21) & 0x1f; ra = (regs->ccr >> (31 - ra)) & 1; rb = (regs->ccr >> (31 - rb)) & 1; - val = (instr >> (6 + ra * 2 + rb)) & 1; + val = (word >> (6 + ra * 2 + rb)) & 1; op->ccval = (regs->ccr & ~(1UL << (31 - rd))) | (val << (31 - rd)); return 1; } break; case 31: - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { case 598: /* sync */ op->type = BARRIER + BARRIER_SYNC; #ifdef __powerpc64__ - switch ((instr >> 21) & 3) { + switch ((word >> 21) & 3) { case 1: /* lwsync */ op->type = BARRIER + BARRIER_LWSYNC; break; @@ -1285,20 +1335,40 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, if (!FULL_REGS(regs)) return -1; - rd = (instr >> 21) & 0x1f; - ra = (instr >> 16) & 0x1f; - rb = (instr >> 11) & 0x1f; - rc = (instr >> 6) & 0x1f; + rd = (word >> 21) & 0x1f; + ra = (word >> 16) & 0x1f; + rb = (word >> 11) & 0x1f; + rc = (word >> 6) & 0x1f; switch (opcode) { #ifdef __powerpc64__ + case 1: + prefix_r = GET_PREFIX_R(word); + ra = GET_PREFIX_RA(suffix); + rd = (suffix >> 21) & 0x1f; + op->reg = rd; + op->val = regs->gpr[rd]; + suffixopcode = get_op(suffix); + prefixtype = (word >> 24) & 0x3; + switch (prefixtype) { + case 2: + if (prefix_r && ra) + return 0; + switch (suffixopcode) { + case 14: /* paddi */ + op->type = COMPUTE | PREFIXED; + op->val = mlsd_8lsd_ea(word, suffix, regs); + goto compute_done; + } + } + break; case 2: /* tdi */ - if (rd & trap_compare(regs->gpr[ra], (short) instr)) + if (rd & trap_compare(regs->gpr[ra], (short) word)) goto trap; return 1; #endif case 3: /* twi */ - if (rd & trap_compare((int)regs->gpr[ra], (short) instr)) + if (rd & trap_compare((int)regs->gpr[ra], (short) word)) goto trap; return 1; @@ -1307,7 +1377,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, if (!cpu_has_feature(CPU_FTR_ARCH_300)) return -1; - switch (instr & 0x3f) { + switch (word & 0x3f) { case 48: /* maddhd */ asm volatile(PPC_MADDHD(%0, %1, %2, %3) : "=r" (op->val) : "r" (regs->gpr[ra]), @@ -1335,16 +1405,16 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif case 7: /* mulli */ - op->val = regs->gpr[ra] * (short) instr; + op->val = regs->gpr[ra] * (short) word; goto compute_done; case 8: /* subfic */ - imm = (short) instr; + imm = (short) word; add_with_carry(regs, op, rd, ~regs->gpr[ra], imm, 1); return 1; case 10: /* cmpli */ - imm = (unsigned short) instr; + imm = (unsigned short) word; val = regs->gpr[ra]; #ifdef __powerpc64__ if ((rd & 1) == 0) @@ -1354,7 +1424,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; case 11: /* cmpi */ - imm = (short) instr; + imm = (short) word; val = regs->gpr[ra]; #ifdef __powerpc64__ if ((rd & 1) == 0) @@ -1364,35 +1434,35 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; case 12: /* addic */ - imm = (short) instr; + imm = (short) word; add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0); return 1; case 13: /* addic. */ - imm = (short) instr; + imm = (short) word; add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0); set_cr0(regs, op); return 1; case 14: /* addi */ - imm = (short) instr; + imm = (short) word; if (ra) imm += regs->gpr[ra]; op->val = imm; goto compute_done; case 15: /* addis */ - imm = ((short) instr) << 16; + imm = ((short) word) << 16; if (ra) imm += regs->gpr[ra]; op->val = imm; goto compute_done; case 19: - if (((instr >> 1) & 0x1f) == 2) { + if (((word >> 1) & 0x1f) == 2) { /* addpcis */ - imm = (short) (instr & 0xffc1); /* d0 + d2 fields */ - imm |= (instr >> 15) & 0x3e; /* d1 field */ + imm = (short) (word & 0xffc1); /* d0 + d2 fields */ + imm |= (word >> 15) & 0x3e; /* d1 field */ op->val = regs->nip + (imm << 16) + 4; goto compute_done; } @@ -1400,65 +1470,65 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; case 20: /* rlwimi */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; val = DATA32(regs->gpr[rd]); imm = MASK32(mb, me); op->val = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm); goto logical_done; case 21: /* rlwinm */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; val = DATA32(regs->gpr[rd]); op->val = ROTATE(val, rb) & MASK32(mb, me); goto logical_done; case 23: /* rlwnm */ - mb = (instr >> 6) & 0x1f; - me = (instr >> 1) & 0x1f; + mb = (word >> 6) & 0x1f; + me = (word >> 1) & 0x1f; rb = regs->gpr[rb] & 0x1f; val = DATA32(regs->gpr[rd]); op->val = ROTATE(val, rb) & MASK32(mb, me); goto logical_done; case 24: /* ori */ - op->val = regs->gpr[rd] | (unsigned short) instr; + op->val = regs->gpr[rd] | (unsigned short) word; goto logical_done_nocc; case 25: /* oris */ - imm = (unsigned short) instr; + imm = (unsigned short) word; op->val = regs->gpr[rd] | (imm << 16); goto logical_done_nocc; case 26: /* xori */ - op->val = regs->gpr[rd] ^ (unsigned short) instr; + op->val = regs->gpr[rd] ^ (unsigned short) word; goto logical_done_nocc; case 27: /* xoris */ - imm = (unsigned short) instr; + imm = (unsigned short) word; op->val = regs->gpr[rd] ^ (imm << 16); goto logical_done_nocc; case 28: /* andi. */ - op->val = regs->gpr[rd] & (unsigned short) instr; + op->val = regs->gpr[rd] & (unsigned short) word; set_cr0(regs, op); goto logical_done_nocc; case 29: /* andis. */ - imm = (unsigned short) instr; + imm = (unsigned short) word; op->val = regs->gpr[rd] & (imm << 16); set_cr0(regs, op); goto logical_done_nocc; #ifdef __powerpc64__ case 30: /* rld* */ - mb = ((instr >> 6) & 0x1f) | (instr & 0x20); + mb = ((word >> 6) & 0x1f) | (word & 0x20); val = regs->gpr[rd]; - if ((instr & 0x10) == 0) { - sh = rb | ((instr & 2) << 4); + if ((word & 0x10) == 0) { + sh = rb | ((word & 2) << 4); val = ROTATE(val, sh); - switch ((instr >> 2) & 3) { + switch ((word >> 2) & 3) { case 0: /* rldicl */ val &= MASK64_L(mb); break; @@ -1478,7 +1548,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } else { sh = regs->gpr[rb] & 0x3f; val = ROTATE(val, sh); - switch ((instr >> 1) & 7) { + switch ((word >> 1) & 7) { case 0: /* rldcl */ op->val = val & MASK64_L(mb); goto logical_done; @@ -1493,8 +1563,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 31: /* isel occupies 32 minor opcodes */ - if (((instr >> 1) & 0x1f) == 15) { - mb = (instr >> 6) & 0x1f; /* bc field */ + if (((word >> 1) & 0x1f) == 15) { + mb = (word >> 6) & 0x1f; /* bc field */ val = (regs->ccr >> (31 - mb)) & 1; val2 = (ra) ? regs->gpr[ra] : 0; @@ -1502,7 +1572,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, goto compute_done; } - switch ((instr >> 1) & 0x3ff) { + switch ((word >> 1) & 0x3ff) { case 4: /* tw */ if (rd == 0x1f || (rd & trap_compare((int)regs->gpr[ra], @@ -1536,17 +1606,17 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->reg = rd; /* only MSR_EE and MSR_RI get changed if bit 15 set */ /* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */ - imm = (instr & 0x10000)? 0x8002: 0xefffffffffffeffeUL; + imm = (word & 0x10000)? 0x8002: 0xefffffffffffeffeUL; op->val = imm; return 0; #endif case 19: /* mfcr */ imm = 0xffffffffUL; - if ((instr >> 20) & 1) { + if ((word >> 20) & 1) { imm = 0xf0000000UL; for (sh = 0; sh < 8; ++sh) { - if (instr & (0x80000 >> sh)) + if (word & (0x80000 >> sh)) break; imm >>= 4; } @@ -1560,7 +1630,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, val = regs->gpr[rd]; op->ccval = regs->ccr; for (sh = 0; sh < 8; ++sh) { - if (instr & (0x80000 >> sh)) + if (word & (0x80000 >> sh)) op->ccval = (op->ccval & ~imm) | (val & imm); imm >>= 4; @@ -1568,7 +1638,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; case 339: /* mfspr */ - spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); + spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0); op->type = MFSPR; op->reg = rd; op->spr = spr; @@ -1578,7 +1648,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; case 467: /* mtspr */ - spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); + spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0); op->type = MTSPR; op->val = regs->gpr[rd]; op->spr = spr; @@ -1736,7 +1806,18 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->val = (int) regs->gpr[ra] / (int) regs->gpr[rb]; goto arith_done; - +#ifdef __powerpc64__ + case 425: /* divde[.] */ + asm volatile(PPC_DIVDE(%0, %1, %2) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb])); + goto arith_done; + case 393: /* divdeu[.] */ + asm volatile(PPC_DIVDEU(%0, %1, %2) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb])); + goto arith_done; +#endif case 755: /* darn */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) return -1; @@ -1948,7 +2029,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 826: /* sradi with sh_5 = 0 */ case 827: /* sradi with sh_5 = 1 */ op->type = COMPUTE + SETREG + SETXER; - sh = rb | ((instr & 2) << 4); + sh = rb | ((word & 2) << 4); ival = (signed long int) regs->gpr[rd]; op->val = ival >> sh; op->xerval = regs->xer; @@ -1964,7 +2045,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, if (!cpu_has_feature(CPU_FTR_ARCH_300)) return -1; op->type = COMPUTE + SETREG; - sh = rb | ((instr & 2) << 4); + sh = rb | ((word & 2) << 4); val = (signed int) regs->gpr[rd]; if (sh) op->val = ROTATE(val, sh) & MASK64(0, 63 - sh); @@ -1979,34 +2060,34 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, */ case 54: /* dcbst */ op->type = MKOP(CACHEOP, DCBST, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; case 86: /* dcbf */ op->type = MKOP(CACHEOP, DCBF, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; case 246: /* dcbtst */ op->type = MKOP(CACHEOP, DCBTST, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); op->reg = rd; return 0; case 278: /* dcbt */ op->type = MKOP(CACHEOP, DCBTST, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); op->reg = rd; return 0; case 982: /* icbi */ op->type = MKOP(CACHEOP, ICBI, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; case 1014: /* dcbz */ op->type = MKOP(CACHEOP, DCBZ, 0); - op->ea = xform_ea(instr, regs); + op->ea = xform_ea(word, regs); return 0; } break; @@ -2019,14 +2100,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->update_reg = ra; op->reg = rd; op->val = regs->gpr[rd]; - u = (instr >> 20) & UPDATE; + u = (word >> 20) & UPDATE; op->vsx_flags = 0; switch (opcode) { case 31: - u = instr & UPDATE; - op->ea = xform_ea(instr, regs); - switch ((instr >> 1) & 0x3ff) { + u = word & UPDATE; + op->ea = xform_ea(word, regs); + switch ((word >> 1) & 0x3ff) { case 20: /* lwarx */ op->type = MKOP(LARX, 0, 4); break; @@ -2271,25 +2352,25 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef CONFIG_VSX case 12: /* lxsiwzx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; break; case 76: /* lxsiwax */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, SIGNEXT, 4); op->element_size = 8; break; case 140: /* stxsiwx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 4); op->element_size = 8; break; case 268: /* lxvx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 16; op->vsx_flags = VSX_CHECK_VEC; @@ -2298,33 +2379,33 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 269: /* lxvl */ case 301: { /* lxvll */ int nb; - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; if (nb > 16) nb = 16; op->type = MKOP(LOAD_VSX, 0, nb); op->element_size = 16; - op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) | + op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) | VSX_CHECK_VEC; break; } case 332: /* lxvdsx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; op->vsx_flags = VSX_SPLAT; break; case 364: /* lxvwsx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 4; op->vsx_flags = VSX_SPLAT | VSX_CHECK_VEC; break; case 396: /* stxvx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 16; op->vsx_flags = VSX_CHECK_VEC; @@ -2333,118 +2414,118 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 397: /* stxvl */ case 429: { /* stxvll */ int nb; - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; if (nb > 16) nb = 16; op->type = MKOP(STORE_VSX, 0, nb); op->element_size = 16; - op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) | + op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) | VSX_CHECK_VEC; break; } case 524: /* lxsspx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; op->vsx_flags = VSX_FPCONV; break; case 588: /* lxsdx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; break; case 652: /* stxsspx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 4); op->element_size = 8; op->vsx_flags = VSX_FPCONV; break; case 716: /* stxsdx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 8); op->element_size = 8; break; case 780: /* lxvw4x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 4; break; case 781: /* lxsibzx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 1); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 812: /* lxvh8x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 2; op->vsx_flags = VSX_CHECK_VEC; break; case 813: /* lxsihzx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 2); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 844: /* lxvd2x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 8; break; case 876: /* lxvb16x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 1; op->vsx_flags = VSX_CHECK_VEC; break; case 908: /* stxvw4x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 4; break; case 909: /* stxsibx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 1); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 940: /* stxvh8x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 2; op->vsx_flags = VSX_CHECK_VEC; break; case 941: /* stxsihx */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 2); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 972: /* stxvd2x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 8; break; case 1004: /* stxvb16x */ - op->reg = rd | ((instr & 1) << 5); + op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 1; op->vsx_flags = VSX_CHECK_VEC; @@ -2457,80 +2538,80 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 32: /* lwz */ case 33: /* lwzu */ op->type = MKOP(LOAD, u, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 34: /* lbz */ case 35: /* lbzu */ op->type = MKOP(LOAD, u, 1); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 36: /* stw */ case 37: /* stwu */ op->type = MKOP(STORE, u, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 38: /* stb */ case 39: /* stbu */ op->type = MKOP(STORE, u, 1); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 40: /* lhz */ case 41: /* lhzu */ op->type = MKOP(LOAD, u, 2); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 42: /* lha */ case 43: /* lhau */ op->type = MKOP(LOAD, SIGNEXT | u, 2); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 44: /* sth */ case 45: /* sthu */ op->type = MKOP(STORE, u, 2); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 46: /* lmw */ if (ra >= rd) break; /* invalid form, ra in range to load */ op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd)); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 47: /* stmw */ op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd)); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; #ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ op->type = MKOP(LOAD_FP, u | FPCONV, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 50: /* lfd */ case 51: /* lfdu */ op->type = MKOP(LOAD_FP, u, 8); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 52: /* stfs */ case 53: /* stfsu */ op->type = MKOP(STORE_FP, u | FPCONV, 4); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; case 54: /* stfd */ case 55: /* stfdu */ op->type = MKOP(STORE_FP, u, 8); - op->ea = dform_ea(instr, regs); + op->ea = dform_ea(word, regs); break; #endif @@ -2538,14 +2619,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 56: /* lq */ if (!((rd & 1) || (rd == ra))) op->type = MKOP(LOAD, 0, 16); - op->ea = dqform_ea(instr, regs); + op->ea = dqform_ea(word, regs); break; #endif #ifdef CONFIG_VSX case 57: /* lfdp, lxsd, lxssp */ - op->ea = dsform_ea(instr, regs); - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* lfdp */ if (rd & 1) break; /* reg must be even */ @@ -2569,8 +2650,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 58: /* ld[u], lwa */ - op->ea = dsform_ea(instr, regs); - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* ld */ op->type = MKOP(LOAD, 0, 8); break; @@ -2586,16 +2667,16 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef CONFIG_VSX case 61: /* stfdp, lxv, stxsd, stxssp, stxv */ - switch (instr & 7) { + switch (word & 7) { case 0: /* stfdp with LSB of DS field = 0 */ case 4: /* stfdp with LSB of DS field = 1 */ - op->ea = dsform_ea(instr, regs); + op->ea = dsform_ea(word, regs); op->type = MKOP(STORE_FP, 0, 16); break; case 1: /* lxv */ - op->ea = dqform_ea(instr, regs); - if (instr & 8) + op->ea = dqform_ea(word, regs); + if (word & 8) op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 16; @@ -2604,7 +2685,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 2: /* stxsd with LSB of DS field = 0 */ case 6: /* stxsd with LSB of DS field = 1 */ - op->ea = dsform_ea(instr, regs); + op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 8); op->element_size = 8; @@ -2613,7 +2694,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 3: /* stxssp with LSB of DS field = 0 */ case 7: /* stxssp with LSB of DS field = 1 */ - op->ea = dsform_ea(instr, regs); + op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 4); op->element_size = 8; @@ -2621,8 +2702,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 5: /* stxv */ - op->ea = dqform_ea(instr, regs); - if (instr & 8) + op->ea = dqform_ea(word, regs); + if (word & 8) op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 16; @@ -2634,8 +2715,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 62: /* std[u] */ - op->ea = dsform_ea(instr, regs); - switch (instr & 3) { + op->ea = dsform_ea(word, regs); + switch (word & 3) { case 0: /* std */ op->type = MKOP(STORE, 0, 8); break; @@ -2648,6 +2729,124 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; } break; + case 1: /* Prefixed instructions */ + prefix_r = GET_PREFIX_R(word); + ra = GET_PREFIX_RA(suffix); + op->update_reg = ra; + rd = (suffix >> 21) & 0x1f; + op->reg = rd; + op->val = regs->gpr[rd]; + + suffixopcode = get_op(suffix); + prefixtype = (word >> 24) & 0x3; + switch (prefixtype) { + case 0: /* Type 00 Eight-Byte Load/Store */ + if (prefix_r && ra) + break; + op->ea = mlsd_8lsd_ea(word, suffix, regs); + switch (suffixopcode) { + case 41: /* plwa */ + op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4); + break; + case 42: /* plxsd */ + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, PREFIXED, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 43: /* plxssp */ + op->reg = rd + 32; + op->type = MKOP(LOAD_VSX, PREFIXED, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + case 46: /* pstxsd */ + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, PREFIXED, 8); + op->element_size = 8; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 47: /* pstxssp */ + op->reg = rd + 32; + op->type = MKOP(STORE_VSX, PREFIXED, 4); + op->element_size = 8; + op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC; + break; + case 51: /* plxv1 */ + op->reg += 32; + fallthrough; + case 50: /* plxv0 */ + op->type = MKOP(LOAD_VSX, PREFIXED, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 55: /* pstxv1 */ + op->reg = rd + 32; + fallthrough; + case 54: /* pstxv0 */ + op->type = MKOP(STORE_VSX, PREFIXED, 16); + op->element_size = 16; + op->vsx_flags = VSX_CHECK_VEC; + break; + case 56: /* plq */ + op->type = MKOP(LOAD, PREFIXED, 16); + break; + case 57: /* pld */ + op->type = MKOP(LOAD, PREFIXED, 8); + break; + case 60: /* stq */ + op->type = MKOP(STORE, PREFIXED, 16); + break; + case 61: /* pstd */ + op->type = MKOP(STORE, PREFIXED, 8); + break; + } + break; + case 1: /* Type 01 Eight-Byte Register-to-Register */ + break; + case 2: /* Type 10 Modified Load/Store */ + if (prefix_r && ra) + break; + op->ea = mlsd_8lsd_ea(word, suffix, regs); + switch (suffixopcode) { + case 32: /* plwz */ + op->type = MKOP(LOAD, PREFIXED, 4); + break; + case 34: /* plbz */ + op->type = MKOP(LOAD, PREFIXED, 1); + break; + case 36: /* pstw */ + op->type = MKOP(STORE, PREFIXED, 4); + break; + case 38: /* pstb */ + op->type = MKOP(STORE, PREFIXED, 1); + break; + case 40: /* plhz */ + op->type = MKOP(LOAD, PREFIXED, 2); + break; + case 42: /* plha */ + op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 2); + break; + case 44: /* psth */ + op->type = MKOP(STORE, PREFIXED, 2); + break; + case 48: /* plfs */ + op->type = MKOP(LOAD_FP, PREFIXED | FPCONV, 4); + break; + case 50: /* plfd */ + op->type = MKOP(LOAD_FP, PREFIXED, 8); + break; + case 52: /* pstfs */ + op->type = MKOP(STORE_FP, PREFIXED | FPCONV, 4); + break; + case 54: /* pstfd */ + op->type = MKOP(STORE_FP, PREFIXED, 8); + break; + } + break; + case 3: /* Type 11 Modified Register-to-Register */ + break; + } #endif /* __powerpc64__ */ } @@ -2663,7 +2862,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; logical_done: - if (instr & 1) + if (word & 1) set_cr0(regs, op); logical_done_nocc: op->reg = ra; @@ -2671,7 +2870,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; arith_done: - if (instr & 1) + if (word & 1) set_cr0(regs, op); compute_done: op->reg = rd; @@ -2756,7 +2955,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) { unsigned long next_pc; - next_pc = truncate_if_32bit(regs->msr, regs->nip + 4); + next_pc = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op->type)); switch (GETTYPE(op->type)) { case COMPUTE: if (op->type & SETREG) @@ -3101,7 +3300,7 @@ NOKPROBE_SYMBOL(emulate_loadstore); * or -1 if the instruction is one that should not be stepped, * such as an rfid, or a mtmsrd that would clear MSR_RI. */ -int emulate_step(struct pt_regs *regs, unsigned int instr) +int emulate_step(struct pt_regs *regs, struct ppc_inst instr) { struct instruction_op op; int r, err, type; @@ -3194,6 +3393,18 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) regs->msr = MSR_KERNEL; return 1; +#ifdef CONFIG_PPC_BOOK3S_64 + case SYSCALL_VECTORED_0: /* scv 0 */ + regs->gpr[9] = regs->gpr[13]; + regs->gpr[10] = MSR_KERNEL; + regs->gpr[11] = regs->nip + 4; + regs->gpr[12] = regs->msr & MSR_MASK; + regs->gpr[13] = (unsigned long) get_paca(); + regs->nip = (unsigned long) &system_call_vectored_emulate; + regs->msr = MSR_KERNEL; + return 1; +#endif + case RFI: return -1; #endif @@ -3201,7 +3412,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) return 0; instr_done: - regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + regs->nip = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type)); return 1; } NOKPROBE_SYMBOL(emulate_step); diff --git a/arch/powerpc/lib/test_code-patching.S b/arch/powerpc/lib/test_code-patching.S new file mode 100644 index 000000000000..a9be6107844e --- /dev/null +++ b/arch/powerpc/lib/test_code-patching.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 IBM Corporation + */ +#include <asm/ppc-opcode.h> + + .text + +#define globl(x) \ + .globl x; \ +x: + +globl(code_patching_test1) + nop + nop +globl(end_code_patching_test1) + +globl(code_patching_test1_expected) + .long OP_PREFIX << 26 + .long 0x0000000 diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 53df4146dd32..0a201b771477 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -8,58 +8,51 @@ #define pr_fmt(fmt) "emulate_step_test: " fmt #include <linux/ptrace.h> +#include <asm/cpu_has_feature.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> #include <asm/code-patching.h> - -#define IMM_L(i) ((uintptr_t)(i) & 0xffff) -#define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) - -/* - * Defined with TEST_ prefix so it does not conflict with other - * definitions. - */ -#define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_DS(i)) -#define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_DS(i)) -#define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | \ - __PPC_EH(eh)) -#define TEST_STDCX(s, a, b) (PPC_INST_STDCX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LFSX(t, a, b) (PPC_INST_LFSX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STFSX(s, a, b) (PPC_INST_STFSX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LFDX(t, a, b) (PPC_INST_LFDX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STFDX(s, a, b) (PPC_INST_STFDX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LVX(t, a, b) (PPC_INST_LVX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STVX(s, a, b) (PPC_INST_STVX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LXVD2X(s, a, b) (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) -#define TEST_STXVD2X(s, a, b) (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) -#define TEST_ADD(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | 0x1) -#define TEST_ADDC(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_ADDC_DOT(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#include <asm/inst.h> #define MAX_SUBTESTS 16 #define IGNORE_GPR(n) (0x1UL << (n)) #define IGNORE_XER (0x1UL << 32) #define IGNORE_CCR (0x1UL << 33) +#define NEGATIVE_TEST (0x1UL << 63) + +#define TEST_PLD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_PLD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLWZ(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_RAW_LWZ(r, base, i)) + +#define TEST_PSTD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_PSTD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLFS(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_LFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PSTFS(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_STFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLFD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_LFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PSTFD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_STFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PADDI(t, a, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_RAW_ADDI(t, a, i)) + static void __init init_pt_regs(struct pt_regs *regs) { @@ -104,7 +97,7 @@ static void __init test_ld(void) regs.gpr[3] = (unsigned long) &a; /* ld r5, 0(r3) */ - stepped = emulate_step(®s, TEST_LD(5, 3, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LD(5, 3, 0))); if (stepped == 1 && regs.gpr[5] == a) show_result("ld", "PASS"); @@ -112,6 +105,29 @@ static void __init test_ld(void) show_result("ld", "FAIL"); } +static void __init test_pld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + + /* pld r5, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLD(5, 3, 0, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("pld", "PASS"); + else + show_result("pld", "FAIL"); +} + static void __init test_lwz(void) { struct pt_regs regs; @@ -122,7 +138,7 @@ static void __init test_lwz(void) regs.gpr[3] = (unsigned long) &a; /* lwz r5, 0(r3) */ - stepped = emulate_step(®s, TEST_LWZ(5, 3, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LWZ(5, 3, 0))); if (stepped == 1 && regs.gpr[5] == a) show_result("lwz", "PASS"); @@ -130,6 +146,30 @@ static void __init test_lwz(void) show_result("lwz", "FAIL"); } +static void __init test_plwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("plwz", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + + /* plwz r5, 0(r3), 0 */ + + stepped = emulate_step(®s, TEST_PLWZ(5, 3, 0, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("plwz", "PASS"); + else + show_result("plwz", "FAIL"); +} + static void __init test_lwzx(void) { struct pt_regs regs; @@ -142,7 +182,7 @@ static void __init test_lwzx(void) regs.gpr[5] = 0x8765; /* lwzx r5, r3, r4 */ - stepped = emulate_step(®s, TEST_LWZX(5, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LWZX(5, 3, 4))); if (stepped == 1 && regs.gpr[5] == a[2]) show_result("lwzx", "PASS"); else @@ -160,13 +200,36 @@ static void __init test_std(void) regs.gpr[5] = 0x5678; /* std r5, 0(r3) */ - stepped = emulate_step(®s, TEST_STD(5, 3, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STD(5, 3, 0))); if (stepped == 1 && regs.gpr[5] == a) show_result("std", "PASS"); else show_result("std", "FAIL"); } +static void __init test_pstd(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pstd", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + regs.gpr[5] = 0x5678; + + /* pstd r5, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTD(5, 3, 0, 0)); + if (stepped == 1 || regs.gpr[5] == a) + show_result("pstd", "PASS"); + else + show_result("pstd", "FAIL"); +} + static void __init test_ldarx_stdcx(void) { struct pt_regs regs; @@ -185,7 +248,7 @@ static void __init test_ldarx_stdcx(void) regs.gpr[5] = 0x5678; /* ldarx r5, r3, r4, 0 */ - stepped = emulate_step(®s, TEST_LDARX(5, 3, 4, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LDARX(5, 3, 4, 0))); /* * Don't touch 'a' here. Touching 'a' can do Load/store @@ -203,7 +266,7 @@ static void __init test_ldarx_stdcx(void) regs.gpr[5] = 0x9ABC; /* stdcx. r5, r3, r4 */ - stepped = emulate_step(®s, TEST_STDCX(5, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STDCX(5, 3, 4))); /* * Two possible scenarios that indicates successful emulation @@ -243,7 +306,7 @@ static void __init test_lfsx_stfsx(void) regs.gpr[4] = 0; /* lfsx frt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LFSX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LFSX(10, 3, 4))); if (stepped == 1) show_result("lfsx", "PASS"); @@ -256,7 +319,7 @@ static void __init test_lfsx_stfsx(void) c.a = 678.91; /* stfsx frs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STFSX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STFSX(10, 3, 4))); if (stepped == 1 && c.b == cached_b) show_result("stfsx", "PASS"); @@ -264,6 +327,53 @@ static void __init test_lfsx_stfsx(void) show_result("stfsx", "FAIL"); } +static void __init test_plfs_pstfs(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + + /*** plfs ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long)&c.a; + + /* plfs frt10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLFS(10, 3, 0, 0)); + + if (stepped == 1) + show_result("plfs", "PASS"); + else + show_result("plfs", "FAIL"); + + + /*** pstfs ***/ + + c.a = 678.91; + + /* pstfs frs10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTFS(10, 3, 0, 0)); + + if (stepped == 1 && c.b == cached_b) + show_result("pstfs", "PASS"); + else + show_result("pstfs", "FAIL"); +} + static void __init test_lfdx_stfdx(void) { struct pt_regs regs; @@ -286,7 +396,7 @@ static void __init test_lfdx_stfdx(void) regs.gpr[4] = 0; /* lfdx frt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LFDX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LFDX(10, 3, 4))); if (stepped == 1) show_result("lfdx", "PASS"); @@ -299,13 +409,60 @@ static void __init test_lfdx_stfdx(void) c.a = 987654.32; /* stfdx frs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STFDX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STFDX(10, 3, 4))); if (stepped == 1 && c.b == cached_b) show_result("stfdx", "PASS"); else show_result("stfdx", "FAIL"); } + +static void __init test_plfd_pstfd(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + + /*** plfd ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long)&c.a; + + /* plfd frt10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLFD(10, 3, 0, 0)); + + if (stepped == 1) + show_result("plfd", "PASS"); + else + show_result("plfd", "FAIL"); + + + /*** pstfd ***/ + + c.a = 987654.32; + + /* pstfd frs10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTFD(10, 3, 0, 0)); + + if (stepped == 1 && c.b == cached_b) + show_result("pstfd", "PASS"); + else + show_result("pstfd", "FAIL"); +} #else static void __init test_lfsx_stfsx(void) { @@ -313,11 +470,23 @@ static void __init test_lfsx_stfsx(void) show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)"); } +static void __init test_plfs_pstfs(void) +{ + show_result("plfs", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("pstfs", "SKIP (CONFIG_PPC_FPU is not set)"); +} + static void __init test_lfdx_stfdx(void) { show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)"); show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)"); } + +static void __init test_plfd_pstfd(void) +{ + show_result("plfd", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("pstfd", "SKIP (CONFIG_PPC_FPU is not set)"); +} #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC @@ -345,7 +514,7 @@ static void __init test_lvx_stvx(void) regs.gpr[4] = 0; /* lvx vrt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LVX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LVX(10, 3, 4))); if (stepped == 1) show_result("lvx", "PASS"); @@ -361,7 +530,7 @@ static void __init test_lvx_stvx(void) c.b[3] = 498532; /* stvx vrs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STVX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STVX(10, 3, 4))); if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) @@ -402,7 +571,7 @@ static void __init test_lxvd2x_stxvd2x(void) regs.gpr[4] = 0; /* lxvd2x vsr39, r3, r4 */ - stepped = emulate_step(®s, TEST_LXVD2X(39, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVD2X(39, R3, R4))); if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { show_result("lxvd2x", "PASS"); @@ -422,7 +591,7 @@ static void __init test_lxvd2x_stxvd2x(void) c.b[3] = 4; /* stxvd2x vsr39, r3, r4 */ - stepped = emulate_step(®s, TEST_STXVD2X(39, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVD2X(39, R3, R4))); if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && cached_b[2] == c.b[2] && cached_b[3] == c.b[3] && @@ -446,33 +615,44 @@ static void __init test_lxvd2x_stxvd2x(void) static void __init run_tests_load_store(void) { test_ld(); + test_pld(); test_lwz(); + test_plwz(); test_lwzx(); test_std(); + test_pstd(); test_ldarx_stdcx(); test_lfsx_stfsx(); + test_plfs_pstfs(); test_lfdx_stfdx(); + test_plfd_pstfd(); test_lvx_stvx(); test_lxvd2x_stxvd2x(); } struct compute_test { char *mnemonic; + unsigned long cpu_feature; struct { char *descr; unsigned long flags; - unsigned int instr; + struct ppc_inst instr; struct pt_regs regs; } subtests[MAX_SUBTESTS + 1]; }; +/* Extreme values for si0||si1 (the MLS:D-form 34 bit immediate field) */ +#define SI_MIN BIT(33) +#define SI_MAX (BIT(33) - 1) +#define SI_UMAX (BIT(34) - 1) + static struct compute_test compute_tests[] = { { .mnemonic = "nop", .subtests = { { .descr = "R0 = LONG_MAX", - .instr = PPC_INST_NOP, + .instr = ppc_inst(PPC_INST_NOP), .regs = { .gpr[0] = LONG_MAX, } @@ -484,7 +664,7 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -492,7 +672,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -500,7 +680,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MAX, RB = LONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -508,7 +688,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -516,7 +696,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -524,7 +704,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -532,7 +712,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -540,7 +720,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -548,7 +728,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -556,7 +736,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -570,7 +750,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", .flags = IGNORE_CCR, - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -578,7 +758,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -587,7 +767,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MAX, RB = LONG_MAX", .flags = IGNORE_CCR, - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -595,7 +775,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -603,7 +783,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -611,7 +791,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -619,7 +799,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -627,7 +807,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -635,7 +815,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -643,7 +823,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -656,7 +836,7 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -664,7 +844,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -672,7 +852,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MAX, RB = LONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -680,7 +860,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -688,7 +868,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -696,7 +876,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -704,7 +884,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -712,7 +892,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -720,7 +900,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -728,7 +908,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -736,7 +916,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN | (uint)INT_MIN, .gpr[22] = LONG_MIN | (uint)INT_MIN, @@ -750,7 +930,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", .flags = IGNORE_CCR, - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -758,7 +938,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -767,7 +947,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MAX, RB = LONG_MAX", .flags = IGNORE_CCR, - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -775,7 +955,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -783,7 +963,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -791,7 +971,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -799,7 +979,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -807,7 +987,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -815,7 +995,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -823,7 +1003,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -831,47 +1011,335 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN | (uint)INT_MIN, .gpr[22] = LONG_MIN | (uint)INT_MIN, } } } + }, + { + .mnemonic = "divde", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + } + } + }, + { + .mnemonic = "divde.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + } + } + }, + { + .mnemonic = "divdeu", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX - 1, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX - 1, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MIN + 1, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN + 1, + .gpr[22] = LONG_MIN, + } + } + } + }, + { + .mnemonic = "divdeu.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX - 1, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX - 1, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MIN + 1, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN + 1, + .gpr[22] = LONG_MIN, + } + } + } + }, + { + .mnemonic = "paddi", + .cpu_feature = CPU_FTR_ARCH_31, + .subtests = { + { + .descr = "RA = LONG_MIN, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MIN, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, SI = SI_UMAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_UMAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, SI = 0x1, R = 0", + .instr = TEST_PADDI(21, 22, 0x1, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = INT_MIN, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MIN, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, SI = 0x1, R = 0", + .instr = TEST_PADDI(21, 22, 0x1, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA is r0, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 0, SI_MIN, 0), + .regs = { + .gpr[21] = 0x0, + } + }, + { + .descr = "RA = 0, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0x0, + .gpr[22] = 0x0, + } + }, + { + .descr = "RA is r0, SI = 0, R = 1", + .instr = TEST_PADDI(21, 0, 0, 1), + .regs = { + .gpr[21] = 0, + } + }, + { + .descr = "RA is r0, SI = SI_MIN, R = 1", + .instr = TEST_PADDI(21, 0, SI_MIN, 1), + .regs = { + .gpr[21] = 0, + } + }, + /* Invalid instruction form with R = 1 and RA != 0 */ + { + .descr = "RA = R22(0), SI = 0, R = 1", + .instr = TEST_PADDI(21, 22, 0, 1), + .flags = NEGATIVE_TEST, + .regs = { + .gpr[21] = 0, + .gpr[22] = 0, + } + } + } } }; static int __init emulate_compute_instr(struct pt_regs *regs, - unsigned int instr) + struct ppc_inst instr, + bool negative) { + int analysed; struct instruction_op op; - if (!regs || !instr) + if (!regs || !ppc_inst_val(instr)) return -EINVAL; - if (analyse_instr(&op, regs, instr) != 1 || - GETTYPE(op.type) != COMPUTE) { - pr_info("emulation failed, instruction = 0x%08x\n", instr); + regs->nip = patch_site_addr(&patch__exec_instr); + + analysed = analyse_instr(&op, regs, instr); + if (analysed != 1 || GETTYPE(op.type) != COMPUTE) { + if (negative) + return -EFAULT; + pr_info("emulation failed, instruction = %s\n", ppc_inst_as_str(instr)); return -EFAULT; } - - emulate_update_regs(regs, &op); + if (analysed == 1 && negative) + pr_info("negative test failed, instruction = %s\n", ppc_inst_as_str(instr)); + if (!negative) + emulate_update_regs(regs, &op); return 0; } static int __init execute_compute_instr(struct pt_regs *regs, - unsigned int instr) + struct ppc_inst instr) { extern int exec_instr(struct pt_regs *regs); - extern s32 patch__exec_instr; - if (!regs || !instr) + if (!regs || !ppc_inst_val(instr)) return -EINVAL; /* Patch the NOP with the actual instruction */ patch_instruction_site(&patch__exec_instr, instr); if (exec_instr(regs)) { - pr_info("execution failed, instruction = 0x%08x\n", instr); + pr_info("execution failed, instruction = %s\n", ppc_inst_as_str(instr)); return -EFAULT; } @@ -891,16 +1359,23 @@ static void __init run_tests_compute(void) unsigned long flags; struct compute_test *test; struct pt_regs *regs, exp, got; - unsigned int i, j, k, instr; - bool ignore_gpr, ignore_xer, ignore_ccr, passed; + unsigned int i, j, k; + struct ppc_inst instr; + bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative; for (i = 0; i < ARRAY_SIZE(compute_tests); i++) { test = &compute_tests[i]; + if (test->cpu_feature && !early_cpu_has_feature(test->cpu_feature)) { + show_result(test->mnemonic, "SKIP (!CPU_FTR)"); + continue; + } + for (j = 0; j < MAX_SUBTESTS && test->subtests[j].descr; j++) { instr = test->subtests[j].instr; flags = test->subtests[j].flags; regs = &test->subtests[j].regs; + negative = flags & NEGATIVE_TEST; ignore_xer = flags & IGNORE_XER; ignore_ccr = flags & IGNORE_CCR; passed = true; @@ -915,8 +1390,12 @@ static void __init run_tests_compute(void) exp.msr = MSR_KERNEL; got.msr = MSR_KERNEL; - if (emulate_compute_instr(&got, instr) || - execute_compute_instr(&exp, instr)) { + rc = emulate_compute_instr(&got, instr, negative) != 0; + if (negative) { + /* skip executing instruction */ + passed = rc; + goto print; + } else if (rc || execute_compute_instr(&exp, instr)) { passed = false; goto print; } diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S index 1580f34f4f4f..9ef941d958d8 100644 --- a/arch/powerpc/lib/test_emulate_step_exec_instr.S +++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S @@ -80,7 +80,9 @@ _GLOBAL(exec_instr) REST_NVGPRS(r31) /* Placeholder for the test instruction */ + .balign 64 1: nop + nop patch_site 1b patch__exec_instr /* diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 877d880890fe..1690d369688b 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -14,9 +14,9 @@ * hash table, so this file is not used on them.) */ +#include <linux/pgtable.h> #include <asm/reg.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/cputable.h> #include <asm/ppc_asm.h> #include <asm/thread_info.h> @@ -62,7 +62,7 @@ _GLOBAL(hash_page) isync #endif /* Get PTE (linux-style) and check access */ - lis r0,KERNELBASE@h /* check if kernel address */ + lis r0, TASK_SIZE@h /* check if kernel address */ cmplw 0,r4,r0 ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */ @@ -81,7 +81,7 @@ _GLOBAL(hash_page) rlwinm. r8,r8,0,0,20 /* extract pt base address */ #endif #ifdef CONFIG_SMP - beq- hash_page_out /* return if no mapping */ + beq- .Lhash_page_out /* return if no mapping */ #else /* XXX it seems like the 601 will give a machine fault on the rfi if its alignment is wrong (bottom 4 bits of address are @@ -109,11 +109,11 @@ _GLOBAL(hash_page) #if (PTE_FLAGS_OFFSET != 0) addi r8,r8,PTE_FLAGS_OFFSET #endif -retry: +.Lretry: lwarx r6,0,r8 /* get linux-style pte, flag word */ andc. r5,r3,r6 /* check access & ~permission */ #ifdef CONFIG_SMP - bne- hash_page_out /* return if access not permitted */ + bne- .Lhash_page_out /* return if access not permitted */ #else bnelr- #endif @@ -128,7 +128,7 @@ retry: #endif /* CONFIG_SMP */ #endif /* CONFIG_PTE_64BIT */ stwcx. r5,0,r8 /* attempt to update PTE */ - bne- retry /* retry if someone got there first */ + bne- .Lretry /* retry if someone got there first */ mfsrin r3,r4 /* get segment reg for segment */ #ifndef CONFIG_VMAP_STACK @@ -156,13 +156,14 @@ retry: #endif #ifdef CONFIG_SMP -hash_page_out: +.Lhash_page_out: eieio lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha li r0,0 stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) blr #endif /* CONFIG_SMP */ +_ASM_NOKPROBE_SYMBOL(hash_page) /* * Add an entry for a particular page to the hash table. @@ -267,6 +268,7 @@ _GLOBAL(add_hash_page) lwz r0,4(r1) mtlr r0 blr +_ASM_NOKPROBE_SYMBOL(add_hash_page) /* * This routine adds a hardware PTE to the hash table. @@ -360,7 +362,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 1: LDPTEu r6,HPTE_SIZE(r4) /* get next PTE */ CMPPTE 0,r6,r5 bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ - beq+ found_slot + beq+ .Lfound_slot patch_site 0f, patch__hash_page_B /* Search the secondary PTEG for a matching PTE */ @@ -372,7 +374,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 2: LDPTEu r6,HPTE_SIZE(r4) CMPPTE 0,r6,r5 bdnzf 2,2b - beq+ found_slot + beq+ .Lfound_slot xori r5,r5,PTE_H /* clear H bit again */ /* Search the primary PTEG for an empty slot */ @@ -381,7 +383,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 1: LDPTEu r6,HPTE_SIZE(r4) /* get next PTE */ TST_V(r6) /* test valid bit */ bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ - beq+ found_empty + beq+ .Lfound_empty /* update counter of times that the primary PTEG is full */ lis r4, (primary_pteg_full - PAGE_OFFSET)@ha @@ -399,7 +401,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 2: LDPTEu r6,HPTE_SIZE(r4) TST_V(r6) bdnzf 2,2b - beq+ found_empty + beq+ .Lfound_empty xori r5,r5,PTE_H /* clear H bit again */ /* @@ -437,9 +439,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) #ifndef CONFIG_SMP /* Store PTE in PTEG */ -found_empty: +.Lfound_empty: STPTE r5,0(r4) -found_slot: +.Lfound_slot: STPTE r8,HPTE_SIZE/2(r4) #else /* CONFIG_SMP */ @@ -460,8 +462,8 @@ found_slot: * We do however have to make sure that the PTE is never in an invalid * state with the V bit set. */ -found_empty: -found_slot: +.Lfound_empty: +.Lfound_slot: CLR_V(r5,r0) /* clear V (valid) bit in PTE */ STPTE r5,0(r4) sync @@ -474,6 +476,7 @@ found_slot: sync /* make sure pte updates get to memory */ blr +_ASM_NOKPROBE_SYMBOL(create_hpte) .section .bss .align 2 @@ -630,6 +633,7 @@ _GLOBAL(flush_hash_pages) isync blr EXPORT_SYMBOL(flush_hash_pages) +_ASM_NOKPROBE_SYMBOL(flush_hash_pages) /* * Flush an entry from the TLB @@ -667,6 +671,7 @@ _GLOBAL(_tlbie) sync #endif /* CONFIG_SMP */ blr +_ASM_NOKPROBE_SYMBOL(_tlbie) /* * Flush the entire TLB. 603/603e only @@ -708,3 +713,4 @@ _GLOBAL(_tlbia) isync #endif /* CONFIG_SMP */ blr +_ASM_NOKPROBE_SYMBOL(_tlbia) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 39ba53ca5bb5..d426eaf76bb0 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -170,6 +170,12 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) pr_debug("RAM mapped without BATs\n"); return base; } + if (debug_pagealloc_enabled()) { + if (base >= border) + return base; + if (top >= border) + top = border; + } if (!strict_kernel_rwx_enabled() || base >= border || top <= border) return __mmu_mapin_ram(base, top); @@ -181,12 +187,31 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return __mmu_mapin_ram(border, top); } +static bool is_module_segment(unsigned long addr) +{ + if (!IS_ENABLED(CONFIG_MODULES)) + return false; +#ifdef MODULES_VADDR + if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M)) + return false; + if (addr > ALIGN(MODULES_END, SZ_256M) - 1) + return false; +#else + if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M)) + return false; + if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1) + return false; +#endif + return true; +} + void mmu_mark_initmem_nx(void) { int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; int i; unsigned long base = (unsigned long)_stext - PAGE_OFFSET; unsigned long top = (unsigned long)_etext - PAGE_OFFSET; + unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; unsigned long size; if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) @@ -201,9 +226,10 @@ void mmu_mark_initmem_nx(void) size = block_size(base, top); size = max(size, 128UL << 10); if ((top - base) > size) { - if (strict_kernel_rwx_enabled()) - pr_warn("Kernel _etext not properly aligned\n"); size <<= 1; + if (strict_kernel_rwx_enabled() && base + size > border) + pr_warn("Some RW data is getting mapped X. " + "Adjust CONFIG_DATA_SHIFT to avoid that.\n"); } setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); base += size; @@ -215,9 +241,9 @@ void mmu_mark_initmem_nx(void) for (i = TASK_SIZE >> 28; i < 16; i++) { /* Do not set NX on VM space for modules */ - if (IS_ENABLED(CONFIG_MODULES) && - (VMALLOC_START & 0xf0000000) == i << 28) - break; + if (is_module_segment(i << 28)) + continue; + mtsrin(mfsrin(i << 28) | 0x10000000, i << 28); } } @@ -312,7 +338,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea) if (!Hash) return; - pmd = pmd_ptr(mm, ea); + pmd = pmd_off(mm, ea); if (!pmd_none(*pmd)) add_hash_page(mm->context.id, ea, pmd_val(*pmd)); } diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index dc9039a170aa..b6c7427daa6f 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -90,7 +90,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, if (start >= end) return; end = (end - 1) | ~PAGE_MASK; - pmd = pmd_ptr(mm, start); + pmd = pmd_off(mm, start); for (;;) { pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; if (pmd_end > end) @@ -129,7 +129,7 @@ void flush_tlb_mm(struct mm_struct *mm) /* * It is safe to go down the mm's list of vmas when called - * from dup_mmap, holding mmap_sem. It would also be safe from + * from dup_mmap, holding mmap_lock. It would also be safe from * unmap_region or exit_mmap, but not from vmtruncate on SMP - * but it seems dup_mmap is the only SMP case which gets here. */ @@ -148,7 +148,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) return; } mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; - pmd = pmd_ptr(mm, vmaddr); + pmd = pmd_off(mm, vmaddr); if (!pmd_none(*pmd)) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); } diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c index eefa89c6117b..964467b3a776 100644 --- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c @@ -10,8 +10,6 @@ #include <linux/mm.h> #include <linux/hugetlb.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> #include <asm/cacheflush.h> #include <asm/machdep.h> diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index d2d8237ea9d5..cf20e5229ce1 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -14,11 +14,11 @@ #include <linux/processor.h> #include <linux/threads.h> #include <linux/smp.h> +#include <linux/pgtable.h> #include <asm/machdep.h> #include <asm/mmu.h> #include <asm/mmu_context.h> -#include <asm/pgtable.h> #include <asm/trace.h> #include <asm/tlb.h> #include <asm/cputable.h> diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 64733b9cb20a..fd9c7f91b092 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -9,8 +9,6 @@ #include <linux/mm_types.h> #include <linux/mm.h> -#include <asm/pgalloc.h> -#include <asm/pgtable.h> #include <asm/sections.h> #include <asm/mmu.h> #include <asm/tlb.h> @@ -148,6 +146,7 @@ void hash__vmemmap_remove_mapping(unsigned long start, int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -155,7 +154,8 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE); if (slab_is_available()) { pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); + p4dp = p4d_offset(pgdp, ea); + pudp = pud_alloc(&init_mm, p4dp, ea); if (!pudp) return -ENOMEM; pmdp = pmd_alloc(&init_mm, pudp, ea); @@ -236,7 +236,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres * to hugepage, we first clear the pmd, then invalidate all * the PTE entries. The assumption here is that any low level * page fault will see a none pmd and take the slow path that - * will wait on mmap_sem. But we could very well be in a + * will wait on mmap_lock. But we could very well be in a * hash_page with local ptep pointer value. Such a hash page * can result in adding new HPTE entries for normal subpages. * That means we could be modifying the page content as we @@ -250,7 +250,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres * Now invalidate the hpte entries in the range * covered by pmd. This make sure we take a * fault and will find the pmd as none, which will - * result in a major fault which takes mmap_sem and + * result in a major fault which takes mmap_lock and * hence wait for collapse to complete. Without this * the __collapse_huge_page_copy can result in copying * the old content. @@ -363,17 +363,6 @@ pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, * hash fault look at them. */ memset(pgtable, 0, PTE_FRAG_SIZE); - /* - * Serialize against find_current_mm_pte variants which does lock-less - * lookup in page tables with local interrupts disabled. For huge pages - * it casts pmd_t to pte_t. Since format of pte_t is different from - * pmd_t we want to prevent transit from pmd pointing to page table - * to pmd pointing to huge page (and back) while interrupts are disabled. - * We clear pmd to possibly replace it with page table pointer in - * different code paths. So make sure we wait for the parallel - * find_curren_mm_pte to finish. - */ - serialize_against_pte_lookup(mm); return old_pmd; } diff --git a/arch/powerpc/mm/book3s64/hash_tlb.c b/arch/powerpc/mm/book3s64/hash_tlb.c index 4a70d8dd39cd..eb0bccaf221e 100644 --- a/arch/powerpc/mm/book3s64/hash_tlb.c +++ b/arch/powerpc/mm/book3s64/hash_tlb.c @@ -21,7 +21,6 @@ #include <linux/mm.h> #include <linux/percpu.h> #include <linux/hardirq.h> -#include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include <asm/bug.h> @@ -176,7 +175,6 @@ void hash__tlb_flush(struct mmu_gather *tlb) * from the hash table (and the TLB). But keeps * the linux PTEs intact. * - * @mm : mm_struct of the target address space (generally init_mm) * @start : starting address * @end : ending address (not included in the flush) * @@ -189,17 +187,14 @@ void hash__tlb_flush(struct mmu_gather *tlb) * Because of that usage pattern, it is implemented for small size rather * than speed. */ -void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, - unsigned long end) +void __flush_hash_table_range(unsigned long start, unsigned long end) { - bool is_thp; int hugepage_shift; unsigned long flags; - start = _ALIGN_DOWN(start, PAGE_SIZE); - end = _ALIGN_UP(end, PAGE_SIZE); + start = ALIGN_DOWN(start, PAGE_SIZE); + end = ALIGN(end, PAGE_SIZE); - BUG_ON(!mm->pgd); /* * Note: Normally, we should only ever use a batch within a @@ -212,21 +207,15 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, local_irq_save(flags); arch_enter_lazy_mmu_mode(); for (; start < end; start += PAGE_SIZE) { - pte_t *ptep = find_current_mm_pte(mm->pgd, start, &is_thp, - &hugepage_shift); + pte_t *ptep = find_init_mm_pte(start, &hugepage_shift); unsigned long pte; if (ptep == NULL) continue; pte = pte_val(*ptep); - if (is_thp) - trace_hugepage_invalidate(start, pte); if (!(pte & H_PAGE_HASHPTE)) continue; - if (unlikely(is_thp)) - hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte); - else - hpte_need_flush(mm, start, ptep, pte, hugepage_shift); + hpte_need_flush(&init_mm, start, ptep, pte, hugepage_shift); } arch_leave_lazy_mmu_mode(); local_irq_restore(flags); @@ -238,7 +227,7 @@ void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) pte_t *start_pte; unsigned long flags; - addr = _ALIGN_DOWN(addr, PMD_SIZE); + addr = ALIGN_DOWN(addr, PMD_SIZE); /* * Note: Normally, we should only ever use a batch within a * PTE locked section. This violates the rule, but will work diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 8ed2411c3f39..c663e7ba801f 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -35,10 +35,10 @@ #include <linux/pkeys.h> #include <linux/hugetlb.h> #include <linux/cpu.h> +#include <linux/pgtable.h> #include <asm/debugfs.h> #include <asm/processor.h> -#include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/page.h> @@ -66,6 +66,9 @@ #include <mm/mmu_decl.h> +#include "internal.h" + + #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) #else @@ -593,7 +596,7 @@ static void __init htab_scan_page_sizes(void) } #ifdef CONFIG_HUGETLB_PAGE - if (!hugetlb_disabled) { + if (!hugetlb_disabled && !early_radix_enabled() ) { /* Reserve 16G huge page memory sections for huge pages */ of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); } @@ -660,11 +663,10 @@ static void __init htab_init_page_sizes(void) * Pick a size for the linear mapping. Currently, we only * support 16M, 1M and 4K which is the default */ - if (IS_ENABLED(STRICT_KERNEL_RWX) && + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && (unsigned long)_stext % 0x1000000) { if (mmu_psize_defs[MMU_PAGE_16M].shift) - pr_warn("Kernel not 16M aligned, " - "disabling 16M linear map alignment"); + pr_warn("Kernel not 16M aligned, disabling 16M linear map alignment\n"); aligned = false; } @@ -785,7 +787,7 @@ static unsigned long __init htab_get_table_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int resize_hpt_for_hotplug(unsigned long new_mem_size) +static int resize_hpt_for_hotplug(unsigned long new_mem_size) { unsigned target_hpt_shift; @@ -819,6 +821,8 @@ int hash__create_section_mapping(unsigned long start, unsigned long end, return -1; } + resize_hpt_for_hotplug(memblock_phys_mem_size()); + rc = htab_bolt_mapping(start, end, __pa(start), pgprot_val(prot), mmu_linear_psize, mmu_kernel_ssize); @@ -836,6 +840,10 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end) int rc = htab_remove_mapping(start, end, mmu_linear_psize, mmu_kernel_ssize); WARN_ON(rc < 0); + + if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) + pr_warn("Hash collision while resizing HPT\n"); + return rc; } #endif /* CONFIG_MEMORY_HOTPLUG */ @@ -870,6 +878,9 @@ static void __init htab_initialize(void) printk(KERN_INFO "Using 1TB segments\n"); } + if (stress_slb_enabled) + static_branch_enable(&stress_slb_key); + /* * Calculate the required size of the htab. We want the number of * PTEGs to equal one half the number of real pages. @@ -1105,6 +1116,11 @@ void hash__early_init_mmu_secondary(void) if (cpu_has_feature(CPU_FTR_ARCH_206) && cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); + +#ifdef CONFIG_PPC_MEM_KEYS + if (mmu_has_feature(MMU_FTR_PKEY)) + mtspr(SPRN_UAMOR, default_uamor); +#endif } #endif /* CONFIG_SMP */ @@ -1350,8 +1366,15 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, goto bail; } - /* Add _PAGE_PRESENT to the required access perm */ - access |= _PAGE_PRESENT; + /* + * Add _PAGE_PRESENT to the required access perm. If there are parallel + * updates to the pte that can possibly clear _PAGE_PTE, catch that too. + * + * We can safely use the return pte address in rest of the function + * because we do set H_PAGE_BUSY which prevents further updates to pte + * from generic code. + */ + access |= _PAGE_PRESENT | _PAGE_PTE; /* * Pre-check access permissions (will be re-checked atomically @@ -1539,16 +1562,14 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) } #endif -static void hash_preload(struct mm_struct *mm, unsigned long ea, +static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, bool is_exec, unsigned long trap) { - int hugepage_shift; unsigned long vsid; pgd_t *pgdir; - pte_t *ptep; - unsigned long flags; int rc, ssize, update_flags = 0; unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0); + unsigned long flags; BUG_ON(get_region_id(ea) != USER_REGION_ID); @@ -1568,32 +1589,42 @@ static void hash_preload(struct mm_struct *mm, unsigned long ea, vsid = get_user_vsid(&mm->context, ea, ssize); if (!vsid) return; - /* - * Hash doesn't like irqs. Walking linux page table with irq disabled - * saves us from holding multiple locks. - */ - local_irq_save(flags); - /* - * THP pages use update_mmu_cache_pmd. We don't do - * hash preload there. Hence can ignore THP here - */ - ptep = find_current_mm_pte(pgdir, ea, NULL, &hugepage_shift); - if (!ptep) - goto out_exit; - - WARN_ON(hugepage_shift); #ifdef CONFIG_PPC_64K_PAGES /* If either H_PAGE_4K_PFN or cache inhibited is set (and we are on * a 64K kernel), then we don't preload, hash_page() will take * care of it once we actually try to access the page. * That way we don't have to duplicate all of the logic for segment * page size demotion here + * Called with PTL held, hence can be sure the value won't change in + * between. */ if ((pte_val(*ptep) & H_PAGE_4K_PFN) || pte_ci(*ptep)) - goto out_exit; + return; #endif /* CONFIG_PPC_64K_PAGES */ + /* + * __hash_page_* must run with interrupts off, as it sets the + * H_PAGE_BUSY bit. It's possible for perf interrupts to hit at any + * time and may take a hash fault reading the user stack, see + * read_user_stack_slow() in the powerpc/perf code. + * + * If that takes a hash fault on the same page as we lock here, it + * will bail out when seeing H_PAGE_BUSY set, and retry the access + * leading to an infinite loop. + * + * Disabling interrupts here does not prevent perf interrupts, but it + * will prevent them taking hash faults (see the NMI test in + * do_hash_page), then read_user_stack's copy_from_user_nofault will + * fail and perf will fall back to read_user_stack_slow(), which + * walks the Linux page tables. + * + * Interrupts must also be off for the duration of the + * mm_is_thread_local test and update, to prevent preempt running the + * mm on another CPU (XXX: this may be racy vs kthread_use_mm). + */ + local_irq_save(flags); + /* Is that local to this CPU ? */ if (mm_is_thread_local(mm)) update_flags |= HPTE_LOCAL_UPDATE; @@ -1616,7 +1647,7 @@ static void hash_preload(struct mm_struct *mm, unsigned long ea, mm_ctx_user_psize(&mm->context), mm_ctx_user_psize(&mm->context), pte_val(*ptep)); -out_exit: + local_irq_restore(flags); } @@ -1638,10 +1669,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, unsigned long trap; bool is_exec; - if (radix_enabled()) { - prefetch((void *)address); + if (radix_enabled()) return; - } /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ if (!pte_young(*ptep) || address >= TASK_SIZE) @@ -1668,33 +1697,9 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, return; } - hash_preload(vma->vm_mm, address, is_exec, trap); + hash_preload(vma->vm_mm, ptep, address, is_exec, trap); } -#ifdef CONFIG_PPC_MEM_KEYS -/* - * Return the protection key associated with the given address and the - * mm_struct. - */ -u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) -{ - pte_t *ptep; - u16 pkey = 0; - unsigned long flags; - - if (!mm || !mm->pgd) - return 0; - - local_irq_save(flags); - ptep = find_linux_pte(mm->pgd, address, NULL, NULL); - if (ptep) - pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep))); - local_irq_restore(flags); - - return pkey; -} -#endif /* CONFIG_PPC_MEM_KEYS */ - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline void tm_flush_hash_page(int local) { @@ -1736,10 +1741,6 @@ unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift, return gslot; } -/* - * WARNING: This is called from hash_low_64.S, if you change this prototype, - * do not forget to update the assembly call site ! - */ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, unsigned long flags) { diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h new file mode 100644 index 000000000000..7eda0d30d765 --- /dev/null +++ b/arch/powerpc/mm/book3s64/internal.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H +#define ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H + +#include <linux/jump_label.h> + +extern bool stress_slb_enabled; + +DECLARE_STATIC_KEY_FALSE(stress_slb_key); + +static inline bool stress_slb(void) +{ + return static_branch_unlikely(&stress_slb_key); +} + +#endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */ diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index fa05bbd1f682..563faa10bb66 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -96,7 +96,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, goto unlock_exit; } - down_read(&mm->mmap_sem); + mmap_read_lock(mm); chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) / sizeof(struct vm_area_struct *); chunk = min(chunk, entries); @@ -114,7 +114,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, pinned += ret; break; } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); if (pinned != entries) { if (!ret) ret = -EFAULT; diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index e0bb69c616e4..e18ae50a275c 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -15,6 +15,7 @@ #include <asm/powernv.h> #include <asm/firmware.h> #include <asm/ultravisor.h> +#include <asm/kexec.h> #include <mm/mmu_decl.h> #include <trace/events/thp.h> @@ -109,15 +110,25 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + return __pmd(old_pmd); +} + +pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp, int full) +{ + pmd_t pmd; + VM_BUG_ON(addr & ~HPAGE_PMD_MASK); + VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && + !pmd_devmap(*pmdp)) || !pmd_present(*pmdp)); + pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); /* - * This ensures that generic code that rely on IRQ disabling - * to prevent a parallel THP split work as expected. - * - * Marking the entry with _PAGE_INVALID && ~_PAGE_PRESENT requires - * a special case check in pmd_access_permitted. + * if it not a fullmm flush, then we can possibly end up converting + * this PMD pte entry to a regular level 0 PTE by a parallel page fault. + * Make sure we flush the tlb in this case. */ - serialize_against_pte_lookup(vma->vm_mm); - return __pmd(old_pmd); + if (!full) + flush_pmd_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE); + return pmd; } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) @@ -146,19 +157,6 @@ pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) pmdv &= _HPAGE_CHG_MASK; return pmd_set_protbits(__pmd(pmdv), newprot); } - -/* - * This is called at the end of handling a user page fault, when the - * fault has been handled by updating a HUGE PMD entry in the linux page tables. - * We use it to preload an HPTE into the hash table corresponding to - * the updated linux HUGE PMD entry. - */ -void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd) -{ - if (radix_enabled()) - prefetch((void *)addr); -} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* For use by kexec */ @@ -168,6 +166,8 @@ void mmu_cleanup_all(void) radix__mmu_cleanup_all(); else if (mmu_hash_ops.hpte_clear_all) mmu_hash_ops.hpte_clear_all(); + + reset_sprs(); } #ifdef CONFIG_MEMORY_HOTPLUG @@ -342,6 +342,9 @@ void pmd_fragment_free(unsigned long *pmd) { struct page *page = virt_to_page(pmd); + if (PageReserved(page)) + return free_reserved_page(page); + BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); if (atomic_dec_and_test(&page->pt_frag_refcount)) { pgtable_pmd_page_dtor(page); @@ -359,7 +362,7 @@ static inline void pgtable_free(void *table, int index) pmd_fragment_free(table); break; case PUD_INDEX: - kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table); + __pud_free(table); break; #if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE) /* 16M hugepd directory at pud level */ diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 1199fc2bfaec..b1d091a97611 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -10,58 +10,97 @@ #include <asm/mmu.h> #include <asm/setup.h> #include <linux/pkeys.h> -#include <linux/of_device.h> +#include <linux/of_fdt.h> -DEFINE_STATIC_KEY_TRUE(pkey_disabled); -int pkeys_total; /* Total pkeys as per device tree */ -u32 initial_allocation_mask; /* Bits set for the initially allocated keys */ -u32 reserved_allocation_mask; /* Bits set for reserved keys */ -static bool pkey_execute_disable_supported; -static bool pkeys_devtree_defined; /* property exported by device tree */ -static u64 pkey_amr_mask; /* Bits in AMR not to be touched */ -static u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ -static u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */ +int num_pkey; /* Max number of pkeys supported */ +/* + * Keys marked in the reservation list cannot be allocated by userspace + */ +u32 reserved_allocation_mask __ro_after_init; + +/* Bits set for the initially allocated keys */ +static u32 initial_allocation_mask __ro_after_init; + +/* + * Even if we allocate keys with sys_pkey_alloc(), we need to make sure + * other thread still find the access denied using the same keys. + */ +static u64 default_amr = ~0x0UL; +static u64 default_iamr = 0x5555555555555555UL; +u64 default_uamor __ro_after_init; +/* + * Key used to implement PROT_EXEC mmap. Denies READ/WRITE + * We pick key 2 because 0 is special key and 1 is reserved as per ISA. + */ static int execute_only_key = 2; +static bool pkey_execute_disable_supported; + #define AMR_BITS_PER_PKEY 2 #define AMR_RD_BIT 0x1UL #define AMR_WR_BIT 0x2UL #define IAMR_EX_BIT 0x1UL -#define PKEY_REG_BITS (sizeof(u64)*8) +#define PKEY_REG_BITS (sizeof(u64) * 8) #define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY)) -static void scan_pkey_feature(void) +static int __init dt_scan_storage_keys(unsigned long node, + const char *uname, int depth, + void *data) { - u32 vals[2]; - struct device_node *cpu; + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be32 *prop; + int *pkeys_total = (int *) data; - cpu = of_find_node_by_type(NULL, "cpu"); - if (!cpu) - return; + /* We are scanning "cpu" nodes only */ + if (type == NULL || strcmp(type, "cpu") != 0) + return 0; - if (of_property_read_u32_array(cpu, - "ibm,processor-storage-keys", vals, 2)) - return; + prop = of_get_flat_dt_prop(node, "ibm,processor-storage-keys", NULL); + if (!prop) + return 0; + *pkeys_total = be32_to_cpu(prop[0]); + return 1; +} + +static int scan_pkey_feature(void) +{ + int ret; + int pkeys_total = 0; /* - * Since any pkey can be used for data or execute, we will just treat - * all keys as equal and track them as one entity. + * Pkey is not supported with Radix translation. */ - pkeys_total = vals[0]; - pkeys_devtree_defined = true; -} + if (early_radix_enabled()) + return 0; -static inline bool pkey_mmu_enabled(void) -{ - if (firmware_has_feature(FW_FEATURE_LPAR)) - return pkeys_total; - else - return cpu_has_feature(CPU_FTR_PKEY); + ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total); + if (ret == 0) { + /* + * Let's assume 32 pkeys on P8/P9 bare metal, if its not defined by device + * tree. We make this exception since some version of skiboot forgot to + * expose this property on power8/9. + */ + if (!firmware_has_feature(FW_FEATURE_LPAR)) { + unsigned long pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E || + PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9) + pkeys_total = 32; + } + } + + /* + * Adjust the upper limit, based on the number of bits supported by + * arch-neutral code. + */ + pkeys_total = min_t(int, pkeys_total, + ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1)); + return pkeys_total; } -static int pkey_initialize(void) +void __init pkey_early_init_devtree(void) { - int os_reserved, i; + int pkeys_total, i; /* * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral @@ -79,32 +118,21 @@ static int pkey_initialize(void) __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) != (sizeof(u64) * BITS_PER_BYTE)); - /* scan the device tree for pkey feature */ - scan_pkey_feature(); - /* - * Let's assume 32 pkeys on P8 bare metal, if its not defined by device - * tree. We make this exception since skiboot forgot to expose this - * property on power8. + * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 */ - if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) && - cpu_has_feature(CPU_FTRS_POWER8)) - pkeys_total = 32; + if (!early_cpu_has_feature(CPU_FTR_ARCH_206)) + return; - /* - * Adjust the upper limit, based on the number of bits supported by - * arch-neutral code. - */ - pkeys_total = min_t(int, pkeys_total, - ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)+1)); + /* scan the device tree for pkey feature */ + pkeys_total = scan_pkey_feature(); + if (!pkeys_total) + goto out; - if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total) - static_branch_enable(&pkey_disabled); - else - static_branch_disable(&pkey_disabled); + /* Allow all keys to be modified by default */ + default_uamor = ~0x0UL; - if (static_branch_likely(&pkey_disabled)) - return 0; + cur_cpu_spec->mmu_features |= MMU_FTR_PKEY; /* * The device tree cannot be relied to indicate support for @@ -118,53 +146,86 @@ static int pkey_initialize(void) #ifdef CONFIG_PPC_4K_PAGES /* * The OS can manage only 8 pkeys due to its inability to represent them - * in the Linux 4K PTE. + * in the Linux 4K PTE. Mark all other keys reserved. */ - os_reserved = pkeys_total - 8; + num_pkey = min(8, pkeys_total); #else - os_reserved = 0; + num_pkey = pkeys_total; #endif - /* Bits are in LE format. */ - reserved_allocation_mask = (0x1 << 1) | (0x1 << execute_only_key); - - /* register mask is in BE format */ - pkey_amr_mask = ~0x0ul; - pkey_amr_mask &= ~(0x3ul << pkeyshift(0)); - - pkey_iamr_mask = ~0x0ul; - pkey_iamr_mask &= ~(0x3ul << pkeyshift(0)); - pkey_iamr_mask &= ~(0x3ul << pkeyshift(execute_only_key)); - - pkey_uamor_mask = ~0x0ul; - pkey_uamor_mask &= ~(0x3ul << pkeyshift(0)); - pkey_uamor_mask &= ~(0x3ul << pkeyshift(execute_only_key)); - - /* mark the rest of the keys as reserved and hence unavailable */ - for (i = (pkeys_total - os_reserved); i < pkeys_total; i++) { - reserved_allocation_mask |= (0x1 << i); - pkey_uamor_mask &= ~(0x3ul << pkeyshift(i)); - } - initial_allocation_mask = reserved_allocation_mask | (0x1 << 0); - if (unlikely((pkeys_total - os_reserved) <= execute_only_key)) { + if (unlikely(num_pkey <= execute_only_key) || !pkey_execute_disable_supported) { /* * Insufficient number of keys to support * execute only key. Mark it unavailable. - * Any AMR, UAMOR, IAMR bit set for - * this key is irrelevant since this key - * can never be allocated. */ execute_only_key = -1; + } else { + /* + * Mark the execute_only_pkey as not available for + * user allocation via pkey_alloc. + */ + reserved_allocation_mask |= (0x1 << execute_only_key); + + /* + * Deny READ/WRITE for execute_only_key. + * Allow execute in IAMR. + */ + default_amr |= (0x3ul << pkeyshift(execute_only_key)); + default_iamr &= ~(0x1ul << pkeyshift(execute_only_key)); + + /* + * Clear the uamor bits for this key. + */ + default_uamor &= ~(0x3ul << pkeyshift(execute_only_key)); } - return 0; -} + /* + * Allow access for only key 0. And prevent any other modification. + */ + default_amr &= ~(0x3ul << pkeyshift(0)); + default_iamr &= ~(0x1ul << pkeyshift(0)); + default_uamor &= ~(0x3ul << pkeyshift(0)); + /* + * key 0 is special in that we want to consider it an allocated + * key which is preallocated. We don't allow changing AMR bits + * w.r.t key 0. But one can pkey_free(key0) + */ + initial_allocation_mask |= (0x1 << 0); + + /* + * key 1 is recommended not to be used. PowerISA(3.0) page 1015, + * programming note. + */ + reserved_allocation_mask |= (0x1 << 1); + default_uamor &= ~(0x3ul << pkeyshift(1)); -arch_initcall(pkey_initialize); + /* + * Prevent the usage of OS reserved keys. Update UAMOR + * for those keys. Also mark the rest of the bits in the + * 32 bit mask as reserved. + */ + for (i = num_pkey; i < 32 ; i++) { + reserved_allocation_mask |= (0x1 << i); + default_uamor &= ~(0x3ul << pkeyshift(i)); + } + /* + * Prevent the allocation of reserved keys too. + */ + initial_allocation_mask |= reserved_allocation_mask; + + pr_info("Enabling pkeys with max key count %d\n", num_pkey); +out: + /* + * Setup uamor on boot cpu + */ + mtspr(SPRN_UAMOR, default_uamor); + + return; +} void pkey_mm_init(struct mm_struct *mm) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; mm_pkey_allocation_map(mm) = initial_allocation_mask; mm->context.execute_only_pkey = execute_only_key; @@ -196,30 +257,6 @@ static inline void write_iamr(u64 value) mtspr(SPRN_IAMR, value); } -static inline u64 read_uamor(void) -{ - return mfspr(SPRN_UAMOR); -} - -static inline void write_uamor(u64 value) -{ - mtspr(SPRN_UAMOR, value); -} - -static bool is_pkey_enabled(int pkey) -{ - u64 uamor = read_uamor(); - u64 pkey_bits = 0x3ul << pkeyshift(pkey); - u64 uamor_pkey_bits = (uamor & pkey_bits); - - /* - * Both the bits in UAMOR corresponding to the key should be set or - * reset. - */ - WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits)); - return !!(uamor_pkey_bits); -} - static inline void init_amr(int pkey, u8 init_bits) { u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); @@ -245,8 +282,18 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, { u64 new_amr_bits = 0x0ul; u64 new_iamr_bits = 0x0ul; + u64 pkey_bits, uamor_pkey_bits; + + /* + * Check whether the key is disabled by UAMOR. + */ + pkey_bits = 0x3ul << pkeyshift(pkey); + uamor_pkey_bits = (default_uamor & pkey_bits); - if (!is_pkey_enabled(pkey)) + /* + * Both the bits in UAMOR corresponding to the key should be set + */ + if (uamor_pkey_bits != pkey_bits) return -EINVAL; if (init_val & PKEY_DISABLE_EXECUTE) { @@ -268,7 +315,7 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, void thread_pkey_regs_save(struct thread_struct *thread) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; /* @@ -276,38 +323,33 @@ void thread_pkey_regs_save(struct thread_struct *thread) */ thread->amr = read_amr(); thread->iamr = read_iamr(); - thread->uamor = read_uamor(); } void thread_pkey_regs_restore(struct thread_struct *new_thread, struct thread_struct *old_thread) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; if (old_thread->amr != new_thread->amr) write_amr(new_thread->amr); if (old_thread->iamr != new_thread->iamr) write_iamr(new_thread->iamr); - if (old_thread->uamor != new_thread->uamor) - write_uamor(new_thread->uamor); } void thread_pkey_regs_init(struct thread_struct *thread) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; - thread->amr = pkey_amr_mask; - thread->iamr = pkey_iamr_mask; - thread->uamor = pkey_uamor_mask; + thread->amr = default_amr; + thread->iamr = default_iamr; - write_uamor(pkey_uamor_mask); - write_amr(pkey_amr_mask); - write_iamr(pkey_iamr_mask); + write_amr(default_amr); + write_iamr(default_iamr); } -int __execute_only_pkey(struct mm_struct *mm) +int execute_only_pkey(struct mm_struct *mm) { return mm->context.execute_only_pkey; } @@ -353,21 +395,20 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) int pkey_shift; u64 amr; - if (!is_pkey_enabled(pkey)) - return true; - pkey_shift = pkeyshift(pkey); - if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift))) - return true; + if (execute) + return !(read_iamr() & (IAMR_EX_BIT << pkey_shift)); + + amr = read_amr(); + if (write) + return !(amr & (AMR_WR_BIT << pkey_shift)); - amr = read_amr(); /* Delay reading amr until absolutely needed */ - return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) || - (write && !(amr & (AMR_WR_BIT << pkey_shift)))); + return !(amr & (AMR_RD_BIT << pkey_shift)); } bool arch_pte_access_permitted(u64 pte, bool write, bool execute) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return true; return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute); @@ -384,7 +425,7 @@ bool arch_pte_access_permitted(u64 pte, bool write, bool execute) bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return true; /* * Do not enforce our key-permissions on a foreign vma. @@ -397,7 +438,7 @@ bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; /* Duplicate the oldmm pkey state in mm: */ diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c index cab06331c0c0..cb91071eef52 100644 --- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c @@ -2,8 +2,6 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/security.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> #include <asm/cacheflush.h> #include <asm/machdep.h> #include <asm/mman.h> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 8f9edf07063a..d5f0c10d752a 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -15,9 +15,8 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/string_helpers.h> -#include <linux/stop_machine.h> +#include <linux/memory.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/mmu_context.h> #include <asm/dma.h> @@ -35,6 +34,7 @@ unsigned int mmu_pid_bits; unsigned int mmu_base_pid; +unsigned int radix_mem_block_size __ro_after_init; static __ref void *early_alloc_pgtable(unsigned long size, int nid, unsigned long region_start, unsigned long region_end) @@ -57,6 +57,13 @@ static __ref void *early_alloc_pgtable(unsigned long size, int nid, return ptr; } +/* + * When allocating pud or pmd pointers, we allocate a complete page + * of PAGE_SIZE rather than PUD_TABLE_SIZE or PMD_TABLE_SIZE. This + * is to ensure that the page obtained from the memblock allocator + * can be completely used as page table page and can be freed + * correctly when the page table entries are removed. + */ static int early_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int map_page_size, @@ -65,24 +72,26 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, { unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; pgdp = pgd_offset_k(ea); - if (pgd_none(*pgdp)) { - pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid, - region_start, region_end); - pgd_populate(&init_mm, pgdp, pudp); + p4dp = p4d_offset(pgdp, ea); + if (p4d_none(*p4dp)) { + pudp = early_alloc_pgtable(PAGE_SIZE, nid, + region_start, region_end); + p4d_populate(&init_mm, p4dp, pudp); } - pudp = pud_offset(pgdp, ea); + pudp = pud_offset(p4dp, ea); if (map_page_size == PUD_SIZE) { ptep = (pte_t *)pudp; goto set_the_pte; } if (pud_none(*pudp)) { - pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid, - region_start, region_end); + pmdp = early_alloc_pgtable(PAGE_SIZE, nid, region_start, + region_end); pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, ea); @@ -115,6 +124,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, { unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -137,7 +147,8 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, * boot. */ pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); + p4dp = p4d_offset(pgdp, ea); + pudp = pud_alloc(&init_mm, p4dp, ea); if (!pudp) return -ENOMEM; if (map_page_size == PUD_SIZE) { @@ -174,6 +185,7 @@ void radix__change_memory_range(unsigned long start, unsigned long end, { unsigned long idx; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -186,7 +198,8 @@ void radix__change_memory_range(unsigned long start, unsigned long end, for (idx = start; idx < end; idx += PAGE_SIZE) { pgdp = pgd_offset_k(idx); - pudp = pud_alloc(&init_mm, pgdp, idx); + p4dp = p4d_offset(pgdp, idx); + pudp = pud_alloc(&init_mm, p4dp, idx); if (!pudp) continue; if (pud_is_leaf(*pudp)) { @@ -254,6 +267,7 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end) static int __meminit create_physical_mapping(unsigned long start, unsigned long end, + unsigned long max_mapping_size, int nid, pgprot_t _prot) { unsigned long vaddr, addr, mapping_size = 0; @@ -261,12 +275,14 @@ static int __meminit create_physical_mapping(unsigned long start, pgprot_t prot; int psize; - start = _ALIGN_UP(start, PAGE_SIZE); + start = ALIGN(start, PAGE_SIZE); for (addr = start; addr < end; addr += mapping_size) { unsigned long gap, previous_size; int rc; gap = next_boundary(addr, end) - addr; + if (gap > max_mapping_size) + gap = max_mapping_size; previous_size = mapping_size; prev_exec = exec; @@ -317,8 +333,9 @@ static void __init radix_init_pgtable(void) /* We don't support slb for radix */ mmu_slb_size = 0; + /* - * Create the linear mapping, using standard page size for now + * Create the linear mapping */ for_each_memblock(memory, reg) { /* @@ -334,6 +351,7 @@ static void __init radix_init_pgtable(void) WARN_ON(create_physical_mapping(reg->base, reg->base + reg->size, + radix_mem_block_size, -1, PAGE_KERNEL)); } @@ -474,6 +492,57 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, return 1; } +#ifdef CONFIG_MEMORY_HOTPLUG +static int __init probe_memory_block_size(unsigned long node, const char *uname, int + depth, void *data) +{ + unsigned long *mem_block_size = (unsigned long *)data; + const __be64 *prop; + int len; + + if (depth != 1) + return 0; + + if (strcmp(uname, "ibm,dynamic-reconfiguration-memory")) + return 0; + + prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len); + if (!prop || len < sizeof(__be64)) + /* + * Nothing in the device tree + */ + *mem_block_size = MIN_MEMORY_BLOCK_SIZE; + else + *mem_block_size = be64_to_cpup(prop); + return 1; +} + +static unsigned long radix_memory_block_size(void) +{ + unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE; + + /* + * OPAL firmware feature is set by now. Hence we are ok + * to test OPAL feature. + */ + if (firmware_has_feature(FW_FEATURE_OPAL)) + mem_block_size = 1UL * 1024 * 1024 * 1024; + else + of_scan_flat_dt(probe_memory_block_size, &mem_block_size); + + return mem_block_size; +} + +#else /* CONFIG_MEMORY_HOTPLUG */ + +static unsigned long radix_memory_block_size(void) +{ + return 1UL * 1024 * 1024 * 1024; +} + +#endif /* CONFIG_MEMORY_HOTPLUG */ + + void __init radix__early_init_devtree(void) { int rc; @@ -482,17 +551,27 @@ void __init radix__early_init_devtree(void) * Try to find the available page sizes in the device-tree */ rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL); - if (rc != 0) /* Found */ - goto found; + if (!rc) { + /* + * No page size details found in device tree. + * Let's assume we have page 4k and 64k support + */ + mmu_psize_defs[MMU_PAGE_4K].shift = 12; + mmu_psize_defs[MMU_PAGE_4K].ap = 0x0; + + mmu_psize_defs[MMU_PAGE_64K].shift = 16; + mmu_psize_defs[MMU_PAGE_64K].ap = 0x5; + } + /* - * let's assume we have page 4k and 64k support + * Max mapping size used when mapping pages. We don't use + * ppc_md.memory_block_size() here because this get called + * early and we don't have machine probe called yet. Also + * the pseries implementation only check for ibm,lmb-size. + * All hypervisor supporting radix do expose that device + * tree node. */ - mmu_psize_defs[MMU_PAGE_4K].shift = 12; - mmu_psize_defs[MMU_PAGE_4K].ap = 0x0; - - mmu_psize_defs[MMU_PAGE_64K].shift = 16; - mmu_psize_defs[MMU_PAGE_64K].ap = 0x5; -found: + radix_mem_block_size = radix_memory_block_size(); return; } @@ -514,8 +593,10 @@ void setup_kuep(bool disabled) if (disabled || !early_radix_enabled()) return; - if (smp_processor_id() == boot_cpuid) + if (smp_processor_id() == boot_cpuid) { pr_info("Activating Kernel Userspace Execution Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_KUEP; + } /* * Radix always uses key0 of the IAMR to determine if an access is @@ -539,6 +620,10 @@ void setup_kuap(bool disabled) /* Make sure userspace can't change the AMR */ mtspr(SPRN_UAMOR, 0); + + /* + * Set the default kernel AMR values on all cpus. + */ mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); isync(); } @@ -649,21 +734,6 @@ void radix__mmu_cleanup_all(void) } } -void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size) -{ - /* - * We don't currently support the first MEMBLOCK not mapping 0 - * physical on those processors - */ - BUG_ON(first_memblock_base != 0); - - /* - * Radix mode is not limited by RMA / VRMA addressing. - */ - ppc64_rma_size = ULONG_MAX; -} - #ifdef CONFIG_MEMORY_HOTPLUG static void free_pte_table(pte_t *pte_start, pmd_t *pmd) { @@ -695,30 +765,19 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud) pud_clear(pud); } -struct change_mapping_params { - pte_t *pte; - unsigned long start; - unsigned long end; - unsigned long aligned_start; - unsigned long aligned_end; -}; - -static int __meminit stop_machine_change_mapping(void *data) +static void free_pud_table(pud_t *pud_start, p4d_t *p4d) { - struct change_mapping_params *params = - (struct change_mapping_params *)data; + pud_t *pud; + int i; - if (!data) - return -1; + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = pud_start + i; + if (!pud_none(*pud)) + return; + } - spin_unlock(&init_mm.page_table_lock); - pte_clear(&init_mm, params->aligned_start, params->pte); - create_physical_mapping(__pa(params->aligned_start), - __pa(params->start), -1, PAGE_KERNEL); - create_physical_mapping(__pa(params->end), __pa(params->aligned_end), - -1, PAGE_KERNEL); - spin_lock(&init_mm.page_table_lock); - return 0; + pud_free(&init_mm, pud_start); + p4d_clear(p4d); } static void remove_pte_table(pte_t *pte_start, unsigned long addr, @@ -749,53 +808,7 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr, } } -/* - * clear the pte and potentially split the mapping helper - */ -static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end, - unsigned long size, pte_t *pte) -{ - unsigned long mask = ~(size - 1); - unsigned long aligned_start = addr & mask; - unsigned long aligned_end = addr + size; - struct change_mapping_params params; - bool split_region = false; - - if ((end - addr) < size) { - /* - * We're going to clear the PTE, but not flushed - * the mapping, time to remap and flush. The - * effects if visible outside the processor or - * if we are running in code close to the - * mapping we cleared, we are in trouble. - */ - if (overlaps_kernel_text(aligned_start, addr) || - overlaps_kernel_text(end, aligned_end)) { - /* - * Hack, just return, don't pte_clear - */ - WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel " - "text, not splitting\n", addr, end); - return; - } - split_region = true; - } - - if (split_region) { - params.pte = pte; - params.start = addr; - params.end = end; - params.aligned_start = addr & ~(size - 1); - params.aligned_end = min_t(unsigned long, aligned_end, - (unsigned long)__va(memblock_end_of_DRAM())); - stop_machine(stop_machine_change_mapping, ¶ms, NULL); - return; - } - - pte_clear(&init_mm, addr, pte); -} - -static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, +static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end) { unsigned long next; @@ -810,7 +823,12 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, continue; if (pmd_is_leaf(*pmd)) { - split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd); + if (!IS_ALIGNED(addr, PMD_SIZE) || + !IS_ALIGNED(next, PMD_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + pte_clear(&init_mm, addr, (pte_t *)pmd); continue; } @@ -820,7 +838,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, } } -static void remove_pud_table(pud_t *pud_start, unsigned long addr, +static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end) { unsigned long next; @@ -835,7 +853,12 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr, continue; if (pud_is_leaf(*pud)) { - split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud); + if (!IS_ALIGNED(addr, PUD_SIZE) || + !IS_ALIGNED(next, PUD_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + pte_clear(&init_mm, addr, (pte_t *)pud); continue; } @@ -850,6 +873,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) unsigned long addr, next; pud_t *pud_base; pgd_t *pgd; + p4d_t *p4d; spin_lock(&init_mm.page_table_lock); @@ -857,16 +881,24 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) next = pgd_addr_end(addr, end); pgd = pgd_offset_k(addr); - if (!pgd_present(*pgd)) + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) continue; - if (pgd_is_leaf(*pgd)) { - split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd); + if (p4d_is_leaf(*p4d)) { + if (!IS_ALIGNED(addr, P4D_SIZE) || + !IS_ALIGNED(next, P4D_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + + pte_clear(&init_mm, addr, (pte_t *)pgd); continue; } - pud_base = (pud_t *)pgd_page_vaddr(*pgd); + pud_base = (pud_t *)p4d_page_vaddr(*p4d); remove_pud_table(pud_base, addr, next); + free_pud_table(pud_base, p4d); } spin_unlock(&init_mm.page_table_lock); @@ -882,7 +914,8 @@ int __meminit radix__create_section_mapping(unsigned long start, return -1; } - return create_physical_mapping(__pa(start), __pa(end), nid, prot); + return create_physical_mapping(__pa(start), __pa(end), + radix_mem_block_size, nid, prot); } int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) @@ -962,7 +995,13 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre pmd = *pmdp; pmd_clear(pmdp); - /*FIXME!! Verify whether we need this kick below */ + /* + * pmdp collapse_flush need to ensure that there are no parallel gup + * walk after this call. This is needed so that we can have stable + * page ref count when collapsing a page. We don't allow a collapse page + * if we have gup taken on the page. We can ensure that by sending IPI + * because gup walk happens with IRQ disabled. + */ serialize_against_pte_lookup(vma->vm_mm); radix__flush_tlb_collapsed_pmd(vma->vm_mm, address); @@ -1023,17 +1062,6 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0); old_pmd = __pmd(old); - /* - * Serialize against find_current_mm_pte which does lock-less - * lookup in page tables with local interrupts disabled. For huge pages - * it casts pmd_t to pte_t. Since format of pte_t is different from - * pmd_t we want to prevent transit from pmd pointing to page table - * to pmd pointing to huge page (and back) while interrupts are disabled. - * We clear pmd to possibly replace it with page table pointer in - * different code paths. So make sure we wait for the parallel - * find_current_mm_pte to finish. - */ - serialize_against_pte_lookup(mm); return old_pmd; } diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 758ade2c2b6e..0d233763441f 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -16,6 +16,7 @@ #include <asm/tlbflush.h> #include <asm/trace.h> #include <asm/cputhreads.h> +#include <asm/plpar_wrappers.h> #define RIC_FLUSH_TLB 0 #define RIC_FLUSH_PWC 1 @@ -694,7 +695,14 @@ void radix__flush_tlb_mm(struct mm_struct *mm) goto local; } - if (cputlb_use_tlbie()) { + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt = H_RPTI_TARGET_CMMU; + + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, + H_RPTI_PAGE_ALL, 0, -1UL); + } else if (cputlb_use_tlbie()) { if (mm_needs_flush_escalation(mm)) _tlbie_pid(pid, RIC_FLUSH_ALL); else @@ -727,7 +735,16 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) goto local; } } - if (cputlb_use_tlbie()) + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt = H_RPTI_TARGET_CMMU; + unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | + H_RPTI_TYPE_PRT; + + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, type, + H_RPTI_PAGE_ALL, 0, -1UL); + } else if (cputlb_use_tlbie()) _tlbie_pid(pid, RIC_FLUSH_ALL); else _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); @@ -760,7 +777,19 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, exit_flush_lazy_tlbs(mm); goto local; } - if (cputlb_use_tlbie()) + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt, pg_sizes, size; + + tgt = H_RPTI_TARGET_CMMU; + pg_sizes = psize_to_rpti_pgsize(psize); + size = 1UL << mmu_psize_to_shift(psize); + + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, + pg_sizes, vmaddr, + vmaddr + size); + } else if (cputlb_use_tlbie()) _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); else _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); @@ -810,7 +839,14 @@ static inline void _tlbiel_kernel_broadcast(void) */ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) { - if (cputlb_use_tlbie()) + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU; + unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | + H_RPTI_TYPE_PRT; + + pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL, + start, end); + } else if (cputlb_use_tlbie()) _tlbie_pid(0, RIC_FLUSH_ALL); else _tlbiel_kernel_broadcast(); @@ -864,7 +900,17 @@ is_local: nr_pages > tlb_local_single_page_flush_ceiling); } - if (full) { + if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + unsigned long tgt = H_RPTI_TARGET_CMMU; + unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M); + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes, + start, end); + } else if (full) { if (local) { _tlbiel_pid(pid, RIC_FLUSH_TLB); } else { @@ -884,9 +930,7 @@ is_local: if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { hstart = (start + PMD_SIZE - 1) & PMD_MASK; hend = end & PMD_MASK; - if (hstart == hend) - hflush = false; - else + if (hstart < hend) hflush = true; } @@ -1048,7 +1092,17 @@ is_local: nr_pages > tlb_local_single_page_flush_ceiling); } - if (full) { + if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + unsigned long tgt = H_RPTI_TARGET_CMMU; + unsigned long type = H_RPTI_TYPE_TLB; + unsigned long pg_sizes = psize_to_rpti_pgsize(psize); + + if (also_pwc) + type |= H_RPTI_TYPE_PWC; + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); + } else if (full) { if (local) { _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); } else { @@ -1113,7 +1167,19 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) exit_flush_lazy_tlbs(mm); goto local; } - if (cputlb_use_tlbie()) + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt, type, pg_sizes; + + tgt = H_RPTI_TARGET_CMMU; + type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | + H_RPTI_TYPE_PRT; + pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); + + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, type, pg_sizes, + addr, end); + } else if (cputlb_use_tlbie()) _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); else _tlbiel_va_range_multicast(mm, diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 716204aee3da..156c38f89511 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -10,7 +10,6 @@ */ #include <asm/asm-prototypes.h> -#include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/paca.h> @@ -21,10 +20,14 @@ #include <linux/compiler.h> #include <linux/context_tracking.h> #include <linux/mm_types.h> +#include <linux/pgtable.h> #include <asm/udbg.h> #include <asm/code-patching.h> +#include "internal.h" + + enum slb_index { LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ KSTACK_INDEX = 1, /* Kernel stack map */ @@ -54,6 +57,17 @@ static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); } +bool stress_slb_enabled __initdata; + +static int __init parse_stress_slb(char *p) +{ + stress_slb_enabled = true; + return 0; +} +early_param("stress_slb", parse_stress_slb); + +__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_slb_key); + static void assert_slb_presence(bool present, unsigned long ea) { #ifdef CONFIG_DEBUG_VM @@ -68,7 +82,7 @@ static void assert_slb_presence(bool present, unsigned long ea) * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware * ignores all other bits from 0-27, so just clear them all. */ - ea &= ~((1UL << 28) - 1); + ea &= ~((1UL << SID_SHIFT) - 1); asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0"); WARN_ON(present == (tmp == 0)); @@ -153,14 +167,42 @@ void slb_flush_all_realmode(void) asm volatile("slbmte %0,%0; slbia" : : "r" (0)); } +static __always_inline void __slb_flush_and_restore_bolted(bool preserve_kernel_lookaside) +{ + struct slb_shadow *p = get_slb_shadow(); + unsigned long ksp_esid_data, ksp_vsid_data; + u32 ih; + + /* + * SLBIA IH=1 on ISA v2.05 and newer processors may preserve lookaside + * information created with Class=0 entries, which we use for kernel + * SLB entries (the SLB entries themselves are still invalidated). + * + * Older processors will ignore this optimisation. Over-invalidation + * is fine because we never rely on lookaside information existing. + */ + if (preserve_kernel_lookaside) + ih = 1; + else + ih = 0; + + ksp_esid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].esid); + ksp_vsid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].vsid); + + asm volatile(PPC_SLBIA(%0)" \n" + "slbmte %1, %2 \n" + :: "i" (ih), + "r" (ksp_vsid_data), + "r" (ksp_esid_data) + : "memory"); +} + /* * This flushes non-bolted entries, it can be run in virtual mode. Must * be called with interrupts disabled. */ void slb_flush_and_restore_bolted(void) { - struct slb_shadow *p = get_slb_shadow(); - BUILD_BUG_ON(SLB_NUM_BOLTED != 2); WARN_ON(!irqs_disabled()); @@ -171,13 +213,10 @@ void slb_flush_and_restore_bolted(void) */ hard_irq_disable(); - asm volatile("isync\n" - "slbia\n" - "slbmte %0, %1\n" - "isync\n" - :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)), - "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid)) - : "memory"); + isync(); + __slb_flush_and_restore_bolted(false); + isync(); + assert_slb_presence(true, get_paca()->kstack); get_paca()->slb_cache_ptr = 0; @@ -400,6 +439,30 @@ void preload_new_slb_context(unsigned long start, unsigned long sp) local_irq_enable(); } +static void slb_cache_slbie_kernel(unsigned int index) +{ + unsigned long slbie_data = get_paca()->slb_cache[index]; + unsigned long ksp = get_paca()->kstack; + + slbie_data <<= SID_SHIFT; + slbie_data |= 0xc000000000000000ULL; + if ((ksp & slb_esid_mask(mmu_kernel_ssize)) == slbie_data) + return; + slbie_data |= mmu_kernel_ssize << SLBIE_SSIZE_SHIFT; + + asm volatile("slbie %0" : : "r" (slbie_data)); +} + +static void slb_cache_slbie_user(unsigned int index) +{ + unsigned long slbie_data = get_paca()->slb_cache[index]; + + slbie_data <<= SID_SHIFT; + slbie_data |= user_segment_size(slbie_data) << SLBIE_SSIZE_SHIFT; + slbie_data |= SLBIE_C; /* user slbs have C=1 */ + + asm volatile("slbie %0" : : "r" (slbie_data)); +} /* Flush all user entries from the segment table of the current processor. */ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) @@ -414,8 +477,14 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) * which would update the slb_cache/slb_cache_ptr fields in the PACA. */ hard_irq_disable(); - asm volatile("isync" : : : "memory"); - if (cpu_has_feature(CPU_FTR_ARCH_300)) { + isync(); + if (stress_slb()) { + __slb_flush_and_restore_bolted(false); + isync(); + get_paca()->slb_cache_ptr = 0; + get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; + + } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { /* * SLBIA IH=3 invalidates all Class=1 SLBEs and their * associated lookaside structures, which matches what @@ -423,47 +492,29 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) * cache. */ asm volatile(PPC_SLBIA(3)); + } else { unsigned long offset = get_paca()->slb_cache_ptr; if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) && offset <= SLB_CACHE_ENTRIES) { - unsigned long slbie_data = 0; - - for (i = 0; i < offset; i++) { - unsigned long ea; - - ea = (unsigned long) - get_paca()->slb_cache[i] << SID_SHIFT; - /* - * Could assert_slb_presence(true) here, but - * hypervisor or machine check could have come - * in and removed the entry at this point. - */ - - slbie_data = ea; - slbie_data |= user_segment_size(slbie_data) - << SLBIE_SSIZE_SHIFT; - slbie_data |= SLBIE_C; /* user slbs have C=1 */ - asm volatile("slbie %0" : : "r" (slbie_data)); - } + /* + * Could assert_slb_presence(true) here, but + * hypervisor or machine check could have come + * in and removed the entry at this point. + */ + + for (i = 0; i < offset; i++) + slb_cache_slbie_user(i); /* Workaround POWER5 < DD2.1 issue */ if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1) - asm volatile("slbie %0" : : "r" (slbie_data)); + slb_cache_slbie_user(0); } else { - struct slb_shadow *p = get_slb_shadow(); - unsigned long ksp_esid_data = - be64_to_cpu(p->save_area[KSTACK_INDEX].esid); - unsigned long ksp_vsid_data = - be64_to_cpu(p->save_area[KSTACK_INDEX].vsid); - - asm volatile(PPC_SLBIA(1) "\n" - "slbmte %0,%1\n" - "isync" - :: "r"(ksp_vsid_data), - "r"(ksp_esid_data)); + /* Flush but retain kernel lookaside information */ + __slb_flush_and_restore_bolted(true); + isync(); get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; } @@ -503,7 +554,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) * address accesses by the kernel (user mode won't happen until * rfid, which is safe). */ - asm volatile("isync" : : : "memory"); + isync(); } void slb_set_size(u16 size) @@ -571,6 +622,9 @@ static void slb_cache_update(unsigned long esid_data) if (cpu_has_feature(CPU_FTR_ARCH_300)) return; /* ISAv3.0B and later does not use slb_cache */ + if (stress_slb()) + return; + /* * Now update slb cache entries */ @@ -580,7 +634,7 @@ static void slb_cache_update(unsigned long esid_data) * We have space in slb cache for optimized switch_slb(). * Top 36 bits from esid_data as per ISA */ - local_paca->slb_cache[slb_cache_index++] = esid_data >> 28; + local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT; local_paca->slb_cache_ptr++; } else { /* @@ -671,6 +725,28 @@ static long slb_insert_entry(unsigned long ea, unsigned long context, * accesses user memory before it returns to userspace with rfid. */ assert_slb_presence(false, ea); + if (stress_slb()) { + int slb_cache_index = local_paca->slb_cache_ptr; + + /* + * stress_slb() does not use slb cache, repurpose as a + * cache of inserted (non-bolted) kernel SLB entries. All + * non-bolted kernel entries are flushed on any user fault, + * or if there are already 3 non-boled kernel entries. + */ + BUILD_BUG_ON(SLB_CACHE_ENTRIES < 3); + if (!kernel || slb_cache_index == 3) { + int i; + + for (i = 0; i < slb_cache_index; i++) + slb_cache_slbie_kernel(i); + slb_cache_index = 0; + } + + if (kernel) + local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT; + local_paca->slb_cache_ptr = slb_cache_index; + } asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)); barrier(); diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c index 2ef24a53f4c9..60c6ea16a972 100644 --- a/arch/powerpc/mm/book3s64/subpage_prot.c +++ b/arch/powerpc/mm/book3s64/subpage_prot.c @@ -11,7 +11,7 @@ #include <linux/hugetlb.h> #include <linux/syscalls.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> #include <linux/uaccess.h> /* @@ -54,15 +54,17 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, int npages) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; spinlock_t *ptl; pgd = pgd_offset(mm, addr); - if (pgd_none(*pgd)) + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) return; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); if (pud_none(*pud)) return; pmd = pmd_offset(pud, addr); @@ -92,7 +94,7 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len) size_t nw; unsigned long next, limit; - down_write(&mm->mmap_sem); + mmap_write_lock(mm); spt = mm_ctx_subpage_prot(&mm->context); if (!spt) @@ -127,7 +129,7 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len) } err_out: - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -217,13 +219,13 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, if (!access_ok(map, (len >> PAGE_SHIFT) * sizeof(u32))) return -EFAULT; - down_write(&mm->mmap_sem); + mmap_write_lock(mm); spt = mm_ctx_subpage_prot(&mm->context); if (!spt) { /* * Allocate subpage prot table if not already done. - * Do this with mmap_sem held + * Do this with mmap_lock held */ spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL); if (!spt) { @@ -267,11 +269,11 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, if (addr + (nw << PAGE_SHIFT) > next) nw = (next - addr) >> PAGE_SHIFT; - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); if (__copy_from_user(spp, map, nw * sizeof(u32))) return -EFAULT; map += nw; - down_write(&mm->mmap_sem); + mmap_write_lock(mm); /* now flush any existing HPTEs for the range */ hpte_flush_range(mm, addr, nw); @@ -280,6 +282,6 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, spt->maxaddr = limit; err = 0; out: - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return err; } diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index beb060b96632..8acd00178956 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -33,7 +33,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, if (mm->pgd == NULL) return -EFAULT; - down_read(&mm->mmap_sem); + mmap_read_lock(mm); ret = -EFAULT; vma = find_vma(mm, ea); if (!vma) @@ -64,7 +64,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, } ret = 0; - *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0); + *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL); if (unlikely(*flt & VM_FAULT_ERROR)) { if (*flt & VM_FAULT_OOM) { ret = -ENOMEM; @@ -76,13 +76,8 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, BUG(); } - if (*flt & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; - out_unlock: - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); return ret; } EXPORT_SYMBOL_GPL(copro_handle_mm_fault); diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 59327cefbc6a..b2eeea39684c 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -14,6 +14,8 @@ #include <asm/prom.h> #include <asm/drmem.h> +static int n_root_addr_cells, n_root_size_cells; + static struct drmem_lmb_info __drmem_info; struct drmem_lmb_info *drmem_info = &__drmem_info; @@ -189,12 +191,13 @@ int drmem_update_dt(void) return rc; } -static void __init read_drconf_v1_cell(struct drmem_lmb *lmb, +static void read_drconf_v1_cell(struct drmem_lmb *lmb, const __be32 **prop) { const __be32 *p = *prop; - lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p); + lmb->base_addr = of_read_number(p, n_root_addr_cells); + p += n_root_addr_cells; lmb->drc_index = of_read_number(p++, 1); p++; /* skip reserved field */ @@ -205,29 +208,33 @@ static void __init read_drconf_v1_cell(struct drmem_lmb *lmb, *prop = p; } -static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, - void (*func)(struct drmem_lmb *, const __be32 **)) +static int +__walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)) { struct drmem_lmb lmb; u32 i, n_lmbs; + int ret = 0; n_lmbs = of_read_number(prop++, 1); - if (n_lmbs == 0) - return; - for (i = 0; i < n_lmbs; i++) { read_drconf_v1_cell(&lmb, &prop); - func(&lmb, &usm); + ret = func(&lmb, &usm, data); + if (ret) + break; } + + return ret; } -static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, +static void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, const __be32 **prop) { const __be32 *p = *prop; dr_cell->seq_lmbs = of_read_number(p++, 1); - dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p); + dr_cell->base_addr = of_read_number(p, n_root_addr_cells); + p += n_root_addr_cells; dr_cell->drc_index = of_read_number(p++, 1); dr_cell->aa_index = of_read_number(p++, 1); dr_cell->flags = of_read_number(p++, 1); @@ -235,17 +242,16 @@ static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, *prop = p; } -static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, - void (*func)(struct drmem_lmb *, const __be32 **)) +static int +__walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)) { struct of_drconf_cell_v2 dr_cell; struct drmem_lmb lmb; u32 i, j, lmb_sets; + int ret = 0; lmb_sets = of_read_number(prop++, 1); - if (lmb_sets == 0) - return; - for (i = 0; i < lmb_sets; i++) { read_drconf_v2_cell(&dr_cell, &prop); @@ -259,21 +265,29 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, lmb.aa_index = dr_cell.aa_index; lmb.flags = dr_cell.flags; - func(&lmb, &usm); + ret = func(&lmb, &usm, data); + if (ret) + break; } } + + return ret; } #ifdef CONFIG_PPC_PSERIES -void __init walk_drmem_lmbs_early(unsigned long node, - void (*func)(struct drmem_lmb *, const __be32 **)) +int __init walk_drmem_lmbs_early(unsigned long node, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)) { const __be32 *prop, *usm; - int len; + int len, ret = -ENODEV; prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len); if (!prop || len < dt_root_size_cells * sizeof(__be32)) - return; + return ret; + + /* Get the address & size cells */ + n_root_addr_cells = dt_root_addr_cells; + n_root_size_cells = dt_root_size_cells; drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop); @@ -281,20 +295,21 @@ void __init walk_drmem_lmbs_early(unsigned long node, prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len); if (prop) { - __walk_drmem_v1_lmbs(prop, usm, func); + ret = __walk_drmem_v1_lmbs(prop, usm, data, func); } else { prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2", &len); if (prop) - __walk_drmem_v2_lmbs(prop, usm, func); + ret = __walk_drmem_v2_lmbs(prop, usm, data, func); } memblock_dump_all(); + return ret; } #endif -static int __init init_drmem_lmb_size(struct device_node *dn) +static int init_drmem_lmb_size(struct device_node *dn) { const __be32 *prop; int len; @@ -303,12 +318,12 @@ static int __init init_drmem_lmb_size(struct device_node *dn) return 0; prop = of_get_property(dn, "ibm,lmb-size", &len); - if (!prop || len < dt_root_size_cells * sizeof(__be32)) { + if (!prop || len < n_root_size_cells * sizeof(__be32)) { pr_info("Could not determine LMB size\n"); return -1; } - drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop); + drmem_info->lmb_size = of_read_number(prop, n_root_size_cells); return 0; } @@ -329,24 +344,36 @@ static const __be32 *of_get_usable_memory(struct device_node *dn) return prop; } -void __init walk_drmem_lmbs(struct device_node *dn, - void (*func)(struct drmem_lmb *, const __be32 **)) +int walk_drmem_lmbs(struct device_node *dn, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)) { const __be32 *prop, *usm; + int ret = -ENODEV; + + if (!of_root) + return ret; + + /* Get the address & size cells */ + of_node_get(of_root); + n_root_addr_cells = of_n_addr_cells(of_root); + n_root_size_cells = of_n_size_cells(of_root); + of_node_put(of_root); if (init_drmem_lmb_size(dn)) - return; + return ret; usm = of_get_usable_memory(dn); prop = of_get_property(dn, "ibm,dynamic-memory", NULL); if (prop) { - __walk_drmem_v1_lmbs(prop, usm, func); + ret = __walk_drmem_v1_lmbs(prop, usm, data, func); } else { prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL); if (prop) - __walk_drmem_v2_lmbs(prop, usm, func); + ret = __walk_drmem_v2_lmbs(prop, usm, data, func); } + + return ret; } static void __init init_drmem_v1_lmbs(const __be32 *prop) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 84af6c8eecf7..0add963a849b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -35,46 +35,14 @@ #include <asm/firmware.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/siginfo.h> #include <asm/debug.h> #include <asm/kup.h> +#include <asm/inst.h> + -/* - * Check whether the instruction inst is a store using - * an update addressing form which will update r1. - */ -static bool store_updates_sp(unsigned int inst) -{ - /* check for 1 in the rA field */ - if (((inst >> 16) & 0x1f) != 1) - return false; - /* check major opcode */ - switch (inst >> 26) { - case OP_STWU: - case OP_STBU: - case OP_STHU: - case OP_STFSU: - case OP_STFDU: - return true; - case OP_STD: /* std or stdu */ - return (inst & 3) == 1; - case OP_31: - /* check minor opcode */ - switch ((inst >> 1) & 0x3ff) { - case OP_31_XOP_STDUX: - case OP_31_XOP_STWUX: - case OP_31_XOP_STBUX: - case OP_31_XOP_STHUX: - case OP_31_XOP_STFSUX: - case OP_31_XOP_STFDUX: - return true; - } - } - return false; -} /* * do_page_fault error handling helpers */ @@ -108,7 +76,7 @@ static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code) * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); return __bad_area_nosemaphore(regs, address, si_code); } @@ -118,9 +86,34 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address) return __bad_area(regs, address, SEGV_MAPERR); } -static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address, - int pkey) +#ifdef CONFIG_PPC_MEM_KEYS +static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, + struct vm_area_struct *vma) { + struct mm_struct *mm = current->mm; + int pkey; + + /* + * We don't try to fetch the pkey from page table because reading + * page table without locking doesn't guarantee stable pte value. + * Hence the pkey value that we return to userspace can be different + * from the pkey that actually caused access error. + * + * It does *not* guarantee that the VMA we find here + * was the one that we faulted on. + * + * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); + * 2. T1 : set AMR to deny access to pkey=4, touches, page + * 3. T1 : faults... + * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); + * 5. T1 : enters fault handler, takes mmap_lock, etc... + * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really + * faulted on a pte with its pkey=4. + */ + pkey = vma_pkey(vma); + + mmap_read_unlock(mm); + /* * If we are in kernel mode, bail out with a SEGV, this will * be caught by the assembly which will restore the non-volatile @@ -133,6 +126,7 @@ static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address, return 0; } +#endif static noinline int bad_access(struct pt_regs *regs, unsigned long address) { @@ -241,56 +235,23 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return false; } -static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, - struct vm_area_struct *vma, unsigned int flags, - bool *must_retry) +#ifdef CONFIG_PPC_MEM_KEYS +static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, + struct vm_area_struct *vma) { /* - * N.B. The POWER/Open ABI allows programs to access up to - * 288 bytes below the stack pointer. - * The kernel signal delivery code writes up to about 1.5kB - * below the stack pointer (r1) before decrementing it. - * The exec code can write slightly over 640kB to the stack - * before setting the user r1. Thus we allow the stack to - * expand to 1MB without further checks. + * Make sure to check the VMA so that we do not perform + * faults just to hit a pkey fault as soon as we fill in a + * page. Only called for current mm, hence foreign == 0 */ - if (address + 0x100000 < vma->vm_end) { - unsigned int __user *nip = (unsigned int __user *)regs->nip; - /* get user regs even if this fault is in kernel mode */ - struct pt_regs *uregs = current->thread.regs; - if (uregs == NULL) - return true; - - /* - * A user-mode access to an address a long way below - * the stack pointer is only valid if the instruction - * is one which would update the stack pointer to the - * address accessed if the instruction completed, - * i.e. either stwu rs,n(r1) or stwux rs,r1,rb - * (or the byte, halfword, float or double forms). - * - * If we don't check this then any write to the area - * between the last mapped region and the stack will - * expand the stack rather than segfaulting. - */ - if (address + 2048 >= uregs->gpr[1]) - return false; - - if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && - access_ok(nip, sizeof(*nip))) { - unsigned int inst; - - if (!probe_user_read(&inst, nip, sizeof(inst))) - return !store_updates_sp(inst); - *must_retry = true; - } + if (!arch_vma_access_permitted(vma, is_write, is_exec, 0)) return true; - } + return false; } +#endif -static bool access_error(bool is_write, bool is_exec, - struct vm_area_struct *vma) +static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma) { /* * Allow execution from readable areas if the MMU does not @@ -439,7 +400,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, int is_user = user_mode(regs); int is_write = page_fault_is_write(error_code); vm_fault_t fault, major = 0; - bool must_retry = false; bool kprobe_fault = kprobe_page_fault(regs, 11); if (unlikely(debugger_fault_handler(regs) || kprobe_fault)) @@ -483,14 +443,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - if (error_code & DSISR_KEYFAULT) - return bad_key_fault_exception(regs, address, - get_mm_addr_key(mm, address)); - /* - * We want to do this outside mmap_sem, because reading code around nip + * We want to do this outside mmap_lock, because reading code around nip * can result in fault, which will cause a deadlock when called with - * mmap_sem held + * mmap_lock held */ if (is_user) flags |= FAULT_FLAG_USER; @@ -502,7 +458,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an - * erroneous fault occurring in a code path which already holds mmap_sem + * erroneous fault occurring in a code path which already holds mmap_lock * we will deadlock attempting to validate the fault against the * address space. Luckily the kernel only validly references user * space from well defined areas of code, which are listed in the @@ -514,12 +470,12 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. */ - if (unlikely(!down_read_trylock(&mm->mmap_sem))) { + if (unlikely(!mmap_read_trylock(mm))) { if (!is_user && !search_exception_tables(regs->nip)) return bad_area_nosemaphore(regs, address); retry: - down_read(&mm->mmap_sem); + mmap_read_lock(mm); } else { /* * The above down_read_trylock() might have succeeded in @@ -532,29 +488,21 @@ retry: vma = find_vma(mm, address); if (unlikely(!vma)) return bad_area(regs, address); - if (likely(vma->vm_start <= address)) - goto good_area; - if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) - return bad_area(regs, address); - /* The stack is being expanded, check if it's valid */ - if (unlikely(bad_stack_expansion(regs, address, vma, flags, - &must_retry))) { - if (!must_retry) + if (unlikely(vma->vm_start > address)) { + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) return bad_area(regs, address); - up_read(&mm->mmap_sem); - if (fault_in_pages_readable((const char __user *)regs->nip, - sizeof(unsigned int))) - return bad_area_nosemaphore(regs, address); - goto retry; + if (unlikely(expand_stack(vma, address))) + return bad_area(regs, address); } - /* Try to expand it */ - if (unlikely(expand_stack(vma, address))) - return bad_area(regs, address); +#ifdef CONFIG_PPC_MEM_KEYS + if (unlikely(access_pkey_error(is_write, is_exec, + (error_code & DSISR_KEYFAULT), vma))) + return bad_access_pkey(regs, address, vma); +#endif /* CONFIG_PPC_MEM_KEYS */ -good_area: if (unlikely(access_error(is_write, is_exec, vma))) return bad_access(regs, address); @@ -563,22 +511,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); - -#ifdef CONFIG_PPC_MEM_KEYS - /* - * we skipped checking for access error due to key earlier. - * Check that using handle_mm_fault error return. - */ - if (unlikely(fault & VM_FAULT_SIGSEGV) && - !arch_vma_access_permitted(vma, is_write, is_exec, 0)) { - - int pkey = vma_pkey(vma); - - up_read(&mm->mmap_sem); - return bad_key_fault_exception(regs, address, pkey); - } -#endif /* CONFIG_PPC_MEM_KEYS */ + fault = handle_mm_fault(vma, address, flags, regs); major |= fault & VM_FAULT_MAJOR; @@ -586,7 +519,7 @@ good_area: return user_mode(regs) ? 0 : SIGBUS; /* - * Handle the retry right now, the mmap_sem has been released in that + * Handle the retry right now, the mmap_lock has been released in that * case. */ if (unlikely(fault & VM_FAULT_RETRY)) { @@ -596,7 +529,7 @@ good_area: } } - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); if (unlikely(fault & VM_FAULT_ERROR)) return mm_fault_error(regs, address, fault); @@ -604,14 +537,9 @@ good_area: /* * Major/minor page fault accounting. */ - if (major) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); + if (major) cmo_account_page_fault(); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - } + return 0; } NOKPROBE_SYMBOL(__do_page_fault); diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index 320c1672b2ae..624b4438aff9 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -24,22 +24,11 @@ #include <linux/highmem.h> #include <linux/module.h> -/* - * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap - * gives a more generic (and caching) interface. But kmap_atomic can - * be used in IRQ contexts, so in some (very limited) cases we need - * it. - */ -void *kmap_atomic_prot(struct page *page, pgprot_t prot) +void *kmap_atomic_high_prot(struct page *page, pgprot_t prot) { unsigned long vaddr; int idx, type; - preempt_disable(); - pagefault_disable(); - if (!PageHighMem(page)) - return page_address(page); - type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); @@ -49,17 +38,14 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) return (void*) vaddr; } -EXPORT_SYMBOL(kmap_atomic_prot); +EXPORT_SYMBOL(kmap_atomic_high_prot); -void __kunmap_atomic(void *kvaddr) +void kunmap_atomic_high(void *kvaddr) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - if (vaddr < __fix_to_virt(FIX_KMAP_END)) { - pagefault_enable(); - preempt_enable(); + if (vaddr < __fix_to_virt(FIX_KMAP_END)) return; - } if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM)) { int type = kmap_atomic_idx(); @@ -77,7 +63,5 @@ void __kunmap_atomic(void *kvaddr) } kmap_atomic_idx_pop(); - pagefault_enable(); - preempt_enable(); } -EXPORT_SYMBOL(__kunmap_atomic); +EXPORT_SYMBOL(kunmap_atomic_high); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 33b3461d91e8..26292544630f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -19,7 +19,6 @@ #include <linux/swap.h> #include <linux/swapops.h> #include <linux/kmemleak.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/setup.h> @@ -30,7 +29,8 @@ bool hugetlb_disabled = false; #define hugepd_none(hpd) (hpd_val(hpd) == 0) -#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *))) +#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \ + __builtin_ffs(sizeof(void *))) pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { @@ -53,24 +53,17 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (pshift >= pdshift) { cachep = PGT_CACHE(PTE_T_ORDER); num_hugepd = 1 << (pshift - pdshift); - new = NULL; - } else if (IS_ENABLED(CONFIG_PPC_8xx)) { - cachep = NULL; - num_hugepd = 1; - new = pte_alloc_one(mm); } else { cachep = PGT_CACHE(pdshift - pshift); num_hugepd = 1; - new = NULL; } - if (!cachep && !new) { + if (!cachep) { WARN_ONCE(1, "No page table cache created for hugetlb tables"); return -ENOMEM; } - if (cachep) - new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -101,10 +94,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (i < num_hugepd) { for (i = i - 1 ; i >= 0; i--, hpdp--) *hpdp = __hugepd(0); - if (cachep) - kmem_cache_free(cachep, new); - else - pte_free(mm, new); + kmem_cache_free(cachep, new); } else { kmemleak_ignore(new); } @@ -119,6 +109,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pg; + p4d_t *p4; pud_t *pu; pmd_t *pm; hugepd_t *hpdp = NULL; @@ -128,20 +119,21 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz addr &= ~(sz-1); pg = pgd_offset(mm, addr); + p4 = p4d_offset(pg, addr); #ifdef CONFIG_PPC_BOOK3S_64 if (pshift == PGDIR_SHIFT) /* 16GB huge page */ - return (pte_t *) pg; + return (pte_t *) p4; else if (pshift > PUD_SHIFT) { /* * We need to use hugepd table */ ptl = &mm->page_table_lock; - hpdp = (hugepd_t *)pg; + hpdp = (hugepd_t *)p4; } else { pdshift = PUD_SHIFT; - pu = pud_alloc(mm, pg, addr); + pu = pud_alloc(mm, p4, addr); if (!pu) return NULL; if (pshift == PUD_SHIFT) @@ -166,10 +158,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz #else if (pshift >= PGDIR_SHIFT) { ptl = &mm->page_table_lock; - hpdp = (hugepd_t *)pg; + hpdp = (hugepd_t *)p4; } else { pdshift = PUD_SHIFT; - pu = pud_alloc(mm, pg, addr); + pu = pud_alloc(mm, p4, addr); if (!pu) return NULL; if (pshift >= PUD_SHIFT) { @@ -188,6 +180,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (!hpdp) return NULL; + if (IS_ENABLED(CONFIG_PPC_8xx) && sz == SZ_512K) + return pte_alloc_map(mm, (pmd_t *)hpdp, addr); + BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, @@ -253,7 +248,7 @@ int __init alloc_bootmem_huge_page(struct hstate *h) struct hugepd_freelist { struct rcu_head rcu; unsigned int index; - void *ptes[0]; + void *ptes[]; }; static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); @@ -330,13 +325,20 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); - else if (IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_free_tlb(tlb, hugepte, 0); else pgtable_free_tlb(tlb, hugepte, get_hugepd_cache_index(pdshift - shift)); } +static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) +{ + pgtable_t token = pmd_pgtable(*pmd); + + pmd_clear(pmd); + pte_free_tlb(tlb, token, addr); + mm_dec_nr_ptes(tlb->mm); +} + static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) @@ -352,11 +354,17 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { + if (pmd_none_or_clear_bad(pmd)) + continue; + /* * if it is not hugepd pointer, we should already find * it cleared. */ - WARN_ON(!pmd_none_or_clear_bad(pmd)); + WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx)); + + hugetlb_free_pte_range(tlb, pmd, addr); + continue; } /* @@ -390,7 +398,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, mm_dec_nr_pmds(tlb->mm); } -static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, +static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -400,7 +408,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, start = addr; do { - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); next = pud_addr_end(addr, end); if (!is_hugepd(__hugepd(pud_val(*pud)))) { if (pud_none_or_clear_bad(pud)) @@ -435,8 +443,8 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, if (end - 1 > ceiling - 1) return; - pud = pud_offset(pgd, start); - pgd_clear(pgd); + pud = pud_offset(p4d, start); + p4d_clear(p4d); pud_free_tlb(tlb, pud, start); mm_dec_nr_puds(tlb->mm); } @@ -449,6 +457,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long floor, unsigned long ceiling) { pgd_t *pgd; + p4d_t *p4d; unsigned long next; /* @@ -471,10 +480,11 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, do { next = pgd_addr_end(addr, end); pgd = pgd_offset(tlb->mm, addr); + p4d = p4d_offset(pgd, addr); if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { - if (pgd_none_or_clear_bad(pgd)) + if (p4d_none_or_clear_bad(p4d)) continue; - hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); + hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling); } else { unsigned long more; /* @@ -487,7 +497,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, if (more > next) next = more; - free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, + free_hugepd_range(tlb, (hugepd_t *)p4d, PGDIR_SHIFT, addr, next, floor, ceiling); } } while (addr = next, addr != end); @@ -558,7 +568,7 @@ unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) return vma_kernel_pagesize(vma); } -static int __init add_huge_page_size(unsigned long long size) +bool __init arch_hugetlb_valid_size(unsigned long size) { int shift = __ffs(size); int mmu_psize; @@ -566,37 +576,27 @@ static int __init add_huge_page_size(unsigned long long size) /* Check that it is a page size supported by the hardware and * that it fits within pagetable and slice limits. */ if (size <= PAGE_SIZE || !is_power_of_2(size)) - return -EINVAL; + return false; mmu_psize = check_and_get_huge_psize(shift); if (mmu_psize < 0) - return -EINVAL; + return false; BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); - /* Return if huge page size has already been setup */ - if (size_to_hstate(size)) - return 0; - - hugetlb_add_hstate(shift - PAGE_SHIFT); - - return 0; + return true; } -static int __init hugepage_setup_sz(char *str) +static int __init add_huge_page_size(unsigned long long size) { - unsigned long long size; - - size = memparse(str, &str); + int shift = __ffs(size); - if (add_huge_page_size(size) != 0) { - hugetlb_bad_size(); - pr_err("Invalid huge page size specified(%llu)\n", size); - } + if (!arch_hugetlb_valid_size((unsigned long)size)) + return -EINVAL; - return 1; + hugetlb_add_hstate(shift - PAGE_SHIFT); + return 0; } -__setup("hugepagesz=", hugepage_setup_sz); static int __init hugetlbpage_init(void) { @@ -684,3 +684,21 @@ void flush_dcache_icache_hugepage(struct page *page) } } } + +void __init gigantic_hugetlb_cma_reserve(void) +{ + unsigned long order = 0; + + if (radix_enabled()) + order = PUD_SHIFT - PAGE_SHIFT; + else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift) + /* + * For pseries we do use ibm,expected#pages for reserving 16G pages. + */ + order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT; + + if (order) { + VM_WARN_ON(order < MAX_ORDER); + hugetlb_cma_reserve(order); + } +} diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 42ef7a6e6098..8e0d792ac296 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -17,8 +17,8 @@ #undef DEBUG #include <linux/string.h> +#include <linux/pgtable.h> #include <asm/pgalloc.h> -#include <asm/pgtable.h> #include <asm/kup.h> phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull; diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 872df48ae41b..02c7db4087cb 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -29,10 +29,8 @@ #include <linux/slab.h> #include <linux/hugetlb.h> -#include <asm/pgalloc.h> #include <asm/prom.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/smp.h> #include <asm/machdep.h> @@ -96,11 +94,13 @@ static void __init MMU_setup(void) if (strstr(boot_command_line, "noltlbs")) { __map_without_ltlbs = 1; } - if (debug_pagealloc_enabled()) { - __map_without_bats = 1; + if (IS_ENABLED(CONFIG_PPC_8xx)) + return; + + if (debug_pagealloc_enabled()) __map_without_ltlbs = 1; - } - if (strict_kernel_rwx_enabled() && !IS_ENABLED(CONFIG_PPC_8xx)) + + if (strict_kernel_rwx_enabled()) __map_without_ltlbs = 1; } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 4002ced3596f..8459056cce67 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -47,7 +47,6 @@ #include <asm/rtas.h> #include <asm/io.h> #include <asm/mmu_context.h> -#include <asm/pgtable.h> #include <asm/mmu.h> #include <linux/uaccess.h> #include <asm/smp.h> @@ -203,7 +202,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; /* Align to the page size of the linear mapping. */ - start = _ALIGN_DOWN(start, page_size); + start = ALIGN_DOWN(start, page_size); pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); @@ -226,12 +225,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, * fall back to system memory if the altmap allocation fail. */ if (altmap && !altmap_cross_boundary(altmap, start, page_size)) { - p = altmap_alloc_block_buf(page_size, altmap); + p = vmemmap_alloc_block_buf(page_size, node, altmap); if (!p) pr_debug("altmap block allocation failed, falling back to system memory"); } if (!p) - p = vmemmap_alloc_block_buf(page_size, node); + p = vmemmap_alloc_block_buf(page_size, node, NULL); if (!p) return -ENOMEM; @@ -292,7 +291,7 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, unsigned long alt_start = ~0, alt_end = ~0; unsigned long base_pfn; - start = _ALIGN_DOWN(start, page_size); + start = ALIGN_DOWN(start, page_size); if (altmap) { alt_start = altmap->base_pfn; alt_end = altmap->base_pfn + altmap->reserve + @@ -407,13 +406,15 @@ static void __init early_check_vec5(void) } if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & OV5_FEAT(OV5_RADIX_GTSE))) { - pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n"); - } + cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE; + } else + cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; /* Do radix anyway - the hypervisor said we had to */ cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { /* Hypervisor only supports hash - disable radix */ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE; } } @@ -432,9 +433,16 @@ void __init mmu_early_init_devtree(void) if (!(mfmsr() & MSR_HV)) early_check_vec5(); - if (early_radix_enabled()) + if (early_radix_enabled()) { radix__early_init_devtree(); - else + /* + * We have finalized the translation we are going to use by now. + * Radix mode is not limited by RMA / VRMA addressing. + * Hence don't limit memblock allocations. + */ + ppc64_rma_size = ULONG_MAX; + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); + } else hash__early_init_devtree(); } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c index 50a99d9684f7..ba5cbb0d66bd 100644 --- a/arch/powerpc/mm/ioremap_64.c +++ b/arch/powerpc/mm/ioremap_64.c @@ -4,56 +4,6 @@ #include <linux/slab.h> #include <linux/vmalloc.h> -/** - * Low level function to establish the page tables for an IO mapping - */ -void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot) -{ - int ret; - unsigned long va = (unsigned long)ea; - - /* We don't support the 4K PFN hack with ioremap */ - if (pgprot_val(prot) & H_PAGE_4K_PFN) - return NULL; - - if ((ea + size) >= (void *)IOREMAP_END) { - pr_warn("Outside the supported range\n"); - return NULL; - } - - WARN_ON(pa & ~PAGE_MASK); - WARN_ON(((unsigned long)ea) & ~PAGE_MASK); - WARN_ON(size & ~PAGE_MASK); - - if (slab_is_available()) { - ret = ioremap_page_range(va, va + size, pa, prot); - if (ret) - unmap_kernel_range(va, size); - } else { - ret = early_ioremap_range(va, pa, size, prot); - } - - if (ret) - return NULL; - - return (void __iomem *)ea; -} -EXPORT_SYMBOL(__ioremap_at); - -/** - * Low level function to tear down the page tables for an IO mapping. This is - * used for mappings that are manipulated manually, like partial unmapping of - * PCI IOs or ISA space. - */ -void __iounmap_at(void *ea, unsigned long size) -{ - WARN_ON(((unsigned long)ea) & ~PAGE_MASK); - WARN_ON(size & ~PAGE_MASK); - - unmap_kernel_range((unsigned long)ea, size); -} -EXPORT_SYMBOL(__iounmap_at); - void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller) { diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c new file mode 100644 index 000000000000..2784224054f8 --- /dev/null +++ b/arch/powerpc/mm/kasan/8xx.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define DISABLE_BRANCH_PROFILING + +#include <linux/kasan.h> +#include <linux/memblock.h> +#include <linux/hugetlb.h> + +static int __init +kasan_init_shadow_8M(unsigned long k_start, unsigned long k_end, void *block) +{ + pmd_t *pmd = pmd_off_k(k_start); + unsigned long k_cur, k_next; + + for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd += 2, block += SZ_8M) { + pte_basic_t *new; + + k_next = pgd_addr_end(k_cur, k_end); + k_next = pgd_addr_end(k_next, k_end); + if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte) + continue; + + new = memblock_alloc(sizeof(pte_basic_t), SZ_4K); + if (!new) + return -ENOMEM; + + *new = pte_val(pte_mkhuge(pfn_pte(PHYS_PFN(__pa(block)), PAGE_KERNEL))); + + hugepd_populate_kernel((hugepd_t *)pmd, (pte_t *)new, PAGE_SHIFT_8M); + hugepd_populate_kernel((hugepd_t *)pmd + 1, (pte_t *)new, PAGE_SHIFT_8M); + } + return 0; +} + +int __init kasan_init_region(void *start, size_t size) +{ + unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); + unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); + unsigned long k_cur; + int ret; + void *block; + + block = memblock_alloc(k_end - k_start, SZ_8M); + if (!block) + return -ENOMEM; + + if (IS_ALIGNED(k_start, SZ_8M)) { + kasan_init_shadow_8M(k_start, ALIGN_DOWN(k_end, SZ_8M), block); + k_cur = ALIGN_DOWN(k_end, SZ_8M); + if (k_cur == k_end) + goto finish; + } else { + k_cur = k_start; + } + + ret = kasan_init_shadow_page_tables(k_start, k_end); + if (ret) + return ret; + + for (; k_cur < k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_off_k(k_cur); + void *va = block + k_cur - k_start; + pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + + if (k_cur < ALIGN_DOWN(k_end, SZ_512K)) + pte = pte_mkhuge(pte); + + __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); + } +finish: + flush_tlb_kernel_range(k_start, k_end); + return 0; +} diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile index 6577897673dd..bb1a5408b86b 100644 --- a/arch/powerpc/mm/kasan/Makefile +++ b/arch/powerpc/mm/kasan/Makefile @@ -3,3 +3,5 @@ KASAN_SANITIZE := n obj-$(CONFIG_PPC32) += kasan_init_32.o +obj-$(CONFIG_PPC_8xx) += 8xx.o +obj-$(CONFIG_PPC_BOOK3S_32) += book3s_32.o diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c new file mode 100644 index 000000000000..202bd260a009 --- /dev/null +++ b/arch/powerpc/mm/kasan/book3s_32.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define DISABLE_BRANCH_PROFILING + +#include <linux/kasan.h> +#include <linux/memblock.h> +#include <mm/mmu_decl.h> + +int __init kasan_init_region(void *start, size_t size) +{ + unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); + unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); + unsigned long k_cur = k_start; + int k_size = k_end - k_start; + int k_size_base = 1 << (ffs(k_size) - 1); + int ret; + void *block; + + block = memblock_alloc(k_size, k_size_base); + + if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) { + int k_size_more = 1 << (ffs(k_size - k_size_base) - 1); + + setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL); + if (k_size_more >= SZ_128K) + setbat(-1, k_start + k_size_base, __pa(block) + k_size_base, + k_size_more, PAGE_KERNEL); + if (v_block_mapped(k_start)) + k_cur = k_start + k_size_base; + if (v_block_mapped(k_start + k_size_base)) + k_cur = k_start + k_size_base + k_size_more; + + update_bats(); + } + + if (!block) + block = memblock_alloc(k_size, PAGE_SIZE); + if (!block) + return -ENOMEM; + + ret = kasan_init_shadow_page_tables(k_start, k_end); + if (ret) + return ret; + + kasan_update_early_region(k_start, k_cur, __pte(0)); + + for (; k_cur < k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_off_k(k_cur); + void *va = block + k_cur - k_start; + pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + + __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); + } + flush_tlb_kernel_range(k_start, k_end); + return 0; +} diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index cbcad369fcb2..fb294046e00e 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -5,9 +5,7 @@ #include <linux/kasan.h> #include <linux/printk.h> #include <linux/memblock.h> -#include <linux/moduleloader.h> #include <linux/sched/task.h> -#include <linux/vmalloc.h> #include <asm/pgalloc.h> #include <asm/code-patching.h> #include <mm/mmu_decl.h> @@ -30,40 +28,31 @@ static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot) __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); } -static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) +int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) { pmd_t *pmd; unsigned long k_cur, k_next; - pte_t *new = NULL; - pmd = pmd_ptr_k(k_start); + pmd = pmd_off_k(k_start); for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) { + pte_t *new; + k_next = pgd_addr_end(k_cur, k_end); if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte) continue; - if (!new) - new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); + new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); if (!new) return -ENOMEM; kasan_populate_pte(new, PAGE_KERNEL); - - smp_wmb(); /* See comment in __pte_alloc */ - - spin_lock(&init_mm.page_table_lock); - /* Has another populated it ? */ - if (likely((void *)pmd_page_vaddr(*pmd) == kasan_early_shadow_pte)) { - pmd_populate_kernel(&init_mm, pmd, new); - new = NULL; - } - spin_unlock(&init_mm.page_table_lock); + pmd_populate_kernel(&init_mm, pmd, new); } return 0; } -static int __init kasan_init_region(void *start, size_t size) +int __init __weak kasan_init_region(void *start, size_t size) { unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); @@ -76,66 +65,67 @@ static int __init kasan_init_region(void *start, size_t size) return ret; block = memblock_alloc(k_end - k_start, PAGE_SIZE); + if (!block) + return -ENOMEM; for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_ptr_k(k_cur); + pmd_t *pmd = pmd_off_k(k_cur); void *va = block + k_cur - k_start; pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); - if (!va) - return -ENOMEM; - __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); } flush_tlb_kernel_range(k_start, k_end); return 0; } -static void __init kasan_remap_early_shadow_ro(void) +void __init +kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte) { - pgprot_t prot = kasan_prot_ro(); - unsigned long k_start = KASAN_SHADOW_START; - unsigned long k_end = KASAN_SHADOW_END; unsigned long k_cur; phys_addr_t pa = __pa(kasan_early_shadow_page); - kasan_populate_pte(kasan_early_shadow_pte, prot); - - for (k_cur = k_start & PAGE_MASK; k_cur != k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_ptr_k(k_cur); + for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_off_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) continue; - __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); + __set_pte_at(&init_mm, k_cur, ptep, pte, 0); } - flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); + + flush_tlb_kernel_range(k_start, k_end); +} + +static void __init kasan_remap_early_shadow_ro(void) +{ + pgprot_t prot = kasan_prot_ro(); + phys_addr_t pa = __pa(kasan_early_shadow_page); + + kasan_populate_pte(kasan_early_shadow_pte, prot); + + kasan_update_early_region(KASAN_SHADOW_START, KASAN_SHADOW_END, + pfn_pte(PHYS_PFN(pa), prot)); } static void __init kasan_unmap_early_shadow_vmalloc(void) { unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START); unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END); - unsigned long k_cur; - phys_addr_t pa = __pa(kasan_early_shadow_page); - - for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); - pte_t *ptep = pte_offset_kernel(pmd, k_cur); - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) - continue; + kasan_update_early_region(k_start, k_end, __pte(0)); - __set_pte_at(&init_mm, k_cur, ptep, __pte(0), 0); - } - flush_tlb_kernel_range(k_start, k_end); +#ifdef MODULES_VADDR + k_start = (unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR); + k_end = (unsigned long)kasan_mem_to_shadow((void *)MODULES_END); + kasan_update_early_region(k_start, k_end, __pte(0)); +#endif } void __init kasan_mmu_init(void) { int ret; - struct memblock_region *reg; if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || IS_ENABLED(CONFIG_KASAN_VMALLOC)) { @@ -144,10 +134,16 @@ void __init kasan_mmu_init(void) if (ret) panic("kasan: kasan_init_shadow_page_tables() failed"); } +} + +void __init kasan_init(void) +{ + struct memblock_region *reg; for_each_memblock(memory, reg) { phys_addr_t base = reg->base; phys_addr_t top = min(base + reg->size, total_lowmem); + int ret; if (base >= top) continue; @@ -156,10 +152,7 @@ void __init kasan_mmu_init(void) if (ret) panic("kasan: kasan_init_region() failed"); } -} -void __init kasan_init(void) -{ kasan_remap_early_shadow_ro(); clear_page(kasan_early_shadow_page); @@ -196,7 +189,7 @@ void __init kasan_early_init(void) unsigned long addr = KASAN_SHADOW_START; unsigned long end = KASAN_SHADOW_END; unsigned long next; - pmd_t *pmd = pmd_ptr_k(addr); + pmd_t *pmd = pmd_off_k(addr); BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 041ed7cfd341..42e25874f5a8 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -32,12 +32,11 @@ #include <linux/vmalloc.h> #include <linux/memremap.h> #include <linux/dma-direct.h> +#include <linux/kprobes.h> -#include <asm/pgalloc.h> #include <asm/prom.h> #include <asm/io.h> #include <asm/mmu_context.h> -#include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/smp.h> #include <asm/machdep.h> @@ -64,8 +63,6 @@ bool init_mem_is_free; #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; EXPORT_SYMBOL(kmap_pte); -pgprot_t kmap_prot; -EXPORT_SYMBOL(kmap_prot); #endif pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, @@ -129,8 +126,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, unsigned long nr_pages = size >> PAGE_SHIFT; int rc; - resize_hpt_for_hotplug(memblock_phys_mem_size()); - start = (unsigned long)__va(start); rc = create_section_mapping(start, start + size, nid, params->pgprot); @@ -163,9 +158,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, * hit that section of memory */ vm_unmap_aliases(); - - if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) - pr_warn("Hash collision while resizing HPT\n"); } #endif @@ -186,8 +178,6 @@ void __init mem_topology_setup(void) void __init initmem_init(void) { - /* XXX need to clip this if using highmem? */ - sparse_memory_present_with_active_regions(0); sparse_init(); } @@ -245,7 +235,6 @@ void __init paging_init(void) pkmap_page_table = virt_to_kpte(PKMAP_BASE); kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); - kmap_prot = PAGE_KERNEL; #endif /* CONFIG_HIGHMEM */ printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%llx\n", @@ -271,7 +260,7 @@ void __init paging_init(void) max_zone_pfns[ZONE_HIGHMEM] = max_pfn; #endif - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); mark_nonram_nosave(); } @@ -468,6 +457,7 @@ static void flush_dcache_icache_phys(unsigned long physaddr) : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) : "ctr", "memory"); } +NOKPROBE_SYMBOL(flush_dcache_icache_phys) #endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) /* @@ -578,7 +568,7 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, flush_dcache_page(pg); } -void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, +void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, unsigned long addr, int len) { unsigned long maddr; @@ -587,7 +577,6 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, flush_icache_range(maddr, maddr + len); kunmap(page); } -EXPORT_SYMBOL(flush_icache_user_range); /* * System memory should not be in /proc/iomem but various tools expect it diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 7097e07a209a..1b6d39e9baed 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -182,6 +182,10 @@ static inline void mmu_mark_initmem_nx(void) { } static inline void mmu_mark_rodata_ro(void) { } #endif +#ifdef CONFIG_PPC_8xx +void __init mmu_mapin_immr(void); +#endif + #ifdef CONFIG_PPC_DEBUG_WX void ptdump_check_wx(void); #else diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c index 82862723ab42..95751c322f6c 100644 --- a/arch/powerpc/mm/nohash/40x.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -32,11 +32,9 @@ #include <linux/highmem.h> #include <linux/memblock.h> -#include <asm/pgalloc.h> #include <asm/prom.h> #include <asm/io.h> #include <asm/mmu_context.h> -#include <asm/pgtable.h> #include <asm/mmu.h> #include <linux/uaccess.h> #include <asm/smp.h> @@ -102,9 +100,9 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) while (s >= LARGE_PAGE_SIZE_16M) { pmd_t *pmdp; - unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; + unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_RW; - pmdp = pmd_ptr_k(v); + pmdp = pmd_off_k(v); *pmdp++ = __pmd(val); *pmdp++ = __pmd(val); *pmdp++ = __pmd(val); @@ -117,9 +115,9 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) while (s >= LARGE_PAGE_SIZE_4M) { pmd_t *pmdp; - unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; + unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_RW; - pmdp = pmd_ptr_k(v); + pmdp = pmd_off_k(v); *pmdp = __pmd(val); v += LARGE_PAGE_SIZE_4M; diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index d83a12c5bc7f..d2b37146ae6c 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -9,8 +9,10 @@ #include <linux/memblock.h> #include <linux/mmu_context.h> +#include <linux/hugetlb.h> #include <asm/fixmap.h> #include <asm/code-patching.h> +#include <asm/inst.h> #include <mm/mmu_decl.h> @@ -54,158 +56,148 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; } -#define LARGE_PAGE_SIZE_8M (1<<23) - -/* - * MMU_init_hw does the chip-specific initialization of the MMU hardware. - */ -void __init MMU_init_hw(void) +static pte_t __init *early_hugepd_alloc_kernel(hugepd_t *pmdp, unsigned long va) { - /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ - if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) { - unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; - unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY; - int i = IS_ENABLED(CONFIG_PIN_TLB_IMMR) ? 29 : 28; - unsigned long addr = 0; - unsigned long mem = total_lowmem; - - for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { - mtspr(SPRN_MD_CTR, ctr | (i << 8)); - mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); - mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); - mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); - addr += LARGE_PAGE_SIZE_8M; - mem -= LARGE_PAGE_SIZE_8M; - } + if (hpd_val(*pmdp) == 0) { + pte_t *ptep = memblock_alloc(sizeof(pte_basic_t), SZ_4K); + + if (!ptep) + return NULL; + + hugepd_populate_kernel((hugepd_t *)pmdp, ptep, PAGE_SHIFT_8M); + hugepd_populate_kernel((hugepd_t *)pmdp + 1, ptep, PAGE_SHIFT_8M); } + return hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT); } -static void __init mmu_mapin_immr(void) +static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa, + pgprot_t prot, int psize, bool new) { - unsigned long p = PHYS_IMMR_BASE; - unsigned long v = VIRT_IMMR_BASE; - int offset; + pmd_t *pmdp = pmd_off_k(va); + pte_t *ptep; + + if (WARN_ON(psize != MMU_PAGE_512K && psize != MMU_PAGE_8M)) + return -EINVAL; + + if (new) { + if (WARN_ON(slab_is_available())) + return -EINVAL; + + if (psize == MMU_PAGE_512K) + ptep = early_pte_alloc_kernel(pmdp, va); + else + ptep = early_hugepd_alloc_kernel((hugepd_t *)pmdp, va); + } else { + if (psize == MMU_PAGE_512K) + ptep = pte_offset_kernel(pmdp, va); + else + ptep = hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT); + } + + if (WARN_ON(!ptep)) + return -ENOMEM; - for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) - map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG); + /* The PTE should never be already present */ + if (new && WARN_ON(pte_present(*ptep) && pgprot_val(prot))) + return -EINVAL; + + set_huge_pte_at(&init_mm, va, ptep, pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot))); + + return 0; } -static void mmu_patch_cmp_limit(s32 *site, unsigned long mapped) +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) { - modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16); } -static void mmu_patch_addis(s32 *site, long simm) +static bool immr_is_mapped __initdata; + +void __init mmu_mapin_immr(void) { - unsigned int instr = *(unsigned int *)patch_site_addr(site); + if (immr_is_mapped) + return; + + immr_is_mapped = true; - instr &= 0xffff0000; - instr |= ((unsigned long)simm) >> 16; - patch_instruction_site(site, instr); + __early_map_kernel_hugepage(VIRT_IMMR_BASE, PHYS_IMMR_BASE, + PAGE_KERNEL_NCG, MMU_PAGE_512K, true); } -static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot) +static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, + pgprot_t prot, bool new) { - unsigned long s = offset; - unsigned long v = PAGE_OFFSET + s; - phys_addr_t p = memstart_addr + s; - - for (; s < top; s += PAGE_SIZE) { - map_kernel_page(v, p, prot); - v += PAGE_SIZE; - p += PAGE_SIZE; - } + unsigned long v = PAGE_OFFSET + offset; + unsigned long p = offset; + + WARN_ON(!IS_ALIGNED(offset, SZ_512K) || !IS_ALIGNED(top, SZ_512K)); + + for (; p < ALIGN(p, SZ_8M) && p < top; p += SZ_512K, v += SZ_512K) + __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); + for (; p < ALIGN_DOWN(top, SZ_8M) && p < top; p += SZ_8M, v += SZ_8M) + __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new); + for (; p < ALIGN_DOWN(top, SZ_512K) && p < top; p += SZ_512K, v += SZ_512K) + __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); + + if (!new) + flush_tlb_kernel_range(PAGE_OFFSET + v, PAGE_OFFSET + top); } unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { - unsigned long mapped; - - if (__map_without_ltlbs) { - mapped = 0; - mmu_mapin_immr(); - if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR)) - patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0); + unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); + unsigned long sinittext = __pa(_sinittext); + bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled(); + unsigned long boundary = strict_boundary ? sinittext : etext8; + unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + + WARN_ON(top < einittext8); + + mmu_mapin_immr(); + + if (__map_without_ltlbs) + return 0; + + mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); + if (debug_pagealloc_enabled()) { + top = boundary; } else { - unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); - - mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, einittext8); - - /* - * Populate page tables to: - * - have them appear in /sys/kernel/debug/kernel_page_tables - * - allow the BDI to find the pages when they are not PINNED - */ - mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X); - mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL); - mmu_mapin_immr(); + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); + mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true); } - mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped); - mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped); + if (top > SZ_32M) + memblock_set_current_limit(top); - /* If the size of RAM is not an exact power of two, we may not - * have covered RAM in its entirety with 8 MiB - * pages. Consequently, restrict the top end of RAM currently - * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail" - * coverage with normal-sized pages (or other reasons) do not - * attempt to allocate outside the allowed range. - */ - if (mapped) - memblock_set_current_limit(mapped); + block_mapped_ram = top; - block_mapped_ram = mapped; - - return mapped; + return top; } void mmu_mark_initmem_nx(void) { - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23) - mmu_patch_addis(&patch__itlbmiss_linmem_top8, - -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1))); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) { - unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); - unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); - unsigned long etext = __pa(_etext); - - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext)); - - /* Update page tables for PTDUMP and BDI */ - mmu_mapin_ram_chunk(0, einittext8, __pgprot(0)); - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { - mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_TEXT); - mmu_mapin_ram_chunk(etext, einittext8, PAGE_KERNEL); - } else { - mmu_mapin_ram_chunk(0, etext8, PAGE_KERNEL_TEXT); - mmu_mapin_ram_chunk(etext8, einittext8, PAGE_KERNEL); - } - } - _tlbil_all(); + unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); + unsigned long sinittext = __pa(_sinittext); + unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8; + unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + + mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); + + if (IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_pin_tlb(block_mapped_ram, false); } #ifdef CONFIG_STRICT_KERNEL_RWX void mmu_mark_rodata_ro(void) { unsigned long sinittext = __pa(_sinittext); - unsigned long etext = __pa(_etext); - - if (CONFIG_DATA_SHIFT < 23) - mmu_patch_addis(&patch__dtlbmiss_romem_top8, - -__pa(((unsigned long)_sinittext) & - ~(LARGE_PAGE_SIZE_8M - 1))); - mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext)); - - _tlbil_all(); - /* Update page tables for PTDUMP and BDI */ - mmu_mapin_ram_chunk(0, sinittext, __pgprot(0)); - mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_ROX); - mmu_mapin_ram_chunk(etext, sinittext, PAGE_KERNEL_RO); + mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false); + if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) + mmu_pin_tlb(block_mapped_ram, true); } #endif @@ -218,7 +210,7 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, BUG_ON(first_memblock_base != 0); /* 8xx can only access 32MB at the moment */ - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x02000000)); + memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M)); } /* diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index 4637fdd469cf..77884e24281d 100644 --- a/arch/powerpc/mm/nohash/book3e_pgtable.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -73,6 +73,7 @@ static void __init *early_alloc_pgtable(unsigned long size) int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -80,7 +81,8 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE); if (slab_is_available()) { pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); + p4dp = p4d_offset(pgdp, ea); + pudp = pud_alloc(&init_mm, p4dp, ea); if (!pudp) return -ENOMEM; pmdp = pmd_alloc(&init_mm, pudp, ea); @@ -91,13 +93,12 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) return -ENOMEM; } else { pgdp = pgd_offset_k(ea); -#ifndef __PAGETABLE_PUD_FOLDED - if (pgd_none(*pgdp)) { - pudp = early_alloc_pgtable(PUD_TABLE_SIZE); - pgd_populate(&init_mm, pgdp, pudp); + p4dp = p4d_offset(pgdp, ea); + if (p4d_none(*p4dp)) { + pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); + p4d_populate(&init_mm, p4dp, pmdp); } -#endif /* !__PAGETABLE_PUD_FOLDED */ - pudp = pud_offset(pgdp, ea); + pudp = pud_offset(p4dp, ea); if (pud_none(*pudp)) { pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); pud_populate(&init_mm, pudp, pmdp); diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c index b4eb06ceb189..0c294827d6e5 100644 --- a/arch/powerpc/mm/nohash/fsl_booke.c +++ b/arch/powerpc/mm/nohash/fsl_booke.c @@ -37,11 +37,9 @@ #include <linux/highmem.h> #include <linux/memblock.h> -#include <asm/pgalloc.h> #include <asm/prom.h> #include <asm/io.h> #include <asm/mmu_context.h> -#include <asm/pgtable.h> #include <asm/mmu.h> #include <linux/uaccess.h> #include <asm/smp.h> diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 4a75f2d9bf0e..4c74e8a5482b 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -14,7 +14,7 @@ #include <linux/memblock.h> #include <linux/libfdt.h> #include <linux/crash_core.h> -#include <asm/pgalloc.h> +#include <asm/cacheflush.h> #include <asm/prom.h> #include <asm/kdump.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 696f568253a0..14514585db98 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -34,6 +34,7 @@ #include <linux/of_fdt.h> #include <linux/hugetlb.h> +#include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include <asm/code-patching.h> diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 1f110c3c48fb..bf24451f3e71 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -6,6 +6,7 @@ * Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. */ +#include <linux/pgtable.h> #include <asm/processor.h> #include <asm/reg.h> #include <asm/page.h> @@ -13,7 +14,6 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/cputable.h> -#include <asm/pgtable.h> #include <asm/exception-64e.h> #include <asm/ppc-opcode.h> #include <asm/kvm_asm.h> @@ -71,7 +71,6 @@ START_BTB_FLUSH_SECTION END_BTB_FLUSH_SECTION std r7,EX_TLB_R7(r12) #endif - TLB_MISS_PROLOG_STATS .endm .macro tlb_epilog_bolted @@ -85,7 +84,6 @@ END_BTB_FLUSH_SECTION mtcr r14 ld r14,EX_TLB_R14(r12) ld r15,EX_TLB_R15(r12) - TLB_MISS_RESTORE_STATS ld r16,EX_TLB_R16(r12) mfspr r12,SPRN_SPRG_GEN_SCRATCH .endm @@ -128,7 +126,6 @@ END_BTB_FLUSH_SECTION ori r10,r10,_PAGE_PRESENT oris r11,r10,_PAGE_ACCESSED@h - TLB_MISS_STATS_SAVE_INFO_BOLTED bne tlb_miss_kernel_bolted tlb_miss_common_bolted: @@ -209,7 +206,6 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) tlbwe tlb_miss_done_bolted: - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) tlb_epilog_bolted rfi @@ -229,11 +225,9 @@ tlb_miss_fault_bolted: andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX bne itlb_miss_fault_bolted dtlb_miss_fault_bolted: - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) tlb_epilog_bolted b exc_data_storage_book3e itlb_miss_fault_bolted: - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) tlb_epilog_bolted b exc_instruction_storage_book3e @@ -243,7 +237,6 @@ itlb_miss_fault_bolted: rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 srdi r15,r16,60 /* get region */ - TLB_MISS_STATS_SAVE_INFO_BOLTED bne- itlb_miss_fault_bolted li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */ @@ -276,7 +269,6 @@ itlb_miss_fault_bolted: srdi. r15,r16,60 /* get region */ ori r16,r16,1 - TLB_MISS_STATS_SAVE_INFO_BOLTED bne tlb_miss_kernel_e6500 /* user/kernel test */ b tlb_miss_common_e6500 @@ -288,7 +280,6 @@ itlb_miss_fault_bolted: srdi. r15,r16,60 /* get region */ rldicr r16,r16,0,62 - TLB_MISS_STATS_SAVE_INFO_BOLTED bne tlb_miss_kernel_e6500 /* user vs kernel check */ /* @@ -460,7 +451,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT) .endm tlb_unlock_e6500 - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) tlb_epilog_bolted rfi @@ -519,11 +509,9 @@ tlb_miss_fault_e6500: andi. r16,r16,1 bne itlb_miss_fault_e6500 dtlb_miss_fault_e6500: - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) tlb_epilog_bolted b exc_data_storage_book3e itlb_miss_fault_e6500: - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) tlb_epilog_bolted b exc_instruction_storage_book3e #endif /* CONFIG_PPC_FSL_BOOK3E */ @@ -548,7 +536,6 @@ itlb_miss_fault_e6500: mfspr r16,SPRN_DEAR /* get faulting address */ srdi r15,r16,60 /* get region */ cmpldi cr0,r15,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO beq tlb_load_linear /* yes -> go to linear map load */ /* The page tables are mapped virtually linear. At this point, though, @@ -600,7 +587,6 @@ itlb_miss_fault_e6500: /* We got a crappy address, just fault with whatever DEAR and ESR * are here */ - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e @@ -624,7 +610,6 @@ itlb_miss_fault_e6500: */ srdi r15,r16,60 /* get region */ cmpldi cr0,r15,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO beq tlb_load_linear /* yes -> go to linear map load */ /* We do the user/kernel test for the PID here along with the RW test @@ -646,7 +631,6 @@ itlb_miss_fault_e6500: beq+ normal_tlb_miss /* We got a crappy address, just fault */ - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e @@ -745,7 +729,6 @@ normal_tlb_miss_done: * level 0 and just going back to userland. They are only needed * if you are going to take an access fault */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) TLB_MISS_EPILOG_SUCCESS rfi @@ -757,11 +740,9 @@ normal_tlb_miss_access_fault: ld r15,EX_TLB_ESR(r12) mtspr SPRN_DEAR,r14 mtspr SPRN_ESR,r15 - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e -1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) - TLB_MISS_EPILOG_ERROR +1: TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e @@ -899,7 +880,6 @@ virt_page_table_tlb_miss_done: 1: END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) /* Return to caller, normal case */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK); TLB_MISS_EPILOG_SUCCESS rfi @@ -935,18 +915,15 @@ virt_page_table_tlb_miss_fault: beq 1f mtspr SPRN_DEAR,r15 mtspr SPRN_ESR,r16 - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT); TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e -1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT); - TLB_MISS_EPILOG_ERROR +1: TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e virt_page_table_tlb_miss_whacko_fault: /* The linear fault will restart everything so ESR and DEAR will * not have been clobbered, let's just fault with what we have */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT); TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e @@ -971,7 +948,6 @@ virt_page_table_tlb_miss_whacko_fault: mfspr r16,SPRN_DEAR /* get faulting address */ srdi r11,r16,60 /* get region */ cmpldi cr0,r11,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO beq tlb_load_linear /* yes -> go to linear map load */ /* We do the user/kernel test for the PID here along with the RW test @@ -991,7 +967,6 @@ virt_page_table_tlb_miss_whacko_fault: /* We got a crappy address, just fault with whatever DEAR and ESR * are here */ - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e @@ -1015,7 +990,6 @@ virt_page_table_tlb_miss_whacko_fault: */ srdi r11,r16,60 /* get region */ cmpldi cr0,r11,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO beq tlb_load_linear /* yes -> go to linear map load */ /* We do the user/kernel test for the PID here along with the RW test @@ -1033,7 +1007,6 @@ virt_page_table_tlb_miss_whacko_fault: beq+ htw_tlb_miss /* We got a crappy address, just fault */ - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e @@ -1130,7 +1103,6 @@ htw_tlb_miss_done: * level 0 and just going back to userland. They are only needed * if you are going to take an access fault */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK) TLB_MISS_EPILOG_SUCCESS rfi @@ -1142,11 +1114,9 @@ htw_tlb_miss_fault: beq 1f mtspr SPRN_DEAR,r16 mtspr SPRN_ESR,r14 - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT) TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e -1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT) - TLB_MISS_EPILOG_ERROR +1: TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e /* @@ -1221,7 +1191,6 @@ tlb_load_linear_done: * We do that because we can't resume a fault within a TLB * miss handler, due to MAS and TLB reservation being clobbered. */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_LINEAR) TLB_MISS_EPILOG_ERROR rfi @@ -1233,13 +1202,3 @@ tlb_load_linear_fault: b exc_data_storage_book3e 1: TLB_MISS_EPILOG_ERROR_SPECIAL b exc_instruction_storage_book3e - - -#ifdef CONFIG_BOOK3E_MMU_TLB_STATS -.tlb_stat_inc: -1: ldarx r8,0,r9 - addi r8,r8,1 - stdcx. r8,0,r9 - bne- 1b - blr -#endif diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9fcf2d195830..1f61fa2148b5 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -221,7 +221,8 @@ static void initialize_distance_lookup_table(int nid, } } -/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa +/* + * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA * info is found. */ static int associativity_to_nid(const __be32 *associativity) @@ -235,7 +236,7 @@ static int associativity_to_nid(const __be32 *associativity) nid = of_read_number(&associativity[min_common_depth], 1); /* POWER4 LPAR uses 0xffff as invalid node */ - if (nid == 0xffff || nid >= MAX_NUMNODES) + if (nid == 0xffff || nid >= nr_node_ids) nid = NUMA_NO_NODE; if (nid > 0 && @@ -448,7 +449,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb) index = lmb->aa_index * aa.array_sz + min_common_depth - 1; nid = of_read_number(&aa.arrays[index], 1); - if (nid == 0xffff || nid >= MAX_NUMNODES) + if (nid == 0xffff || nid >= nr_node_ids) nid = default_nid; if (nid > 0) { @@ -644,8 +645,9 @@ static inline int __init read_usm_ranges(const __be32 **usm) * Extract NUMA information from the ibm,dynamic-reconfiguration-memory * node. This assumes n_mem_{addr,size}_cells have been set. */ -static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, - const __be32 **usm) +static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, + const __be32 **usm, + void *data) { unsigned int ranges, is_kexec_kdump = 0; unsigned long base, size, sz; @@ -657,7 +659,7 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, */ if ((lmb->flags & DRCONF_MEM_RESERVED) || !(lmb->flags & DRCONF_MEM_ASSIGNED)) - return; + return 0; if (*usm) is_kexec_kdump = 1; @@ -669,7 +671,7 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, if (is_kexec_kdump) { ranges = read_usm_ranges(usm); if (!ranges) /* there are no (base, size) duple */ - return; + return 0; } do { @@ -686,6 +688,8 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, if (sz) memblock_set_node(base, sz, &memblock.memory, nid); } while (--ranges); + + return 0; } static int __init parse_numa_properties(void) @@ -787,7 +791,7 @@ new_range: */ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (memory) { - walk_drmem_lmbs(memory, numa_setup_drmem_lmb); + walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb); of_node_put(memory); } @@ -949,7 +953,6 @@ void __init initmem_init(void) get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); setup_node_data(nid, start_pfn, end_pfn); - sparse_memory_present_with_active_regions(nid); } sparse_init(); @@ -984,28 +987,6 @@ static int __init early_numa(char *p) } early_param("numa", early_numa); -/* - * The platform can inform us through one of several mechanisms - * (post-migration device tree updates, PRRN or VPHN) that the NUMA - * assignment of a resource has changed. This controls whether we act - * on that. Disabled by default. - */ -static bool topology_updates_enabled; - -static int __init early_topology_updates(char *p) -{ - if (!p) - return 0; - - if (!strcmp(p, "on")) { - pr_warn("Caution: enabling topology updates\n"); - topology_updates_enabled = true; - } - - return 0; -} -early_param("topology_updates", early_topology_updates); - #ifdef CONFIG_MEMORY_HOTPLUG /* * Find the node associated with a hot added memory section for @@ -1144,98 +1125,9 @@ u64 memory_hotplug_max(void) /* Virtual Processor Home Node (VPHN) support */ #ifdef CONFIG_PPC_SPLPAR -struct topology_update_data { - struct topology_update_data *next; - unsigned int cpu; - int old_nid; - int new_nid; -}; - -#define TOPOLOGY_DEF_TIMER_SECS 60 - -static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; -static cpumask_t cpu_associativity_changes_mask; -static int vphn_enabled; -static int prrn_enabled; -static void reset_topology_timer(void); -static int topology_timer_secs = 1; static int topology_inited; /* - * Change polling interval for associativity changes. - */ -int timed_topology_update(int nsecs) -{ - if (vphn_enabled) { - if (nsecs > 0) - topology_timer_secs = nsecs; - else - topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS; - - reset_topology_timer(); - } - - return 0; -} - -/* - * Store the current values of the associativity change counters in the - * hypervisor. - */ -static void setup_cpu_associativity_change_counters(void) -{ - int cpu; - - /* The VPHN feature supports a maximum of 8 reference points */ - BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8); - - for_each_possible_cpu(cpu) { - int i; - u8 *counts = vphn_cpu_change_counts[cpu]; - volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; - - for (i = 0; i < distance_ref_points_depth; i++) - counts[i] = hypervisor_counts[i]; - } -} - -/* - * The hypervisor maintains a set of 8 associativity change counters in - * the VPA of each cpu that correspond to the associativity levels in the - * ibm,associativity-reference-points property. When an associativity - * level changes, the corresponding counter is incremented. - * - * Set a bit in cpu_associativity_changes_mask for each cpu whose home - * node associativity levels have changed. - * - * Returns the number of cpus with unhandled associativity changes. - */ -static int update_cpu_associativity_changes_mask(void) -{ - int cpu; - cpumask_t *changes = &cpu_associativity_changes_mask; - - for_each_possible_cpu(cpu) { - int i, changed = 0; - u8 *counts = vphn_cpu_change_counts[cpu]; - volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; - - for (i = 0; i < distance_ref_points_depth; i++) { - if (hypervisor_counts[i] != counts[i]) { - counts[i] = hypervisor_counts[i]; - changed = 1; - } - } - if (changed) { - cpumask_or(changes, changes, cpu_sibling_mask(cpu)); - cpu = cpu_last_thread_sibling(cpu); - } - } - - return cpumask_weight(changes); -} - -/* * Retrieve the new associativity information for a virtual processor's * home node. */ @@ -1250,7 +1142,6 @@ static long vphn_get_associativity(unsigned long cpu, switch (rc) { case H_SUCCESS: dbg("VPHN hcall succeeded. Reset polling...\n"); - timed_topology_update(0); goto out; case H_FUNCTION: @@ -1269,8 +1160,6 @@ static long vphn_get_associativity(unsigned long cpu, , rc); break; } - - stop_topology_update(); out: return rc; } @@ -1314,380 +1203,8 @@ int find_and_online_cpu_nid(int cpu) return new_nid; } -/* - * Update the CPU maps and sysfs entries for a single CPU when its NUMA - * characteristics change. This function doesn't perform any locking and is - * only safe to call from stop_machine(). - */ -static int update_cpu_topology(void *data) -{ - struct topology_update_data *update; - unsigned long cpu; - - if (!data) - return -EINVAL; - - cpu = smp_processor_id(); - - for (update = data; update; update = update->next) { - int new_nid = update->new_nid; - if (cpu != update->cpu) - continue; - - unmap_cpu_from_node(cpu); - map_cpu_to_node(cpu, new_nid); - set_cpu_numa_node(cpu, new_nid); - set_cpu_numa_mem(cpu, local_memory_node(new_nid)); - vdso_getcpu_init(); - } - - return 0; -} - -static int update_lookup_table(void *data) -{ - struct topology_update_data *update; - - if (!data) - return -EINVAL; - - /* - * Upon topology update, the numa-cpu lookup table needs to be updated - * for all threads in the core, including offline CPUs, to ensure that - * future hotplug operations respect the cpu-to-node associativity - * properly. - */ - for (update = data; update; update = update->next) { - int nid, base, j; - - nid = update->new_nid; - base = cpu_first_thread_sibling(update->cpu); - - for (j = 0; j < threads_per_core; j++) { - update_numa_cpu_lookup_table(base + j, nid); - } - } - - return 0; -} - -/* - * Update the node maps and sysfs entries for each cpu whose home node - * has changed. Returns 1 when the topology has changed, and 0 otherwise. - * - * cpus_locked says whether we already hold cpu_hotplug_lock. - */ -int numa_update_cpu_topology(bool cpus_locked) -{ - unsigned int cpu, sibling, changed = 0; - struct topology_update_data *updates, *ud; - cpumask_t updated_cpus; - struct device *dev; - int weight, new_nid, i = 0; - - if (!prrn_enabled && !vphn_enabled && topology_inited) - return 0; - - weight = cpumask_weight(&cpu_associativity_changes_mask); - if (!weight) - return 0; - - updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL); - if (!updates) - return 0; - - cpumask_clear(&updated_cpus); - - for_each_cpu(cpu, &cpu_associativity_changes_mask) { - /* - * If siblings aren't flagged for changes, updates list - * will be too short. Skip on this update and set for next - * update. - */ - if (!cpumask_subset(cpu_sibling_mask(cpu), - &cpu_associativity_changes_mask)) { - pr_info("Sibling bits not set for associativity " - "change, cpu%d\n", cpu); - cpumask_or(&cpu_associativity_changes_mask, - &cpu_associativity_changes_mask, - cpu_sibling_mask(cpu)); - cpu = cpu_last_thread_sibling(cpu); - continue; - } - - new_nid = find_and_online_cpu_nid(cpu); - - if (new_nid == numa_cpu_lookup_table[cpu]) { - cpumask_andnot(&cpu_associativity_changes_mask, - &cpu_associativity_changes_mask, - cpu_sibling_mask(cpu)); - dbg("Assoc chg gives same node %d for cpu%d\n", - new_nid, cpu); - cpu = cpu_last_thread_sibling(cpu); - continue; - } - - for_each_cpu(sibling, cpu_sibling_mask(cpu)) { - ud = &updates[i++]; - ud->next = &updates[i]; - ud->cpu = sibling; - ud->new_nid = new_nid; - ud->old_nid = numa_cpu_lookup_table[sibling]; - cpumask_set_cpu(sibling, &updated_cpus); - } - cpu = cpu_last_thread_sibling(cpu); - } - - /* - * Prevent processing of 'updates' from overflowing array - * where last entry filled in a 'next' pointer. - */ - if (i) - updates[i-1].next = NULL; - - pr_debug("Topology update for the following CPUs:\n"); - if (cpumask_weight(&updated_cpus)) { - for (ud = &updates[0]; ud; ud = ud->next) { - pr_debug("cpu %d moving from node %d " - "to %d\n", ud->cpu, - ud->old_nid, ud->new_nid); - } - } - - /* - * In cases where we have nothing to update (because the updates list - * is too short or because the new topology is same as the old one), - * skip invoking update_cpu_topology() via stop-machine(). This is - * necessary (and not just a fast-path optimization) since stop-machine - * can end up electing a random CPU to run update_cpu_topology(), and - * thus trick us into setting up incorrect cpu-node mappings (since - * 'updates' is kzalloc()'ed). - * - * And for the similar reason, we will skip all the following updating. - */ - if (!cpumask_weight(&updated_cpus)) - goto out; - - if (cpus_locked) - stop_machine_cpuslocked(update_cpu_topology, &updates[0], - &updated_cpus); - else - stop_machine(update_cpu_topology, &updates[0], &updated_cpus); - - /* - * Update the numa-cpu lookup table with the new mappings, even for - * offline CPUs. It is best to perform this update from the stop- - * machine context. - */ - if (cpus_locked) - stop_machine_cpuslocked(update_lookup_table, &updates[0], - cpumask_of(raw_smp_processor_id())); - else - stop_machine(update_lookup_table, &updates[0], - cpumask_of(raw_smp_processor_id())); - - for (ud = &updates[0]; ud; ud = ud->next) { - unregister_cpu_under_node(ud->cpu, ud->old_nid); - register_cpu_under_node(ud->cpu, ud->new_nid); - - dev = get_cpu_device(ud->cpu); - if (dev) - kobject_uevent(&dev->kobj, KOBJ_CHANGE); - cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask); - changed = 1; - } - -out: - kfree(updates); - return changed; -} - -int arch_update_cpu_topology(void) -{ - return numa_update_cpu_topology(true); -} - -static void topology_work_fn(struct work_struct *work) -{ - rebuild_sched_domains(); -} -static DECLARE_WORK(topology_work, topology_work_fn); - -static void topology_schedule_update(void) -{ - schedule_work(&topology_work); -} - -static void topology_timer_fn(struct timer_list *unused) -{ - if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask)) - topology_schedule_update(); - else if (vphn_enabled) { - if (update_cpu_associativity_changes_mask() > 0) - topology_schedule_update(); - reset_topology_timer(); - } -} -static struct timer_list topology_timer; - -static void reset_topology_timer(void) -{ - if (vphn_enabled) - mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ); -} - -#ifdef CONFIG_SMP - -static int dt_update_callback(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct of_reconfig_data *update = data; - int rc = NOTIFY_DONE; - - switch (action) { - case OF_RECONFIG_UPDATE_PROPERTY: - if (of_node_is_type(update->dn, "cpu") && - !of_prop_cmp(update->prop->name, "ibm,associativity")) { - u32 core_id; - of_property_read_u32(update->dn, "reg", &core_id); - rc = dlpar_cpu_readd(core_id); - rc = NOTIFY_OK; - } - break; - } - - return rc; -} - -static struct notifier_block dt_update_nb = { - .notifier_call = dt_update_callback, -}; - -#endif - -/* - * Start polling for associativity changes. - */ -int start_topology_update(void) -{ - int rc = 0; - - if (!topology_updates_enabled) - return 0; - - if (firmware_has_feature(FW_FEATURE_PRRN)) { - if (!prrn_enabled) { - prrn_enabled = 1; -#ifdef CONFIG_SMP - rc = of_reconfig_notifier_register(&dt_update_nb); -#endif - } - } - if (firmware_has_feature(FW_FEATURE_VPHN) && - lppaca_shared_proc(get_lppaca())) { - if (!vphn_enabled) { - vphn_enabled = 1; - setup_cpu_associativity_change_counters(); - timer_setup(&topology_timer, topology_timer_fn, - TIMER_DEFERRABLE); - reset_topology_timer(); - } - } - - pr_info("Starting topology update%s%s\n", - (prrn_enabled ? " prrn_enabled" : ""), - (vphn_enabled ? " vphn_enabled" : "")); - - return rc; -} - -/* - * Disable polling for VPHN associativity changes. - */ -int stop_topology_update(void) -{ - int rc = 0; - - if (!topology_updates_enabled) - return 0; - - if (prrn_enabled) { - prrn_enabled = 0; -#ifdef CONFIG_SMP - rc = of_reconfig_notifier_unregister(&dt_update_nb); -#endif - } - if (vphn_enabled) { - vphn_enabled = 0; - rc = del_timer_sync(&topology_timer); - } - - pr_info("Stopping topology update\n"); - - return rc; -} - -int prrn_is_enabled(void) -{ - return prrn_enabled; -} - -static int topology_read(struct seq_file *file, void *v) -{ - if (vphn_enabled || prrn_enabled) - seq_puts(file, "on\n"); - else - seq_puts(file, "off\n"); - - return 0; -} - -static int topology_open(struct inode *inode, struct file *file) -{ - return single_open(file, topology_read, NULL); -} - -static ssize_t topology_write(struct file *file, const char __user *buf, - size_t count, loff_t *off) -{ - char kbuf[4]; /* "on" or "off" plus null. */ - int read_len; - - read_len = count < 3 ? count : 3; - if (copy_from_user(kbuf, buf, read_len)) - return -EINVAL; - - kbuf[read_len] = '\0'; - - if (!strncmp(kbuf, "on", 2)) { - topology_updates_enabled = true; - start_topology_update(); - } else if (!strncmp(kbuf, "off", 3)) { - stop_topology_update(); - topology_updates_enabled = false; - } else - return -EINVAL; - - return count; -} - -static const struct proc_ops topology_proc_ops = { - .proc_read = seq_read, - .proc_write = topology_write, - .proc_open = topology_open, - .proc_release = single_release, -}; - static int topology_update_init(void) { - start_topology_update(); - - if (vphn_enabled) - topology_schedule_update(); - - if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_proc_ops)) - return -ENOMEM; - topology_inited = 1; return 0; } diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c index ee4bd6d38602..97ae4935da79 100644 --- a/arch/powerpc/mm/pgtable-frag.c +++ b/arch/powerpc/mm/pgtable-frag.c @@ -110,6 +110,9 @@ void pte_fragment_free(unsigned long *table, int kernel) { struct page *page = virt_to_page(table); + if (PageReserved(page)) + return free_reserved_page(page); + BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); if (atomic_dec_and_test(&page->pt_frag_refcount)) { if (!kernel) diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index e3759b69f81b..9c0547d77af3 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -23,7 +23,6 @@ #include <linux/percpu.h> #include <linux/hardirq.h> #include <linux/hugetlb.h> -#include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include <asm/hugetlb.h> @@ -100,7 +99,7 @@ static pte_t set_pte_filter_hash(pte_t pte) { return pte; } * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so * instead we "filter out" the exec permission for non clean pages. */ -static pte_t set_pte_filter(pte_t pte) +static inline pte_t set_pte_filter(pte_t pte) { struct page *pg; @@ -249,22 +248,49 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, #else /* - * Not used on non book3s64 platforms. But 8xx - * can possibly use tsize derived from hstate. + * Not used on non book3s64 platforms. + * 8xx compares it with mmu_virtual_psize to + * know if it is a huge page or not. */ - psize = 0; + psize = MMU_PAGE_COUNT; #endif __ptep_set_access_flags(vma, ptep, pte, addr, psize); } return changed; #endif } + +#if defined(CONFIG_PPC_8xx) +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +{ + pmd_t *pmd = pmd_off(mm, addr); + pte_basic_t val; + pte_basic_t *entry = &ptep->pte; + int num = is_hugepd(*((hugepd_t *)pmd)) ? 1 : SZ_512K / SZ_4K; + int i; + + /* + * Make sure hardware valid bit is not set. We don't do + * tlb flush for this update. + */ + VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); + + pte = pte_mkpte(pte); + + pte = set_pte_filter(pte); + + val = pte_val(pte); + for (i = 0; i < num; i++, entry++, val += SZ_4K) + *entry = val; +} +#endif #endif /* CONFIG_HUGETLB_PAGE */ #ifdef CONFIG_DEBUG_VM void assert_pte_locked(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -272,12 +298,14 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) return; pgd = mm->pgd + pgd_index(addr); BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + BUG_ON(p4d_none(*p4d)); + pud = pud_offset(p4d, addr); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, addr); /* * khugepaged to collapse normal pages to hugepage, first set - * pmd to none to force page fault/gup to take mmap_sem. After + * pmd to none to force page fault/gup to take mmap_lock. After * pmd is set to none, we do a pte_clear which does this assertion * so if we find pmd none, return. */ @@ -312,12 +340,13 @@ EXPORT_SYMBOL_GPL(vmalloc_to_phys); pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, bool *is_thp, unsigned *hpage_shift) { - pgd_t pgd, *pgdp; + pgd_t *pgdp; + p4d_t p4d, *p4dp; pud_t pud, *pudp; pmd_t pmd, *pmdp; pte_t *ret_pte; hugepd_t *hpdp = NULL; - unsigned pdshift = PGDIR_SHIFT; + unsigned pdshift; if (hpage_shift) *hpage_shift = 0; @@ -325,24 +354,28 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, if (is_thp) *is_thp = false; - pgdp = pgdir + pgd_index(ea); - pgd = READ_ONCE(*pgdp); /* * Always operate on the local stack value. This make sure the * value don't get updated by a parallel THP split/collapse, * page fault or a page unmap. The return pte_t * is still not * stable. So should be checked there for above conditions. + * Top level is an exception because it is folded into p4d. */ - if (pgd_none(pgd)) + pgdp = pgdir + pgd_index(ea); + p4dp = p4d_offset(pgdp, ea); + p4d = READ_ONCE(*p4dp); + pdshift = P4D_SHIFT; + + if (p4d_none(p4d)) return NULL; - if (pgd_is_leaf(pgd)) { - ret_pte = (pte_t *)pgdp; + if (p4d_is_leaf(p4d)) { + ret_pte = (pte_t *)p4dp; goto out; } - if (is_hugepd(__hugepd(pgd_val(pgd)))) { - hpdp = (hugepd_t *)&pgd; + if (is_hugepd(__hugepd(p4d_val(p4d)))) { + hpdp = (hugepd_t *)&p4d; goto out_huge; } @@ -352,7 +385,7 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, * irq disabled */ pdshift = PUD_SHIFT; - pudp = pud_offset(&pgd, ea); + pudp = pud_offset(&p4d, ea); pud = READ_ONCE(*pudp); if (pud_none(pud)) diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index f62de06e3d07..6eb4eab79385 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -24,16 +24,31 @@ #include <linux/memblock.h> #include <linux/slab.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/fixmap.h> #include <asm/setup.h> #include <asm/sections.h> +#include <asm/early_ioremap.h> #include <mm/mmu_decl.h> extern char etext[], _stext[], _sinittext[], _einittext[]; +static u8 early_fixmap_pagetable[FIXMAP_PTE_SIZE] __page_aligned_data; + +notrace void __init early_ioremap_init(void) +{ + unsigned long addr = ALIGN_DOWN(FIXADDR_START, PGDIR_SIZE); + pte_t *ptep = (pte_t *)early_fixmap_pagetable; + pmd_t *pmdp = pmd_off_k(addr); + + for (; (s32)(FIXADDR_TOP - addr) > 0; + addr += PGDIR_SIZE, ptep += PTRS_PER_PTE, pmdp++) + pmd_populate_kernel(&init_mm, pmdp, ptep); + + early_ioremap_setup(); +} + static void __init *early_alloc_pgtable(unsigned long size) { void *ptr = memblock_alloc(size, size); @@ -45,7 +60,7 @@ static void __init *early_alloc_pgtable(unsigned long size) return ptr; } -static pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va) +pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va) { if (pmd_none(*pmdp)) { pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE); @@ -63,7 +78,7 @@ int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) int err = -ENOMEM; /* Use upper 10 bits of VA to index the first level map */ - pd = pmd_ptr_k(va); + pd = pmd_off_k(va); /* Use middle 10 bits of VA to index the second-level map */ if (likely(slab_is_available())) pg = pte_alloc_kernel(pd, va); @@ -169,7 +184,7 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_stext + 1)) + if (v_block_mapped((unsigned long)_sinittext)) mmu_mark_initmem_nx(); else change_page_attr(page, numpages, PAGE_KERNEL); @@ -181,7 +196,7 @@ void mark_rodata_ro(void) struct page *page; unsigned long numpages; - if (v_block_mapped((unsigned long)_sinittext)) { + if (v_block_mapped((unsigned long)_stext + 1)) { mmu_mark_rodata_ro(); ptdump_check_wx(); return; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e78832dce7bb..cc6e2f94517f 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -31,11 +31,9 @@ #include <linux/slab.h> #include <linux/hugetlb.h> -#include <asm/pgalloc.h> #include <asm/page.h> #include <asm/prom.h> #include <asm/mmu_context.h> -#include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/smp.h> #include <asm/machdep.h> @@ -101,13 +99,13 @@ EXPORT_SYMBOL(__pte_frag_size_shift); #ifndef __PAGETABLE_PUD_FOLDED /* 4 level page table */ -struct page *pgd_page(pgd_t pgd) +struct page *p4d_page(p4d_t p4d) { - if (pgd_is_leaf(pgd)) { - VM_WARN_ON(!pgd_huge(pgd)); - return pte_page(pgd_pte(pgd)); + if (p4d_is_leaf(p4d)) { + VM_WARN_ON(!p4d_huge(p4d)); + return pte_page(p4d_pte(p4d)); } - return virt_to_page(pgd_page_vaddr(pgd)); + return virt_to_page(p4d_page_vaddr(p4d)); } #endif diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c index 9e2d8e847d6e..8a797dcbf475 100644 --- a/arch/powerpc/mm/ptdump/8xx.c +++ b/arch/powerpc/mm/ptdump/8xx.c @@ -5,12 +5,17 @@ * */ #include <linux/kernel.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> #include "ptdump.h" static const struct flag_info flag_array[] = { { + .mask = _PAGE_HUGE, + .val = _PAGE_HUGE, + .set = "huge", + .clear = " ", + }, { .mask = _PAGE_SH, .val = 0, .set = "user", diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c index d3a5d6b318d1..e29b338d499f 100644 --- a/arch/powerpc/mm/ptdump/bats.c +++ b/arch/powerpc/mm/ptdump/bats.c @@ -6,19 +6,21 @@ * This dumps the content of BATS */ +#include <linux/pgtable.h> #include <asm/debugfs.h> -#include <asm/pgtable.h> #include <asm/cpu_has_feature.h> +#include "ptdump.h" + static char *pp_601(int k, int pp) { if (pp == 0) - return k ? "NA" : "RWX"; + return k ? " " : "rwx"; if (pp == 1) - return k ? "ROX" : "RWX"; + return k ? "r x" : "rwx"; if (pp == 2) - return k ? "RWX" : "RWX"; - return k ? "ROX" : "ROX"; + return "rwx"; + return "r x"; } static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper) @@ -42,15 +44,13 @@ static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper) #else seq_printf(m, "0x%08x ", pbn); #endif + pt_dump_size(m, size); seq_printf(m, "Kernel %s User %s", pp_601(k & 2, pp), pp_601(k & 1, pp)); - if (lower & _PAGE_WRITETHRU) - seq_puts(m, "write through "); - if (lower & _PAGE_NO_CACHE) - seq_puts(m, "no cache "); - if (lower & _PAGE_COHERENT) - seq_puts(m, "coherent "); + seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : " "); + seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : " "); + seq_puts(m, lower & _PAGE_COHERENT ? "m " : " "); seq_puts(m, "\n"); } @@ -88,6 +88,7 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool #else seq_printf(m, "0x%08x ", brpn); #endif + pt_dump_size(m, size); if (k == 1) seq_puts(m, "User "); @@ -97,20 +98,16 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool seq_puts(m, "Kernel/User "); if (lower & BPP_RX) - seq_puts(m, is_d ? "RO " : "EXEC "); + seq_puts(m, is_d ? "r " : " x "); else if (lower & BPP_RW) - seq_puts(m, is_d ? "RW " : "EXEC "); + seq_puts(m, is_d ? "rw " : " x "); else - seq_puts(m, is_d ? "NA " : "NX "); - - if (lower & _PAGE_WRITETHRU) - seq_puts(m, "write through "); - if (lower & _PAGE_NO_CACHE) - seq_puts(m, "no cache "); - if (lower & _PAGE_COHERENT) - seq_puts(m, "coherent "); - if (lower & _PAGE_GUARDED) - seq_puts(m, "guarded "); + seq_puts(m, is_d ? " " : " "); + + seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : " "); + seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : " "); + seq_puts(m, lower & _PAGE_COHERENT ? "m " : " "); + seq_puts(m, lower & _PAGE_GUARDED ? "g " : " "); seq_puts(m, "\n"); } diff --git a/arch/powerpc/mm/ptdump/book3s64.c b/arch/powerpc/mm/ptdump/book3s64.c index 0dfca72cb9bd..14f73868db66 100644 --- a/arch/powerpc/mm/ptdump/book3s64.c +++ b/arch/powerpc/mm/ptdump/book3s64.c @@ -5,7 +5,7 @@ * */ #include <linux/kernel.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> #include "ptdump.h" diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index b6ed9578382f..ad6df9a2e7c8 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -15,13 +15,12 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/seq_file.h> -#include <asm/pgtable.h> #include <linux/const.h> #include <asm/page.h> -#include <asm/pgalloc.h> #include <asm/plpar_wrappers.h> #include <linux/memblock.h> #include <asm/firmware.h> +#include <asm/pgalloc.h> struct pg_state { struct seq_file *seq; @@ -259,7 +258,7 @@ static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 * for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); - if (lpar_rc != H_SUCCESS) + if (lpar_rc) continue; for (j = 0; j < 4; j++) { if (HPTE_V_COMPARE(ptes[j].v, want_v) && @@ -417,9 +416,9 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) } } -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) { - pud_t *pud = pud_offset(pgd, 0); + pud_t *pud = pud_offset(p4d, 0); unsigned long addr; unsigned int i; @@ -431,6 +430,20 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) } } +static void walk_p4d(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + p4d_t *p4d = p4d_offset(pgd, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { + addr = start + i * P4D_SIZE; + if (!p4d_none(*p4d)) + /* p4d exists */ + walk_pud(st, p4d, addr); + } +} + static void walk_pagetables(struct pg_state *st) { pgd_t *pgd = pgd_offset_k(0UL); @@ -445,7 +458,7 @@ static void walk_pagetables(struct pg_state *st) addr = KERN_VIRT_START + i * PGDIR_SIZE; if (!pgd_none(*pgd)) /* pgd exists */ - walk_pud(st, pgd, addr); + walk_p4d(st, pgd, addr); } } diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index d92bb8ea229c..aca354fb670b 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -19,10 +19,9 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <asm/fixmap.h> -#include <asm/pgtable.h> #include <linux/const.h> #include <asm/page.h> -#include <asm/pgalloc.h> +#include <asm/hugetlb.h> #include <mm/mmu_decl.h> @@ -60,6 +59,7 @@ struct pg_state { unsigned long start_address; unsigned long start_pa; unsigned long last_pa; + unsigned long page_size; unsigned int level; u64 current_flags; bool check_wx; @@ -73,6 +73,10 @@ struct addr_marker { static struct addr_marker address_markers[] = { { 0, "Start of kernel VM" }, +#ifdef MODULES_VADDR + { 0, "modules start" }, + { 0, "modules end" }, +#endif { 0, "vmalloc() Area" }, { 0, "vmalloc() End" }, #ifdef CONFIG_PPC64 @@ -112,6 +116,19 @@ static struct addr_marker address_markers[] = { seq_putc(m, c); \ }) +void pt_dump_size(struct seq_file *m, unsigned long size) +{ + static const char units[] = "KMGTPE"; + const char *unit = units; + + /* Work out what appropriate unit to use */ + while (!(size & 1023) && unit[1]) { + size >>= 10; + unit++; + } + pt_dump_seq_printf(m, "%9lu%c ", size, *unit); +} + static void dump_flag_info(struct pg_state *st, const struct flag_info *flag, u64 pte, int num) { @@ -146,8 +163,6 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info static void dump_addr(struct pg_state *st, unsigned long addr) { - static const char units[] = "KMGTPE"; - const char *unit = units; unsigned long delta; #ifdef CONFIG_PPC64 @@ -157,20 +172,14 @@ static void dump_addr(struct pg_state *st, unsigned long addr) #endif pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); - if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) { + if (st->start_pa == st->last_pa && st->start_address + st->page_size != addr) { pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa); - delta = PAGE_SIZE >> 10; + delta = st->page_size >> 10; } else { pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); delta = (addr - st->start_address) >> 10; } - /* Work out what appropriate unit to use */ - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; - } - pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit); - + pt_dump_size(st->seq, delta); } static void note_prot_wx(struct pg_state *st, unsigned long addr) @@ -189,20 +198,34 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) st->wx_pages += (addr - st->start_address) / PAGE_SIZE; } +static void note_page_update_state(struct pg_state *st, unsigned long addr, + unsigned int level, u64 val, unsigned long page_size) +{ + u64 flag = val & pg_level[level].mask; + u64 pa = val & PTE_RPN_MASK; + + st->level = level; + st->current_flags = flag; + st->start_address = addr; + st->start_pa = pa; + st->page_size = page_size; + + while (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } +} + static void note_page(struct pg_state *st, unsigned long addr, - unsigned int level, u64 val) + unsigned int level, u64 val, unsigned long page_size) { u64 flag = val & pg_level[level].mask; u64 pa = val & PTE_RPN_MASK; /* At first no level is set */ if (!st->level) { - st->level = level; - st->current_flags = flag; - st->start_address = addr; - st->start_pa = pa; - st->last_pa = pa; pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + note_page_update_state(st, addr, level, val, page_size); /* * Dump the section of virtual memory when: * - the PTE flags from one entry to the next differs. @@ -213,7 +236,7 @@ static void note_page(struct pg_state *st, unsigned long addr, */ } else if (flag != st->current_flags || level != st->level || addr >= st->marker[1].start_address || - (pa != st->last_pa + PAGE_SIZE && + (pa != st->last_pa + st->page_size && (pa != st->start_pa || st->start_pa != st->last_pa))) { /* Check the PTE flags */ @@ -234,18 +257,9 @@ static void note_page(struct pg_state *st, unsigned long addr, * Address indicates we have passed the end of the * current section of virtual memory */ - while (addr >= st->marker[1].start_address) { - st->marker++; - pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); - } - st->start_address = addr; - st->start_pa = pa; - st->last_pa = pa; - st->current_flags = flag; - st->level = level; - } else { - st->last_pa = pa; + note_page_update_state(st, addr, level, val, page_size); } + st->last_pa = pa; } static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) @@ -256,11 +270,31 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) for (i = 0; i < PTRS_PER_PTE; i++, pte++) { addr = start + i * PAGE_SIZE; - note_page(st, addr, 4, pte_val(*pte)); + note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE); } } +static void walk_hugepd(struct pg_state *st, hugepd_t *phpd, unsigned long start, + int pdshift, int level) +{ +#ifdef CONFIG_ARCH_HAS_HUGEPD + unsigned int i; + int shift = hugepd_shift(*phpd); + int ptrs_per_hpd = pdshift - shift > 0 ? 1 << (pdshift - shift) : 1; + + if (start & ((1 << shift) - 1)) + return; + + for (i = 0; i < ptrs_per_hpd; i++) { + unsigned long addr = start + (i << shift); + pte_t *pte = hugepte_offset(*phpd, addr, pdshift); + + note_page(st, addr, level + 1, pte_val(*pte), 1 << shift); + } +#endif +} + static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) { pmd_t *pmd = pmd_offset(pud, 0); @@ -273,13 +307,13 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) /* pmd exists */ walk_pte(st, pmd, addr); else - note_page(st, addr, 3, pmd_val(*pmd)); + note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE); } } -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) { - pud_t *pud = pud_offset(pgd, 0); + pud_t *pud = pud_offset(p4d, 0); unsigned long addr; unsigned int i; @@ -289,7 +323,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) /* pud exists */ walk_pmd(st, pud, addr); else - note_page(st, addr, 2, pud_val(*pud)); + note_page(st, addr, 2, pud_val(*pud), PUD_SIZE); } } @@ -304,11 +338,15 @@ static void walk_pagetables(struct pg_state *st) * the hash pagetable. */ for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { - if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd)) - /* pgd exists */ - walk_pud(st, pgd, addr); + p4d_t *p4d = p4d_offset(pgd, 0); + + if (p4d_none(*p4d) || p4d_is_leaf(*p4d)) + note_page(st, addr, 1, p4d_val(*p4d), PGDIR_SIZE); + else if (is_hugepd(__hugepd(p4d_val(*p4d)))) + walk_hugepd(st, (hugepd_t *)p4d, addr, PGDIR_SHIFT, 1); else - note_page(st, addr, 1, pgd_val(*pgd)); + /* p4d exists */ + walk_pud(st, p4d, addr); } } @@ -316,7 +354,15 @@ static void populate_markers(void) { int i = 0; +#ifdef CONFIG_PPC64 address_markers[i++].start_address = PAGE_OFFSET; +#else + address_markers[i++].start_address = TASK_SIZE; +#endif +#ifdef MODULES_VADDR + address_markers[i++].start_address = MODULES_VADDR; + address_markers[i++].start_address = MODULES_END; +#endif address_markers[i++].start_address = VMALLOC_START; address_markers[i++].start_address = VMALLOC_END; #ifdef CONFIG_PPC64 @@ -353,7 +399,7 @@ static int ptdump_show(struct seq_file *m, void *v) struct pg_state st = { .seq = m, .marker = address_markers, - .start_address = PAGE_OFFSET, + .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE, }; #ifdef CONFIG_PPC64 @@ -363,7 +409,7 @@ static int ptdump_show(struct seq_file *m, void *v) /* Traverse kernel page tables */ walk_pagetables(&st); - note_page(&st, 0, 0, 0); + note_page(&st, 0, 0, 0, 0); return 0; } @@ -397,7 +443,7 @@ void ptdump_check_wx(void) .seq = NULL, .marker = address_markers, .check_wx = true, - .start_address = PAGE_OFFSET, + .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE, }; #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/mm/ptdump/ptdump.h b/arch/powerpc/mm/ptdump/ptdump.h index 5d513636de73..154efae96ae0 100644 --- a/arch/powerpc/mm/ptdump/ptdump.h +++ b/arch/powerpc/mm/ptdump/ptdump.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/types.h> +#include <linux/seq_file.h> struct flag_info { u64 mask; @@ -17,3 +18,5 @@ struct pgtable_level { }; extern struct pgtable_level pg_level[5]; + +void pt_dump_size(struct seq_file *m, unsigned long delta); diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index f7ed2f187cb0..c005fe041c18 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -5,7 +5,7 @@ * */ #include <linux/kernel.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> #include "ptdump.h" @@ -31,6 +31,11 @@ static const struct flag_info flag_array[] = { .set = "present", .clear = " ", }, { + .mask = _PAGE_COHERENT, + .val = _PAGE_COHERENT, + .set = "coherent", + .clear = " ", + }, { .mask = _PAGE_GUARDED, .val = _PAGE_GUARDED, .set = "guarded", diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index dffe1a45b6ed..82b45b1cb973 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -478,7 +478,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* If hint, make sure it matches our alignment restrictions */ if (!fixed && addr) { - addr = _ALIGN_UP(addr, page_size); + addr = ALIGN(addr, page_size); slice_dbg(" aligned addr=%lx\n", addr); /* Ignore hint if it's too large or overlaps a VMA */ if (addr > high_limit - len || addr < mmap_min_addr || diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 55d4377ccfae..d0a67a1bbaf1 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -11,6 +11,7 @@ #ifndef __ASSEMBLY__ #include <asm/types.h> +#include <asm/ppc-opcode.h> #ifdef PPC64_ELF_ABI_v1 #define FUNCTION_DESCR_SIZE 24 @@ -18,167 +19,10 @@ #define FUNCTION_DESCR_SIZE 0 #endif -/* - * 16-bit immediate helper macros: HA() is for use with sign-extending instrs - * (e.g. LD, ADDI). If the bottom 16 bits is "-ve", add another bit into the - * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000). - */ -#define IMM_H(i) ((uintptr_t)(i)>>16) -#define IMM_HA(i) (((uintptr_t)(i)>>16) + \ - (((uintptr_t)(i) & 0x8000) >> 15)) -#define IMM_L(i) ((uintptr_t)(i) & 0xffff) - #define PLANT_INSTR(d, idx, instr) \ do { if (d) { (d)[idx] = instr; } idx++; } while (0) #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) -#define PPC_NOP() EMIT(PPC_INST_NOP) -#define PPC_BLR() EMIT(PPC_INST_BLR) -#define PPC_BLRL() EMIT(PPC_INST_BLRL) -#define PPC_MTLR(r) EMIT(PPC_INST_MTLR | ___PPC_RT(r)) -#define PPC_BCTR() EMIT(PPC_INST_BCTR) -#define PPC_MTCTR(r) EMIT(PPC_INST_MTCTR | ___PPC_RT(r)) -#define PPC_ADDI(d, a, i) EMIT(PPC_INST_ADDI | ___PPC_RT(d) | \ - ___PPC_RA(a) | IMM_L(i)) -#define PPC_MR(d, a) PPC_OR(d, a, a) -#define PPC_LI(r, i) PPC_ADDI(r, 0, i) -#define PPC_ADDIS(d, a, i) EMIT(PPC_INST_ADDIS | \ - ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) -#define PPC_LIS(r, i) PPC_ADDIS(r, 0, i) -#define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) -#define PPC_STDX(r, base, b) EMIT(PPC_INST_STDX | ___PPC_RS(r) | \ - ___PPC_RA(base) | ___PPC_RB(b)) -#define PPC_STDU(r, base, i) EMIT(PPC_INST_STDU | ___PPC_RS(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) -#define PPC_STW(r, base, i) EMIT(PPC_INST_STW | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_STWU(r, base, i) EMIT(PPC_INST_STWU | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_STH(r, base, i) EMIT(PPC_INST_STH | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_STB(r, base, i) EMIT(PPC_INST_STB | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_L(i)) - -#define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) -#define PPC_LDX(r, base, b) EMIT(PPC_INST_LDX | ___PPC_RT(r) | \ - ___PPC_RA(base) | ___PPC_RB(b)) -#define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_LHBRX(r, base, b) EMIT(PPC_INST_LHBRX | ___PPC_RT(r) | \ - ___PPC_RA(base) | ___PPC_RB(b)) -#define PPC_LDBRX(r, base, b) EMIT(PPC_INST_LDBRX | ___PPC_RT(r) | \ - ___PPC_RA(base) | ___PPC_RB(b)) - -#define PPC_BPF_LDARX(t, a, b, eh) EMIT(PPC_INST_LDARX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | \ - __PPC_EH(eh)) -#define PPC_BPF_LWARX(t, a, b, eh) EMIT(PPC_INST_LWARX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | \ - __PPC_EH(eh)) -#define PPC_BPF_STWCX(s, a, b) EMIT(PPC_INST_STWCX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i)) -#define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i)) -#define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_CMPD(a, b) EMIT(PPC_INST_CMPD | ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_CMPLWI(a, i) EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i)) -#define PPC_CMPLDI(a, i) EMIT(PPC_INST_CMPLDI | ___PPC_RA(a) | IMM_L(i)) -#define PPC_CMPLW(a, b) EMIT(PPC_INST_CMPLW | ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_CMPLD(a, b) EMIT(PPC_INST_CMPLD | ___PPC_RA(a) | \ - ___PPC_RB(b)) - -#define PPC_SUB(d, a, b) EMIT(PPC_INST_SUB | ___PPC_RT(d) | \ - ___PPC_RB(a) | ___PPC_RA(b)) -#define PPC_ADD(d, a, b) EMIT(PPC_INST_ADD | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_MULD(d, a, b) EMIT(PPC_INST_MULLD | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_MULW(d, a, b) EMIT(PPC_INST_MULLW | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_MULHWU(d, a, b) EMIT(PPC_INST_MULHWU | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_MULI(d, a, i) EMIT(PPC_INST_MULLI | ___PPC_RT(d) | \ - ___PPC_RA(a) | IMM_L(i)) -#define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_DIVDU(d, a, b) EMIT(PPC_INST_DIVDU | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_AND(d, a, b) EMIT(PPC_INST_AND | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(b)) -#define PPC_ANDI(d, a, i) EMIT(PPC_INST_ANDI | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_AND_DOT(d, a, b) EMIT(PPC_INST_ANDDOT | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(b)) -#define PPC_OR(d, a, b) EMIT(PPC_INST_OR | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(b)) -#define PPC_MR(d, a) PPC_OR(d, a, a) -#define PPC_ORI(d, a, i) EMIT(PPC_INST_ORI | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_ORIS(d, a, i) EMIT(PPC_INST_ORIS | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_XOR(d, a, b) EMIT(PPC_INST_XOR | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(b)) -#define PPC_XORI(d, a, i) EMIT(PPC_INST_XORI | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_XORIS(d, a, i) EMIT(PPC_INST_XORIS | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_EXTSW(d, a) EMIT(PPC_INST_EXTSW | ___PPC_RA(d) | \ - ___PPC_RS(a)) -#define PPC_SLW(d, a, s) EMIT(PPC_INST_SLW | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SLD(d, a, s) EMIT(PPC_INST_SLD | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRW(d, a, s) EMIT(PPC_INST_SRW | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRAW(d, a, s) EMIT(PPC_INST_SRAW | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRAWI(d, a, i) EMIT(PPC_INST_SRAWI | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH(i)) -#define PPC_SRD(d, a, s) EMIT(PPC_INST_SRD | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRAD(d, a, s) EMIT(PPC_INST_SRAD | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRADI(d, a, i) EMIT(PPC_INST_SRADI | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH64(i)) -#define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH(i) | \ - __PPC_MB(mb) | __PPC_ME(me)) -#define PPC_RLWINM_DOT(d, a, i, mb, me) EMIT(PPC_INST_RLWINM_DOT | \ - ___PPC_RA(d) | ___PPC_RS(a) | \ - __PPC_SH(i) | __PPC_MB(mb) | \ - __PPC_ME(me)) -#define PPC_RLWIMI(d, a, i, mb, me) EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH(i) | \ - __PPC_MB(mb) | __PPC_ME(me)) -#define PPC_RLDICL(d, a, i, mb) EMIT(PPC_INST_RLDICL | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH64(i) | \ - __PPC_MB64(mb)) -#define PPC_RLDICR(d, a, i, me) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH64(i) | \ - __PPC_ME64(me)) - -/* slwi = rlwinm Rx, Ry, n, 0, 31-n */ -#define PPC_SLWI(d, a, i) PPC_RLWINM(d, a, i, 0, 31-(i)) -/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */ -#define PPC_SRWI(d, a, i) PPC_RLWINM(d, a, 32-(i), i, 31) -/* sldi = rldicr Rx, Ry, n, 63-n */ -#define PPC_SLDI(d, a, i) PPC_RLDICR(d, a, i, 63-(i)) -/* sldi = rldicl Rx, Ry, 64-n, n */ -#define PPC_SRDI(d, a, i) PPC_RLDICL(d, a, 64-(i), i) - -#define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a)) - /* Long jump; (unconditional 'branch') */ #define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ (((dest) - (ctx->idx * 4)) & 0x03fffffc)) @@ -191,11 +35,11 @@ #define PPC_LI32(d, i) do { \ if ((int)(uintptr_t)(i) >= -32768 && \ (int)(uintptr_t)(i) < 32768) \ - PPC_LI(d, i); \ + EMIT(PPC_RAW_LI(d, i)); \ else { \ - PPC_LIS(d, IMM_H(i)); \ + EMIT(PPC_RAW_LIS(d, IMM_H(i))); \ if (IMM_L(i)) \ - PPC_ORI(d, d, IMM_L(i)); \ + EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ } } while(0) #define PPC_LI64(d, i) do { \ @@ -204,19 +48,21 @@ PPC_LI32(d, i); \ else { \ if (!((uintptr_t)(i) & 0xffff800000000000ULL)) \ - PPC_LI(d, ((uintptr_t)(i) >> 32) & 0xffff); \ + EMIT(PPC_RAW_LI(d, ((uintptr_t)(i) >> 32) & \ + 0xffff)); \ else { \ - PPC_LIS(d, ((uintptr_t)(i) >> 48)); \ + EMIT(PPC_RAW_LIS(d, ((uintptr_t)(i) >> 48))); \ if ((uintptr_t)(i) & 0x0000ffff00000000ULL) \ - PPC_ORI(d, d, \ - ((uintptr_t)(i) >> 32) & 0xffff); \ + EMIT(PPC_RAW_ORI(d, d, \ + ((uintptr_t)(i) >> 32) & 0xffff)); \ } \ - PPC_SLDI(d, d, 32); \ + EMIT(PPC_RAW_SLDI(d, d, 32)); \ if ((uintptr_t)(i) & 0x00000000ffff0000ULL) \ - PPC_ORIS(d, d, \ - ((uintptr_t)(i) >> 16) & 0xffff); \ + EMIT(PPC_RAW_ORIS(d, d, \ + ((uintptr_t)(i) >> 16) & 0xffff)); \ if ((uintptr_t)(i) & 0x000000000000ffffULL) \ - PPC_ORI(d, d, (uintptr_t)(i) & 0xffff); \ + EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ + 0xffff)); \ } } while (0) #ifdef CONFIG_PPC64 @@ -240,7 +86,7 @@ static inline bool is_nearbranch(int offset) #define PPC_BCC(cond, dest) do { \ if (is_nearbranch((dest) - (ctx->idx * 4))) { \ PPC_BCC_SHORT(cond, dest); \ - PPC_NOP(); \ + EMIT(PPC_RAW_NOP()); \ } else { \ /* Flip the 'T or F' bit to invert comparison */ \ PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, (ctx->idx+2)*4); \ diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h index 4ec2a9f14f84..448dfd4d98e1 100644 --- a/arch/powerpc/net/bpf_jit32.h +++ b/arch/powerpc/net/bpf_jit32.h @@ -72,21 +72,21 @@ DECLARE_LOAD_FUNC(sk_load_half); DECLARE_LOAD_FUNC(sk_load_byte); DECLARE_LOAD_FUNC(sk_load_byte_msh); -#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LBZ(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LBZ(r, r, IMM_L(i)); } } while(0) +#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LBZ(r, base, i)); \ + else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ + EMIT(PPC_RAW_LBZ(r, r, IMM_L(i))); } } while(0) -#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LD(r, r, IMM_L(i)); } } while(0) +#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LD(r, base, i)); \ + else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ + EMIT(PPC_RAW_LD(r, r, IMM_L(i))); } } while(0) -#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LWZ(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LWZ(r, r, IMM_L(i)); } } while(0) +#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LWZ(r, base, i)); \ + else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ + EMIT(PPC_RAW_LWZ(r, r, IMM_L(i))); } } while(0) -#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LHZ(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LHZ(r, r, IMM_L(i)); } } while(0) +#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LHZ(r, base, i)); \ + else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ + EMIT(PPC_RAW_LHZ(r, r, IMM_L(i))); } } while(0) #ifdef CONFIG_PPC64 #define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0) @@ -107,20 +107,20 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); } while(0) #endif #else -#define PPC_BPF_LOAD_CPU(r) do { PPC_LI(r, 0); } while(0) +#define PPC_BPF_LOAD_CPU(r) do { EMIT(PPC_RAW_LI(r, 0)); } while(0) #endif #define PPC_LHBRX_OFFS(r, base, i) \ - do { PPC_LI32(r, i); PPC_LHBRX(r, r, base); } while(0) + do { PPC_LI32(r, i); EMIT(PPC_RAW_LHBRX(r, r, base)); } while(0) #ifdef __LITTLE_ENDIAN__ #define PPC_NTOHS_OFFS(r, base, i) PPC_LHBRX_OFFS(r, base, i) #else #define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i) #endif -#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0) -#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0) -#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0) +#define PPC_BPF_LL(r, base, i) do { EMIT(PPC_RAW_LWZ(r, base, i)); } while(0) +#define PPC_BPF_STL(r, base, i) do { EMIT(PPC_RAW_STW(r, base, i)); } while(0) +#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STWU(r, base, i)); } while(0) #define SEEN_DATAREF 0x10000 /* might call external helpers */ #define SEEN_XREG 0x20000 /* X reg is used */ diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index cf3a7e337f02..2e33c6673ff9 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -70,19 +70,21 @@ static const int b2p[] = { */ #define PPC_BPF_LL(r, base, i) do { \ if ((i) % 4) { \ - PPC_LI(b2p[TMP_REG_2], (i)); \ - PPC_LDX(r, base, b2p[TMP_REG_2]); \ + EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ + EMIT(PPC_RAW_LDX(r, base, \ + b2p[TMP_REG_2])); \ } else \ - PPC_LD(r, base, i); \ + EMIT(PPC_RAW_LD(r, base, i)); \ } while(0) #define PPC_BPF_STL(r, base, i) do { \ if ((i) % 4) { \ - PPC_LI(b2p[TMP_REG_2], (i)); \ - PPC_STDX(r, base, b2p[TMP_REG_2]); \ + EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ + EMIT(PPC_RAW_STDX(r, base, \ + b2p[TMP_REG_2])); \ } else \ - PPC_STD(r, base, i); \ + EMIT(PPC_RAW_STD(r, base, i)); \ } while(0) -#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0) +#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0) #define SEEN_FUNC 0x1000 /* might call external helpers */ #define SEEN_STACK 0x2000 /* uses BPF stack */ diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 0acc9d5fb19e..e809cb5a1631 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -61,7 +61,7 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, data_len)); PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len)); - PPC_SUB(r_HL, r_HL, r_scratch1); + EMIT(PPC_RAW_SUB(r_HL, r_HL, r_scratch1)); PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data)); } @@ -70,12 +70,12 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, * TODO: Could also detect whether first instr. sets X and * avoid this (as below, with A). */ - PPC_LI(r_X, 0); + EMIT(PPC_RAW_LI(r_X, 0)); } /* make sure we dont leak kernel information to user */ if (bpf_needs_clear_a(&filter[0])) - PPC_LI(r_A, 0); + EMIT(PPC_RAW_LI(r_A, 0)); } static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) @@ -83,10 +83,10 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) int i; if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { - PPC_ADDI(1, 1, BPF_PPC_STACKFRAME); + EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME)); if (ctx->seen & SEEN_DATAREF) { PPC_BPF_LL(0, 1, PPC_LR_STKOFF); - PPC_MTLR(0); + EMIT(PPC_RAW_MTLR(0)); PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D))); PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL))); } @@ -100,7 +100,7 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) } /* The RETs have left a return value in R3. */ - PPC_BLR(); + EMIT(PPC_RAW_BLR()); } #define CHOOSE_LOAD_FUNC(K, func) \ @@ -134,124 +134,124 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, /*** ALU ops ***/ case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ ctx->seen |= SEEN_XREG; - PPC_ADD(r_A, r_A, r_X); + EMIT(PPC_RAW_ADD(r_A, r_A, r_X)); break; case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ if (!K) break; - PPC_ADDI(r_A, r_A, IMM_L(K)); + EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(K))); if (K >= 32768) - PPC_ADDIS(r_A, r_A, IMM_HA(K)); + EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(K))); break; case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ ctx->seen |= SEEN_XREG; - PPC_SUB(r_A, r_A, r_X); + EMIT(PPC_RAW_SUB(r_A, r_A, r_X)); break; case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ if (!K) break; - PPC_ADDI(r_A, r_A, IMM_L(-K)); + EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(-K))); if (K >= 32768) - PPC_ADDIS(r_A, r_A, IMM_HA(-K)); + EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(-K))); break; case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ ctx->seen |= SEEN_XREG; - PPC_MULW(r_A, r_A, r_X); + EMIT(PPC_RAW_MULW(r_A, r_A, r_X)); break; case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ if (K < 32768) - PPC_MULI(r_A, r_A, K); + EMIT(PPC_RAW_MULI(r_A, r_A, K)); else { PPC_LI32(r_scratch1, K); - PPC_MULW(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_MULW(r_A, r_A, r_scratch1)); } break; case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */ case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ ctx->seen |= SEEN_XREG; - PPC_CMPWI(r_X, 0); + EMIT(PPC_RAW_CMPWI(r_X, 0)); if (ctx->pc_ret0 != -1) { PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); } else { PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); - PPC_LI(r_ret, 0); + EMIT(PPC_RAW_LI(r_ret, 0)); PPC_JMP(exit_addr); } if (code == (BPF_ALU | BPF_MOD | BPF_X)) { - PPC_DIVWU(r_scratch1, r_A, r_X); - PPC_MULW(r_scratch1, r_X, r_scratch1); - PPC_SUB(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_X)); + EMIT(PPC_RAW_MULW(r_scratch1, r_X, r_scratch1)); + EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); } else { - PPC_DIVWU(r_A, r_A, r_X); + EMIT(PPC_RAW_DIVWU(r_A, r_A, r_X)); } break; case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */ PPC_LI32(r_scratch2, K); - PPC_DIVWU(r_scratch1, r_A, r_scratch2); - PPC_MULW(r_scratch1, r_scratch2, r_scratch1); - PPC_SUB(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_scratch2)); + EMIT(PPC_RAW_MULW(r_scratch1, r_scratch2, r_scratch1)); + EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); break; case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ if (K == 1) break; PPC_LI32(r_scratch1, K); - PPC_DIVWU(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_DIVWU(r_A, r_A, r_scratch1)); break; case BPF_ALU | BPF_AND | BPF_X: ctx->seen |= SEEN_XREG; - PPC_AND(r_A, r_A, r_X); + EMIT(PPC_RAW_AND(r_A, r_A, r_X)); break; case BPF_ALU | BPF_AND | BPF_K: if (!IMM_H(K)) - PPC_ANDI(r_A, r_A, K); + EMIT(PPC_RAW_ANDI(r_A, r_A, K)); else { PPC_LI32(r_scratch1, K); - PPC_AND(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_AND(r_A, r_A, r_scratch1)); } break; case BPF_ALU | BPF_OR | BPF_X: ctx->seen |= SEEN_XREG; - PPC_OR(r_A, r_A, r_X); + EMIT(PPC_RAW_OR(r_A, r_A, r_X)); break; case BPF_ALU | BPF_OR | BPF_K: if (IMM_L(K)) - PPC_ORI(r_A, r_A, IMM_L(K)); + EMIT(PPC_RAW_ORI(r_A, r_A, IMM_L(K))); if (K >= 65536) - PPC_ORIS(r_A, r_A, IMM_H(K)); + EMIT(PPC_RAW_ORIS(r_A, r_A, IMM_H(K))); break; case BPF_ANC | SKF_AD_ALU_XOR_X: case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ ctx->seen |= SEEN_XREG; - PPC_XOR(r_A, r_A, r_X); + EMIT(PPC_RAW_XOR(r_A, r_A, r_X)); break; case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ if (IMM_L(K)) - PPC_XORI(r_A, r_A, IMM_L(K)); + EMIT(PPC_RAW_XORI(r_A, r_A, IMM_L(K))); if (K >= 65536) - PPC_XORIS(r_A, r_A, IMM_H(K)); + EMIT(PPC_RAW_XORIS(r_A, r_A, IMM_H(K))); break; case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ ctx->seen |= SEEN_XREG; - PPC_SLW(r_A, r_A, r_X); + EMIT(PPC_RAW_SLW(r_A, r_A, r_X)); break; case BPF_ALU | BPF_LSH | BPF_K: if (K == 0) break; else - PPC_SLWI(r_A, r_A, K); + EMIT(PPC_RAW_SLWI(r_A, r_A, K)); break; case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ ctx->seen |= SEEN_XREG; - PPC_SRW(r_A, r_A, r_X); + EMIT(PPC_RAW_SRW(r_A, r_A, r_X)); break; case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ if (K == 0) break; else - PPC_SRWI(r_A, r_A, K); + EMIT(PPC_RAW_SRWI(r_A, r_A, K)); break; case BPF_ALU | BPF_NEG: - PPC_NEG(r_A, r_A); + EMIT(PPC_RAW_NEG(r_A, r_A)); break; case BPF_RET | BPF_K: PPC_LI32(r_ret, K); @@ -277,24 +277,24 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, if (ctx->seen) PPC_JMP(exit_addr); else - PPC_BLR(); + EMIT(PPC_RAW_BLR()); } break; case BPF_RET | BPF_A: - PPC_MR(r_ret, r_A); + EMIT(PPC_RAW_MR(r_ret, r_A)); if (i != flen - 1) { if (ctx->seen) PPC_JMP(exit_addr); else - PPC_BLR(); + EMIT(PPC_RAW_BLR()); } break; case BPF_MISC | BPF_TAX: /* X = A */ - PPC_MR(r_X, r_A); + EMIT(PPC_RAW_MR(r_X, r_A)); break; case BPF_MISC | BPF_TXA: /* A = X */ ctx->seen |= SEEN_XREG; - PPC_MR(r_A, r_X); + EMIT(PPC_RAW_MR(r_A, r_X)); break; /*** Constant loads/M[] access ***/ @@ -305,19 +305,19 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_LI32(r_X, K); break; case BPF_LD | BPF_MEM: /* A = mem[K] */ - PPC_MR(r_A, r_M + (K & 0xf)); + EMIT(PPC_RAW_MR(r_A, r_M + (K & 0xf))); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; case BPF_LDX | BPF_MEM: /* X = mem[K] */ - PPC_MR(r_X, r_M + (K & 0xf)); + EMIT(PPC_RAW_MR(r_X, r_M + (K & 0xf))); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; case BPF_ST: /* mem[K] = A */ - PPC_MR(r_M + (K & 0xf), r_A); + EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_A)); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; case BPF_STX: /* mem[K] = X */ - PPC_MR(r_M + (K & 0xf), r_X); + EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_X)); ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); break; case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ @@ -346,13 +346,13 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, type) != 2); PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, dev)); - PPC_CMPDI(r_scratch1, 0); + EMIT(PPC_RAW_CMPDI(r_scratch1, 0)); if (ctx->pc_ret0 != -1) { PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); } else { /* Exit, returning 0; first pass hits here. */ PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12); - PPC_LI(r_ret, 0); + EMIT(PPC_RAW_LI(r_ret, 0)); PPC_JMP(exit_addr); } if (code == (BPF_ANC | SKF_AD_IFINDEX)) { @@ -383,9 +383,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: PPC_LBZ_OFFS(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET()); if (PKT_VLAN_PRESENT_BIT) - PPC_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT); + EMIT(PPC_RAW_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT)); if (PKT_VLAN_PRESENT_BIT < 7) - PPC_ANDI(r_A, r_A, 1); + EMIT(PPC_RAW_ANDI(r_A, r_A, 1)); break; case BPF_ANC | SKF_AD_QUEUE: BUILD_BUG_ON(sizeof_field(struct sk_buff, @@ -395,8 +395,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, break; case BPF_ANC | SKF_AD_PKTTYPE: PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET()); - PPC_ANDI(r_A, r_A, PKT_TYPE_MAX); - PPC_SRWI(r_A, r_A, 5); + EMIT(PPC_RAW_ANDI(r_A, r_A, PKT_TYPE_MAX)); + EMIT(PPC_RAW_SRWI(r_A, r_A, 5)); break; case BPF_ANC | SKF_AD_CPU: PPC_BPF_LOAD_CPU(r_A); @@ -414,9 +414,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, /* Load from [K]. */ ctx->seen |= SEEN_DATAREF; PPC_FUNC_ADDR(r_scratch1, func); - PPC_MTLR(r_scratch1); + EMIT(PPC_RAW_MTLR(r_scratch1)); PPC_LI32(r_addr, K); - PPC_BLRL(); + EMIT(PPC_RAW_BLRL()); /* * Helper returns 'lt' condition on error, and an * appropriate return value in r3 @@ -440,11 +440,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ ctx->seen |= SEEN_DATAREF | SEEN_XREG; PPC_FUNC_ADDR(r_scratch1, func); - PPC_MTLR(r_scratch1); - PPC_ADDI(r_addr, r_X, IMM_L(K)); + EMIT(PPC_RAW_MTLR(r_scratch1)); + EMIT(PPC_RAW_ADDI(r_addr, r_X, IMM_L(K))); if (K >= 32768) - PPC_ADDIS(r_addr, r_addr, IMM_HA(K)); - PPC_BLRL(); + EMIT(PPC_RAW_ADDIS(r_addr, r_addr, IMM_HA(K))); + EMIT(PPC_RAW_BLRL()); /* If error, cr0.LT set */ PPC_BCC(COND_LT, exit_addr); break; @@ -475,7 +475,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP | BPF_JSET | BPF_X: true_cond = COND_NE; - /* Fall through */ cond_branch: /* same targets, can avoid doing the test :) */ if (filter[i].jt == filter[i].jf) { @@ -489,30 +488,30 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP | BPF_JEQ | BPF_X: ctx->seen |= SEEN_XREG; - PPC_CMPLW(r_A, r_X); + EMIT(PPC_RAW_CMPLW(r_A, r_X)); break; case BPF_JMP | BPF_JSET | BPF_X: ctx->seen |= SEEN_XREG; - PPC_AND_DOT(r_scratch1, r_A, r_X); + EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, r_X)); break; case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP | BPF_JGE | BPF_K: if (K < 32768) - PPC_CMPLWI(r_A, K); + EMIT(PPC_RAW_CMPLWI(r_A, K)); else { PPC_LI32(r_scratch1, K); - PPC_CMPLW(r_A, r_scratch1); + EMIT(PPC_RAW_CMPLW(r_A, r_scratch1)); } break; case BPF_JMP | BPF_JSET | BPF_K: if (K < 32768) /* PPC_ANDI is /only/ dot-form */ - PPC_ANDI(r_scratch1, r_A, K); + EMIT(PPC_RAW_ANDI(r_scratch1, r_A, K)); else { PPC_LI32(r_scratch1, K); - PPC_AND_DOT(r_scratch1, r_A, - r_scratch1); + EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, + r_scratch1)); } break; } diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index be3517ef0574..022103c6a201 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -95,12 +95,12 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * invoked through a tail call. */ if (ctx->seen & SEEN_TAILCALL) { - PPC_LI(b2p[TMP_REG_1], 0); + EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); /* this goes in the redzone */ PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); } else { - PPC_NOP(); - PPC_NOP(); + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_NOP()); } #define BPF_TAILCALL_PROLOGUE_SIZE 8 @@ -129,8 +129,8 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, BPF_REG_FP)) - PPC_ADDI(b2p[BPF_REG_FP], 1, - STACK_FRAME_MIN_SIZE + ctx->stack_size); + EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, + STACK_FRAME_MIN_SIZE + ctx->stack_size)); } static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) @@ -144,10 +144,10 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { - PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); + EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size)); if (ctx->seen & SEEN_FUNC) { PPC_BPF_LL(0, 1, PPC_LR_STKOFF); - PPC_MTLR(0); + EMIT(PPC_RAW_MTLR(0)); } } } @@ -157,9 +157,9 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); /* Move result to r3 */ - PPC_MR(3, b2p[BPF_REG_0]); + EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0])); - PPC_BLR(); + EMIT(PPC_RAW_BLR()); } static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, @@ -171,7 +171,7 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, /* Load actual entry point from function descriptor */ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); /* ... and move it to LR */ - PPC_MTLR(b2p[TMP_REG_1]); + EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); /* * Load TOC from function descriptor at offset 8. * We can clobber r2 since we get called through a @@ -182,9 +182,9 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, #else /* We can clobber r12 */ PPC_FUNC_ADDR(12, func); - PPC_MTLR(12); + EMIT(PPC_RAW_MTLR(12)); #endif - PPC_BLRL(); + EMIT(PPC_RAW_BLRL()); } static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, @@ -206,7 +206,7 @@ static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, * that PPC_LI64() can emit. */ for (i = ctx->idx - ctx_idx; i < 5; i++) - PPC_NOP(); + EMIT(PPC_RAW_NOP()); #ifdef PPC64_ELF_ABI_v1 /* @@ -220,8 +220,8 @@ static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, PPC_BPF_LL(12, 12, 0); #endif - PPC_MTLR(12); - PPC_BLRL(); + EMIT(PPC_RAW_MTLR(12)); + EMIT(PPC_RAW_BLRL()); } static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) @@ -239,9 +239,9 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 * if (index >= array->map.max_entries) * goto out; */ - PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); - PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31); - PPC_CMPLW(b2p_index, b2p[TMP_REG_1]); + EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31)); + EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1])); PPC_BCC(COND_GE, out); /* @@ -249,42 +249,42 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 * goto out; */ PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); - PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT); + EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); PPC_BCC(COND_GT, out); /* * tail_call_cnt++; */ - PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); /* prog = array->ptrs[index]; */ - PPC_MULI(b2p[TMP_REG_1], b2p_index, 8); - PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array); + EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); + EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array)); PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); /* * if (prog == NULL) * goto out; */ - PPC_CMPLDI(b2p[TMP_REG_1], 0); + EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0)); PPC_BCC(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); #ifdef PPC64_ELF_ABI_v1 /* skip past the function descriptor */ - PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], - FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], + FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE)); #else - PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE)); #endif - PPC_MTCTR(b2p[TMP_REG_1]); + EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); /* tear down stack, restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); - PPC_BCTR(); + EMIT(PPC_RAW_BCTR()); /* out: */ } @@ -340,11 +340,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ - PPC_ADD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ - PPC_SUB(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ @@ -354,53 +354,53 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, imm = -imm; if (imm) { if (imm >= -32768 && imm < 32768) - PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); else { PPC_LI32(b2p[TMP_REG_1], imm); - PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); } } goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ if (BPF_CLASS(code) == BPF_ALU) - PPC_MULW(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); else - PPC_MULD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_MULD(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ if (imm >= -32768 && imm < 32768) - PPC_MULI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm))); else { PPC_LI32(b2p[TMP_REG_1], imm); if (BPF_CLASS(code) == BPF_ALU) - PPC_MULW(dst_reg, dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, + b2p[TMP_REG_1])); else - PPC_MULD(dst_reg, dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_MULD(dst_reg, dst_reg, + b2p[TMP_REG_1])); } goto bpf_alu32_trunc; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ if (BPF_OP(code) == BPF_MOD) { - PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg); - PPC_MULW(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1]); - PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg, + b2p[TMP_REG_1])); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); } else - PPC_DIVWU(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ if (BPF_OP(code) == BPF_MOD) { - PPC_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg); - PPC_MULD(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1]); - PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg)); + EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg, + b2p[TMP_REG_1])); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); } else - PPC_DIVDU(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ @@ -415,35 +415,37 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, switch (BPF_CLASS(code)) { case BPF_ALU: if (BPF_OP(code) == BPF_MOD) { - PPC_DIVWU(b2p[TMP_REG_2], dst_reg, - b2p[TMP_REG_1]); - PPC_MULW(b2p[TMP_REG_1], + EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2], + dst_reg, + b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], b2p[TMP_REG_1], - b2p[TMP_REG_2]); - PPC_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1]); + b2p[TMP_REG_2])); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, + b2p[TMP_REG_1])); } else - PPC_DIVWU(dst_reg, dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, + b2p[TMP_REG_1])); break; case BPF_ALU64: if (BPF_OP(code) == BPF_MOD) { - PPC_DIVDU(b2p[TMP_REG_2], dst_reg, - b2p[TMP_REG_1]); - PPC_MULD(b2p[TMP_REG_1], + EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2], + dst_reg, + b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], b2p[TMP_REG_1], - b2p[TMP_REG_2]); - PPC_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1]); + b2p[TMP_REG_2])); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, + b2p[TMP_REG_1])); } else - PPC_DIVDU(dst_reg, dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, + b2p[TMP_REG_1])); break; } goto bpf_alu32_trunc; case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ case BPF_ALU64 | BPF_NEG: /* dst = -dst */ - PPC_NEG(dst_reg, dst_reg); + EMIT(PPC_RAW_NEG(dst_reg, dst_reg)); goto bpf_alu32_trunc; /* @@ -451,101 +453,101 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ - PPC_AND(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ if (!IMM_H(imm)) - PPC_ANDI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); else { /* Sign-extended */ PPC_LI32(b2p[TMP_REG_1], imm); - PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1])); } goto bpf_alu32_trunc; case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ - PPC_OR(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ PPC_LI32(b2p[TMP_REG_1], imm); - PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1])); } else { if (IMM_L(imm)) - PPC_ORI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); if (IMM_H(imm)) - PPC_ORIS(dst_reg, dst_reg, IMM_H(imm)); + EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm))); } goto bpf_alu32_trunc; case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ - PPC_XOR(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ PPC_LI32(b2p[TMP_REG_1], imm); - PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1])); } else { if (IMM_L(imm)) - PPC_XORI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); if (IMM_H(imm)) - PPC_XORIS(dst_reg, dst_reg, IMM_H(imm)); + EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm))); } goto bpf_alu32_trunc; case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ /* slw clears top 32 bits */ - PPC_SLW(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); /* skip zero extension move, but set address map. */ if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ - PPC_SLD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SLD(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ /* with imm 0, we still need to clear top 32 bits */ - PPC_SLWI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ if (imm != 0) - PPC_SLDI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, imm)); break; case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ - PPC_SRW(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ - PPC_SRD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SRD(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ - PPC_SRWI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ if (imm != 0) - PPC_SRDI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SRDI(dst_reg, dst_reg, imm)); break; case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ - PPC_SRAW(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SRAW(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ - PPC_SRAD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SRAD(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ - PPC_SRAWI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ if (imm != 0) - PPC_SRADI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SRADI(dst_reg, dst_reg, imm)); break; /* @@ -555,10 +557,10 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ if (imm == 1) { /* special mov32 for zext */ - PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); break; } - PPC_MR(dst_reg, src_reg); + EMIT(PPC_RAW_MR(dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ @@ -572,7 +574,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, bpf_alu32_trunc: /* Truncate to 32-bits */ if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext) - PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); break; /* @@ -590,11 +592,11 @@ bpf_alu32_trunc: switch (imm) { case 16: /* Rotate 8 bits left & mask with 0x0000ff00 */ - PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23); + EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23)); /* Rotate 8 bits right & insert LSB to reg */ - PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31); + EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31)); /* Move result back to dst_reg */ - PPC_MR(dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); break; case 32: /* @@ -602,12 +604,12 @@ bpf_alu32_trunc: * 2 bytes are already in their final position * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) */ - PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31); + EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ - PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7); + EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ - PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23); - PPC_MR(dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); break; case 64: /* @@ -619,8 +621,8 @@ bpf_alu32_trunc: * same across all passes */ PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); - PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)); - PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); break; } break; @@ -629,14 +631,14 @@ emit_clear: switch (imm) { case 16: /* zero-extend 16 bits into 64 bits */ - PPC_RLDICL(dst_reg, dst_reg, 0, 48); + EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 48)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case 32: if (!fp->aux->verifier_zext) /* zero-extend 32 bits into 64 bits */ - PPC_RLDICL(dst_reg, dst_reg, 0, 32); + EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 32)); break; case 64: /* nop */ @@ -650,18 +652,18 @@ emit_clear: case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI(b2p[TMP_REG_1], imm); + EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); src_reg = b2p[TMP_REG_1]; } - PPC_STB(src_reg, dst_reg, off); + EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI(b2p[TMP_REG_1], imm); + EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); src_reg = b2p[TMP_REG_1]; } - PPC_STH(src_reg, dst_reg, off); + EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ @@ -669,7 +671,7 @@ emit_clear: PPC_LI32(b2p[TMP_REG_1], imm); src_reg = b2p[TMP_REG_1]; } - PPC_STW(src_reg, dst_reg, off); + EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ @@ -686,24 +688,24 @@ emit_clear: /* *(u32 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_W: /* Get EA into TMP_REG_1 */ - PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ - PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); + EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); /* add value from src_reg into this */ - PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); + EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); /* store result back */ - PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); + EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); break; /* *(u64 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_DW: - PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); tmp_idx = ctx->idx * 4; - PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); - PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); - PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); + EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); + EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); PPC_BCC_SHORT(COND_NE, tmp_idx); break; @@ -712,19 +714,19 @@ emit_clear: */ /* dst = *(u8 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_B: - PPC_LBZ(dst_reg, src_reg, off); + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; /* dst = *(u16 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_H: - PPC_LHZ(dst_reg, src_reg, off); + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; /* dst = *(u32 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_W: - PPC_LWZ(dst_reg, src_reg, off); + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; @@ -775,7 +777,7 @@ emit_clear: else bpf_jit_emit_func_call_rel(image, ctx, func_addr); /* move return value from r3 to BPF_REG_0 */ - PPC_MR(b2p[BPF_REG_0], 3); + EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3)); break; /* @@ -860,9 +862,9 @@ cond_branch: case BPF_JMP32 | BPF_JNE | BPF_X: /* unsigned comparison */ if (BPF_CLASS(code) == BPF_JMP32) - PPC_CMPLW(dst_reg, src_reg); + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); else - PPC_CMPLD(dst_reg, src_reg); + EMIT(PPC_RAW_CMPLD(dst_reg, src_reg)); break; case BPF_JMP | BPF_JSGT | BPF_X: case BPF_JMP | BPF_JSLT | BPF_X: @@ -874,21 +876,21 @@ cond_branch: case BPF_JMP32 | BPF_JSLE | BPF_X: /* signed comparison */ if (BPF_CLASS(code) == BPF_JMP32) - PPC_CMPW(dst_reg, src_reg); + EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); else - PPC_CMPD(dst_reg, src_reg); + EMIT(PPC_RAW_CMPD(dst_reg, src_reg)); break; case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: if (BPF_CLASS(code) == BPF_JMP) { - PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, - src_reg); + EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg, + src_reg)); } else { int tmp_reg = b2p[TMP_REG_1]; - PPC_AND(tmp_reg, dst_reg, src_reg); - PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, - 31); + EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, + 31)); } break; case BPF_JMP | BPF_JNE | BPF_K: @@ -912,19 +914,19 @@ cond_branch: */ if (imm >= 0 && imm < 32768) { if (is_jmp32) - PPC_CMPLWI(dst_reg, imm); + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); else - PPC_CMPLDI(dst_reg, imm); + EMIT(PPC_RAW_CMPLDI(dst_reg, imm)); } else { /* sign-extending load */ PPC_LI32(b2p[TMP_REG_1], imm); /* ... but unsigned comparison */ if (is_jmp32) - PPC_CMPLW(dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_CMPLW(dst_reg, + b2p[TMP_REG_1])); else - PPC_CMPLD(dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_CMPLD(dst_reg, + b2p[TMP_REG_1])); } break; } @@ -945,17 +947,17 @@ cond_branch: */ if (imm >= -32768 && imm < 32768) { if (is_jmp32) - PPC_CMPWI(dst_reg, imm); + EMIT(PPC_RAW_CMPWI(dst_reg, imm)); else - PPC_CMPDI(dst_reg, imm); + EMIT(PPC_RAW_CMPDI(dst_reg, imm)); } else { PPC_LI32(b2p[TMP_REG_1], imm); if (is_jmp32) - PPC_CMPW(dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_CMPW(dst_reg, + b2p[TMP_REG_1])); else - PPC_CMPD(dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_CMPD(dst_reg, + b2p[TMP_REG_1])); } break; } @@ -964,19 +966,19 @@ cond_branch: /* andi does not sign-extend the immediate */ if (imm >= 0 && imm < 32768) /* PPC_ANDI is _only/always_ dot-form */ - PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm); + EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm)); else { int tmp_reg = b2p[TMP_REG_1]; PPC_LI32(tmp_reg, imm); if (BPF_CLASS(code) == BPF_JMP) { - PPC_AND_DOT(tmp_reg, dst_reg, - tmp_reg); + EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg, + tmp_reg)); } else { - PPC_AND(tmp_reg, dst_reg, - tmp_reg); - PPC_RLWINM_DOT(tmp_reg, tmp_reg, - 0, 0, 31); + EMIT(PPC_RAW_AND(tmp_reg, dst_reg, + tmp_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, + 0, 0, 31)); } } break; diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index 6f347fa29f41..9db7ada79d10 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -33,7 +33,8 @@ static unsigned int user_getsp32(unsigned int sp, int is_first) * which means that we've done all that we can do from * interrupt context. */ - if (probe_user_read(stack_frame, (void __user *)p, sizeof(stack_frame))) + if (copy_from_user_nofault(stack_frame, (void __user *)p, + sizeof(stack_frame))) return 0; if (!is_first) @@ -51,7 +52,8 @@ static unsigned long user_getsp64(unsigned long sp, int is_first) { unsigned long stack_frame[3]; - if (probe_user_read(stack_frame, (void __user *)sp, sizeof(stack_frame))) + if (copy_from_user_nofault(stack_frame, (void __user *)sp, + sizeof(stack_frame))) return 0; if (!is_first) diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 0caec3d8d436..df59d0bb121f 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -332,7 +332,7 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, fput(exe_file); } - down_read(&mm->mmap_sem); + mmap_read_lock(mm); for (vma = mm->mmap; vma; vma = vma->vm_next) { if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref) continue; @@ -349,13 +349,13 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, *spu_bin_dcookie = fast_get_dcookie(&vma->vm_file->f_path); pr_debug("got dcookie for %pD\n", vma->vm_file); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); out: return app_cookie; fail_no_image_cookie: - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); printk(KERN_ERR "SPU_PROF: " "%s, line %d: Cannot find dcookie for SPU binary\n", diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index 1ad03c55c88c..e53c3c161257 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -15,6 +15,7 @@ #include <asm/firmware.h> #include <asm/ptrace.h> #include <asm/code-patching.h> +#include <asm/inst.h> #define PERF_8xx_ID_CPU_CYCLES 1 #define PERF_8xx_ID_HW_INSTRUCTIONS 2 @@ -99,9 +100,6 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags) unsigned long target = patch_site_addr(&patch__itlbmiss_perf); patch_branch_site(&patch__itlbmiss_exit_1, target, 0); -#ifndef CONFIG_PIN_TLB_TEXT - patch_branch_site(&patch__itlbmiss_exit_2, target, 0); -#endif } val = itlb_miss_counter; break; @@ -110,8 +108,6 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags) unsigned long target = patch_site_addr(&patch__dtlbmiss_perf); patch_branch_site(&patch__dtlbmiss_exit_1, target, 0); - patch_branch_site(&patch__dtlbmiss_exit_2, target, 0); - patch_branch_site(&patch__dtlbmiss_exit_3, target, 0); } val = dtlb_miss_counter; break; @@ -170,24 +166,19 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags) case PERF_8xx_ID_ITLB_LOAD_MISS: if (atomic_dec_return(&itlb_miss_ref) == 0) { /* mfspr r10, SPRN_SPRG_SCRATCH0 */ - unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) | - __PPC_SPR(SPRN_SPRG_SCRATCH0); + struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | + __PPC_SPR(SPRN_SPRG_SCRATCH0)); patch_instruction_site(&patch__itlbmiss_exit_1, insn); -#ifndef CONFIG_PIN_TLB_TEXT - patch_instruction_site(&patch__itlbmiss_exit_2, insn); -#endif } break; case PERF_8xx_ID_DTLB_LOAD_MISS: if (atomic_dec_return(&dtlb_miss_ref) == 0) { /* mfspr r10, SPRN_DAR */ - unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) | - __PPC_SPR(SPRN_DAR); + struct ppc_inst insn = ppc_inst(PPC_INST_MFSPR | __PPC_RS(R10) | + __PPC_SPR(SPRN_DAR)); patch_instruction_site(&patch__dtlbmiss_exit_1, insn); - patch_instruction_site(&patch__dtlbmiss_exit_2, insn); - patch_instruction_site(&patch__dtlbmiss_exit_3, insn); } break; } diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 53d614e98537..c02854dea2b2 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o \ isa207-common.o power8-pmu.o power9-pmu.o \ - generic-compat-pmu.o + generic-compat-pmu.o power10-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index dd5051015008..6c028ee513c0 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -11,7 +11,6 @@ #include <linux/uaccess.h> #include <linux/mm.h> #include <asm/ptrace.h> -#include <asm/pgtable.h> #include <asm/sigcontext.h> #include <asm/ucontext.h> #include <asm/vdso.h> diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h index 7a2cb9e1181a..ae24d4a00da6 100644 --- a/arch/powerpc/perf/callchain.h +++ b/arch/powerpc/perf/callchain.h @@ -2,7 +2,7 @@ #ifndef _POWERPC_PERF_CALLCHAIN_H #define _POWERPC_PERF_CALLCHAIN_H -int read_user_stack_slow(void __user *ptr, void *buf, int nb); +int read_user_stack_slow(const void __user *ptr, void *buf, int nb); void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, @@ -16,4 +16,27 @@ static inline bool invalid_user_sp(unsigned long sp) return (!sp || (sp & mask) || (sp > top)); } +/* + * On 32-bit we just access the address and let hash_page create a + * HPTE if necessary, so there is no need to fall back to reading + * the page tables. Since this is called at interrupt level, + * do_page_fault() won't treat a DSI as a page fault. + */ +static inline int __read_user_stack(const void __user *ptr, void *ret, + size_t size) +{ + unsigned long addr = (unsigned long)ptr; + int rc; + + if (addr > TASK_SIZE - size || (addr & (size - 1))) + return -EFAULT; + + rc = copy_from_user_nofault(ret, ptr, size); + + if (IS_ENABLED(CONFIG_PPC64) && rc) + return read_user_stack_slow(ptr, ret, size); + + return rc; +} + #endif /* _POWERPC_PERF_CALLCHAIN_H */ diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index 8aa951003141..64e4013d8060 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -11,7 +11,6 @@ #include <linux/uaccess.h> #include <linux/mm.h> #include <asm/ptrace.h> -#include <asm/pgtable.h> #include <asm/sigcontext.h> #include <asm/ucontext.h> #include <asm/vdso.h> @@ -31,26 +30,9 @@ #endif /* CONFIG_PPC64 */ -/* - * On 32-bit we just access the address and let hash_page create a - * HPTE if necessary, so there is no need to fall back to reading - * the page tables. Since this is called at interrupt level, - * do_page_fault() won't treat a DSI as a page fault. - */ -static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +static int read_user_stack_32(const unsigned int __user *ptr, unsigned int *ret) { - int rc; - - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || - ((unsigned long)ptr & 3)) - return -EFAULT; - - rc = probe_user_read(ret, ptr, sizeof(*ret)); - - if (IS_ENABLED(CONFIG_PPC64) && rc) - return read_user_stack_slow(ptr, ret, 4); - - return rc; + return __read_user_stack(ptr, ret, sizeof(*ret)); } /* diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index df1ffd8b20f2..fed90e827f3a 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -11,7 +11,6 @@ #include <linux/uaccess.h> #include <linux/mm.h> #include <asm/ptrace.h> -#include <asm/pgtable.h> #include <asm/sigcontext.h> #include <asm/ucontext.h> #include <asm/vdso.h> @@ -24,57 +23,30 @@ * interrupt context, so if the access faults, we read the page tables * to find which page (if any) is mapped and access it directly. */ -int read_user_stack_slow(void __user *ptr, void *buf, int nb) +int read_user_stack_slow(const void __user *ptr, void *buf, int nb) { - int ret = -EFAULT; - pgd_t *pgdir; - pte_t *ptep, pte; - unsigned int shift; + unsigned long addr = (unsigned long) ptr; unsigned long offset; - unsigned long pfn, flags; + struct page *page; void *kaddr; - pgdir = current->mm->pgd; - if (!pgdir) - return -EFAULT; - - local_irq_save(flags); - ptep = find_current_mm_pte(pgdir, addr, NULL, &shift); - if (!ptep) - goto err_out; - if (!shift) - shift = PAGE_SHIFT; - - /* align address to page boundary */ - offset = addr & ((1UL << shift) - 1); - - pte = READ_ONCE(*ptep); - if (!pte_present(pte) || !pte_user(pte)) - goto err_out; - pfn = pte_pfn(pte); - if (!page_is_ram(pfn)) - goto err_out; - - /* no highmem to worry about here */ - kaddr = pfn_to_kaddr(pfn); - memcpy(buf, kaddr + offset, nb); - ret = 0; -err_out: - local_irq_restore(flags); - return ret; -} + if (get_user_page_fast_only(addr, FOLL_WRITE, &page)) { + kaddr = page_address(page); -static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) -{ - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || - ((unsigned long)ptr & 7)) - return -EFAULT; + /* align address to page boundary */ + offset = addr & ~PAGE_MASK; - if (!probe_user_read(ret, ptr, sizeof(*ret))) + memcpy(buf, kaddr + offset, nb); + put_page(page); return 0; + } + return -EFAULT; +} - return read_user_stack_slow(ptr, ret, 8); +static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret) +{ + return __read_user_stack(ptr, ret, sizeof(*ret)); } /* diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3dcfecf858f3..08643cba1494 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -37,12 +37,7 @@ struct cpu_hw_events { struct perf_event *event[MAX_HWEVENTS]; u64 events[MAX_HWEVENTS]; unsigned int flags[MAX_HWEVENTS]; - /* - * The order of the MMCR array is: - * - 64-bit, MMCR0, MMCR1, MMCRA, MMCR2 - * - 32-bit, MMCR0, MMCR1, MMCR2 - */ - unsigned long mmcr[4]; + struct mmcr_regs mmcr; struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS]; u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; @@ -77,6 +72,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; /* * 32-bit doesn't have MMCRA but does have an MMCR2, * and a few other names are different. + * Also 32-bit doesn't have MMCR3, SIER2 and SIER3. + * Define them as zero knowing that any code path accessing + * these registers (via mtspr/mfspr) are done under ppmu flag + * check for PPMU_ARCH_31 and we will not enter that code path + * for 32-bit. */ #ifdef CONFIG_PPC32 @@ -90,7 +90,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define MMCR0_PMCC_U6 0 #define SPRN_MMCRA SPRN_MMCR2 +#define SPRN_MMCR3 0 +#define SPRN_SIER2 0 +#define SPRN_SIER3 0 #define MMCRA_SAMPLE_ENABLE 0 +#define MMCRA_BHRB_DISABLE 0 static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { @@ -121,7 +125,7 @@ static void ebb_event_add(struct perf_event *event) { } static void ebb_switch_out(unsigned long mmcr0) { } static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) { - return cpuhw->mmcr[0]; + return cpuhw->mmcr.mmcr0; } static inline void power_pmu_bhrb_enable(struct perf_event *event) {} @@ -418,17 +422,19 @@ static __u64 power_pmu_bhrb_to(u64 addr) __u64 target; if (is_kernel_addr(addr)) { - if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + if (copy_from_kernel_nofault(&instr, (void *)addr, + sizeof(instr))) return 0; - return branch_target(&instr); + return branch_target((struct ppc_inst *)&instr); } /* Userspace: need copy instruction here then translate it */ - if (probe_user_read(&instr, (unsigned int __user *)addr, sizeof(instr))) + if (copy_from_user_nofault(&instr, (unsigned int __user *)addr, + sizeof(instr))) return 0; - target = branch_target(&instr); + target = branch_target((struct ppc_inst *)&instr); if ((!target) || (instr & BRANCH_ABSOLUTE)) return target; @@ -464,8 +470,11 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events * * addresses at this point. Check the privileges before * exporting it to userspace (avoid exposure of regions * where we could have speculative execution) + * Incase of ISA v3.1, BHRB will capture only user-space + * addresses, hence include a check before filtering code */ - if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) + if (!(ppmu->flags & PPMU_ARCH_31) && + is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) continue; /* Branches are read most recent first (ie. mfbhrb 0 is @@ -584,11 +593,16 @@ static void ebb_switch_out(unsigned long mmcr0) current->thread.sdar = mfspr(SPRN_SDAR); current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK; current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK; + if (ppmu->flags & PPMU_ARCH_31) { + current->thread.mmcr3 = mfspr(SPRN_MMCR3); + current->thread.sier2 = mfspr(SPRN_SIER2); + current->thread.sier3 = mfspr(SPRN_SIER3); + } } static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) { - unsigned long mmcr0 = cpuhw->mmcr[0]; + unsigned long mmcr0 = cpuhw->mmcr.mmcr0; if (!ebb) goto out; @@ -622,7 +636,13 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) * unfreeze counters, it should not set exclude_xxx in its events and * instead manage the MMCR2 entirely by itself. */ - mtspr(SPRN_MMCR2, cpuhw->mmcr[3] | current->thread.mmcr2); + mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2 | current->thread.mmcr2); + + if (ppmu->flags & PPMU_ARCH_31) { + mtspr(SPRN_MMCR3, current->thread.mmcr3); + mtspr(SPRN_SIER2, current->thread.sier2); + mtspr(SPRN_SIER3, current->thread.sier3); + } out: return mmcr0; } @@ -843,6 +863,11 @@ void perf_event_print_debug(void) pr_info("EBBRR: %016lx BESCR: %016lx\n", mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR)); } + + if (ppmu->flags & PPMU_ARCH_31) { + pr_info("MMCR3: %016lx SIER2: %016lx SIER3: %016lx\n", + mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3)); + } #endif pr_info("SIAR: %016lx SDAR: %016lx SIER: %016lx\n", mfspr(SPRN_SIAR), sdar, sier); @@ -1194,7 +1219,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; - unsigned long flags, mmcr0, val; + unsigned long flags, mmcr0, val, mmcra; if (!ppmu) return; @@ -1227,12 +1252,24 @@ static void power_pmu_disable(struct pmu *pmu) mb(); isync(); + val = mmcra = cpuhw->mmcr.mmcra; + /* * Disable instruction sampling if it was enabled */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mtspr(SPRN_MMCRA, - cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) + val &= ~MMCRA_SAMPLE_ENABLE; + + /* Disable BHRB via mmcra (BHRBRD) for p10 */ + if (ppmu->flags & PPMU_ARCH_31) + val |= MMCRA_BHRB_DISABLE; + + /* + * Write SPRN_MMCRA if mmcra has either disabled + * instruction sampling or BHRB. + */ + if (val != mmcra) { + mtspr(SPRN_MMCRA, mmcra); mb(); isync(); } @@ -1306,18 +1343,20 @@ static void power_pmu_enable(struct pmu *pmu) * (possibly updated for removal of events). */ if (!cpuhw->n_added) { - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1); + if (ppmu->flags & PPMU_ARCH_31) + mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3); goto out_enable; } /* * Clear all MMCR settings and recompute them for the new set of events. */ - memset(cpuhw->mmcr, 0, sizeof(cpuhw->mmcr)); + memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr)); if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, - cpuhw->mmcr, cpuhw->event)) { + &cpuhw->mmcr, cpuhw->event)) { /* shouldn't ever get here */ printk(KERN_ERR "oops compute_mmcr failed\n"); goto out; @@ -1331,11 +1370,11 @@ static void power_pmu_enable(struct pmu *pmu) */ event = cpuhw->event[0]; if (event->attr.exclude_user) - cpuhw->mmcr[0] |= MMCR0_FCP; + cpuhw->mmcr.mmcr0 |= MMCR0_FCP; if (event->attr.exclude_kernel) - cpuhw->mmcr[0] |= freeze_events_kernel; + cpuhw->mmcr.mmcr0 |= freeze_events_kernel; if (event->attr.exclude_hv) - cpuhw->mmcr[0] |= MMCR0_FCHV; + cpuhw->mmcr.mmcr0 |= MMCR0_FCHV; } /* @@ -1344,12 +1383,15 @@ static void power_pmu_enable(struct pmu *pmu) * Then unfreeze the events. */ ppc_set_pmu_inuse(1); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1); + mtspr(SPRN_MMCR0, (cpuhw->mmcr.mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | MMCR0_FC); if (ppmu->flags & PPMU_ARCH_207S) - mtspr(SPRN_MMCR2, cpuhw->mmcr[3]); + mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2); + + if (ppmu->flags & PPMU_ARCH_31) + mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3); /* * Read off any pre-existing events that need to move @@ -1400,7 +1442,7 @@ static void power_pmu_enable(struct pmu *pmu) perf_event_update_userpage(event); } cpuhw->n_limited = n_lim; - cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; + cpuhw->mmcr.mmcr0 |= MMCR0_PMXE | MMCR0_FCECE; out_enable: pmao_restore_workaround(ebb); @@ -1416,9 +1458,9 @@ static void power_pmu_enable(struct pmu *pmu) /* * Enable instruction sampling if necessary */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) { mb(); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra); } out: @@ -1515,9 +1557,16 @@ nocheck: ret = 0; out: if (has_branch_stack(event)) { - power_pmu_bhrb_enable(event); - cpuhw->bhrb_filter = ppmu->bhrb_filter_map( - event->attr.branch_sample_type); + u64 bhrb_filter = -1; + + if (ppmu->bhrb_filter_map) + bhrb_filter = ppmu->bhrb_filter_map( + event->attr.branch_sample_type); + + if (bhrb_filter != -1) { + cpuhw->bhrb_filter = bhrb_filter; + power_pmu_bhrb_enable(event); + } } perf_pmu_enable(event->pmu); @@ -1548,7 +1597,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) cpuhw->flags[i-1] = cpuhw->flags[i]; } --cpuhw->n_events; - ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); + ppmu->disable_pmc(event->hw.idx - 1, &cpuhw->mmcr); if (event->hw.idx) { write_pmc(event->hw.idx, 0); event->hw.idx = 0; @@ -1569,7 +1618,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) } if (cpuhw->n_events == 0) { /* disable exceptions if no events are running */ - cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); + cpuhw->mmcr.mmcr0 &= ~(MMCR0_PMXE | MMCR0_FCECE); } if (has_branch_stack(event)) @@ -1793,7 +1842,7 @@ static void hw_perf_event_destroy(struct perf_event *event) static int hw_perf_cache_event(u64 config, u64 *eventp) { unsigned long type, op, result; - int ev; + u64 ev; if (!ppmu->cache_events) return -EINVAL; @@ -1839,7 +1888,6 @@ static int power_pmu_event_init(struct perf_event *event) int n; int err; struct cpu_hw_events *cpuhw; - u64 bhrb_filter; if (!ppmu) return -ENOENT; @@ -1945,7 +1993,10 @@ static int power_pmu_event_init(struct perf_event *event) err = power_check_constraints(cpuhw, events, cflags, n + 1); if (has_branch_stack(event)) { - bhrb_filter = ppmu->bhrb_filter_map( + u64 bhrb_filter = -1; + + if (ppmu->bhrb_filter_map) + bhrb_filter = ppmu->bhrb_filter_map( event->attr.branch_sample_type); if (bhrb_filter == -1) { @@ -2099,6 +2150,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); + } else if (period) { + /* Account for interrupt in case of invalid SIAR */ + if (perf_event_account_interrupt(event)) + power_pmu_stop(event, 0); } } @@ -2177,6 +2232,12 @@ static void __perf_event_interrupt(struct pt_regs *regs) perf_read_regs(regs); + /* + * If perf interrupts hit in a local_irq_disable (soft-masked) region, + * we consider them as NMIs. This is required to prevent hash faults on + * user addresses when reading callchains. See the NMI test in + * do_hash_page. + */ nmi = perf_intr_is_nmi(regs); if (nmi) nmi_enter(); @@ -2238,7 +2299,7 @@ static void __perf_event_interrupt(struct pt_regs *regs) * XXX might want to use MSR.PM to keep the events frozen until * we get back out of this interrupt. */ - write_mmcr0(cpuhw, cpuhw->mmcr[0]); + write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0); if (nmi) nmi_exit(); @@ -2260,7 +2321,7 @@ static int power_pmu_prepare_cpu(unsigned int cpu) if (ppmu) { memset(cpuhw, 0, sizeof(*cpuhw)); - cpuhw->mmcr[0] = MMCR0_FC; + cpuhw->mmcr.mmcr0 = MMCR0_FC; } return 0; } @@ -2275,6 +2336,7 @@ int register_power_pmu(struct power_pmu *pmu) pmu->name); power_pmu.attr_groups = ppmu->attr_groups; + power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS); #ifdef MSR_HV /* @@ -2306,6 +2368,8 @@ static int __init init_ppc64_pmu(void) return 0; else if (!init_power9_pmu()) return 0; + else if (!init_power10_pmu()) + return 0; else if (!init_ppc970_pmu()) return 0; else diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index 5e5a54d5588e..eb8a6aaf4cc1 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -101,7 +101,7 @@ static int compat_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 573e0b309c0c..6e7e820508df 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -20,6 +20,7 @@ #include <asm/io.h> #include <linux/byteorder/generic.h> +#include <asm/rtas.h> #include "hv-24x7.h" #include "hv-24x7-catalog.h" #include "hv-common.h" @@ -30,6 +31,8 @@ static int interface_version; /* Whether we have to aggregate result data for some domains. */ static bool aggregate_result_elements; +static cpumask_t hv_24x7_cpumask; + static bool domain_is_valid(unsigned domain) { switch (domain) { @@ -57,6 +60,65 @@ static bool is_physical_domain(unsigned domain) } } +/* + * The Processor Module Information system parameter allows transferring + * of certain processor module information from the platform to the OS. + * Refer PAPR+ document to get parameter token value as '43'. + */ + +#define PROCESSOR_MODULE_INFO 43 + +static u32 phys_sockets; /* Physical sockets */ +static u32 phys_chipspersocket; /* Physical chips per socket*/ +static u32 phys_coresperchip; /* Physical cores per chip */ + +/* + * read_24x7_sys_info() + * Retrieve the number of sockets and chips per socket and cores per + * chip details through the get-system-parameter rtas call. + */ +void read_24x7_sys_info(void) +{ + int call_status, len, ntypes; + + spin_lock(&rtas_data_buf_lock); + + /* + * Making system parameter: chips and sockets and cores per chip + * default to 1. + */ + phys_sockets = 1; + phys_chipspersocket = 1; + phys_coresperchip = 1; + + call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, + NULL, + PROCESSOR_MODULE_INFO, + __pa(rtas_data_buf), + RTAS_DATA_BUF_SIZE); + + if (call_status != 0) { + pr_err("Error calling get-system-parameter %d\n", + call_status); + } else { + len = be16_to_cpup((__be16 *)&rtas_data_buf[0]); + if (len < 8) + goto out; + + ntypes = be16_to_cpup((__be16 *)&rtas_data_buf[2]); + + if (!ntypes) + goto out; + + phys_sockets = be16_to_cpup((__be16 *)&rtas_data_buf[4]); + phys_chipspersocket = be16_to_cpup((__be16 *)&rtas_data_buf[6]); + phys_coresperchip = be16_to_cpup((__be16 *)&rtas_data_buf[8]); + } + +out: + spin_unlock(&rtas_data_buf_lock); +} + /* Domains for which more than one result element are returned for each event. */ static bool domain_needs_aggregation(unsigned int domain) { @@ -386,6 +448,30 @@ static ssize_t device_show_string(struct device *dev, return sprintf(buf, "%s\n", (char *)d->var); } +static ssize_t cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &hv_24x7_cpumask); +} + +static ssize_t sockets_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", phys_sockets); +} + +static ssize_t chipspersocket_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", phys_chipspersocket); +} + +static ssize_t coresperchip_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", phys_coresperchip); +} + static struct attribute *device_str_attr_create_(char *name, char *str) { struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL); @@ -1032,16 +1118,32 @@ PAGE_0_ATTR(catalog_len, "%lld\n", (unsigned long long)be32_to_cpu(page_0->length) * 4096); static BIN_ATTR_RO(catalog, 0/* real length varies */); static DEVICE_ATTR_RO(domains); +static DEVICE_ATTR_RO(sockets); +static DEVICE_ATTR_RO(chipspersocket); +static DEVICE_ATTR_RO(coresperchip); +static DEVICE_ATTR_RO(cpumask); static struct bin_attribute *if_bin_attrs[] = { &bin_attr_catalog, NULL, }; +static struct attribute *cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group cpumask_attr_group = { + .attrs = cpumask_attrs, +}; + static struct attribute *if_attrs[] = { &dev_attr_catalog_len.attr, &dev_attr_catalog_version.attr, &dev_attr_domains.attr, + &dev_attr_sockets.attr, + &dev_attr_chipspersocket.attr, + &dev_attr_coresperchip.attr, NULL, }; @@ -1057,6 +1159,7 @@ static const struct attribute_group *attr_groups[] = { &event_desc_group, &event_long_desc_group, &if_group, + &cpumask_attr_group, NULL, }; @@ -1400,16 +1503,6 @@ static void h_24x7_event_read(struct perf_event *event) h24x7hw = &get_cpu_var(hv_24x7_hw); h24x7hw->events[i] = event; put_cpu_var(h24x7hw); - /* - * Clear the event count so we can compute the _change_ - * in the 24x7 raw counter value at the end of the txn. - * - * Note that we could alternatively read the 24x7 value - * now and save its value in event->hw.prev_count. But - * that would require issuing a hcall, which would then - * defeat the purpose of using the txn interface. - */ - local64_set(&event->count, 0); } put_cpu_var(hv_24x7_reqb); @@ -1567,6 +1660,45 @@ static struct pmu h_24x7_pmu = { .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; +static int ppc_hv_24x7_cpu_online(unsigned int cpu) +{ + if (cpumask_empty(&hv_24x7_cpumask)) + cpumask_set_cpu(cpu, &hv_24x7_cpumask); + + return 0; +} + +static int ppc_hv_24x7_cpu_offline(unsigned int cpu) +{ + int target; + + /* Check if exiting cpu is used for collecting 24x7 events */ + if (!cpumask_test_and_clear_cpu(cpu, &hv_24x7_cpumask)) + return 0; + + /* Find a new cpu to collect 24x7 events */ + target = cpumask_last(cpu_active_mask); + + if (target < 0 || target >= nr_cpu_ids) { + pr_err("hv_24x7: CPU hotplug init failed\n"); + return -1; + } + + /* Migrate 24x7 events to the new target */ + cpumask_set_cpu(target, &hv_24x7_cpumask); + perf_pmu_migrate_context(&h_24x7_pmu, cpu, target); + + return 0; +} + +static int hv_24x7_cpu_hotplug_init(void) +{ + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, + "perf/powerpc/hv_24x7:online", + ppc_hv_24x7_cpu_online, + ppc_hv_24x7_cpu_offline); +} + static int hv_24x7_init(void) { int r; @@ -1611,10 +1743,17 @@ static int hv_24x7_init(void) if (r) return r; + /* init cpuhotplug */ + r = hv_24x7_cpu_hotplug_init(); + if (r) + return r; + r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1); if (r) return r; + read_24x7_sys_info(); + return 0; } diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index eb82dda884e5..62d0b54086f8 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -976,7 +976,7 @@ static int thread_imc_event_init(struct perf_event *event) if (event->attr.type != event->pmu->type) return -ENOENT; - if (!capable(CAP_SYS_ADMIN)) + if (!perfmon_capable()) return -EACCES; /* Sampling not supported */ @@ -1288,11 +1288,30 @@ static int trace_imc_prepare_sample(struct trace_imc_data *mem, header->size = sizeof(*header) + event->header_size; header->misc = 0; - if (is_kernel_addr(data->ip)) - header->misc |= PERF_RECORD_MISC_KERNEL; - else - header->misc |= PERF_RECORD_MISC_USER; - + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + switch (IMC_TRACE_RECORD_VAL_HVPR(be64_to_cpu(READ_ONCE(mem->val)))) { + case 0:/* when MSR HV and PR not set in the trace-record */ + header->misc |= PERF_RECORD_MISC_GUEST_KERNEL; + break; + case 1: /* MSR HV is 0 and PR is 1 */ + header->misc |= PERF_RECORD_MISC_GUEST_USER; + break; + case 2: /* MSR HV is 1 and PR is 0 */ + header->misc |= PERF_RECORD_MISC_KERNEL; + break; + case 3: /* MSR HV is 1 and PR is 1 */ + header->misc |= PERF_RECORD_MISC_USER; + break; + default: + pr_info("IMC: Unable to set the flag based on MSR bits\n"); + break; + } + } else { + if (is_kernel_addr(data->ip)) + header->misc |= PERF_RECORD_MISC_KERNEL; + else + header->misc |= PERF_RECORD_MISC_USER; + } perf_event_header__init_id(header, data, event); return 0; @@ -1412,7 +1431,7 @@ static int trace_imc_event_init(struct perf_event *event) if (event->attr.type != event->pmu->type) return -ENOENT; - if (!capable(CAP_SYS_ADMIN)) + if (!perfmon_capable()) return -EACCES; /* Return if this is a couting event */ diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h index f755c64da137..80bbf72bfec2 100644 --- a/arch/powerpc/perf/internal.h +++ b/arch/powerpc/perf/internal.h @@ -9,4 +9,5 @@ extern int init_power6_pmu(void); extern int init_power7_pmu(void); extern int init_power8_pmu(void); extern int init_power9_pmu(void); +extern int init_power10_pmu(void); extern int init_generic_compat_pmu(void); diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 4c86da5eb28a..964437adec18 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -55,7 +55,9 @@ static bool is_event_valid(u64 event) { u64 valid_mask = EVENT_VALID_MASK; - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_31)) + valid_mask = p10_EVENT_VALID_MASK; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) valid_mask = p9_EVENT_VALID_MASK; return !(event & ~valid_mask); @@ -69,6 +71,14 @@ static inline bool is_event_marked(u64 event) return false; } +static unsigned long sdar_mod_val(u64 event) +{ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return p10_SDAR_MODE(event); + + return p9_SDAR_MODE(event); +} + static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) { /* @@ -79,7 +89,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling * mode and will be un-changed when setting MMCRA[63] (Marked events). * - * Incase of Power9: + * Incase of Power9/power10: * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'), * or if group already have any marked events. * For rest @@ -90,8 +100,8 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE)) *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES; - else if (p9_SDAR_MODE(event)) - *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; + else if (sdar_mod_val(event)) + *mmcra |= sdar_mod_val(event) << MMCRA_SDAR_MODE_SHIFT; else *mmcra |= MMCRA_SDAR_MODE_DCACHE; } else @@ -134,7 +144,11 @@ static bool is_thresh_cmp_valid(u64 event) /* * Check the mantissa upper two bits are not zero, unless the * exponent is also zero. See the THRESH_CMP_MANTISSA doc. + * Power10: thresh_cmp is replaced by l2_l3 event select. */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return false; + cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; exp = cmp >> 7; @@ -251,7 +265,12 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; - cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + cache = (event >> EVENT_CACHE_SEL_SHIFT) & + p10_EVENT_CACHE_SEL_MASK; + else + cache = (event >> EVENT_CACHE_SEL_SHIFT) & + EVENT_CACHE_SEL_MASK; ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK; if (pmc) { @@ -283,7 +302,10 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } if (unit >= 6 && unit <= 9) { - if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { + mask |= CNST_L2L3_GROUP_MASK; + value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); + } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { mask |= CNST_CACHE_GROUP_MASK; value |= CNST_CACHE_GROUP_VAL(event & 0xff); @@ -363,10 +385,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } int isa207_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]) { unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; + unsigned long mmcr3; unsigned int pmc, pmc_inuse; int i; @@ -379,7 +402,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev, pmc_inuse |= 1 << pmc; } - mmcra = mmcr1 = mmcr2 = 0; + mmcra = mmcr1 = mmcr2 = mmcr3 = 0; + + /* + * Disable bhrb unless explicitly requested + * by setting MMCRA (BHRBRD) bit. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mmcra |= MMCRA_BHRB_DISABLE; /* Second pass: assign PMCs, set all MMCR1 fields */ for (i = 0; i < n_ev; ++i) { @@ -438,8 +468,17 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcra |= val << MMCRA_THR_CTL_SHIFT; val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK; mmcra |= val << MMCRA_THR_SEL_SHIFT; - val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; - mmcra |= thresh_cmp_val(val); + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + val = (event[i] >> EVENT_THR_CMP_SHIFT) & + EVENT_THR_CMP_MASK; + mmcra |= thresh_cmp_val(val); + } + } + + if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { + val = (event[i] >> p10_L2L3_EVENT_SHIFT) & + p10_EVENT_L2L3_SEL_MASK; + mmcr2 |= val << p10_L2L3_SEL_SHIFT; } if (event[i] & EVENT_WANTS_BHRB) { @@ -447,6 +486,11 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcra |= val << MMCRA_IFM_SHIFT; } + /* set MMCRA (BHRBRD) to 0 if there is user request for BHRB */ + if (cpu_has_feature(CPU_FTR_ARCH_31) && + (has_branch_stack(pevents[i]) || (event[i] & EVENT_WANTS_BHRB))) + mmcra &= ~MMCRA_BHRB_DISABLE; + if (pevents[i]->attr.exclude_user) mmcr2 |= MMCR2_FCP(pmc); @@ -460,34 +504,43 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcr2 |= MMCR2_FCS(pmc); } + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (pmc <= 4) { + val = (event[i] >> p10_EVENT_MMCR3_SHIFT) & + p10_EVENT_MMCR3_MASK; + mmcr3 |= val << MMCR3_SHIFT(pmc); + } + } + hwc[i] = pmc - 1; } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; /* pmc_inuse is 1-based */ if (pmc_inuse & 2) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x7c) - mmcr[0] |= MMCR0_PMCjCE; + mmcr->mmcr0 |= MMCR0_PMCjCE; /* If we're not using PMC 5 or 6, freeze them */ if (!(pmc_inuse & 0x60)) - mmcr[0] |= MMCR0_FC56; + mmcr->mmcr0 |= MMCR0_FC56; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; - mmcr[3] = mmcr2; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; + mmcr->mmcr2 = mmcr2; + mmcr->mmcr3 = mmcr3; return 0; } -void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); + mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); } static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int size) diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 63fd4f3f6013..044de65e96b9 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -87,6 +87,31 @@ EVENT_LINUX_MASK | \ EVENT_PSEL_MASK)) +/* Contants to support power10 raw encoding format */ +#define p10_SDAR_MODE_SHIFT 22 +#define p10_SDAR_MODE_MASK 0x3ull +#define p10_SDAR_MODE(v) (((v) >> p10_SDAR_MODE_SHIFT) & \ + p10_SDAR_MODE_MASK) +#define p10_EVENT_L2L3_SEL_MASK 0x1f +#define p10_L2L3_SEL_SHIFT 3 +#define p10_L2L3_EVENT_SHIFT 40 +#define p10_EVENT_THRESH_MASK 0xffffull +#define p10_EVENT_CACHE_SEL_MASK 0x3ull +#define p10_EVENT_MMCR3_MASK 0x7fffull +#define p10_EVENT_MMCR3_SHIFT 45 + +#define p10_EVENT_VALID_MASK \ + ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \ + (p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ + (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ + (p10_EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \ + (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \ + (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ + (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \ + (p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \ + (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + EVENT_LINUX_MASK | \ + EVENT_PSEL_MASK)) /* * Layout of constraint bits: * @@ -135,6 +160,9 @@ #define CNST_CACHE_PMC4_VAL (1ull << 54) #define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL +#define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55) +#define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f) + /* * For NC we are counting up to 4 events. This requires three bits, and we need * the fifth event to overflow and set the 4th bit. To achieve that we bias the @@ -191,7 +219,7 @@ #define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\ MMCRA_THR_CTR_EXP_MASK) -/* MMCR1 Threshold Compare bit constant for power9 */ +/* MMCRA Threshold Compare bit constant for power9 */ #define p9_MMCRA_THR_CMP_SHIFT 45 /* Bits in MMCR2 for PowerISA v2.07 */ @@ -202,6 +230,9 @@ #define MAX_ALT 2 #define MAX_PMU_COUNTERS 6 +/* Bits in MMCR3 for PowerISA v3.10 */ +#define MMCR3_SHIFT(pmc) (49 - (15 * ((pmc) - 1))) + #define ISA207_SIER_TYPE_SHIFT 15 #define ISA207_SIER_TYPE_MASK (0x7ull << ISA207_SIER_TYPE_SHIFT) @@ -217,9 +248,9 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp); int isa207_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]); -void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]); +void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr); int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, const unsigned int ev_alt[][MAX_ALT]); void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c index 4d5ef92511d1..1919e9df9165 100644 --- a/arch/powerpc/perf/mpc7450-pmu.c +++ b/arch/powerpc/perf/mpc7450-pmu.c @@ -257,7 +257,7 @@ static const u32 pmcsel_mask[N_COUNTER] = { * Compute MMCR0/1/2 values for a set of events. */ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], - unsigned long mmcr[], + struct mmcr_regs *mmcr, struct perf_event *pevents[]) { u8 event_index[N_CLASSES][N_COUNTER]; @@ -321,9 +321,16 @@ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], mmcr0 |= MMCR0_PMCnCE; /* Return MMCRx values */ - mmcr[0] = mmcr0; - mmcr[1] = mmcr1; - mmcr[2] = mmcr2; + mmcr->mmcr0 = mmcr0; + mmcr->mmcr1 = mmcr1; + mmcr->mmcr2 = mmcr2; + /* + * 32-bit doesn't have an MMCRA and uses SPRN_MMCR2 to define + * SPRN_MMCRA. So assign mmcra of cpu_hw_events with `mmcr2` + * value to ensure that any write to this SPRN_MMCRA will + * use mmcr2 value. + */ + mmcr->mmcra = mmcr2; return 0; } @@ -331,12 +338,12 @@ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], * Disable counting by a PMC. * Note that the pmc argument is 0-based here, not 1-based. */ -static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void mpc7450_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 1) - mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); + mmcr->mmcr0 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); else - mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); + mmcr->mmcr1 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); } static int mpc7450_generic_events[] = { @@ -354,7 +361,7 @@ static int mpc7450_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0, 0x225 }, [C(OP_WRITE)] = { 0, 0x227 }, diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index a213a0aa5d25..8e53f2fc3fe0 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -13,9 +13,11 @@ #include <asm/ptrace.h> #include <asm/perf_regs.h> +u64 PERF_REG_EXTENDED_MASK; + #define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) -#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1)) +#define REG_RESERVED (~(PERF_REG_EXTENDED_MASK | PERF_REG_PMU_MASK)) static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { PT_REGS_OFFSET(PERF_REG_POWERPC_R0, gpr[0]), @@ -69,10 +71,36 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr), }; +/* Function to return the extended register values */ +static u64 get_ext_regs_value(int idx) +{ + switch (idx) { + case PERF_REG_POWERPC_MMCR0: + return mfspr(SPRN_MMCR0); + case PERF_REG_POWERPC_MMCR1: + return mfspr(SPRN_MMCR1); + case PERF_REG_POWERPC_MMCR2: + return mfspr(SPRN_MMCR2); +#ifdef CONFIG_PPC64 + case PERF_REG_POWERPC_MMCR3: + return mfspr(SPRN_MMCR3); + case PERF_REG_POWERPC_SIER2: + return mfspr(SPRN_SIER2); + case PERF_REG_POWERPC_SIER3: + return mfspr(SPRN_SIER3); +#endif + default: return 0; + } +} + u64 perf_reg_value(struct pt_regs *regs, int idx) { - if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX)) - return 0; + u64 perf_reg_extended_max = PERF_REG_POWERPC_MAX; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + perf_reg_extended_max = PERF_REG_MAX_ISA_31; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) + perf_reg_extended_max = PERF_REG_MAX_ISA_300; if (idx == PERF_REG_POWERPC_SIER && (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) || @@ -85,6 +113,16 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) IS_ENABLED(CONFIG_PPC32))) return 0; + if (idx >= PERF_REG_POWERPC_MAX && idx < perf_reg_extended_max) + return get_ext_regs_value(idx); + + /* + * If the idx is referring to value beyond the + * supported registers, return 0 with a warning + */ + if (WARN_ON_ONCE(idx >= perf_reg_extended_max)) + return 0; + return regs_get_register(regs, pt_regs_offset[idx]); } diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h new file mode 100644 index 000000000000..60c1b8111082 --- /dev/null +++ b/arch/powerpc/perf/power10-events-list.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Performance counter support for POWER10 processors. + * + * Copyright 2020 Madhavan Srinivasan, IBM Corporation. + * Copyright 2020 Athira Rajeev, IBM Corporation. + */ + +/* + * Power10 event codes. + */ +EVENT(PM_RUN_CYC, 0x600f4); +EVENT(PM_DISP_STALL_CYC, 0x100f8); +EVENT(PM_EXEC_STALL, 0x30008); +EVENT(PM_RUN_INST_CMPL, 0x500fa); +EVENT(PM_BR_CMPL, 0x4d05e); +EVENT(PM_BR_MPRED_CMPL, 0x400f6); + +/* All L1 D cache load references counted at finish, gated by reject */ +EVENT(PM_LD_REF_L1, 0x100fc); +/* Load Missed L1 */ +EVENT(PM_LD_MISS_L1, 0x3e054); +/* Store Missed L1 */ +EVENT(PM_ST_MISS_L1, 0x300f0); +/* L1 cache data prefetches */ +EVENT(PM_LD_PREFETCH_CACHE_LINE_MISS, 0x1002c); +/* Demand iCache Miss */ +EVENT(PM_L1_ICACHE_MISS, 0x200fc); +/* Instruction fetches from L1 */ +EVENT(PM_INST_FROM_L1, 0x04080); +/* Instruction Demand sectors wriittent into IL1 */ +EVENT(PM_INST_FROM_L1MISS, 0x03f00000001c040); +/* Instruction prefetch written into IL1 */ +EVENT(PM_IC_PREF_REQ, 0x040a0); +/* The data cache was reloaded from local core's L3 due to a demand load */ +EVENT(PM_DATA_FROM_L3, 0x01340000001c040); +/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ +EVENT(PM_DATA_FROM_L3MISS, 0x300fe); +/* Data PTEG reload */ +EVENT(PM_DTLB_MISS, 0x300fc); +/* ITLB Reloaded */ +EVENT(PM_ITLB_MISS, 0x400fc); + +EVENT(PM_RUN_CYC_ALT, 0x0001e); +EVENT(PM_RUN_INST_CMPL_ALT, 0x00002); + +/* + * Memory Access Events + * + * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0) + * To enable capturing of memory profiling, these MMCRA bits + * needs to be programmed and corresponding raw event format + * encoding. + * + * MMCRA bits encoding needed are + * SM (Sampling Mode) + * EM (Eligibility for Random Sampling) + * TECE (Threshold Event Counter Event) + * TS (Threshold Start Event) + * TE (Threshold End Event) + * + * Corresponding Raw Encoding bits: + * sample [EM,SM] + * thresh_sel (TECE) + * thresh start (TS) + * thresh end (TE) + */ + +EVENT(MEM_LOADS, 0x34340401e0); +EVENT(MEM_STORES, 0x343c0401e0); diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c new file mode 100644 index 000000000000..83148656b524 --- /dev/null +++ b/arch/powerpc/perf/power10-pmu.c @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Performance counter support for POWER10 processors. + * + * Copyright 2020 Madhavan Srinivasan, IBM Corporation. + * Copyright 2020 Athira Rajeev, IBM Corporation. + */ + +#define pr_fmt(fmt) "power10-pmu: " fmt + +#include "isa207-common.h" +#include "internal.h" + +/* + * Raw event encoding for Power10: + * + * 60 56 52 48 44 40 36 32 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * | | [ ] [ src_match ] [ src_mask ] | [ ] [ l2l3_sel ] [ thresh_ctl ] + * | | | | | | + * | | *- IFM (Linux) | | thresh start/stop -* + * | *- BHRB (Linux) | src_sel + * *- EBB (Linux) *invert_bit + * + * 28 24 20 16 12 8 4 0 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] m [ pmcxsel ] + * | | | | | | + * | | | | | *- mark + * | | | *- L1/L2/L3 cache_sel | + * | | sdar_mode | + * | *- sampling mode for marked events *- combine + * | + * *- thresh_sel + * + * Below uses IBM bit numbering. + * + * MMCR1[x:y] = unit (PMCxUNIT) + * MMCR1[24] = pmc1combine[0] + * MMCR1[25] = pmc1combine[1] + * MMCR1[26] = pmc2combine[0] + * MMCR1[27] = pmc2combine[1] + * MMCR1[28] = pmc3combine[0] + * MMCR1[29] = pmc3combine[1] + * MMCR1[30] = pmc4combine[0] + * MMCR1[31] = pmc4combine[1] + * + * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011 + * MMCR1[20:27] = thresh_ctl + * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001 + * MMCR1[20:27] = thresh_ctl + * else + * MMCRA[48:55] = thresh_ctl (THRESH START/END) + * + * if thresh_sel: + * MMCRA[45:47] = thresh_sel + * + * if l2l3_sel: + * MMCR2[56:60] = l2l3_sel[0:4] + * + * MMCR1[16] = cache_sel[0] + * MMCR1[17] = cache_sel[1] + * + * if mark: + * MMCRA[63] = 1 (SAMPLE_ENABLE) + * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG) + * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE) + * + * if EBB and BHRB: + * MMCRA[32:33] = IFM + * + * MMCRA[SDAR_MODE] = sdar_mode[0:1] + */ + +/* + * Some power10 event codes. + */ +#define EVENT(_name, _code) enum{_name = _code} + +#include "power10-events-list.h" + +#undef EVENT + +/* MMCRA IFM bits - POWER10 */ +#define POWER10_MMCRA_IFM1 0x0000000040000000UL +#define POWER10_MMCRA_IFM2 0x0000000080000000UL +#define POWER10_MMCRA_IFM3 0x00000000C0000000UL +#define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL + +extern u64 PERF_REG_EXTENDED_MASK; + +/* Table of alternatives, sorted by column 0 */ +static const unsigned int power10_event_alternatives[][MAX_ALT] = { + { PM_RUN_CYC_ALT, PM_RUN_CYC }, + { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, +}; + +static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[]) +{ + int num_alt = 0; + + num_alt = isa207_get_alternatives(event, alt, + ARRAY_SIZE(power10_event_alternatives), flags, + power10_event_alternatives); + + return num_alt; +} + +GENERIC_EVENT_ATTR(cpu-cycles, PM_RUN_CYC); +GENERIC_EVENT_ATTR(instructions, PM_RUN_INST_CMPL); +GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL); +GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); +GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); +GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1); +GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS); +GENERIC_EVENT_ATTR(mem-stores, MEM_STORES); + +CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1); +CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1); +CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_LD_PREFETCH_CACHE_LINE_MISS); +CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1); +CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS); +CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1); +CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_REQ); +CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); +CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); +CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); +CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL); +CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); +CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); + +static struct attribute *power10_events_attr[] = { + GENERIC_EVENT_PTR(PM_RUN_CYC), + GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), + GENERIC_EVENT_PTR(PM_BR_CMPL), + GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), + GENERIC_EVENT_PTR(PM_LD_REF_L1), + GENERIC_EVENT_PTR(PM_LD_MISS_L1), + GENERIC_EVENT_PTR(MEM_LOADS), + GENERIC_EVENT_PTR(MEM_STORES), + CACHE_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_LD_REF_L1), + CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS), + CACHE_EVENT_PTR(PM_ST_MISS_L1), + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), + CACHE_EVENT_PTR(PM_INST_FROM_L1), + CACHE_EVENT_PTR(PM_IC_PREF_REQ), + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), + CACHE_EVENT_PTR(PM_DATA_FROM_L3), + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), + CACHE_EVENT_PTR(PM_BR_CMPL), + CACHE_EVENT_PTR(PM_DTLB_MISS), + CACHE_EVENT_PTR(PM_ITLB_MISS), + NULL +}; + +static struct attribute_group power10_pmu_events_group = { + .name = "events", + .attrs = power10_events_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-59"); +PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); +PMU_FORMAT_ATTR(mark, "config:8"); +PMU_FORMAT_ATTR(combine, "config:10-11"); +PMU_FORMAT_ATTR(unit, "config:12-15"); +PMU_FORMAT_ATTR(pmc, "config:16-19"); +PMU_FORMAT_ATTR(cache_sel, "config:20-21"); +PMU_FORMAT_ATTR(sdar_mode, "config:22-23"); +PMU_FORMAT_ATTR(sample_mode, "config:24-28"); +PMU_FORMAT_ATTR(thresh_sel, "config:29-31"); +PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); +PMU_FORMAT_ATTR(thresh_start, "config:36-39"); +PMU_FORMAT_ATTR(l2l3_sel, "config:40-44"); +PMU_FORMAT_ATTR(src_sel, "config:45-46"); +PMU_FORMAT_ATTR(invert_bit, "config:47"); +PMU_FORMAT_ATTR(src_mask, "config:48-53"); +PMU_FORMAT_ATTR(src_match, "config:54-59"); + +static struct attribute *power10_pmu_format_attr[] = { + &format_attr_event.attr, + &format_attr_pmcxsel.attr, + &format_attr_mark.attr, + &format_attr_combine.attr, + &format_attr_unit.attr, + &format_attr_pmc.attr, + &format_attr_cache_sel.attr, + &format_attr_sdar_mode.attr, + &format_attr_sample_mode.attr, + &format_attr_thresh_sel.attr, + &format_attr_thresh_stop.attr, + &format_attr_thresh_start.attr, + &format_attr_l2l3_sel.attr, + &format_attr_src_sel.attr, + &format_attr_invert_bit.attr, + &format_attr_src_mask.attr, + &format_attr_src_match.attr, + NULL, +}; + +static struct attribute_group power10_pmu_format_group = { + .name = "format", + .attrs = power10_pmu_format_attr, +}; + +static const struct attribute_group *power10_pmu_attr_groups[] = { + &power10_pmu_format_group, + &power10_pmu_events_group, + NULL, +}; + +static int power10_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, + [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, +}; + +static u64 power10_bhrb_filter_map(u64 branch_sample_type) +{ + u64 pmu_bhrb_filter = 0; + + /* BHRB and regular PMU events share the same privilege state + * filter configuration. BHRB is always recorded along with a + * regular PMU event. As the privilege state filter is handled + * in the basic PMC configuration of the accompanying regular + * PMU event, we ignore any separate BHRB specific request. + */ + + /* No branch filter requested */ + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) + return pmu_bhrb_filter; + + /* Invalid branch filter options - HW does not support */ + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + return -1; + + if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) { + pmu_bhrb_filter |= POWER10_MMCRA_IFM2; + return pmu_bhrb_filter; + } + + if (branch_sample_type & PERF_SAMPLE_BRANCH_COND) { + pmu_bhrb_filter |= POWER10_MMCRA_IFM3; + return pmu_bhrb_filter; + } + + if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL) + return -1; + + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) { + pmu_bhrb_filter |= POWER10_MMCRA_IFM1; + return pmu_bhrb_filter; + } + + /* Every thing else is unsupported */ + return -1; +} + +static void power10_config_bhrb(u64 pmu_bhrb_filter) +{ + pmu_bhrb_filter &= POWER10_MMCRA_BHRB_MASK; + + /* Enable BHRB filter in PMU */ + mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter)); +} + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_LD_REF_L1, + [C(RESULT_MISS)] = PM_LD_MISS_L1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ST_MISS_L1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS, + [C(RESULT_MISS)] = 0, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1, + [C(RESULT_MISS)] = PM_L1_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_IC_PREF_REQ, + [C(RESULT_MISS)] = 0, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_DATA_FROM_L3, + [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = 0, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_BR_CMPL, + [C(RESULT_MISS)] = PM_BR_MPRED_CMPL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, +}; + +#undef C + +static struct power_pmu power10_pmu = { + .name = "POWER10", + .n_counter = MAX_PMU_COUNTERS, + .add_fields = ISA207_ADD_FIELDS, + .test_adder = ISA207_TEST_ADDER, + .group_constraint_mask = CNST_CACHE_PMC4_MASK, + .group_constraint_val = CNST_CACHE_PMC4_VAL, + .compute_mmcr = isa207_compute_mmcr, + .config_bhrb = power10_config_bhrb, + .bhrb_filter_map = power10_bhrb_filter_map, + .get_constraint = isa207_get_constraint, + .get_alternatives = power10_get_alternatives, + .get_mem_data_src = isa207_get_mem_data_src, + .get_mem_weight = isa207_get_mem_weight, + .disable_pmc = isa207_disable_pmc, + .flags = PPMU_HAS_SIER | PPMU_ARCH_207S | + PPMU_ARCH_31, + .n_generic = ARRAY_SIZE(power10_generic_events), + .generic_events = power10_generic_events, + .cache_events = &power10_cache_events, + .attr_groups = power10_pmu_attr_groups, + .bhrb_nr = 32, + .capabilities = PERF_PMU_CAP_EXTENDED_REGS, +}; + +int init_power10_pmu(void) +{ + int rc; + + /* Comes from cpu_specs[] */ + if (!cur_cpu_spec->oprofile_cpu_type || + strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10")) + return -ENODEV; + + /* Set the PERF_REG_EXTENDED_MASK here */ + PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31; + + rc = register_power_pmu(&power10_pmu); + if (rc) + return rc; + + /* Tell userspace that EBB is supported */ + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB; + + return 0; +} diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index f8574547fc6b..a62b2cd7914f 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -448,7 +448,8 @@ static int power5p_marked_instr_event(u64 event) } static int power5p_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = 0; @@ -586,20 +587,20 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void power5p_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); } static int power5p_generic_events[] = { @@ -618,7 +619,7 @@ static int power5p_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x1c10a8, 0x3c1088 }, [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 }, diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index da52ecab7c9a..8732b587cf71 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -379,7 +379,8 @@ static int power5_marked_instr_event(u64 event) } static int power5_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -528,20 +529,20 @@ static int power5_compute_mmcr(u64 event[], int n_ev, } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void power5_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); } static int power5_generic_events[] = { @@ -560,7 +561,7 @@ static int power5_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x4c1090, 0x3c1088 }, [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 }, diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 3929cacf72ed..0e318cf87129 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -171,7 +171,7 @@ static int power6_marked_instr_event(u64 event) * Assign PMC numbers and compute MMCR1 value for a set of events */ static int p6_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -243,13 +243,13 @@ static int p6_compute_mmcr(u64 event[], int n_ev, if (pmc < 4) mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc); } - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0xe) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } @@ -457,11 +457,11 @@ static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) return nalt; } -static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void p6_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { /* Set PMCxSEL to 0 to disable PMCx */ if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); } static int power6_generic_events[] = { @@ -481,7 +481,7 @@ static int power6_generic_events[] = { * are event codes. * The "DTLB" and "ITLB" events relate to the DERAT and IERAT. */ -static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x280030, 0x80080 }, [C(OP_WRITE)] = { 0x180032, 0x80088 }, diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index a137813a3076..5e0bf09cf077 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -242,7 +242,8 @@ static int power7_marked_instr_event(u64 event) } static int power7_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -298,20 +299,20 @@ static int power7_compute_mmcr(u64 event[], int n_ev, } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void power7_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); } static int power7_generic_events[] = { @@ -332,7 +333,7 @@ static int power7_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0xc880, 0x400f0 }, [C(OP_WRITE)] = { 0, 0x300f0 }, diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 3a5fcc20ff31..5282e8415ddf 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -253,7 +253,7 @@ static void power8_config_bhrb(u64 pmu_bhrb_filter) * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = PM_LD_REF_L1, diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 08c3ef796198..2a57e93a79dc 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -90,6 +90,8 @@ enum { #define POWER9_MMCRA_IFM3 0x00000000C0000000UL #define POWER9_MMCRA_BHRB_MASK 0x00000000C0000000UL +extern u64 PERF_REG_EXTENDED_MASK; + /* Nasty Power9 specific hack */ #define PVR_POWER9_CUMULUS 0x00002000 @@ -310,7 +312,7 @@ static void power9_config_bhrb(u64 pmu_bhrb_filter) * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = PM_LD_REF_L1, @@ -434,6 +436,7 @@ static struct power_pmu power9_pmu = { .cache_events = &power9_cache_events, .attr_groups = power9_pmu_attr_groups, .bhrb_nr = 32, + .capabilities = PERF_PMU_CAP_EXTENDED_REGS, }; int init_power9_pmu(void) @@ -457,6 +460,9 @@ int init_power9_pmu(void) } } + /* Set the PERF_REG_EXTENDED_MASK here */ + PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_300; + rc = register_power_pmu(&power9_pmu); if (rc) return rc; diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 4035d93d87ab..d35223fb112c 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -253,7 +253,8 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) } static int p970_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel; @@ -393,27 +394,26 @@ static int p970_compute_mmcr(u64 event[], int n_ev, mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ /* Return MMCRx values */ - mmcr[0] = mmcr0; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 = mmcr0; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void p970_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { - int shift, i; + int shift; + /* + * Setting the PMCxSEL field to 0x08 disables PMC x. + */ if (pmc <= 1) { shift = MMCR0_PMC1SEL_SH - 7 * pmc; - i = 0; + mmcr->mmcr0 = (mmcr->mmcr0 & ~(0x1fUL << shift)) | (0x08UL << shift); } else { shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); - i = 1; + mmcr->mmcr1 = (mmcr->mmcr1 & ~(0x1fUL << shift)) | (0x08UL << shift); } - /* - * Setting the PMCxSEL field to 0x08 disables PMC x. - */ - mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift); } static int ppc970_generic_events[] = { @@ -432,7 +432,7 @@ static int ppc970_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x8810, 0x3810 }, [C(OP_WRITE)] = { 0x7810, 0x813 }, diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index 6da813b65b42..e3e5217c9822 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -7,14 +7,6 @@ config ACADIA help This option enables support for the AMCC 405EZ Acadia evaluation board. -config EP405 - bool "EP405/EP405PC" - depends on 40x - select 405GP - select FORCE_PCI - help - This option enables support for the EP405/EP405PC boards. - config HOTFOOT bool "Hotfoot" depends on 40x @@ -45,33 +37,6 @@ config MAKALU help This option enables support for the AMCC PPC405EX board. -config WALNUT - bool "Walnut" - depends on 40x - default y - select 405GP - select FORCE_PCI - select OF_RTC - help - This option enables support for the IBM PPC405GP evaluation board. - -config XILINX_VIRTEX_GENERIC_BOARD - bool "Generic Xilinx Virtex board" - depends on 40x - select XILINX_VIRTEX_II_PRO - select XILINX_VIRTEX_4_FX - select XILINX_INTC - help - This option enables generic support for Xilinx Virtex based boards. - - The generic virtex board support matches any device tree which - specifies 'xilinx,virtex' in its compatible field. This includes - the Xilinx ML3xx and ML4xx reference designs using the powerpc - core. - - Most Virtex designs should use this unless it needs to do some - special configuration at board probe time. - config OBS600 bool "OpenBlockS 600" depends on 40x @@ -86,18 +51,6 @@ config PPC40x_SIMPLE help This option enables the simple PowerPC 40x platform support. -# OAK doesn't exist but wanted to keep this around for any future 403GCX boards -config 403GCX - bool - #depends on OAK - select IBM405_ERR51 - -config 405GP - bool - select IBM405_ERR77 - select IBM405_ERR51 - select IBM_EMAC_ZMII if IBM_EMAC - config 405EX bool select IBM_EMAC_EMAC4 if IBM_EMAC @@ -109,25 +62,6 @@ config 405EZ select IBM_EMAC_MAL_CLR_ICINTSTAT if IBM_EMAC select IBM_EMAC_MAL_COMMON_ERR if IBM_EMAC -config XILINX_VIRTEX - bool - select DEFAULT_UIMAGE - -config XILINX_VIRTEX_II_PRO - bool - select XILINX_VIRTEX - select IBM405_ERR77 - select IBM405_ERR51 - -config XILINX_VIRTEX_4_FX - bool - select XILINX_VIRTEX - -config STB03xxx - bool - select IBM405_ERR77 - select IBM405_ERR51 - config PPC4xx_GPIO bool "PPC4xx GPIO support" depends on 40x @@ -135,16 +69,6 @@ config PPC4xx_GPIO help Enable gpiolib support for ppc40x based boards -# 40x errata/workaround config symbols, selected by the CPU models above - -# All 405-based cores up until the 405GPR and 405EP have this errata. -config IBM405_ERR77 - bool - -# All 40x-based cores, up until the 405GPR and 405EP have this errata. -config IBM405_ERR51 - bool - config APM8018X bool "APM8018X" depends on 40x diff --git a/arch/powerpc/platforms/40x/Makefile b/arch/powerpc/platforms/40x/Makefile index 828d78340dd9..122de98527c4 100644 --- a/arch/powerpc/platforms/40x/Makefile +++ b/arch/powerpc/platforms/40x/Makefile @@ -1,5 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_WALNUT) += walnut.o -obj-$(CONFIG_XILINX_VIRTEX_GENERIC_BOARD) += virtex.o -obj-$(CONFIG_EP405) += ep405.o obj-$(CONFIG_PPC40x_SIMPLE) += ppc40x_simple.o diff --git a/arch/powerpc/platforms/40x/ep405.c b/arch/powerpc/platforms/40x/ep405.c deleted file mode 100644 index 1c8aec6e9bb7..000000000000 --- a/arch/powerpc/platforms/40x/ep405.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Architecture- / platform-specific boot-time initialization code for - * IBM PowerPC 4xx based boards. Adapted from original - * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek - * <dan@net4x.com>. - * - * Copyright(c) 1999-2000 Grant Erickson <grant@lcse.umn.edu> - * - * Rewritten and ported to the merged powerpc tree: - * Copyright 2007 IBM Corporation - * Josh Boyer <jwboyer@linux.vnet.ibm.com> - * - * Adapted to EP405 by Ben. Herrenschmidt <benh@kernel.crashing.org> - * - * TODO: Wire up the PCI IRQ mux and the southbridge interrupts - * - * 2002 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#include <linux/init.h> -#include <linux/of_platform.h> - -#include <asm/machdep.h> -#include <asm/prom.h> -#include <asm/udbg.h> -#include <asm/time.h> -#include <asm/uic.h> -#include <asm/pci-bridge.h> -#include <asm/ppc4xx.h> - -static struct device_node *bcsr_node; -static void __iomem *bcsr_regs; - -/* BCSR registers */ -#define BCSR_ID 0 -#define BCSR_PCI_CTRL 1 -#define BCSR_FLASH_NV_POR_CTRL 2 -#define BCSR_FENET_UART_CTRL 3 -#define BCSR_PCI_IRQ 4 -#define BCSR_XIRQ_SELECT 5 -#define BCSR_XIRQ_ROUTING 6 -#define BCSR_XIRQ_STATUS 7 -#define BCSR_XIRQ_STATUS2 8 -#define BCSR_SW_STAT_LED_CTRL 9 -#define BCSR_GPIO_IRQ_PAR_CTRL 10 -/* there's more, can't be bothered typing them tho */ - - -static const struct of_device_id ep405_of_bus[] __initconst = { - { .compatible = "ibm,plb3", }, - { .compatible = "ibm,opb", }, - { .compatible = "ibm,ebc", }, - {}, -}; - -static int __init ep405_device_probe(void) -{ - of_platform_bus_probe(NULL, ep405_of_bus, NULL); - - return 0; -} -machine_device_initcall(ep405, ep405_device_probe); - -static void __init ep405_init_bcsr(void) -{ - const u8 *irq_routing; - int i; - - /* Find the bloody thing & map it */ - bcsr_node = of_find_compatible_node(NULL, NULL, "ep405-bcsr"); - if (bcsr_node == NULL) { - printk(KERN_ERR "EP405 BCSR not found !\n"); - return; - } - bcsr_regs = of_iomap(bcsr_node, 0); - if (bcsr_regs == NULL) { - printk(KERN_ERR "EP405 BCSR failed to map !\n"); - return; - } - - /* Get the irq-routing property and apply the routing to the CPLD */ - irq_routing = of_get_property(bcsr_node, "irq-routing", NULL); - if (irq_routing == NULL) - return; - for (i = 0; i < 16; i++) { - u8 irq = irq_routing[i]; - out_8(bcsr_regs + BCSR_XIRQ_SELECT, i); - out_8(bcsr_regs + BCSR_XIRQ_ROUTING, irq); - } - in_8(bcsr_regs + BCSR_XIRQ_SELECT); - mb(); - out_8(bcsr_regs + BCSR_GPIO_IRQ_PAR_CTRL, 0xfe); -} - -static void __init ep405_setup_arch(void) -{ - /* Find & init the BCSR CPLD */ - ep405_init_bcsr(); - - pci_set_flags(PCI_REASSIGN_ALL_RSRC); -} - -static int __init ep405_probe(void) -{ - if (!of_machine_is_compatible("ep405")) - return 0; - - return 1; -} - -define_machine(ep405) { - .name = "EP405", - .probe = ep405_probe, - .setup_arch = ep405_setup_arch, - .progress = udbg_progress, - .init_IRQ = uic_init_tree, - .get_irq = uic_get_irq, - .restart = ppc4xx_reset_system, - .calibrate_decr = generic_calibrate_decr, -}; diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c deleted file mode 100644 index e3d5e095846b..000000000000 --- a/arch/powerpc/platforms/40x/virtex.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Xilinx Virtex (IIpro & 4FX) based board support - * - * Copyright 2007 Secret Lab Technologies Ltd. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include <linux/init.h> -#include <linux/of_platform.h> -#include <asm/machdep.h> -#include <asm/prom.h> -#include <asm/time.h> -#include <asm/xilinx_intc.h> -#include <asm/xilinx_pci.h> -#include <asm/ppc4xx.h> - -static const struct of_device_id xilinx_of_bus_ids[] __initconst = { - { .compatible = "xlnx,plb-v46-1.00.a", }, - { .compatible = "xlnx,plb-v34-1.01.a", }, - { .compatible = "xlnx,plb-v34-1.02.a", }, - { .compatible = "xlnx,opb-v20-1.10.c", }, - { .compatible = "xlnx,dcr-v29-1.00.a", }, - { .compatible = "xlnx,compound", }, - {} -}; - -static int __init virtex_device_probe(void) -{ - of_platform_bus_probe(NULL, xilinx_of_bus_ids, NULL); - - return 0; -} -machine_device_initcall(virtex, virtex_device_probe); - -static int __init virtex_probe(void) -{ - if (!of_machine_is_compatible("xlnx,virtex")) - return 0; - - return 1; -} - -define_machine(virtex) { - .name = "Xilinx Virtex", - .probe = virtex_probe, - .setup_arch = xilinx_pci_init, - .init_IRQ = xilinx_intc_init_tree, - .get_irq = xintc_get_irq, - .restart = ppc4xx_reset_system, - .calibrate_decr = generic_calibrate_decr, -}; diff --git a/arch/powerpc/platforms/40x/walnut.c b/arch/powerpc/platforms/40x/walnut.c deleted file mode 100644 index e5797815e2f1..000000000000 --- a/arch/powerpc/platforms/40x/walnut.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Architecture- / platform-specific boot-time initialization code for - * IBM PowerPC 4xx based boards. Adapted from original - * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek - * <dan@net4x.com>. - * - * Copyright(c) 1999-2000 Grant Erickson <grant@lcse.umn.edu> - * - * Rewritten and ported to the merged powerpc tree: - * Copyright 2007 IBM Corporation - * Josh Boyer <jwboyer@linux.vnet.ibm.com> - * - * 2002 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#include <linux/init.h> -#include <linux/of_platform.h> -#include <linux/rtc.h> - -#include <asm/machdep.h> -#include <asm/prom.h> -#include <asm/udbg.h> -#include <asm/time.h> -#include <asm/uic.h> -#include <asm/pci-bridge.h> -#include <asm/ppc4xx.h> - -static const struct of_device_id walnut_of_bus[] __initconst = { - { .compatible = "ibm,plb3", }, - { .compatible = "ibm,opb", }, - { .compatible = "ibm,ebc", }, - {}, -}; - -static int __init walnut_device_probe(void) -{ - of_platform_bus_probe(NULL, walnut_of_bus, NULL); - of_instantiate_rtc(); - - return 0; -} -machine_device_initcall(walnut, walnut_device_probe); - -static int __init walnut_probe(void) -{ - if (!of_machine_is_compatible("ibm,walnut")) - return 0; - - pci_set_flags(PCI_REASSIGN_ALL_RSRC); - - return 1; -} - -define_machine(walnut) { - .name = "Walnut", - .probe = walnut_probe, - .progress = udbg_progress, - .init_IRQ = uic_init_tree, - .get_irq = uic_get_irq, - .restart = ppc4xx_reset_system, - .calibrate_decr = generic_calibrate_decr, -}; diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 25ebe634a661..78ac6d67a935 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -167,8 +167,7 @@ config YOSEMITE config ISS4xx bool "ISS 4xx Simulator" - depends on (44x || 40x) - select 405GP if 40x + depends on 44x select 440GP if 44x && !PPC_47x select PPC_FPU select OF_RTC @@ -232,33 +231,6 @@ config ICON help This option enables support for the AMCC PPC440SPe evaluation board. -config XILINX_VIRTEX440_GENERIC_BOARD - bool "Generic Xilinx Virtex 5 FXT board support" - depends on 44x - select XILINX_VIRTEX_5_FXT - select XILINX_INTC - help - This option enables generic support for Xilinx Virtex based boards - that use a 440 based processor in the Virtex 5 FXT FPGA architecture. - - The generic virtex board support matches any device tree which - specifies 'xlnx,virtex440' in its compatible field. This includes - the Xilinx ML5xx reference designs using the powerpc core. - - Most Virtex 5 designs should use this unless it needs to do some - special configuration at board probe time. - -config XILINX_ML510 - bool "Xilinx ML510 extra support" - depends on XILINX_VIRTEX440_GENERIC_BOARD - select HAVE_PCI - select XILINX_PCI if PCI - select PPC_INDIRECT_PCI if PCI - select PPC_I8259 if PCI - help - This option enables extra support for features on the Xilinx ML510 - board. The ML510 has a PCI bus with ALI south bridge. - config PPC44x_SIMPLE bool "Simple PowerPC 44x board support" depends on 44x @@ -354,13 +326,3 @@ config 476FPE_ERR46 config IBM440EP_ERR42 bool -# Xilinx specific config options. -config XILINX_VIRTEX - bool - select DEFAULT_UIMAGE - -# Xilinx Virtex 5 FXT FPGA architecture, selected by a Xilinx board above -config XILINX_VIRTEX_5_FXT - bool - select XILINX_VIRTEX - diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile index 1b78c6af821a..5ba031f57652 100644 --- a/arch/powerpc/platforms/44x/Makefile +++ b/arch/powerpc/platforms/44x/Makefile @@ -7,8 +7,6 @@ obj-$(CONFIG_PPC44x_SIMPLE) += ppc44x_simple.o obj-$(CONFIG_EBONY) += ebony.o obj-$(CONFIG_SAM440EP) += sam440ep.o obj-$(CONFIG_WARP) += warp.o -obj-$(CONFIG_XILINX_VIRTEX_5_FXT) += virtex.o -obj-$(CONFIG_XILINX_ML510) += virtex_ml510.o obj-$(CONFIG_ISS4xx) += iss4xx.o obj-$(CONFIG_CANYONLANDS)+= canyonlands.o obj-$(CONFIG_CURRITUCK) += ppc476.o diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c deleted file mode 100644 index 3eb13ed926ee..000000000000 --- a/arch/powerpc/platforms/44x/virtex.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Xilinx Virtex 5FXT based board support, derived from - * the Xilinx Virtex (IIpro & 4FX) based board support - * - * Copyright 2007 Secret Lab Technologies Ltd. - * Copyright 2008 Xilinx, Inc. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include <linux/init.h> -#include <linux/of_platform.h> -#include <asm/machdep.h> -#include <asm/prom.h> -#include <asm/time.h> -#include <asm/xilinx_intc.h> -#include <asm/xilinx_pci.h> -#include <asm/reg.h> -#include <asm/ppc4xx.h> -#include "44x.h" - -static const struct of_device_id xilinx_of_bus_ids[] __initconst = { - { .compatible = "simple-bus", }, - { .compatible = "xlnx,plb-v46-1.00.a", }, - { .compatible = "xlnx,plb-v46-1.02.a", }, - { .compatible = "xlnx,plb-v34-1.01.a", }, - { .compatible = "xlnx,plb-v34-1.02.a", }, - { .compatible = "xlnx,opb-v20-1.10.c", }, - { .compatible = "xlnx,dcr-v29-1.00.a", }, - { .compatible = "xlnx,compound", }, - {} -}; - -static int __init virtex_device_probe(void) -{ - of_platform_bus_probe(NULL, xilinx_of_bus_ids, NULL); - - return 0; -} -machine_device_initcall(virtex, virtex_device_probe); - -static int __init virtex_probe(void) -{ - if (!of_machine_is_compatible("xlnx,virtex440")) - return 0; - - return 1; -} - -define_machine(virtex) { - .name = "Xilinx Virtex440", - .probe = virtex_probe, - .setup_arch = xilinx_pci_init, - .init_IRQ = xilinx_intc_init_tree, - .get_irq = xintc_get_irq, - .calibrate_decr = generic_calibrate_decr, - .restart = ppc4xx_reset_system, -}; diff --git a/arch/powerpc/platforms/44x/virtex_ml510.c b/arch/powerpc/platforms/44x/virtex_ml510.c deleted file mode 100644 index 349f218b335c..000000000000 --- a/arch/powerpc/platforms/44x/virtex_ml510.c +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <asm/i8259.h> -#include <linux/pci.h> -#include "44x.h" - -/** - * ml510_ail_quirk - */ -static void ml510_ali_quirk(struct pci_dev *dev) -{ - /* Enable the IDE controller */ - pci_write_config_byte(dev, 0x58, 0x4c); - /* Assign irq 14 to the primary ide channel */ - pci_write_config_byte(dev, 0x44, 0x0d); - /* Assign irq 15 to the secondary ide channel */ - pci_write_config_byte(dev, 0x75, 0x0f); - /* Set the ide controller in native mode */ - pci_write_config_byte(dev, 0x09, 0xff); - - /* INTB = disabled, INTA = disabled */ - pci_write_config_byte(dev, 0x48, 0x00); - /* INTD = disabled, INTC = disabled */ - pci_write_config_byte(dev, 0x4a, 0x00); - /* Audio = INT7, Modem = disabled. */ - pci_write_config_byte(dev, 0x4b, 0x60); - /* USB = INT7 */ - pci_write_config_byte(dev, 0x74, 0x06); -} -DECLARE_PCI_FIXUP_EARLY(0x10b9, 0x1533, ml510_ali_quirk); - diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c index e6e2adcc7b64..c13d64c3b019 100644 --- a/arch/powerpc/platforms/4xx/pci.c +++ b/arch/powerpc/platforms/4xx/pci.c @@ -1242,7 +1242,7 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port) if (mbase == NULL) { printk(KERN_ERR "%pOF: Can't map internal config space !", port->node); - goto done; + return; } while (attempt && (0 == (in_le32(mbase + PECFG_460SX_DLLSTA) @@ -1252,9 +1252,7 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port) } if (attempt) port->link = 1; -done: iounmap(mbase); - } static struct ppc4xx_pciex_hwops ppc460sx_pcie_hwops __initdata = { diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index 3a9969c429b3..11475c58ea43 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -56,7 +56,7 @@ lite5200_low_power: /* * save stuff BDI overwrites * 0xf0 (0xe0->0x100 gets overwritten when BDI connected; - * even when CONFIG_BDI* is disabled and MMU XLAT commented; heisenbug?)) + * even when CONFIG_BDI_SWITCH is disabled and MMU XLAT commented; heisenbug?)) * WARNING: self-refresh doesn't seem to work when BDI2000 is connected, * possibly because BDI sets SDRAM registers before wakeup code does */ @@ -248,6 +248,7 @@ mmu_on: blr +_ASM_NOKPROBE_SYMBOL(lite5200_wakeup) /* ---------------------------------------------------------------------- */ @@ -391,6 +392,7 @@ restore_regs: LOAD_SPRN(TBWU, 0x5b); blr +_ASM_NOKPROBE_SYMBOL(restore_regs) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index fc98912f42cf..76a8102bdb98 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -340,7 +340,7 @@ static int mpc52xx_irqhost_map(struct irq_domain *h, unsigned int virq, { int l1irq; int l2irq; - struct irq_chip *uninitialized_var(irqchip); + struct irq_chip *irqchip; void *hndlr; int type; u32 reg; diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c index 1cdd5ed9d896..3b5cb39a564c 100644 --- a/arch/powerpc/platforms/82xx/pq2.c +++ b/arch/powerpc/platforms/82xx/pq2.c @@ -10,6 +10,8 @@ * Copyright (c) 2006 MontaVista Software, Inc. */ +#include <linux/kprobes.h> + #include <asm/cpm2.h> #include <asm/io.h> #include <asm/pci-bridge.h> @@ -29,6 +31,7 @@ void __noreturn pq2_restart(char *cmd) panic("Restart failed\n"); } +NOKPROBE_SYMBOL(pq2_restart) #ifdef CONFIG_PCI static int pq2_pci_exclude_device(struct pci_controller *hose, diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S index 3acd7470dc5e..bc6bd4d0ae96 100644 --- a/arch/powerpc/platforms/83xx/suspend-asm.S +++ b/arch/powerpc/platforms/83xx/suspend-asm.S @@ -548,3 +548,4 @@ mpc83xx_deep_resume: mtdec r0 rfi +_ASM_NOKPROBE_SYMBOL(mpc83xx_deep_resume) diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index fa3d29dcb57e..b77cbb0a50e1 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -160,7 +160,7 @@ config XES_MPC85xx computers from Extreme Engineering Solutions (X-ES) based on Freescale MPC85xx processors. Manufacturer: Extreme Engineering Solutions, Inc. - URL: <http://www.xes-inc.com/> + URL: <https://www.xes-inc.com/> config STX_GP3 bool "Silicon Turnkey Express GP3" diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 27ac38f7e1a9..6aa8defb5857 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -12,11 +12,11 @@ #include <linux/kdev_t.h> #include <linux/delay.h> #include <linux/interrupt.h> +#include <linux/pgtable.h> #include <asm/time.h> #include <asm/machdep.h> #include <asm/pci-bridge.h> -#include <asm/pgtable.h> #include <asm/ppc-pci.h> #include <mm/mmu_decl.h> #include <asm/prom.h> diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 915ab6710b93..172d2b7cfeb7 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -22,8 +22,8 @@ #include <linux/interrupt.h> #include <linux/fsl_devices.h> #include <linux/of_platform.h> +#include <linux/pgtable.h> -#include <asm/pgtable.h> #include <asm/page.h> #include <linux/atomic.h> #include <asm/time.h> diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index a43b8c30157c..a4127b0b161f 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -13,8 +13,8 @@ #include <linux/kernel.h> #include <linux/of_fdt.h> +#include <linux/pgtable.h> #include <asm/machdep.h> -#include <asm/pgtable.h> #include <asm/time.h> #include <asm/udbg.h> #include <asm/mpic.h> diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c index dd97ef277276..e4acf5ce6b07 100644 --- a/arch/powerpc/platforms/85xx/sbc8548.c +++ b/arch/powerpc/platforms/85xx/sbc8548.c @@ -24,8 +24,8 @@ #include <linux/interrupt.h> #include <linux/fsl_devices.h> #include <linux/of_platform.h> +#include <linux/pgtable.h> -#include <asm/pgtable.h> #include <asm/page.h> #include <linux/atomic.h> #include <asm/time.h> diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 48f7d96ae37d..fda108bae95f 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -16,9 +16,9 @@ #include <linux/highmem.h> #include <linux/cpu.h> #include <linux/fsl/guts.h> +#include <linux/pgtable.h> #include <asm/machdep.h> -#include <asm/pgtable.h> #include <asm/page.h> #include <asm/mpic.h> #include <asm/cacheflush.h> diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index 5b91ea5694e3..87f524e4b09c 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -10,13 +10,14 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/delay.h> +#include <linux/pgtable.h> #include <asm/code-patching.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/pci-bridge.h> #include <asm/mpic.h> #include <asm/cacheflush.h> +#include <asm/inst.h> #include <sysdev/fsl_soc.h> @@ -72,7 +73,7 @@ smp_86xx_kick_cpu(int nr) /* Setup fake reset vector to call __secondary_start_mpc86xx. */ target = (unsigned long) __secondary_start_mpc86xx; - patch_branch(vector, target, BRANCH_SET_LINK); + patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); /* Kick that CPU */ smp_86xx_release_core(nr); @@ -82,7 +83,7 @@ smp_86xx_kick_cpu(int nr) mdelay(1); /* Restore the exception vector */ - patch_instruction(vector, save_vector); + patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); local_irq_restore(flags); diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index e0fe670f06f6..abb2b45b2789 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -98,15 +98,6 @@ menu "MPC8xx CPM Options" # 8xx specific questions. comment "Generic MPC8xx Options" -config 8xx_COPYBACK - bool "Copy-Back Data Cache (else Writethrough)" - help - Saying Y here will cause the cache on an MPC8xx processor to be used - in Copy-Back mode. If you say N here, it is used in Writethrough - mode. - - If in doubt, say Y here. - config 8xx_GPIO bool "GPIO API Support" select GPIOLIB @@ -171,4 +162,45 @@ config UCODE_PATCH default y depends on !NO_UCODE_PATCH +menu "8xx advanced setup" + depends on PPC_8xx + +config PIN_TLB + bool "Pinned Kernel TLBs" + depends on ADVANCED_OPTIONS + help + On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each + table 4 TLBs can be pinned. + + It reduces the amount of usable TLBs to 28 (ie by 12%). That's the + reason why we make it selectable. + + This option does nothing, it just activate the selection of what + to pin. + +config PIN_TLB_DATA + bool "Pinned TLB for DATA" + depends on PIN_TLB + default y + help + This pins the first 32 Mbytes of memory with 8M pages. + +config PIN_TLB_IMMR + bool "Pinned TLB for IMMR" + depends on PIN_TLB + default y + help + This pins the IMMR area with a 512kbytes page. In case + CONFIG_PIN_TLB_DATA is also selected, it will reduce + CONFIG_PIN_TLB_DATA to 24 Mbytes. + +config PIN_TLB_TEXT + bool "Pinned TLB for TEXT" + depends on PIN_TLB + default y + help + This pins kernel text with 8M pages. + +endmenu + endmenu diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 4db4ca2e1222..c58b6f1c40e3 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -34,7 +34,6 @@ #include <linux/spinlock.h> #include <linux/slab.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/8xx_immap.h> #include <asm/cpm1.h> #include <asm/io.h> diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c index c80bd7afd6c5..aed4bc75f352 100644 --- a/arch/powerpc/platforms/8xx/micropatch.c +++ b/arch/powerpc/platforms/8xx/micropatch.c @@ -16,7 +16,6 @@ #include <linux/interrupt.h> #include <asm/irq.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/8xx_immap.h> #include <asm/cpm.h> #include <asm/cpm1.h> diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 1f8025383caa..fb7515b4fa9c 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -24,7 +24,7 @@ source "arch/powerpc/platforms/amigaone/Kconfig" config KVM_GUEST bool "KVM Guest support" select EPAPR_PARAVIRT - ---help--- + help This option enables various optimizations for running under the KVM hypervisor. Overhead for the kernel when not running inside KVM should be minimal. @@ -230,7 +230,7 @@ config TAU config TAU_INT bool "Interrupt driven TAU driver (DANGEROUS)" depends on TAU - ---help--- + help The TAU supports an interrupt driven mode which causes an interrupt whenever the temperature goes out of range. This is the fastest way to get notified the temp has exceeded a range. With this option off, @@ -246,7 +246,7 @@ config TAU_INT config TAU_AVERAGE bool "Average high and low temp" depends on TAU - ---help--- + help The TAU hardware can compare the temperature to an upper and lower bound. The default behavior is to show both the upper and lower bound in /proc/cpuinfo. If the range is large, the temperature is @@ -317,8 +317,4 @@ config MCU_MPC8349EMITX also register MCU GPIOs with the generic GPIO API, so you'll able to use MCU pins as GPIOs. -config XILINX_PCI - bool "Xilinx PCI host bridge support" - depends on PCI && XILINX_VIRTEX - endmenu diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 27a81c291be8..1dc9d3c81872 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -36,7 +36,7 @@ config PPC_BOOK3S_6xx select PPC_HAVE_PMU_SUPPORT select PPC_HAVE_KUEP select PPC_HAVE_KUAP - select HAVE_ARCH_VMAP_STACK + select HAVE_ARCH_VMAP_STACK if !ADB_PMU config PPC_BOOK3S_601 bool "PowerPC 601" @@ -55,8 +55,8 @@ config PPC_8xx select SYS_SUPPORTS_HUGETLBFS select PPC_HAVE_KUEP select PPC_HAVE_KUAP - select PPC_MM_SLICES if HUGETLB_PAGE select HAVE_ARCH_VMAP_STACK + select HUGETLBFS config 40x bool "AMCC 40x" @@ -280,7 +280,7 @@ config PHYS_64BIT bool 'Large physical address support' if E500 || PPC_86xx depends on (44x || E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx select PHYS_ADDR_T_64BIT - ---help--- + help This option enables kernel support for larger than 32-bit physical addresses. This feature may not be available on all cores. @@ -293,7 +293,7 @@ config PHYS_64BIT config ALTIVEC bool "AltiVec Support" depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 || (PPC_E500MC && PPC64) - ---help--- + help This option enables kernel support for the Altivec extensions to the PowerPC processor. The kernel currently supports saving and restoring altivec registers, and turning on the 'altivec enable' bit so user @@ -309,7 +309,7 @@ config ALTIVEC config VSX bool "VSX Support" depends on PPC_BOOK3S_64 && ALTIVEC && PPC_FPU - ---help--- + help This option enables kernel support for the Vector Scaler extensions to the PowerPC processor. The kernel currently supports saving and @@ -330,7 +330,7 @@ config SPE bool "SPE Support" depends on SPE_POSSIBLE default y - ---help--- + help This option enables kernel support for the Signal Processing Extensions (SPE) to the PowerPC processor. The kernel currently supports saving and restoring SPE registers, and turning on the @@ -377,7 +377,7 @@ config PPC_HAVE_KUEP config PPC_KUEP bool "Kernel Userspace Execution Prevention" depends on PPC_HAVE_KUEP - default y + default y if !PPC_BOOK3S_32 help Enable support for Kernel Userspace Execution Prevention (KUEP) @@ -389,7 +389,7 @@ config PPC_HAVE_KUAP config PPC_KUAP bool "Kernel Userspace Access Protection" depends on PPC_HAVE_KUAP - default y + default y if !PPC_BOOK3S_32 help Enable support for Kernel Userspace Access Protection (KUAP) @@ -446,7 +446,7 @@ config SMP depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE || PPC_47x select GENERIC_IRQ_MIGRATION bool "Symmetric multi-processing support" if !FORCE_SMP - ---help--- + help This enables support for systems with more than one CPU. If you have a system with only one CPU, say N. If you have a system with more than one CPU, say Y. Note that the kernel does not currently diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 0f7c8241912b..f2ff359041ee 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -44,6 +44,7 @@ config SPU_FS tristate "SPU file system" default m depends on PPC_CELL + depends on COREDUMP select SPU_BASE help The SPU file system is used to access Synergistic Processing diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index 0be212a27254..c2a0678d85db 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -12,9 +12,9 @@ #include <linux/export.h> #include <linux/of_device.h> #include <linux/of_platform.h> +#include <linux/pgtable.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/prom.h> #include <asm/ptrace.h> #include <asm/cell-regs.h> diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c index 55b31eadb3c8..ca7849e113d7 100644 --- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c +++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c @@ -126,30 +126,8 @@ static struct cpufreq_governor spu_governor = { .stop = spu_gov_stop, .owner = THIS_MODULE, }; - -/* - * module init and destoy - */ - -static int __init spu_gov_init(void) -{ - int ret; - - ret = cpufreq_register_governor(&spu_governor); - if (ret) - printk(KERN_ERR "registration of governor failed\n"); - return ret; -} - -static void __exit spu_gov_exit(void) -{ - cpufreq_unregister_governor(&spu_governor); -} - - -module_init(spu_gov_init); -module_exit(spu_gov_exit); +cpufreq_governor_init(spu_governor); +cpufreq_governor_exit(spu_governor); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>"); - diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 5927ead4aed2..c0ab62ba6f16 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -23,9 +23,9 @@ #include <linux/types.h> #include <linux/ioport.h> #include <linux/kernel_stat.h> +#include <linux/pgtable.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/prom.h> #include <asm/ptrace.h> #include <asm/machdep.h> diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index ca9ffc1c8685..2124831cf57c 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -943,7 +943,7 @@ static int __init cell_iommu_fixed_mapping_init(void) fbase = max(fbase, dbase + dsize); } - fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT); + fbase = ALIGN(fbase, 1 << IO_SEGMENT_SHIFT); fsize = memblock_phys_mem_size(); if ((fbase + fsize) <= 0x800000000ul) @@ -963,8 +963,8 @@ static int __init cell_iommu_fixed_mapping_init(void) hend = hbase + htab_size_bytes; /* The window must start and end on a segment boundary */ - if ((hbase != _ALIGN_UP(hbase, 1 << IO_SEGMENT_SHIFT)) || - (hend != _ALIGN_UP(hend, 1 << IO_SEGMENT_SHIFT))) { + if ((hbase != ALIGN(hbase, 1 << IO_SEGMENT_SHIFT)) || + (hend != ALIGN(hend, 1 << IO_SEGMENT_SHIFT))) { pr_debug("iommu: hash window not segment aligned\n"); return -1; } diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index 6af3a6e600a7..9068edef71f7 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -15,11 +15,11 @@ #include <linux/percpu.h> #include <linux/types.h> #include <linux/kallsyms.h> +#include <linux/pgtable.h> #include <asm/io.h> #include <asm/machdep.h> #include <asm/prom.h> -#include <asm/pgtable.h> #include <asm/reg.h> #include <asm/cell-regs.h> #include <asm/cpu_has_feature.h> diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 855eedb8d7d7..edefa785d2ef 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -31,7 +31,6 @@ #include <asm/mmu.h> #include <asm/processor.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/prom.h> #include <asm/rtas.h> #include <asm/pci-bridge.h> diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index 85d795d96a27..c855a0aeb49c 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -21,12 +21,12 @@ #include <linux/err.h> #include <linux/device.h> #include <linux/cpu.h> +#include <linux/pgtable.h> #include <asm/ptrace.h> #include <linux/atomic.h> #include <asm/irq.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/smp.h> diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index 026b72c0a452..210785f59271 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -10,8 +10,8 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/ioport.h> +#include <linux/pgtable.h> -#include <asm/pgtable.h> #include <asm/prom.h> #include <asm/io.h> diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index cbee3666da07..abdef9bcf432 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -35,7 +35,7 @@ */ static void *spu_syscall_table[] = { -#define __SYSCALL(nr, entry) entry, +#define __SYSCALL(nr, entry) [nr] = entry, #include <asm/syscall_table_spu.h> #undef __SYSCALL }; diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 8b3296b62f65..026c181a98c5 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -21,22 +21,6 @@ #include "spufs.h" -static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer, - size_t size, loff_t *off) -{ - u64 data; - int ret; - - if (spufs_coredump_read[num].read) - return spufs_coredump_read[num].read(ctx, buffer, size, off); - - data = spufs_coredump_read[num].get(ctx); - ret = snprintf(buffer, size, "0x%.16llx", data); - if (ret >= size) - return size; - return ++ret; /* count trailing NULL */ -} - static int spufs_ctx_note_size(struct spu_context *ctx, int dfd) { int i, sz, total = 0; @@ -82,13 +66,20 @@ static int match_context(const void *v, struct file *file, unsigned fd) */ static struct spu_context *coredump_next_context(int *fd) { + struct spu_context *ctx; struct file *file; int n = iterate_fd(current->files, *fd, match_context, NULL); if (!n) return NULL; *fd = n - 1; + + rcu_read_lock(); file = fcheck(*fd); - return SPUFS_I(file_inode(file))->i_ctx; + ctx = SPUFS_I(file_inode(file))->i_ctx; + get_spu_context(ctx); + rcu_read_unlock(); + + return ctx; } int spufs_coredump_extra_notes_size(void) @@ -99,17 +90,23 @@ int spufs_coredump_extra_notes_size(void) fd = 0; while ((ctx = coredump_next_context(&fd)) != NULL) { rc = spu_acquire_saved(ctx); - if (rc) + if (rc) { + put_spu_context(ctx); break; + } + rc = spufs_ctx_note_size(ctx, fd); spu_release_saved(ctx); - if (rc < 0) + if (rc < 0) { + put_spu_context(ctx); break; + } size += rc; /* start searching the next fd next time */ fd++; + put_spu_context(ctx); } return size; @@ -118,58 +115,43 @@ int spufs_coredump_extra_notes_size(void) static int spufs_arch_write_note(struct spu_context *ctx, int i, struct coredump_params *cprm, int dfd) { - loff_t pos = 0; - int sz, rc, total = 0; - const int bufsz = PAGE_SIZE; - char *name; - char fullname[80], *buf; + size_t sz = spufs_coredump_read[i].size; + char fullname[80]; struct elf_note en; - size_t skip; - - buf = (void *)get_zeroed_page(GFP_KERNEL); - if (!buf) - return -ENOMEM; - - name = spufs_coredump_read[i].name; - sz = spufs_coredump_read[i].size; + int ret; - sprintf(fullname, "SPU/%d/%s", dfd, name); + sprintf(fullname, "SPU/%d/%s", dfd, spufs_coredump_read[i].name); en.n_namesz = strlen(fullname) + 1; en.n_descsz = sz; en.n_type = NT_SPU; if (!dump_emit(cprm, &en, sizeof(en))) - goto Eio; - + return -EIO; if (!dump_emit(cprm, fullname, en.n_namesz)) - goto Eio; - + return -EIO; if (!dump_align(cprm, 4)) - goto Eio; - - do { - rc = do_coredump_read(i, ctx, buf, bufsz, &pos); - if (rc > 0) { - if (!dump_emit(cprm, buf, rc)) - goto Eio; - total += rc; - } - } while (rc == bufsz && total < sz); - - if (rc < 0) - goto out; - - skip = roundup(cprm->pos - total + sz, 4) - cprm->pos; - if (!dump_skip(cprm, skip)) - goto Eio; - - rc = 0; -out: - free_page((unsigned long)buf); - return rc; -Eio: - free_page((unsigned long)buf); - return -EIO; + return -EIO; + + if (spufs_coredump_read[i].dump) { + ret = spufs_coredump_read[i].dump(ctx, cprm); + if (ret < 0) + return ret; + } else { + char buf[32]; + + ret = snprintf(buf, sizeof(buf), "0x%.16llx", + spufs_coredump_read[i].get(ctx)); + if (ret >= sizeof(buf)) + return sizeof(buf); + + /* count trailing the NULL: */ + if (!dump_emit(cprm, buf, ret + 1)) + return -EIO; + } + + if (!dump_skip(cprm, roundup(cprm->pos - ret + sz, 4) - cprm->pos)) + return -EIO; + return 0; } int spufs_coredump_extra_notes_write(struct coredump_params *cprm) diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index c0f950a3f4e1..62d90a5e23d1 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -9,6 +9,7 @@ #undef DEBUG +#include <linux/coredump.h> #include <linux/fs.h> #include <linux/ioctl.h> #include <linux/export.h> @@ -129,6 +130,14 @@ out: return ret; } +static ssize_t spufs_dump_emit(struct coredump_params *cprm, void *buf, + size_t size) +{ + if (!dump_emit(cprm, buf, size)) + return -EIO; + return size; +} + #define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ static int __fops ## _open(struct inode *inode, struct file *file) \ { \ @@ -172,12 +181,9 @@ spufs_mem_release(struct inode *inode, struct file *file) } static ssize_t -__spufs_mem_read(struct spu_context *ctx, char __user *buffer, - size_t size, loff_t *pos) +spufs_mem_dump(struct spu_context *ctx, struct coredump_params *cprm) { - char *local_store = ctx->ops->get_ls(ctx); - return simple_read_from_buffer(buffer, size, pos, local_store, - LS_SIZE); + return spufs_dump_emit(cprm, ctx->ops->get_ls(ctx), LS_SIZE); } static ssize_t @@ -190,7 +196,8 @@ spufs_mem_read(struct file *file, char __user *buffer, ret = spu_acquire(ctx); if (ret) return ret; - ret = __spufs_mem_read(ctx, buffer, size, pos); + ret = simple_read_from_buffer(buffer, size, pos, ctx->ops->get_ls(ctx), + LS_SIZE); spu_release(ctx); return ret; @@ -318,7 +325,7 @@ static vm_fault_t spufs_ps_fault(struct vm_fault *vmf, return VM_FAULT_SIGBUS; /* - * Because we release the mmap_sem, the context may be destroyed while + * Because we release the mmap_lock, the context may be destroyed while * we're in spu_wait. Grab an extra reference so it isn't destroyed * in the meantime. */ @@ -327,8 +334,8 @@ static vm_fault_t spufs_ps_fault(struct vm_fault *vmf, /* * We have to wait for context to be loaded before we have * pages to hand out to the user, but we don't want to wait - * with the mmap_sem held. - * It is possible to drop the mmap_sem here, but then we need + * with the mmap_lock held. + * It is possible to drop the mmap_lock here, but then we need * to return VM_FAULT_NOPAGE because the mappings may have * hanged. */ @@ -336,11 +343,11 @@ static vm_fault_t spufs_ps_fault(struct vm_fault *vmf, goto refault; if (ctx->state == SPU_STATE_SAVED) { - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); spu_context_nospu_trace(spufs_ps_fault__sleep, ctx); err = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE); spu_context_trace(spufs_ps_fault__wake, ctx, ctx->spu); - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); } else { area = ctx->spu->problem_phys + ps_offs; ret = vmf_insert_pfn(vmf->vma, vmf->address, @@ -459,12 +466,10 @@ spufs_regs_open(struct inode *inode, struct file *file) } static ssize_t -__spufs_regs_read(struct spu_context *ctx, char __user *buffer, - size_t size, loff_t *pos) +spufs_regs_dump(struct spu_context *ctx, struct coredump_params *cprm) { - struct spu_lscsa *lscsa = ctx->csa.lscsa; - return simple_read_from_buffer(buffer, size, pos, - lscsa->gprs, sizeof lscsa->gprs); + return spufs_dump_emit(cprm, ctx->csa.lscsa->gprs, + sizeof(ctx->csa.lscsa->gprs)); } static ssize_t @@ -482,7 +487,8 @@ spufs_regs_read(struct file *file, char __user *buffer, ret = spu_acquire_saved(ctx); if (ret) return ret; - ret = __spufs_regs_read(ctx, buffer, size, pos); + ret = simple_read_from_buffer(buffer, size, pos, ctx->csa.lscsa->gprs, + sizeof(ctx->csa.lscsa->gprs)); spu_release_saved(ctx); return ret; } @@ -517,12 +523,10 @@ static const struct file_operations spufs_regs_fops = { }; static ssize_t -__spufs_fpcr_read(struct spu_context *ctx, char __user * buffer, - size_t size, loff_t * pos) +spufs_fpcr_dump(struct spu_context *ctx, struct coredump_params *cprm) { - struct spu_lscsa *lscsa = ctx->csa.lscsa; - return simple_read_from_buffer(buffer, size, pos, - &lscsa->fpcr, sizeof(lscsa->fpcr)); + return spufs_dump_emit(cprm, &ctx->csa.lscsa->fpcr, + sizeof(ctx->csa.lscsa->fpcr)); } static ssize_t @@ -535,7 +539,8 @@ spufs_fpcr_read(struct file *file, char __user * buffer, ret = spu_acquire_saved(ctx); if (ret) return ret; - ret = __spufs_fpcr_read(ctx, buffer, size, pos); + ret = simple_read_from_buffer(buffer, size, pos, &ctx->csa.lscsa->fpcr, + sizeof(ctx->csa.lscsa->fpcr)); spu_release_saved(ctx); return ret; } @@ -590,17 +595,12 @@ static ssize_t spufs_mbox_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; - u32 mbox_data, __user *udata; + u32 mbox_data, __user *udata = (void __user *)buf; ssize_t count; if (len < 4) return -EINVAL; - if (!access_ok(buf, len)) - return -EFAULT; - - udata = (void __user *)buf; - count = spu_acquire(ctx); if (count) return count; @@ -616,7 +616,7 @@ static ssize_t spufs_mbox_read(struct file *file, char __user *buf, * but still need to return the data we have * read successfully so far. */ - ret = __put_user(mbox_data, udata); + ret = put_user(mbox_data, udata); if (ret) { if (!count) count = -EFAULT; @@ -698,17 +698,12 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; - u32 ibox_data, __user *udata; + u32 ibox_data, __user *udata = (void __user *)buf; ssize_t count; if (len < 4) return -EINVAL; - if (!access_ok(buf, len)) - return -EFAULT; - - udata = (void __user *)buf; - count = spu_acquire(ctx); if (count) goto out; @@ -727,7 +722,7 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf, } /* if we can't write at all, return -EFAULT */ - count = __put_user(ibox_data, udata); + count = put_user(ibox_data, udata); if (count) goto out_unlock; @@ -741,7 +736,7 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf, * but still need to return the data we have * read successfully so far. */ - ret = __put_user(ibox_data, udata); + ret = put_user(ibox_data, udata); if (ret) break; } @@ -836,17 +831,13 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; - u32 wbox_data, __user *udata; + u32 wbox_data, __user *udata = (void __user *)buf; ssize_t count; if (len < 4) return -EINVAL; - udata = (void __user *)buf; - if (!access_ok(buf, len)) - return -EFAULT; - - if (__get_user(wbox_data, udata)) + if (get_user(wbox_data, udata)) return -EFAULT; count = spu_acquire(ctx); @@ -873,7 +864,7 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf, /* write as much as possible */ for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) { int ret; - ret = __get_user(wbox_data, udata); + ret = get_user(wbox_data, udata); if (ret) break; @@ -967,28 +958,26 @@ spufs_signal1_release(struct inode *inode, struct file *file) return 0; } -static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf, - size_t len, loff_t *pos) +static ssize_t spufs_signal1_dump(struct spu_context *ctx, + struct coredump_params *cprm) { - int ret = 0; - u32 data; + if (!ctx->csa.spu_chnlcnt_RW[3]) + return 0; + return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[3], + sizeof(ctx->csa.spu_chnldata_RW[3])); +} - if (len < 4) +static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf, + size_t len) +{ + if (len < sizeof(ctx->csa.spu_chnldata_RW[3])) return -EINVAL; - - if (ctx->csa.spu_chnlcnt_RW[3]) { - data = ctx->csa.spu_chnldata_RW[3]; - ret = 4; - } - - if (!ret) - goto out; - - if (copy_to_user(buf, &data, 4)) + if (!ctx->csa.spu_chnlcnt_RW[3]) + return 0; + if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[3], + sizeof(ctx->csa.spu_chnldata_RW[3]))) return -EFAULT; - -out: - return ret; + return sizeof(ctx->csa.spu_chnldata_RW[3]); } static ssize_t spufs_signal1_read(struct file *file, char __user *buf, @@ -1000,7 +989,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf, ret = spu_acquire_saved(ctx); if (ret) return ret; - ret = __spufs_signal1_read(ctx, buf, len, pos); + ret = __spufs_signal1_read(ctx, buf, len); spu_release_saved(ctx); return ret; @@ -1104,28 +1093,26 @@ spufs_signal2_release(struct inode *inode, struct file *file) return 0; } -static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf, - size_t len, loff_t *pos) +static ssize_t spufs_signal2_dump(struct spu_context *ctx, + struct coredump_params *cprm) { - int ret = 0; - u32 data; + if (!ctx->csa.spu_chnlcnt_RW[4]) + return 0; + return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[4], + sizeof(ctx->csa.spu_chnldata_RW[4])); +} - if (len < 4) +static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf, + size_t len) +{ + if (len < sizeof(ctx->csa.spu_chnldata_RW[4])) return -EINVAL; - - if (ctx->csa.spu_chnlcnt_RW[4]) { - data = ctx->csa.spu_chnldata_RW[4]; - ret = 4; - } - - if (!ret) - goto out; - - if (copy_to_user(buf, &data, 4)) + if (!ctx->csa.spu_chnlcnt_RW[4]) + return 0; + if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[4], + sizeof(ctx->csa.spu_chnldata_RW[4]))) return -EFAULT; - -out: - return ret; + return sizeof(ctx->csa.spu_chnldata_RW[4]); } static ssize_t spufs_signal2_read(struct file *file, char __user *buf, @@ -1137,7 +1124,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf, ret = spu_acquire_saved(ctx); if (ret) return ret; - ret = __spufs_signal2_read(ctx, buf, len, pos); + ret = __spufs_signal2_read(ctx, buf, len); spu_release_saved(ctx); return ret; @@ -1961,38 +1948,36 @@ static const struct file_operations spufs_caps_fops = { .release = single_release, }; -static ssize_t __spufs_mbox_info_read(struct spu_context *ctx, - char __user *buf, size_t len, loff_t *pos) +static ssize_t spufs_mbox_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) { - u32 data; - - /* EOF if there's no entry in the mbox */ if (!(ctx->csa.prob.mb_stat_R & 0x0000ff)) return 0; - - data = ctx->csa.prob.pu_mb_R; - - return simple_read_from_buffer(buf, len, pos, &data, sizeof data); + return spufs_dump_emit(cprm, &ctx->csa.prob.pu_mb_R, + sizeof(ctx->csa.prob.pu_mb_R)); } static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { - int ret; struct spu_context *ctx = file->private_data; - - if (!access_ok(buf, len)) - return -EFAULT; + u32 stat, data; + int ret; ret = spu_acquire_saved(ctx); if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_mbox_info_read(ctx, buf, len, pos); + stat = ctx->csa.prob.mb_stat_R; + data = ctx->csa.prob.pu_mb_R; spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + /* EOF if there's no entry in the mbox */ + if (!(stat & 0x0000ff)) + return 0; + + return simple_read_from_buffer(buf, len, pos, &data, sizeof(data)); } static const struct file_operations spufs_mbox_info_fops = { @@ -2001,38 +1986,36 @@ static const struct file_operations spufs_mbox_info_fops = { .llseek = generic_file_llseek, }; -static ssize_t __spufs_ibox_info_read(struct spu_context *ctx, - char __user *buf, size_t len, loff_t *pos) +static ssize_t spufs_ibox_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) { - u32 data; - - /* EOF if there's no entry in the ibox */ if (!(ctx->csa.prob.mb_stat_R & 0xff0000)) return 0; - - data = ctx->csa.priv2.puint_mb_R; - - return simple_read_from_buffer(buf, len, pos, &data, sizeof data); + return spufs_dump_emit(cprm, &ctx->csa.priv2.puint_mb_R, + sizeof(ctx->csa.priv2.puint_mb_R)); } static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + u32 stat, data; int ret; - if (!access_ok(buf, len)) - return -EFAULT; - ret = spu_acquire_saved(ctx); if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_ibox_info_read(ctx, buf, len, pos); + stat = ctx->csa.prob.mb_stat_R; + data = ctx->csa.priv2.puint_mb_R; spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + /* EOF if there's no entry in the ibox */ + if (!(stat & 0xff0000)) + return 0; + + return simple_read_from_buffer(buf, len, pos, &data, sizeof(data)); } static const struct file_operations spufs_ibox_info_fops = { @@ -2041,41 +2024,36 @@ static const struct file_operations spufs_ibox_info_fops = { .llseek = generic_file_llseek, }; -static ssize_t __spufs_wbox_info_read(struct spu_context *ctx, - char __user *buf, size_t len, loff_t *pos) +static size_t spufs_wbox_info_cnt(struct spu_context *ctx) { - int i, cnt; - u32 data[4]; - u32 wbox_stat; - - wbox_stat = ctx->csa.prob.mb_stat_R; - cnt = 4 - ((wbox_stat & 0x00ff00) >> 8); - for (i = 0; i < cnt; i++) { - data[i] = ctx->csa.spu_mailbox_data[i]; - } + return (4 - ((ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8)) * sizeof(u32); +} - return simple_read_from_buffer(buf, len, pos, &data, - cnt * sizeof(u32)); +static ssize_t spufs_wbox_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) +{ + return spufs_dump_emit(cprm, &ctx->csa.spu_mailbox_data, + spufs_wbox_info_cnt(ctx)); } static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; - int ret; - - if (!access_ok(buf, len)) - return -EFAULT; + u32 data[ARRAY_SIZE(ctx->csa.spu_mailbox_data)]; + int ret, count; ret = spu_acquire_saved(ctx); if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_wbox_info_read(ctx, buf, len, pos); + count = spufs_wbox_info_cnt(ctx); + memcpy(&data, &ctx->csa.spu_mailbox_data, sizeof(data)); spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + return simple_read_from_buffer(buf, len, pos, &data, + count * sizeof(u32)); } static const struct file_operations spufs_wbox_info_fops = { @@ -2084,50 +2062,53 @@ static const struct file_operations spufs_wbox_info_fops = { .llseek = generic_file_llseek, }; -static ssize_t __spufs_dma_info_read(struct spu_context *ctx, - char __user *buf, size_t len, loff_t *pos) +static void spufs_get_dma_info(struct spu_context *ctx, + struct spu_dma_info *info) { - struct spu_dma_info info; - struct mfc_cq_sr *qp, *spuqp; int i; - info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW; - info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0]; - info.dma_info_status = ctx->csa.spu_chnldata_RW[24]; - info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25]; - info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27]; + info->dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW; + info->dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0]; + info->dma_info_status = ctx->csa.spu_chnldata_RW[24]; + info->dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25]; + info->dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27]; for (i = 0; i < 16; i++) { - qp = &info.dma_info_command_data[i]; - spuqp = &ctx->csa.priv2.spuq[i]; + struct mfc_cq_sr *qp = &info->dma_info_command_data[i]; + struct mfc_cq_sr *spuqp = &ctx->csa.priv2.spuq[i]; qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW; qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW; qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW; qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW; } +} - return simple_read_from_buffer(buf, len, pos, &info, - sizeof info); +static ssize_t spufs_dma_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) +{ + struct spu_dma_info info; + + spufs_get_dma_info(ctx, &info); + return spufs_dump_emit(cprm, &info, sizeof(info)); } static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + struct spu_dma_info info; int ret; - if (!access_ok(buf, len)) - return -EFAULT; - ret = spu_acquire_saved(ctx); if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_dma_info_read(ctx, buf, len, pos); + spufs_get_dma_info(ctx, &info); spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + return simple_read_from_buffer(buf, len, pos, &info, + sizeof(info)); } static const struct file_operations spufs_dma_info_fops = { @@ -2136,52 +2117,55 @@ static const struct file_operations spufs_dma_info_fops = { .llseek = no_llseek, }; -static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx, - char __user *buf, size_t len, loff_t *pos) +static void spufs_get_proxydma_info(struct spu_context *ctx, + struct spu_proxydma_info *info) { - struct spu_proxydma_info info; - struct mfc_cq_sr *qp, *puqp; - int ret = sizeof info; int i; - if (len < ret) - return -EINVAL; - - if (!access_ok(buf, len)) - return -EFAULT; + info->proxydma_info_type = ctx->csa.prob.dma_querytype_RW; + info->proxydma_info_mask = ctx->csa.prob.dma_querymask_RW; + info->proxydma_info_status = ctx->csa.prob.dma_tagstatus_R; - info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW; - info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW; - info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R; for (i = 0; i < 8; i++) { - qp = &info.proxydma_info_command_data[i]; - puqp = &ctx->csa.priv2.puq[i]; + struct mfc_cq_sr *qp = &info->proxydma_info_command_data[i]; + struct mfc_cq_sr *puqp = &ctx->csa.priv2.puq[i]; qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW; qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW; qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW; qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW; } +} - return simple_read_from_buffer(buf, len, pos, &info, - sizeof info); +static ssize_t spufs_proxydma_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) +{ + struct spu_proxydma_info info; + + spufs_get_proxydma_info(ctx, &info); + return spufs_dump_emit(cprm, &info, sizeof(info)); } static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + struct spu_proxydma_info info; int ret; + if (len < sizeof(info)) + return -EINVAL; + ret = spu_acquire_saved(ctx); if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_proxydma_info_read(ctx, buf, len, pos); + spufs_get_proxydma_info(ctx, &info); spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + return simple_read_from_buffer(buf, len, pos, &info, + sizeof(info)); } static const struct file_operations spufs_proxydma_info_fops = { @@ -2625,23 +2609,23 @@ const struct spufs_tree_descr spufs_dir_debug_contents[] = { }; const struct spufs_coredump_reader spufs_coredump_read[] = { - { "regs", __spufs_regs_read, NULL, sizeof(struct spu_reg128[128])}, - { "fpcr", __spufs_fpcr_read, NULL, sizeof(struct spu_reg128) }, + { "regs", spufs_regs_dump, NULL, sizeof(struct spu_reg128[128])}, + { "fpcr", spufs_fpcr_dump, NULL, sizeof(struct spu_reg128) }, { "lslr", NULL, spufs_lslr_get, 19 }, { "decr", NULL, spufs_decr_get, 19 }, { "decr_status", NULL, spufs_decr_status_get, 19 }, - { "mem", __spufs_mem_read, NULL, LS_SIZE, }, - { "signal1", __spufs_signal1_read, NULL, sizeof(u32) }, + { "mem", spufs_mem_dump, NULL, LS_SIZE, }, + { "signal1", spufs_signal1_dump, NULL, sizeof(u32) }, { "signal1_type", NULL, spufs_signal1_type_get, 19 }, - { "signal2", __spufs_signal2_read, NULL, sizeof(u32) }, + { "signal2", spufs_signal2_dump, NULL, sizeof(u32) }, { "signal2_type", NULL, spufs_signal2_type_get, 19 }, { "event_mask", NULL, spufs_event_mask_get, 19 }, { "event_status", NULL, spufs_event_status_get, 19 }, - { "mbox_info", __spufs_mbox_info_read, NULL, sizeof(u32) }, - { "ibox_info", __spufs_ibox_info_read, NULL, sizeof(u32) }, - { "wbox_info", __spufs_wbox_info_read, NULL, 4 * sizeof(u32)}, - { "dma_info", __spufs_dma_info_read, NULL, sizeof(struct spu_dma_info)}, - { "proxydma_info", __spufs_proxydma_info_read, + { "mbox_info", spufs_mbox_info_dump, NULL, sizeof(u32) }, + { "ibox_info", spufs_ibox_info_dump, NULL, sizeof(u32) }, + { "wbox_info", spufs_wbox_info_dump, NULL, 4 * sizeof(u32)}, + { "dma_info", spufs_dma_info_dump, NULL, sizeof(struct spu_dma_info)}, + { "proxydma_info", spufs_proxydma_info_dump, NULL, sizeof(struct spu_proxydma_info)}, { "object-id", NULL, spufs_object_id_get, 19 }, { "npc", NULL, spufs_npc_get, 19 }, diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 413c89afe112..1ba4d884febf 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -337,8 +337,7 @@ void spufs_dma_callback(struct spu *spu, int type); extern struct spu_coredump_calls spufs_coredump_calls; struct spufs_coredump_reader { char *name; - ssize_t (*read)(struct spu_context *ctx, - char __user *buffer, size_t size, loff_t *pos); + ssize_t (*dump)(struct spu_context *ctx, struct coredump_params *cprm); u64 (*get)(struct spu_context *ctx); size_t size; }; diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c index b020c757d2bf..b2c2bf35b76c 100644 --- a/arch/powerpc/platforms/chrp/pci.c +++ b/arch/powerpc/platforms/chrp/pci.c @@ -8,9 +8,9 @@ #include <linux/delay.h> #include <linux/string.h> #include <linux/init.h> +#include <linux/pgtable.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/irq.h> #include <asm/hydra.h> #include <asm/prom.h> diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 65a7e01a8f7d..c45435aa5e36 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -34,7 +34,6 @@ #include <linux/timer.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/prom.h> #include <asm/pci-bridge.h> #include <asm/dma.h> diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c index f7bb6cb8d1e3..e30cd2915e54 100644 --- a/arch/powerpc/platforms/chrp/smp.c +++ b/arch/powerpc/platforms/chrp/smp.c @@ -16,12 +16,12 @@ #include <linux/kernel_stat.h> #include <linux/delay.h> #include <linux/spinlock.h> +#include <linux/pgtable.h> #include <asm/ptrace.h> #include <linux/atomic.h> #include <asm/irq.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/sections.h> #include <asm/io.h> #include <asm/prom.h> diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 67e48b0a164e..a802ef957d63 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -172,19 +172,6 @@ static void wii_shutdown(void) flipper_quiesce(); } -define_machine(wii) { - .name = "wii", - .probe = wii_probe, - .setup_arch = wii_setup_arch, - .restart = wii_restart, - .halt = wii_halt, - .init_IRQ = wii_pic_probe, - .get_irq = flipper_pic_get_irq, - .calibrate_decr = generic_calibrate_decr, - .progress = udbg_progress, - .machine_shutdown = wii_shutdown, -}; - static const struct of_device_id wii_of_bus[] = { { .compatible = "nintendo,hollywood", }, { }, @@ -200,3 +187,15 @@ static int __init wii_device_probe(void) } device_initcall(wii_device_probe); +define_machine(wii) { + .name = "wii", + .probe = wii_probe, + .setup_arch = wii_setup_arch, + .restart = wii_restart, + .halt = wii_halt, + .init_IRQ = wii_pic_probe, + .get_irq = flipper_pic_get_irq, + .calibrate_decr = generic_calibrate_decr, + .progress = udbg_progress, + .machine_shutdown = wii_shutdown, +}; diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 15b2c6eb506d..f7e66a2005b4 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -42,7 +42,6 @@ #include <asm/processor.h> #include <asm/sections.h> #include <asm/prom.h> -#include <asm/pgtable.h> #include <asm/io.h> #include <asm/pci-bridge.h> #include <asm/iommu.h> diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c index 701c4e098fe9..78209bb7629c 100644 --- a/arch/powerpc/platforms/maple/time.c +++ b/arch/powerpc/platforms/maple/time.c @@ -23,7 +23,6 @@ #include <asm/sections.h> #include <asm/prom.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/machdep.h> #include <asm/time.h> diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c index 1cd4ca14b08d..1bf65d02d3ba 100644 --- a/arch/powerpc/platforms/pasemi/misc.c +++ b/arch/powerpc/platforms/pasemi/misc.c @@ -56,8 +56,7 @@ static int __init pasemi_register_i2c_devices(void) if (!adap_node) continue; - node = NULL; - while ((node = of_get_next_child(adap_node, node))) { + for_each_child_of_node(adap_node, node) { struct i2c_board_info info = {}; const u32 *addr; int len; diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile index f4247ade71ca..cf85f0662d0d 100644 --- a/arch/powerpc/platforms/powermac/Makefile +++ b/arch/powerpc/platforms/powermac/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS_bootx_init.o += -fPIC -CFLAGS_bootx_init.o += $(call cc-option, -fno-stack-protector) +CFLAGS_bootx_init.o += -fno-stack-protector KASAN_SANITIZE_bootx_init.o := n diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index af309ee99114..9d4ecd292255 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -108,7 +108,7 @@ static void * __init bootx_early_getprop(unsigned long base, #define dt_push_token(token, mem) \ do { \ - *(mem) = _ALIGN_UP(*(mem),4); \ + *(mem) = ALIGN(*(mem),4); \ *((u32 *)*(mem)) = token; \ *(mem) += 4; \ } while(0) @@ -150,7 +150,7 @@ static void __init bootx_dt_add_prop(char *name, void *data, int size, /* push property content */ if (size && data) { memcpy((void *)*mem_end, data, size); - *mem_end = _ALIGN_UP(*mem_end + size, 4); + *mem_end = ALIGN(*mem_end + size, 4); } } @@ -303,7 +303,7 @@ static void __init bootx_scan_dt_build_struct(unsigned long base, *lp++ = *p; } *lp = 0; - *mem_end = _ALIGN_UP((unsigned long)lp + 1, 4); + *mem_end = ALIGN((unsigned long)lp + 1, 4); /* get and store all properties */ while (*ppp) { @@ -356,11 +356,11 @@ static unsigned long __init bootx_flatten_dt(unsigned long start) /* Start using memory after the big blob passed by BootX, get * some space for the header */ - mem_start = mem_end = _ALIGN_UP(((unsigned long)bi) + start, 4); + mem_start = mem_end = ALIGN(((unsigned long)bi) + start, 4); DBG("Boot params header at: %x\n", mem_start); hdr = (struct boot_param_header *)mem_start; mem_end += sizeof(struct boot_param_header); - rsvmap = (u64 *)(_ALIGN_UP(mem_end, 8)); + rsvmap = (u64 *)(ALIGN(mem_end, 8)); hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - mem_start; mem_end = ((unsigned long)rsvmap) + 8 * sizeof(u64); @@ -386,7 +386,7 @@ static unsigned long __init bootx_flatten_dt(unsigned long start) hdr->dt_strings_size = bootx_dt_strend - bootx_dt_strbase; /* Build structure */ - mem_end = _ALIGN(mem_end, 16); + mem_end = ALIGN(mem_end, 16); DBG("Building device tree structure at: %x\n", mem_end); hdr->off_dt_struct = mem_end - mem_start; bootx_scan_dt_build_struct(base, 4, &mem_end); @@ -404,7 +404,7 @@ static unsigned long __init bootx_flatten_dt(unsigned long start) * also bump mem_reserve_cnt to cause further reservations to * fail since it's too late. */ - mem_end = _ALIGN(mem_end, PAGE_SIZE); + mem_end = ALIGN(mem_end, PAGE_SIZE); DBG("End of boot params: %x\n", mem_end); rsvmap[0] = mem_start; rsvmap[1] = mem_end; diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S index da69e0fcb4f1..ced225415486 100644 --- a/arch/powerpc/platforms/powermac/cache.S +++ b/arch/powerpc/platforms/powermac/cache.S @@ -184,6 +184,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) mtlr r10 blr +_ASM_NOKPROBE_SYMBOL(flush_disable_75x) /* This code is for 745x processors */ flush_disable_745x: @@ -351,4 +352,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR) mtmsr r11 /* restore DR and EE */ isync blr +_ASM_NOKPROBE_SYMBOL(flush_disable_745x) #endif /* CONFIG_PPC_BOOK3S_32 */ diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 181caa3f6717..5c77b9a24c0e 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -1465,7 +1465,7 @@ static long g5_i2s_enable(struct device_node *node, long param, long value) case 2: if (macio->type == macio_shasta) break; - /* fall through */ + fallthrough; default: return -ENODEV; } diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index bf4be4b53b44..f77a59b5c2e1 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -629,8 +629,7 @@ static void __init kw_i2c_probe(void) for (i = 0; i < chans; i++) kw_i2c_add(host, np, np, i); } else { - for (child = NULL; - (child = of_get_next_child(np, child)) != NULL;) { + for_each_child_of_node(np, child) { const u32 *reg = of_get_property(child, "reg", NULL); if (reg == NULL) @@ -1193,8 +1192,7 @@ static void pmac_i2c_devscan(void (*callback)(struct device_node *dev, * platform function instance */ list_for_each_entry(bus, &pmac_i2c_busses, link) { - for (np = NULL; - (np = of_get_next_child(bus->busnode, np)) != NULL;) { + for_each_child_of_node(bus->busnode, np) { struct whitelist_ent *p; /* If multibus, check if device is on that bus */ if (bus->flags & pmac_i2c_multibus) diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index dc7a5bae8f1c..853ccc4480e2 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -55,7 +55,7 @@ struct chrp_header { u8 cksum; u16 len; char name[12]; - u8 data[0]; + u8 data[]; }; struct core99_header { diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index 62311e84a423..f5422506d4b0 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -114,7 +114,7 @@ static void macio_gpio_init_one(struct macio_chip *macio) * Ok, got one, we dont need anything special to track them down, so * we just create them all */ - for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;) { + for_each_child_of_node(gparent, gp) { const u32 *reg = of_get_property(gp, "reg", NULL); unsigned long offset; if (reg == NULL) @@ -133,7 +133,7 @@ static void macio_gpio_init_one(struct macio_chip *macio) macio->of_node); /* And now we run all the init ones */ - for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;) + for_each_child_of_node(gparent, gp) pmf_do_functions(gp, NULL, 0, PMF_FLAGS_ON_INIT, NULL); /* Note: We do not at this point implement the "at sleep" or "at wake" diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 95fb4feb6ccc..f002b0fa69b8 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -51,7 +51,6 @@ #include <asm/reg.h> #include <asm/sections.h> #include <asm/prom.h> -#include <asm/pgtable.h> #include <asm/io.h> #include <asm/pci-bridge.h> #include <asm/ohare.h> diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index bd6085b470b7..f9a680fdd9c4 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -244,7 +244,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) mtmsr r2 isync b 1b - +_ASM_NOKPROBE_SYMBOL(low_cpu_die) /* * Here is the resume code. */ @@ -282,6 +282,7 @@ _GLOBAL(core99_wake_up) lwz r1,0(r3) /* Pass thru to older resume code ... */ +_ASM_NOKPROBE_SYMBOL(core99_wake_up) /* * Here is the resume code for older machines. * r1 has the physical address of SL_PC(sp). @@ -429,6 +430,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) lwz r0,4(r1) mtlr r0 blr +_ASM_NOKPROBE_SYMBOL(grackle_wake_up) turn_on_mmu: mflr r4 @@ -438,6 +440,7 @@ turn_on_mmu: sync isync rfi +_ASM_NOKPROBE_SYMBOL(turn_on_mmu) #endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */ diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index be2ab5b11e57..eb23264910e1 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -30,13 +30,13 @@ #include <linux/hardirq.h> #include <linux/cpu.h> #include <linux/compiler.h> +#include <linux/pgtable.h> #include <asm/ptrace.h> #include <linux/atomic.h> #include <asm/code-patching.h> #include <asm/irq.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/sections.h> #include <asm/io.h> #include <asm/prom.h> @@ -49,6 +49,7 @@ #include <asm/keylargo.h> #include <asm/pmac_low_i2c.h> #include <asm/pmac_pfunc.h> +#include <asm/inst.h> #include "pmac.h" @@ -813,7 +814,7 @@ static int smp_core99_kick_cpu(int nr) * b __secondary_start_pmac_0 + nr*8 */ target = (unsigned long) __secondary_start_pmac_0 + nr * 8; - patch_branch(vector, target, BRANCH_SET_LINK); + patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); /* Put some life in our friend */ pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0); @@ -826,7 +827,7 @@ static int smp_core99_kick_cpu(int nr) mdelay(1); /* Restore our exception vector */ - patch_instruction(vector, save_vector); + patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c index b36ddee17c87..31d6213a6c8f 100644 --- a/arch/powerpc/platforms/powermac/time.c +++ b/arch/powerpc/platforms/powermac/time.c @@ -28,7 +28,6 @@ #include <asm/sections.h> #include <asm/prom.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/machdep.h> #include <asm/time.h> #include <asm/nvram.h> diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c index 6b61a18e8db3..f286bdfe8346 100644 --- a/arch/powerpc/platforms/powermac/udbg_scc.c +++ b/arch/powerpc/platforms/powermac/udbg_scc.c @@ -80,7 +80,7 @@ void udbg_scc_init(int force_scc) path = of_get_property(of_chosen, "linux,stdout-path", NULL); if (path != NULL) stdout = of_find_node_by_path(path); - for (ch = NULL; (ch = of_get_next_child(escc, ch)) != NULL;) { + for_each_child_of_node(escc, ch) { if (ch == stdout) ch_def = of_node_get(ch); if (of_node_name_eq(ch, "ch-a")) diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index c0f8120045c3..2eb6ae150d1f 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -11,13 +11,14 @@ obj-$(CONFIG_FA_DUMP) += opal-fadump.o obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o obj-$(CONFIG_OPAL_CORE) += opal-core.o obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o +obj-$(CONFIG_PCI_IOV) += pci-sriov.o obj-$(CONFIG_CXL_BASE) += pci-cxl.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o -obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o vas-api.o obj-$(CONFIG_OCXL_BASE) += ocxl.o obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 79409e005fcd..9af8c3b98853 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -338,6 +338,28 @@ static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap) return 0; } +static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev) +{ + struct pci_controller *hose = pdev->bus->sysdata; + struct pnv_phb *phb = hose->private_data; + struct pci_dev *parent = pdev->bus->self; + +#ifdef CONFIG_PCI_IOV + /* for VFs we use the PF's PE as the upstream PE */ + if (pdev->is_virtfn) + parent = pdev->physfn; +#endif + + /* otherwise use the PE of our parent bridge */ + if (parent) { + struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent); + + return eeh_pe_get(phb->hose, ioda_pe->pe_number, 0); + } + + return NULL; +} + /** * pnv_eeh_probe - Do probe on PCI device * @pdev: pci_dev to probe @@ -350,6 +372,7 @@ static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev) struct pci_controller *hose = pdn->phb; struct pnv_phb *phb = hose->private_data; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + struct eeh_pe *upstream_pe; uint32_t pcie_flags; int ret; int config_addr = (pdn->busno << 8) | (pdn->devfn); @@ -372,19 +395,18 @@ static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev) } /* Skip for PCI-ISA bridge */ - if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) return NULL; eeh_edev_dbg(edev, "Probing device\n"); /* Initialize eeh device */ - edev->class_code = pdn->class_code; edev->mode &= 0xFFFFFF00; edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); - if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; if (edev->pcie_cap) { pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS, @@ -399,8 +421,10 @@ static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev) edev->pe_config_addr = phb->ioda.pe_rmap[config_addr]; + upstream_pe = pnv_eeh_get_upstream_pe(pdev); + /* Create PE */ - ret = eeh_add_to_parent_pe(edev); + ret = eeh_pe_tree_insert(edev, upstream_pe); if (ret) { eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret); return NULL; @@ -535,18 +559,6 @@ static int pnv_eeh_set_option(struct eeh_pe *pe, int option) return 0; } -/** - * pnv_eeh_get_pe_addr - Retrieve PE address - * @pe: EEH PE - * - * Retrieve the PE address according to the given tranditional - * PCI BDF (Bus/Device/Function) address. - */ -static int pnv_eeh_get_pe_addr(struct eeh_pe *pe) -{ - return pe->addr; -} - static void pnv_eeh_get_phb_diag(struct eeh_pe *pe) { struct pnv_phb *phb = pe->phb->private_data; @@ -850,32 +862,32 @@ static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option) case EEH_RESET_HOT: /* Don't report linkDown event */ if (aer) { - eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK, + eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK, 4, &ctrl); ctrl |= PCI_ERR_UNC_SURPDN; - eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK, + eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK, 4, ctrl); } - eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl); + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl); ctrl |= PCI_BRIDGE_CTL_BUS_RESET; - eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl); + eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl); msleep(EEH_PE_RST_HOLD_TIME); break; case EEH_RESET_DEACTIVATE: - eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl); + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl); ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; - eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl); + eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl); msleep(EEH_PE_RST_SETTLE_TIME); /* Continue reporting linkDown event */ if (aer) { - eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK, + eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK, 4, &ctrl); ctrl &= ~PCI_ERR_UNC_SURPDN; - eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK, + eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK, 4, ctrl); } @@ -944,11 +956,12 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev) static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type, int pos, u16 mask) { + struct eeh_dev *edev = pdn->edev; int i, status = 0; /* Wait for Transaction Pending bit to be cleared */ for (i = 0; i < 4; i++) { - eeh_ops->read_config(pdn, pos, 2, &status); + eeh_ops->read_config(edev, pos, 2, &status); if (!(status & mask)) return; @@ -969,7 +982,7 @@ static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) if (WARN_ON(!edev->pcie_cap)) return -ENOTTY; - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); + eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); if (!(reg & PCI_EXP_DEVCAP_FLR)) return -ENOTTY; @@ -979,18 +992,18 @@ static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) pnv_eeh_wait_for_pending(pdn, "", edev->pcie_cap + PCI_EXP_DEVSTA, PCI_EXP_DEVSTA_TRPND); - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, 4, ®); reg |= PCI_EXP_DEVCTL_BCR_FLR; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, 4, reg); msleep(EEH_PE_RST_HOLD_TIME); break; case EEH_RESET_DEACTIVATE: - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, 4, ®); reg &= ~PCI_EXP_DEVCTL_BCR_FLR; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, 4, reg); msleep(EEH_PE_RST_SETTLE_TIME); break; @@ -1007,7 +1020,7 @@ static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) if (WARN_ON(!edev->af_cap)) return -ENOTTY; - eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); + eeh_ops->read_config(edev, edev->af_cap + PCI_AF_CAP, 1, &cap); if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) return -ENOTTY; @@ -1022,12 +1035,12 @@ static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) pnv_eeh_wait_for_pending(pdn, "AF", edev->af_cap + PCI_AF_CTRL, PCI_AF_STATUS_TP << 8); - eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, + eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, 1, PCI_AF_CTRL_FLR); msleep(EEH_PE_RST_HOLD_TIME); break; case EEH_RESET_DEACTIVATE: - eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); + eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, 1, 0); msleep(EEH_PE_RST_SETTLE_TIME); break; } @@ -1261,9 +1274,11 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) return false; } -static int pnv_eeh_read_config(struct pci_dn *pdn, +static int pnv_eeh_read_config(struct eeh_dev *edev, int where, int size, u32 *val) { + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + if (!pdn) return PCIBIOS_DEVICE_NOT_FOUND; @@ -1275,9 +1290,11 @@ static int pnv_eeh_read_config(struct pci_dn *pdn, return pnv_pci_cfg_read(pdn, where, size, val); } -static int pnv_eeh_write_config(struct pci_dn *pdn, +static int pnv_eeh_write_config(struct eeh_dev *edev, int where, int size, u32 val) { + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + if (!pdn) return PCIBIOS_DEVICE_NOT_FOUND; @@ -1631,34 +1648,24 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) return ret; } -static int pnv_eeh_restore_config(struct pci_dn *pdn) +static int pnv_eeh_restore_config(struct eeh_dev *edev) { - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); struct pnv_phb *phb; s64 ret = 0; - int config_addr = (pdn->busno << 8) | (pdn->devfn); if (!edev) return -EEXIST; - /* - * We have to restore the PCI config space after reset since the - * firmware can't see SRIOV VFs. - * - * FIXME: The MPS, error routing rules, timeout setting are worthy - * to be exported by firmware in extendible way. - */ - if (edev->physfn) { - ret = eeh_restore_vf_config(pdn); - } else { - phb = pdn->phb->private_data; - ret = opal_pci_reinit(phb->opal_id, - OPAL_REINIT_PCI_DEV, config_addr); - } + if (edev->physfn) + return 0; + + phb = edev->controller->private_data; + ret = opal_pci_reinit(phb->opal_id, + OPAL_REINIT_PCI_DEV, edev->bdfn); if (ret) { pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", - __func__, config_addr, ret); + __func__, edev->bdfn, ret); return -EIO; } @@ -1670,7 +1677,6 @@ static struct eeh_ops pnv_eeh_ops = { .init = pnv_eeh_init, .probe = pnv_eeh_probe, .set_option = pnv_eeh_set_option, - .get_pe_addr = pnv_eeh_get_pe_addr, .get_state = pnv_eeh_get_state, .reset = pnv_eeh_reset, .get_log = pnv_eeh_get_log, diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 78599bca66c2..345ab062b21a 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -48,7 +48,7 @@ static bool default_stop_found; * First stop state levels when SPR and TB loss can occur. */ static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; -static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1; +static u64 deep_spr_loss_state = MAX_STOP_STATE + 1; /* * psscr value and mask of the deepest stop idle state. @@ -73,9 +73,6 @@ static int pnv_save_sprs_for_deep_states(void) */ uint64_t lpcr_val = mfspr(SPRN_LPCR); uint64_t hid0_val = mfspr(SPRN_HID0); - uint64_t hid1_val = mfspr(SPRN_HID1); - uint64_t hid4_val = mfspr(SPRN_HID4); - uint64_t hid5_val = mfspr(SPRN_HID5); uint64_t hmeer_val = mfspr(SPRN_HMEER); uint64_t msr_val = MSR_IDLE; uint64_t psscr_val = pnv_deepest_stop_psscr_val; @@ -117,6 +114,9 @@ static int pnv_save_sprs_for_deep_states(void) /* Only p8 needs to set extra HID regiters */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + uint64_t hid1_val = mfspr(SPRN_HID1); + uint64_t hid4_val = mfspr(SPRN_HID4); + uint64_t hid5_val = mfspr(SPRN_HID5); rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); if (rc != 0) @@ -611,6 +611,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) unsigned long srr1; unsigned long pls; unsigned long mmcr0 = 0; + unsigned long mmcra = 0; struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ bool sprs_saved = false; @@ -657,7 +658,22 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) */ mmcr0 = mfspr(SPRN_MMCR0); } - if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) { + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + /* + * POWER10 uses MMCRA (BHRBRD) as BHRB disable bit. + * If the user hasn't asked for the BHRB to be + * written, the value of MMCRA[BHRBRD] is 1. + * On wakeup from stop, MMCRA[BHRBD] will be 0, + * since it is previleged resource and will be lost. + * Thus, if we do not save and restore the MMCRA[BHRBD], + * hardware will be needlessly writing to the BHRB + * in problem mode. + */ + mmcra = mfspr(SPRN_MMCRA); + } + + if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { sprs.lpcr = mfspr(SPRN_LPCR); sprs.hfscr = mfspr(SPRN_HFSCR); sprs.fscr = mfspr(SPRN_FSCR); @@ -700,8 +716,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { - unsigned long mmcra; - /* * We don't need an isync after the mtsprs here because the * upcoming mtmsrd is execution synchronizing. @@ -721,6 +735,10 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) mtspr(SPRN_MMCR0, mmcr0); } + /* Reload MMCRA to restore BHRB disable bit for POWER10 */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mtspr(SPRN_MMCRA, mmcra); + /* * DD2.2 and earlier need to set then clear bit 60 in MMCRA * to ensure the PMU starts running. @@ -741,7 +759,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) * just always test PSSCR for SPR/TB state loss. */ pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; - if (likely(pls < pnv_first_spr_loss_level)) { + if (likely(pls < deep_spr_loss_state)) { if (sprs_saved) atomic_stop_thread_idle(); goto out; @@ -1088,7 +1106,7 @@ static void __init pnv_power9_idle_init(void) * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. */ pnv_first_tb_loss_level = MAX_STOP_STATE + 1; - pnv_first_spr_loss_level = MAX_STOP_STATE + 1; + deep_spr_loss_state = MAX_STOP_STATE + 1; for (i = 0; i < nr_pnv_idle_states; i++) { int err; struct pnv_idle_states_t *state = &pnv_idle_states[i]; @@ -1099,8 +1117,8 @@ static void __init pnv_power9_idle_init(void) pnv_first_tb_loss_level = psscr_rl; if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && - (pnv_first_spr_loss_level > psscr_rl)) - pnv_first_spr_loss_level = psscr_rl; + (deep_spr_loss_state > psscr_rl)) + deep_spr_loss_state = psscr_rl; /* * The idle code does not deal with TB loss occurring @@ -1111,8 +1129,8 @@ static void __init pnv_power9_idle_init(void) * compatibility. */ if ((state->flags & OPAL_PM_TIMEBASE_STOP) && - (pnv_first_spr_loss_level > psscr_rl)) - pnv_first_spr_loss_level = psscr_rl; + (deep_spr_loss_state > psscr_rl)) + deep_spr_loss_state = psscr_rl; err = validate_psscr_val_mask(&state->psscr_val, &state->psscr_mask, @@ -1158,7 +1176,7 @@ static void __init pnv_power9_idle_init(void) } pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", - pnv_first_spr_loss_level); + deep_spr_loss_state); pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", pnv_first_tb_loss_level); @@ -1270,7 +1288,7 @@ static int pnv_parse_cpuidle_dt(void) /* Read residencies */ if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", temp_u32, nr_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); rc = -EINVAL; goto out; } diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index b95b9e3c4c98..abeaa533b976 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -15,6 +15,7 @@ #include <asm/debugfs.h> #include <asm/powernv.h> +#include <asm/ppc-pci.h> #include <asm/opal.h> #include "pci.h" @@ -425,9 +426,10 @@ static void pnv_comp_attach_table_group(struct npu_comp *npucomp, ++npucomp->pe_num; } -struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) +static struct iommu_table_group * + pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) { - struct iommu_table_group *table_group; + struct iommu_table_group *compound_group; struct npu_comp *npucomp; struct pci_dev *gpdev = NULL; struct pci_controller *hose; @@ -446,39 +448,52 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) hose = pci_bus_to_host(npdev->bus); if (hose->npu) { - table_group = &hose->npu->npucomp.table_group; - - if (!table_group->group) { - table_group->ops = &pnv_npu_peers_ops; - iommu_register_group(table_group, - hose->global_number, - pe->pe_number); - } + /* P9 case: compound group is per-NPU (all gpus, all links) */ + npucomp = &hose->npu->npucomp; } else { - /* Create a group for 1 GPU and attached NPUs for POWER8 */ - pe->npucomp = kzalloc(sizeof(*pe->npucomp), GFP_KERNEL); - table_group = &pe->npucomp->table_group; - table_group->ops = &pnv_npu_peers_ops; - iommu_register_group(table_group, hose->global_number, - pe->pe_number); + /* P8 case: Compound group is per-GPU (1 gpu, 2 links) */ + npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL); } - /* Steal capabilities from a GPU PE */ - table_group->max_dynamic_windows_supported = - pe->table_group.max_dynamic_windows_supported; - table_group->tce32_start = pe->table_group.tce32_start; - table_group->tce32_size = pe->table_group.tce32_size; - table_group->max_levels = pe->table_group.max_levels; - if (!table_group->pgsizes) - table_group->pgsizes = pe->table_group.pgsizes; + compound_group = &npucomp->table_group; + if (!compound_group->group) { + compound_group->ops = &pnv_npu_peers_ops; + iommu_register_group(compound_group, hose->global_number, + pe->pe_number); - npucomp = container_of(table_group, struct npu_comp, table_group); + /* Steal capabilities from a GPU PE */ + compound_group->max_dynamic_windows_supported = + pe->table_group.max_dynamic_windows_supported; + compound_group->tce32_start = pe->table_group.tce32_start; + compound_group->tce32_size = pe->table_group.tce32_size; + compound_group->max_levels = pe->table_group.max_levels; + if (!compound_group->pgsizes) + compound_group->pgsizes = pe->table_group.pgsizes; + } + + /* + * The gpu would have been added to the iommu group that's created + * for the PE. Pull it out now. + */ + iommu_del_device(&gpdev->dev); + + /* + * I'm not sure this is strictly required, but it's probably a good idea + * since the table_group for the PE is going to be attached to the + * compound table group. If we leave the PE's iommu group active then + * we might have the same table_group being modifiable via two sepeate + * iommu groups. + */ + iommu_group_put(pe->table_group.group); + + /* now put the GPU into the compound group */ pnv_comp_attach_table_group(npucomp, pe); + iommu_add_device(compound_group, &gpdev->dev); - return table_group; + return compound_group; } -struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) +static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) { struct iommu_table_group *table_group; struct npu_comp *npucomp; @@ -521,6 +536,54 @@ struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) return table_group; } + +void pnv_pci_npu_setup_iommu_groups(void) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + struct pnv_ioda_pe *pe; + + /* + * For non-nvlink devices the IOMMU group is registered when the PE is + * configured and devices are added to the group when the per-device + * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is + * only initialise for "normal" IODA PHBs. + * + * For NVLink devices we need to ensure the NVLinks and the GPU end up + * in the same IOMMU group, so that's handled here. + */ + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + + if (phb->type == PNV_PHB_IODA2) + list_for_each_entry(pe, &phb->ioda.pe_list, list) + pnv_try_setup_npu_table_group(pe); + } + + /* + * Now we have all PHBs discovered, time to add NPU devices to + * the corresponding IOMMU groups. + */ + list_for_each_entry(hose, &hose_list, list_node) { + unsigned long pgsizes; + + phb = hose->private_data; + + if (phb->type != PNV_PHB_NPU_NVLINK) + continue; + + pgsizes = pnv_ioda_parse_tce_sizes(phb); + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + /* + * IODA2 bridges get this set up from + * pci_controller_ops::setup_bridge but NPU bridges + * do not have this hook defined so we do it here. + */ + pe->table_group.pgsizes = pgsizes; + pnv_npu_compound_attach(pe); + } + } +} #endif /* CONFIG_IOMMU_API */ int pnv_npu2_init(struct pci_controller *hose) diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index 1656e8965d6b..c094fdf5825c 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -104,7 +104,7 @@ static int __opal_async_release_token(int token) */ case ASYNC_TOKEN_DISPATCHED: opal_async_tokens[token].state = ASYNC_TOKEN_ABANDONED; - /* Fall through */ + fallthrough; default: rc = 1; } diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index d361d37d975f..9a360ced663b 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -671,7 +671,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) * Firmware supports 32-bit field for size. Align it to PAGE_SIZE * and request firmware to copy multiple kernel boot memory regions. */ - fadump_conf->max_copy_size = _ALIGN_DOWN(U32_MAX, PAGE_SIZE); + fadump_conf->max_copy_size = ALIGN_DOWN(U32_MAX, PAGE_SIZE); /* * Check if dump has been initiated on last reboot. diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 2b3dfd0b6cdd..d95954ad4c0a 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -811,6 +811,10 @@ static int opal_add_one_export(struct kobject *parent, const char *export_name, goto out; attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) { + rc = -ENOMEM; + goto out; + } name = kstrdup(export_name, GFP_KERNEL); if (!name) { rc = -ENOMEM; diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index 5dc6847d5f4c..5218f5da2737 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -17,6 +17,34 @@ #include <asm/tce.h> #include "pci.h" +unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) +{ + struct pci_controller *hose = phb->hose; + struct device_node *dn = hose->dn; + unsigned long mask = 0; + int i, rc, count; + u32 val; + + count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes"); + if (count <= 0) { + mask = SZ_4K | SZ_64K; + /* Add 16M for POWER8 by default */ + if (cpu_has_feature(CPU_FTR_ARCH_207S) && + !cpu_has_feature(CPU_FTR_ARCH_300)) + mask |= SZ_16M | SZ_256M; + return mask; + } + + for (i = 0; i < count; i++) { + rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes", + i, &val); + if (rc == 0) + mask |= 1ULL << val; + } + + return mask; +} + void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned int page_shift) @@ -138,7 +166,7 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index, if (!ptce) { ptce = pnv_tce(tbl, false, idx, alloc); if (!ptce) - return alloc ? H_HARDWARE : H_TOO_HARD; + return -ENOMEM; } if (newtce & TCE_PCI_WRITE) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 57d3a6af1d52..023a4f987bb2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -51,6 +51,7 @@ static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK", "NPU_OCAPI" }; static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); +static void pnv_pci_configure_bus(struct pci_bus *bus); void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...) @@ -114,32 +115,13 @@ static int __init pci_reset_phbs_setup(char *str) early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup); -static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) -{ - /* - * WARNING: We cannot rely on the resource flags. The Linux PCI - * allocation code sometimes decides to put a 64-bit prefetchable - * BAR in the 32-bit window, so we have to compare the addresses. - * - * For simplicity we only test resource start. - */ - return (r->start >= phb->ioda.m64_base && - r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); -} - -static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags) -{ - unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); - - return (resource_flags & flags) == flags; -} - static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) { s64 rc; phb->ioda.pe_array[pe_no].phb = phb; phb->ioda.pe_array[pe_no].pe_number = pe_no; + phb->ioda.pe_array[pe_no].dma_setup_done = false; /* * Clear the PE frozen state as it might be put into frozen state @@ -163,26 +145,48 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) return; } + mutex_lock(&phb->ioda.pe_alloc_mutex); if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) pr_debug("%s: PE %x was reserved on PHB#%x\n", __func__, pe_no, phb->hose->global_number); + mutex_unlock(&phb->ioda.pe_alloc_mutex); pnv_ioda_init_pe(phb, pe_no); } -static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count) { - long pe; + struct pnv_ioda_pe *ret = NULL; + int run = 0, pe, i; + mutex_lock(&phb->ioda.pe_alloc_mutex); + + /* scan backwards for a run of @count cleared bits */ for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) { - if (!test_and_set_bit(pe, phb->ioda.pe_alloc)) - return pnv_ioda_init_pe(phb, pe); + if (test_bit(pe, phb->ioda.pe_alloc)) { + run = 0; + continue; + } + + run++; + if (run == count) + break; } + if (run != count) + goto out; - return NULL; + for (i = pe; i < pe + count; i++) { + set_bit(i, phb->ioda.pe_alloc); + pnv_ioda_init_pe(phb, i); + } + ret = &phb->ioda.pe_array[pe]; + +out: + mutex_unlock(&phb->ioda.pe_alloc_mutex); + return ret; } -static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) +void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) { struct pnv_phb *phb = pe->phb; unsigned int pe_num = pe->pe_number; @@ -191,7 +195,10 @@ static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */ kfree(pe->npucomp); memset(pe, 0, sizeof(struct pnv_ioda_pe)); + + mutex_lock(&phb->ioda.pe_alloc_mutex); clear_bit(pe_num, phb->ioda.pe_alloc); + mutex_unlock(&phb->ioda.pe_alloc_mutex); } /* The default M64 BAR is shared by all PEs */ @@ -251,8 +258,7 @@ fail: static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, unsigned long *pe_bitmap) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct resource *r; resource_size_t base, sgsz, start, end; int segno, i; @@ -264,8 +270,8 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, if (!r->parent || !pnv_pci_is_m64(phb, r)) continue; - start = _ALIGN_DOWN(r->start - base, sgsz); - end = _ALIGN_UP(r->end - base, sgsz); + start = ALIGN_DOWN(r->start - base, sgsz); + end = ALIGN(r->end - base, sgsz); for (segno = start / sgsz; segno < end / sgsz; segno++) { if (pe_bitmap) set_bit(segno, pe_bitmap); @@ -310,6 +316,28 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb) } } + for (index = 0; index < phb->ioda.total_pe_num; index++) { + int64_t rc; + + /* + * P7IOC supports M64DT, which helps mapping M64 segment + * to one particular PE#. However, PHB3 has fixed mapping + * between M64 segment and PE#. In order to have same logic + * for P7IOC and PHB3, we enforce fixed mapping between M64 + * segment and PE# on P7IOC. + */ + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + index, OPAL_M64_WINDOW_TYPE, + index / PNV_IODA1_M64_SEGS, + index % PNV_IODA1_M64_SEGS); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, + index); + goto fail; + } + } + /* * Exclude the segments for reserved and root bus PE, which * are first or last two PEs. @@ -350,8 +378,7 @@ static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus, static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); struct pnv_ioda_pe *master_pe, *pe; unsigned long size, *pe_alloc; int i; @@ -361,7 +388,7 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) return NULL; /* Allocate bitmap */ - size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); + size = ALIGN(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); pe_alloc = kzalloc(size, GFP_KERNEL); if (!pe_alloc) { pr_warn("%s: Out of memory !\n", @@ -402,26 +429,6 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) pe->master = master_pe; list_add_tail(&pe->list, &master_pe->slaves); } - - /* - * P7IOC supports M64DT, which helps mapping M64 segment - * to one particular PE#. However, PHB3 has fixed mapping - * between M64 segment and PE#. In order to have same logic - * for P7IOC and PHB3, we enforce fixed mapping between M64 - * segment and PE# on P7IOC. - */ - if (phb->type == PNV_PHB_IODA1) { - int64_t rc; - - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - pe->pe_number, OPAL_M64_WINDOW_TYPE, - pe->pe_number / PNV_IODA1_M64_SEGS, - pe->pe_number % PNV_IODA1_M64_SEGS); - if (rc != OPAL_SUCCESS) - pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n", - __func__, rc, phb->hose->global_number, - pe->pe_number); - } } kfree(pe_alloc); @@ -660,10 +667,19 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) return state; } +struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn) +{ + int pe_number = phb->ioda.pe_rmap[bdfn]; + + if (pe_number == IODA_INVALID_PE) + return NULL; + + return &phb->ioda.pe_array[pe_number]; +} + struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); struct pci_dn *pdn = pci_get_pdn(dev); if (!pdn) @@ -805,7 +821,7 @@ static void pnv_ioda_unset_peltv(struct pnv_phb *phb, pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc); } -static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) +int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; uint8_t bcomp, dcomp, fcomp; @@ -876,7 +892,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) return 0; } -static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) +int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; uint8_t bcomp, dcomp, fcomp; @@ -971,95 +987,9 @@ out: return 0; } -#ifdef CONFIG_PCI_IOV -static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) -{ - struct pci_dn *pdn = pci_get_pdn(dev); - int i; - struct resource *res, res2; - resource_size_t size; - u16 num_vfs; - - if (!dev->is_physfn) - return -EINVAL; - - /* - * "offset" is in VFs. The M64 windows are sized so that when they - * are segmented, each segment is the same size as the IOV BAR. - * Each segment is in a separate PE, and the high order bits of the - * address are the PE number. Therefore, each VF's BAR is in a - * separate PE, and changing the IOV BAR start address changes the - * range of PEs the VFs are in. - */ - num_vfs = pdn->num_vfs; - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &dev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || !res->parent) - continue; - - /* - * The actual IOV BAR range is determined by the start address - * and the actual size for num_vfs VFs BAR. This check is to - * make sure that after shifting, the range will not overlap - * with another device. - */ - size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); - res2.flags = res->flags; - res2.start = res->start + (size * offset); - res2.end = res2.start + (size * num_vfs) - 1; - - if (res2.end > res->end) { - dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n", - i, &res2, res, num_vfs, offset); - return -EBUSY; - } - } - - /* - * Since M64 BAR shares segments among all possible 256 PEs, - * we have to shift the beginning of PF IOV BAR to make it start from - * the segment which belongs to the PE number assigned to the first VF. - * This creates a "hole" in the /proc/iomem which could be used for - * allocating other resources so we reserve this area below and - * release when IOV is released. - */ - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &dev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || !res->parent) - continue; - - size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); - res2 = *res; - res->start += size * offset; - - dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", - i, &res2, res, (offset > 0) ? "En" : "Dis", - num_vfs, offset); - - if (offset < 0) { - devm_release_resource(&dev->dev, &pdn->holes[i]); - memset(&pdn->holes[i], 0, sizeof(pdn->holes[i])); - } - - pci_update_resource(dev, i + PCI_IOV_RESOURCES); - - if (offset > 0) { - pdn->holes[i].start = res2.start; - pdn->holes[i].end = res2.start + size * offset - 1; - pdn->holes[i].flags = IORESOURCE_BUS; - pdn->holes[i].name = "pnv_iov_reserved"; - devm_request_resource(&dev->dev, res->parent, - &pdn->holes[i]); - } - } - return 0; -} -#endif /* CONFIG_PCI_IOV */ - static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); struct pci_dn *pdn = pci_get_pdn(dev); struct pnv_ioda_pe *pe; @@ -1071,7 +1001,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) if (pdn->pe_number != IODA_INVALID_PE) return NULL; - pe = pnv_ioda_alloc_pe(phb); + pe = pnv_ioda_alloc_pe(phb, 1); if (!pe) { pr_warn("%s: Not enough PE# available, disabling device\n", pci_name(dev)); @@ -1110,34 +1040,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) return pe; } -static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - struct pci_dn *pdn = pci_get_pdn(dev); - - if (pdn == NULL) { - pr_warn("%s: No device node associated with device !\n", - pci_name(dev)); - continue; - } - - /* - * In partial hotplug case, the PCI device might be still - * associated with the PE and needn't attach it to the PE - * again. - */ - if (pdn->pe_number != IODA_INVALID_PE) - continue; - - pe->device_count++; - pdn->pe_number = pe->pe_number; - if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) - pnv_ioda_setup_same_PE(dev->subordinate, pe); - } -} - /* * There're 2 types of PCI bus sensitive PEs: One that is compromised of * single PCI bus. Another one that contains the primary PCI bus and its @@ -1146,8 +1048,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) */ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); struct pnv_ioda_pe *pe = NULL; unsigned int pe_num; @@ -1156,15 +1057,13 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) * We should reuse it instead of allocating a new one. */ pe_num = phb->ioda.pe_rmap[bus->number << 8]; - if (pe_num != IODA_INVALID_PE) { + if (WARN_ON(pe_num != IODA_INVALID_PE)) { pe = &phb->ioda.pe_array[pe_num]; - pnv_ioda_setup_same_PE(bus, pe); return NULL; } /* PE number for root bus should have been reserved */ - if (pci_is_root_bus(bus) && - phb->ioda.root_pe_idx != IODA_INVALID_PE) + if (pci_is_root_bus(bus)) pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx]; /* Check if PE is determined by M64 */ @@ -1173,7 +1072,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) /* The PE number isn't pinned by M64 */ if (!pe) - pe = pnv_ioda_alloc_pe(phb); + pe = pnv_ioda_alloc_pe(phb, 1); if (!pe) { pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n", @@ -1202,9 +1101,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) return NULL; } - /* Associate it with all child devices */ - pnv_ioda_setup_same_PE(bus, pe); - /* Put PE to the list */ list_add_tail(&pe->list, &phb->ioda.pe_list); @@ -1218,8 +1114,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) struct pnv_ioda_pe *pe; struct pci_dev *gpu_pdev; struct pci_dn *npu_pdn; - struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(npu_pdev->bus); /* * Intentionally leak a reference on the npu device (for @@ -1288,7 +1183,7 @@ static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus) pnv_ioda_setup_npu_PE(pdev); } -static void pnv_pci_ioda_setup_PEs(void) +static void pnv_pci_ioda_setup_nvlink(void) { struct pci_controller *hose; struct pnv_phb *phb; @@ -1312,479 +1207,73 @@ static void pnv_pci_ioda_setup_PEs(void) list_for_each_entry(pe, &phb->ioda.pe_list, list) pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV); } -} -#ifdef CONFIG_PCI_IOV -static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pci_dn *pdn; - int i, j; - int m64_bars; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - - if (pdn->m64_single_mode) - m64_bars = num_vfs; - else - m64_bars = 1; - - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) - for (j = 0; j < m64_bars; j++) { - if (pdn->m64_map[j][i] == IODA_INVALID_M64) - continue; - opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0); - clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc); - pdn->m64_map[j][i] = IODA_INVALID_M64; - } - - kfree(pdn->m64_map); - return 0; -} - -static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pci_dn *pdn; - unsigned int win; - struct resource *res; - int i, j; - int64_t rc; - int total_vfs; - resource_size_t size, start; - int pe_num; - int m64_bars; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - total_vfs = pci_sriov_get_totalvfs(pdev); - - if (pdn->m64_single_mode) - m64_bars = num_vfs; - else - m64_bars = 1; - - pdn->m64_map = kmalloc_array(m64_bars, - sizeof(*pdn->m64_map), - GFP_KERNEL); - if (!pdn->m64_map) - return -ENOMEM; - /* Initialize the m64_map to IODA_INVALID_M64 */ - for (i = 0; i < m64_bars ; i++) - for (j = 0; j < PCI_SRIOV_NUM_BARS; j++) - pdn->m64_map[i][j] = IODA_INVALID_M64; - - - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &pdev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || !res->parent) - continue; - - for (j = 0; j < m64_bars; j++) { - do { - win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, - phb->ioda.m64_bar_idx + 1, 0); - - if (win >= phb->ioda.m64_bar_idx + 1) - goto m64_failed; - } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); - - pdn->m64_map[j][i] = win; - - if (pdn->m64_single_mode) { - size = pci_iov_resource_size(pdev, - PCI_IOV_RESOURCES + i); - start = res->start + size * j; - } else { - size = resource_size(res); - start = res->start; - } - - /* Map the M64 here */ - if (pdn->m64_single_mode) { - pe_num = pdn->pe_num_map[j]; - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - pe_num, OPAL_M64_WINDOW_TYPE, - pdn->m64_map[j][i], 0); - } - - rc = opal_pci_set_phb_mem_window(phb->opal_id, - OPAL_M64_WINDOW_TYPE, - pdn->m64_map[j][i], - start, - 0, /* unused */ - size); - - - if (rc != OPAL_SUCCESS) { - dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n", - win, rc); - goto m64_failed; - } - - if (pdn->m64_single_mode) - rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2); - else - rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1); - - if (rc != OPAL_SUCCESS) { - dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n", - win, rc); - goto m64_failed; - } - } - } - return 0; - -m64_failed: - pnv_pci_vf_release_m64(pdev, num_vfs); - return -EBUSY; -} - -static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, - int num); - -static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe) -{ - struct iommu_table *tbl; - int64_t rc; - - tbl = pe->table_group.tables[0]; - rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); - if (rc) - pe_warn(pe, "OPAL error %lld release DMA window\n", rc); - - pnv_pci_ioda2_set_bypass(pe, false); - if (pe->table_group.group) { - iommu_group_put(pe->table_group.group); - BUG_ON(pe->table_group.group); - } - iommu_tce_table_put(tbl); -} - -static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe, *pe_n; - struct pci_dn *pdn; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - - if (!pdev->is_physfn) - return; - - list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { - if (pe->parent_dev != pdev) - continue; - - pnv_pci_ioda2_release_dma_pe(pdev, pe); - - /* Remove from list */ - mutex_lock(&phb->ioda.pe_list_mutex); - list_del(&pe->list); - mutex_unlock(&phb->ioda.pe_list_mutex); - - pnv_ioda_deconfigure_pe(phb, pe); - - pnv_ioda_free_pe(pe); - } -} - -void pnv_pci_sriov_disable(struct pci_dev *pdev) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - struct pci_dn *pdn; - u16 num_vfs, i; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - num_vfs = pdn->num_vfs; - - /* Release VF PEs */ - pnv_ioda_release_vf_PE(pdev); - - if (phb->type == PNV_PHB_IODA2) { - if (!pdn->m64_single_mode) - pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map); - - /* Release M64 windows */ - pnv_pci_vf_release_m64(pdev, num_vfs); - - /* Release PE numbers */ - if (pdn->m64_single_mode) { - for (i = 0; i < num_vfs; i++) { - if (pdn->pe_num_map[i] == IODA_INVALID_PE) - continue; - - pe = &phb->ioda.pe_array[pdn->pe_num_map[i]]; - pnv_ioda_free_pe(pe); - } - } else - bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); - /* Releasing pe_num_map */ - kfree(pdn->pe_num_map); - } -} - -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, - struct pnv_ioda_pe *pe); -#ifdef CONFIG_IOMMU_API -static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe, - struct iommu_table_group *table_group, struct pci_bus *bus); - -#endif -static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - int pe_num; - u16 vf_index; - struct pci_dn *pdn; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - - if (!pdev->is_physfn) - return; - - /* Reserve PE for each VF */ - for (vf_index = 0; vf_index < num_vfs; vf_index++) { - int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); - int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); - struct pci_dn *vf_pdn; - - if (pdn->m64_single_mode) - pe_num = pdn->pe_num_map[vf_index]; - else - pe_num = *pdn->pe_num_map + vf_index; - - pe = &phb->ioda.pe_array[pe_num]; - pe->pe_number = pe_num; - pe->phb = phb; - pe->flags = PNV_IODA_PE_VF; - pe->pbus = NULL; - pe->parent_dev = pdev; - pe->mve_number = -1; - pe->rid = (vf_bus << 8) | vf_devfn; - - pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", - hose->global_number, pdev->bus->number, - PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); - - if (pnv_ioda_configure_pe(phb, pe)) { - /* XXX What do we do here ? */ - pnv_ioda_free_pe(pe); - pe->pdev = NULL; - continue; - } - - /* Put PE to the list */ - mutex_lock(&phb->ioda.pe_list_mutex); - list_add_tail(&pe->list, &phb->ioda.pe_list); - mutex_unlock(&phb->ioda.pe_list_mutex); - - /* associate this pe to it's pdn */ - list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { - if (vf_pdn->busno == vf_bus && - vf_pdn->devfn == vf_devfn) { - vf_pdn->pe_number = pe_num; - break; - } - } - - pnv_pci_ioda2_setup_dma_pe(phb, pe); #ifdef CONFIG_IOMMU_API - iommu_register_group(&pe->table_group, - pe->phb->hose->global_number, pe->pe_number); - pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL); + /* setup iommu groups so we can do nvlink pass-thru */ + pnv_pci_npu_setup_iommu_groups(); #endif - } } -int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - struct pci_dn *pdn; - int ret; - u16 i; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - - if (phb->type == PNV_PHB_IODA2) { - if (!pdn->vfs_expanded) { - dev_info(&pdev->dev, "don't support this SRIOV device" - " with non 64bit-prefetchable IOV BAR\n"); - return -ENOSPC; - } - - /* - * When M64 BARs functions in Single PE mode, the number of VFs - * could be enabled must be less than the number of M64 BARs. - */ - if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) { - dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n"); - return -EBUSY; - } - - /* Allocating pe_num_map */ - if (pdn->m64_single_mode) - pdn->pe_num_map = kmalloc_array(num_vfs, - sizeof(*pdn->pe_num_map), - GFP_KERNEL); - else - pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL); - - if (!pdn->pe_num_map) - return -ENOMEM; +static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, + struct pnv_ioda_pe *pe); - if (pdn->m64_single_mode) - for (i = 0; i < num_vfs; i++) - pdn->pe_num_map[i] = IODA_INVALID_PE; +static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *pe; - /* Calculate available PE for required VFs */ - if (pdn->m64_single_mode) { - for (i = 0; i < num_vfs; i++) { - pe = pnv_ioda_alloc_pe(phb); - if (!pe) { - ret = -EBUSY; - goto m64_failed; - } + /* Check if the BDFN for this device is associated with a PE yet */ + pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8)); + if (!pe) { + /* VF PEs should be pre-configured in pnv_pci_sriov_enable() */ + if (WARN_ON(pdev->is_virtfn)) + return; - pdn->pe_num_map[i] = pe->pe_number; - } - } else { - mutex_lock(&phb->ioda.pe_alloc_mutex); - *pdn->pe_num_map = bitmap_find_next_zero_area( - phb->ioda.pe_alloc, phb->ioda.total_pe_num, - 0, num_vfs, 0); - if (*pdn->pe_num_map >= phb->ioda.total_pe_num) { - mutex_unlock(&phb->ioda.pe_alloc_mutex); - dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); - kfree(pdn->pe_num_map); - return -EBUSY; - } - bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); - mutex_unlock(&phb->ioda.pe_alloc_mutex); - } - pdn->num_vfs = num_vfs; + pnv_pci_configure_bus(pdev->bus); + pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8)); + pci_info(pdev, "Configured PE#%x\n", pe ? pe->pe_number : 0xfffff); - /* Assign M64 window accordingly */ - ret = pnv_pci_vf_assign_m64(pdev, num_vfs); - if (ret) { - dev_info(&pdev->dev, "Not enough M64 window resources\n"); - goto m64_failed; - } /* - * When using one M64 BAR to map one IOV BAR, we need to shift - * the IOV BAR according to the PE# allocated to the VFs. - * Otherwise, the PE# for the VF will conflict with others. + * If we can't setup the IODA PE something has gone horribly + * wrong and we can't enable DMA for the device. */ - if (!pdn->m64_single_mode) { - ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map); - if (ret) - goto m64_failed; - } + if (WARN_ON(!pe)) + return; + } else { + pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number); } - /* Setup VF PEs */ - pnv_ioda_setup_vf_PE(pdev, num_vfs); - - return 0; - -m64_failed: - if (pdn->m64_single_mode) { - for (i = 0; i < num_vfs; i++) { - if (pdn->pe_num_map[i] == IODA_INVALID_PE) - continue; - - pe = &phb->ioda.pe_array[pdn->pe_num_map[i]]; - pnv_ioda_free_pe(pe); - } - } else - bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); - - /* Releasing pe_num_map */ - kfree(pdn->pe_num_map); - - return ret; -} - -int pnv_pcibios_sriov_disable(struct pci_dev *pdev) -{ - pnv_pci_sriov_disable(pdev); - - /* Release PCI data */ - remove_sriov_vf_pdns(pdev); - return 0; -} - -int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) -{ - /* Allocate PCI data */ - add_sriov_vf_pdns(pdev); - - return pnv_pci_sriov_enable(pdev, num_vfs); -} -#endif /* CONFIG_PCI_IOV */ - -static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - struct pci_dn *pdn = pci_get_pdn(pdev); - struct pnv_ioda_pe *pe; - /* - * The function can be called while the PE# - * hasn't been assigned. Do nothing for the - * case. + * We assume that bridges *probably* don't need to do any DMA so we can + * skip allocating a TCE table, etc unless we get a non-bridge device. */ - if (!pdn || pdn->pe_number == IODA_INVALID_PE) - return; + if (!pe->dma_setup_done && !pci_is_bridge(pdev)) { + switch (phb->type) { + case PNV_PHB_IODA1: + pnv_pci_ioda1_setup_dma_pe(phb, pe); + break; + case PNV_PHB_IODA2: + pnv_pci_ioda2_setup_dma_pe(phb, pe); + break; + default: + pr_warn("%s: No DMA for PHB#%x (type %d)\n", + __func__, phb->hose->global_number, phb->type); + } + } + + if (pdn) + pdn->pe_number = pe->pe_number; + pe->device_count++; - pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); pdev->dev.archdata.dma_offset = pe->tce_bypass_base; set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); - /* - * Note: iommu_add_device() will fail here as - * for physical PE: the device is already added by now; - * for virtual PE: sysfs entries are not ready yet and - * tce_iommu_bus_notifier will add the device to a group later. - */ + + /* PEs with a DMA weight of zero won't have a group */ + if (pe->table_group.group) + iommu_add_device(&pe->table_group, &pdev->dev); } /* @@ -1859,8 +1348,7 @@ err: static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev, u64 dma_mask) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; @@ -1897,19 +1385,6 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev, return false; } -static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); - dev->dev.archdata.dma_offset = pe->tce_bypass_base; - - if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) - pnv_ioda_setup_bus_dma(pe, dev->subordinate); - } -} - static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb, bool real_mode) { @@ -2297,9 +1772,7 @@ found: pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; iommu_init_table(tbl, phb->hose->node, 0, 0); - if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) - pnv_ioda_setup_bus_dma(pe, pe->pbus); - + pe->dma_setup_done = true; return; fail: /* XXX Failure: Try to fallback to 64-bit only ? */ @@ -2489,7 +1962,6 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) return 0; } -#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV) static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, int num) { @@ -2513,7 +1985,6 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, return ret; } -#endif #ifdef CONFIG_IOMMU_API unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, @@ -2537,7 +2008,7 @@ unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, direct_table_size = 1UL << table_shift; for ( ; levels; --levels) { - bytes += _ALIGN_UP(tce_table_size, direct_table_size); + bytes += ALIGN(tce_table_size, direct_table_size); tce_table_size /= direct_table_size; tce_table_size <<= 3; @@ -2562,6 +2033,19 @@ static long pnv_pci_ioda2_create_table_userspace( return ret; } +static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); + dev->dev.archdata.dma_offset = pe->tce_bypass_base; + + if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) + pnv_ioda_setup_bus_dma(pe, dev->subordinate); + } +} + static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, @@ -2596,145 +2080,13 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = { .take_ownership = pnv_ioda2_take_ownership, .release_ownership = pnv_ioda2_release_ownership, }; - -static void pnv_ioda_setup_bus_iommu_group_add_devices(struct pnv_ioda_pe *pe, - struct iommu_table_group *table_group, - struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - iommu_add_device(table_group, &dev->dev); - - if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) - pnv_ioda_setup_bus_iommu_group_add_devices(pe, - table_group, dev->subordinate); - } -} - -static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe, - struct iommu_table_group *table_group, struct pci_bus *bus) -{ - - if (pe->flags & PNV_IODA_PE_DEV) - iommu_add_device(table_group, &pe->pdev->dev); - - if ((pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) || bus) - pnv_ioda_setup_bus_iommu_group_add_devices(pe, table_group, - bus); -} - -static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb); - -static void pnv_pci_ioda_setup_iommu_api(void) -{ - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - - /* - * There are 4 types of PEs: - * - PNV_IODA_PE_BUS: a downstream port with an adapter, - * created from pnv_pci_setup_bridge(); - * - PNV_IODA_PE_BUS_ALL: a PCI-PCIX bridge with devices behind it, - * created from pnv_pci_setup_bridge(); - * - PNV_IODA_PE_VF: a SRIOV virtual function, - * created from pnv_pcibios_sriov_enable(); - * - PNV_IODA_PE_DEV: an NPU or OCAPI device, - * created from pnv_pci_ioda_fixup(). - * - * Normally a PE is represented by an IOMMU group, however for - * devices with side channels the groups need to be more strict. - */ - list_for_each_entry(hose, &hose_list, list_node) { - phb = hose->private_data; - - if (phb->type == PNV_PHB_NPU_NVLINK || - phb->type == PNV_PHB_NPU_OCAPI) - continue; - - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - struct iommu_table_group *table_group; - - table_group = pnv_try_setup_npu_table_group(pe); - if (!table_group) { - if (!pnv_pci_ioda_pe_dma_weight(pe)) - continue; - - table_group = &pe->table_group; - iommu_register_group(&pe->table_group, - pe->phb->hose->global_number, - pe->pe_number); - } - pnv_ioda_setup_bus_iommu_group(pe, table_group, - pe->pbus); - } - } - - /* - * Now we have all PHBs discovered, time to add NPU devices to - * the corresponding IOMMU groups. - */ - list_for_each_entry(hose, &hose_list, list_node) { - unsigned long pgsizes; - - phb = hose->private_data; - - if (phb->type != PNV_PHB_NPU_NVLINK) - continue; - - pgsizes = pnv_ioda_parse_tce_sizes(phb); - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - /* - * IODA2 bridges get this set up from - * pci_controller_ops::setup_bridge but NPU bridges - * do not have this hook defined so we do it here. - */ - pe->table_group.pgsizes = pgsizes; - pnv_npu_compound_attach(pe); - } - } -} -#else /* !CONFIG_IOMMU_API */ -static void pnv_pci_ioda_setup_iommu_api(void) { }; #endif -static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) -{ - struct pci_controller *hose = phb->hose; - struct device_node *dn = hose->dn; - unsigned long mask = 0; - int i, rc, count; - u32 val; - - count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes"); - if (count <= 0) { - mask = SZ_4K | SZ_64K; - /* Add 16M for POWER8 by default */ - if (cpu_has_feature(CPU_FTR_ARCH_207S) && - !cpu_has_feature(CPU_FTR_ARCH_300)) - mask |= SZ_16M | SZ_256M; - return mask; - } - - for (i = 0; i < count; i++) { - rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes", - i, &val); - if (rc == 0) - mask |= 1ULL << val; - } - - return mask; -} - -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, - struct pnv_ioda_pe *pe) +void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, + struct pnv_ioda_pe *pe) { int64_t rc; - if (!pnv_pci_ioda_pe_dma_weight(pe)) - return; - /* TVE #1 is selected by PCI address bit 59 */ pe->tce_bypass_base = 1ull << 59; @@ -2749,16 +2101,17 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, IOMMU_TABLE_GROUP_MAX_TABLES; pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS; pe->table_group.pgsizes = pnv_ioda_parse_tce_sizes(phb); -#ifdef CONFIG_IOMMU_API - pe->table_group.ops = &pnv_pci_ioda2_ops; -#endif rc = pnv_pci_ioda2_setup_default_config(pe); if (rc) return; - if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) - pnv_ioda_setup_bus_dma(pe, pe->pbus); +#ifdef CONFIG_IOMMU_API + pe->table_group.ops = &pnv_pci_ioda2_ops; + iommu_register_group(&pe->table_group, phb->hose->global_number, + pe->pe_number); +#endif + pe->dma_setup_done = true; } int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq) @@ -2907,118 +2260,6 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) count, phb->msi_base); } -#ifdef CONFIG_PCI_IOV -static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - const resource_size_t gate = phb->ioda.m64_segsize >> 2; - struct resource *res; - int i; - resource_size_t size, total_vf_bar_sz; - struct pci_dn *pdn; - int mul, total_vfs; - - pdn = pci_get_pdn(pdev); - pdn->vfs_expanded = 0; - pdn->m64_single_mode = false; - - total_vfs = pci_sriov_get_totalvfs(pdev); - mul = phb->ioda.total_pe_num; - total_vf_bar_sz = 0; - - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &pdev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || res->parent) - continue; - if (!pnv_pci_is_m64_flags(res->flags)) { - dev_warn(&pdev->dev, "Don't support SR-IOV with" - " non M64 VF BAR%d: %pR. \n", - i, res); - goto truncate_iov; - } - - total_vf_bar_sz += pci_iov_resource_size(pdev, - i + PCI_IOV_RESOURCES); - - /* - * If bigger than quarter of M64 segment size, just round up - * power of two. - * - * Generally, one M64 BAR maps one IOV BAR. To avoid conflict - * with other devices, IOV BAR size is expanded to be - * (total_pe * VF_BAR_size). When VF_BAR_size is half of M64 - * segment size , the expanded size would equal to half of the - * whole M64 space size, which will exhaust the M64 Space and - * limit the system flexibility. This is a design decision to - * set the boundary to quarter of the M64 segment size. - */ - if (total_vf_bar_sz > gate) { - mul = roundup_pow_of_two(total_vfs); - dev_info(&pdev->dev, - "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n", - total_vf_bar_sz, gate, mul); - pdn->m64_single_mode = true; - break; - } - } - - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &pdev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || res->parent) - continue; - - size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); - /* - * On PHB3, the minimum size alignment of M64 BAR in single - * mode is 32MB. - */ - if (pdn->m64_single_mode && (size < SZ_32M)) - goto truncate_iov; - dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); - res->end = res->start + size * mul - 1; - dev_dbg(&pdev->dev, " %pR\n", res); - dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)", - i, res, mul); - } - pdn->vfs_expanded = mul; - - return; - -truncate_iov: - /* To save MMIO space, IOV BAR is truncated. */ - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &pdev->resource[i + PCI_IOV_RESOURCES]; - res->flags = 0; - res->end = res->start - 1; - } -} - -static void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) -{ - if (WARN_ON(pci_dev_is_added(pdev))) - return; - - if (pdev->is_virtfn) { - struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); - - /* - * VF PEs are single-device PEs so their pdev pointer needs to - * be set. The pdev doesn't exist when the PE is allocated (in - * (pcibios_sriov_enable()) so we fix it up here. - */ - pe->pdev = pdev; - WARN_ON(!(pe->flags & PNV_IODA_PE_VF)); - } else if (pdev->is_physfn) { - /* - * For PFs adjust their allocated IOV resources to match what - * the PHB can support using it's M64 BAR table. - */ - pnv_pci_ioda_fixup_iov_resources(pdev); - } -} -#endif /* CONFIG_PCI_IOV */ - static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, struct resource *res) { @@ -3220,8 +2461,7 @@ static void pnv_pci_enable_bridges(void) static void pnv_pci_ioda_fixup(void) { - pnv_pci_ioda_setup_PEs(); - pnv_pci_ioda_setup_iommu_api(); + pnv_pci_ioda_setup_nvlink(); pnv_pci_ioda_create_dbgfs(); pnv_pci_enable_bridges(); @@ -3246,10 +2486,9 @@ static void pnv_pci_ioda_fixup(void) static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, unsigned long type) { - struct pci_dev *bridge; - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); int num_pci_bridges = 0; + struct pci_dev *bridge; bridge = bus->self; while (bridge) { @@ -3333,28 +2572,16 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus, } } -static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type) +static void pnv_pci_configure_bus(struct pci_bus *bus) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; struct pci_dev *bridge = bus->self; struct pnv_ioda_pe *pe; - bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); + bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); - /* Extend bridge's windows if necessary */ - pnv_pci_fixup_bridge_resources(bus, type); - - /* The PE for root bus should be realized before any one else */ - if (!phb->ioda.root_pe_populated) { - pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false); - if (pe) { - phb->ioda.root_pe_idx = pe->pe_number; - phb->ioda.root_pe_populated = true; - } - } + dev_info(&bus->dev, "Configuring PE for bus\n"); /* Don't assign PE to PCI bus, which doesn't have subordinate devices */ - if (list_empty(&bus->devices)) + if (WARN_ON(list_empty(&bus->devices))) return; /* Reserve PEs according to used M64 resources */ @@ -3370,17 +2597,6 @@ static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type) return; pnv_ioda_setup_pe_seg(pe); - switch (phb->type) { - case PNV_PHB_IODA1: - pnv_pci_ioda1_setup_dma_pe(phb, pe); - break; - case PNV_PHB_IODA2: - pnv_pci_ioda2_setup_dma_pe(phb, pe); - break; - default: - pr_warn("%s: No DMA for PHB#%x (type %d)\n", - __func__, phb->hose->global_number, phb->type); - } } static resource_size_t pnv_pci_default_alignment(void) @@ -3388,49 +2604,12 @@ static resource_size_t pnv_pci_default_alignment(void) return PAGE_SIZE; } -#ifdef CONFIG_PCI_IOV -static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, - int resno) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - struct pci_dn *pdn = pci_get_pdn(pdev); - resource_size_t align; - - /* - * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the - * SR-IOV. While from hardware perspective, the range mapped by M64 - * BAR should be size aligned. - * - * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra - * powernv-specific hardware restriction is gone. But if just use the - * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with - * in one segment of M64 #15, which introduces the PE conflict between - * PF and VF. Based on this, the minimum alignment of an IOV BAR is - * m64_segsize. - * - * This function returns the total IOV BAR size if M64 BAR is in - * Shared PE mode or just VF BAR size if not. - * If the M64 BAR is in Single PE mode, return the VF BAR size or - * M64 segment size if IOV BAR size is less. - */ - align = pci_iov_resource_size(pdev, resno); - if (!pdn->vfs_expanded) - return align; - if (pdn->m64_single_mode) - return max(align, (resource_size_t)phb->ioda.m64_segsize); - - return pdn->vfs_expanded * align; -} -#endif /* CONFIG_PCI_IOV */ - /* Prevent enabling devices for which we couldn't properly * assign a PE */ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); struct pci_dn *pdn; /* The function is probably called while the PEs have @@ -3501,11 +2680,10 @@ static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group, static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe) { - unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe); struct iommu_table *tbl = pe->table_group.tables[0]; int64_t rc; - if (!weight) + if (!pe->dma_setup_done) return; rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0); @@ -3522,22 +2700,17 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe) iommu_tce_table_put(tbl); } -static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) +void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) { struct iommu_table *tbl = pe->table_group.tables[0]; - unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe); -#ifdef CONFIG_IOMMU_API int64_t rc; -#endif - if (!weight) + if (!pe->dma_setup_done) return; -#ifdef CONFIG_IOMMU_API rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (rc) pe_warn(pe, "OPAL error %lld release DMA window\n", rc); -#endif pnv_pci_ioda2_set_bypass(pe, false); if (pe->table_group.group) { @@ -3560,14 +2733,8 @@ static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe, if (map[idx] != pe->pe_number) continue; - if (win == OPAL_M64_WINDOW_TYPE) - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - phb->ioda.reserved_pe_idx, win, - idx / PNV_IODA1_M64_SEGS, - idx % PNV_IODA1_M64_SEGS); - else - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - phb->ioda.reserved_pe_idx, win, 0, idx); + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + phb->ioda.reserved_pe_idx, win, 0, idx); if (rc != OPAL_SUCCESS) pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n", @@ -3586,8 +2753,7 @@ static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe) phb->ioda.io_segmap); pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE, phb->ioda.m32_segmap); - pnv_ioda_free_pe_seg(pe, OPAL_M64_WINDOW_TYPE, - phb->ioda.m64_segmap); + /* M64 is pre-configured by pnv_ioda1_init_m64() */ } else if (phb->type == PNV_PHB_IODA2) { pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE, phb->ioda.m32_segmap); @@ -3599,6 +2765,8 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) struct pnv_phb *phb = pe->phb; struct pnv_ioda_pe *slave, *tmp; + pe_info(pe, "Releasing PE\n"); + mutex_lock(&phb->ioda.pe_list_mutex); list_del(&pe->list); mutex_unlock(&phb->ioda.pe_list_mutex); @@ -3633,26 +2801,35 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) * that it can be populated again in PCI hot add path. The PE * shouldn't be destroyed as it's the global reserved resource. */ - if (phb->ioda.root_pe_populated && - phb->ioda.root_pe_idx == pe->pe_number) - phb->ioda.root_pe_populated = false; - else - pnv_ioda_free_pe(pe); + if (phb->ioda.root_pe_idx == pe->pe_number) + return; + + pnv_ioda_free_pe(pe); } static void pnv_pci_release_device(struct pci_dev *pdev) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; + /* The VF PE state is torn down when sriov_disable() is called */ if (pdev->is_virtfn) return; if (!pdn || pdn->pe_number == IODA_INVALID_PE) return; +#ifdef CONFIG_PCI_IOV + /* + * FIXME: Try move this to sriov_disable(). It's here since we allocate + * the iov state at probe time since we need to fiddle with the IOV + * resources. + */ + if (pdev->is_physfn) + kfree(pdev->dev.archdata.iov_data); +#endif + /* * PCI hotplug can happen as part of EEH error recovery. The @pdn * isn't removed and added afterwards in this scenario. We should @@ -3688,8 +2865,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose) static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus) { - struct pci_controller *hose = bus->sysdata; - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); struct pnv_ioda_pe *pe; list_for_each_entry(pe, &phb->ioda.pe_list, list) { @@ -3715,7 +2891,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .enable_device_hook = pnv_pci_enable_device_hook, .release_device = pnv_pci_release_device, .window_alignment = pnv_pci_window_alignment, - .setup_bridge = pnv_pci_setup_bridge, + .setup_bridge = pnv_pci_fixup_bridge_resources, .reset_secondary_bus = pnv_pci_reset_secondary_bus, .shutdown = pnv_pci_ioda_shutdown, }; @@ -3745,6 +2921,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, struct pnv_phb *phb; unsigned long size, m64map_off, m32map_off, pemap_off; unsigned long iomap_off = 0, dma32map_off = 0; + struct pnv_ioda_pe *root_pe; struct resource r; const __be64 *prop64; const __be32 *prop32; @@ -3863,7 +3040,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, PNV_IODA1_DMA32_SEGSIZE; /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ - size = _ALIGN_UP(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8, + size = ALIGN(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8, sizeof(unsigned long)); m64map_off = size; size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]); @@ -3912,7 +3089,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->ioda.root_pe_idx = phb->ioda.reserved_pe_idx - 1; pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx); } else { - phb->ioda.root_pe_idx = IODA_INVALID_PE; + /* otherwise just allocate one */ + root_pe = pnv_ioda_alloc_pe(phb, 1); + phb->ioda.root_pe_idx = root_pe->pe_number; } INIT_LIST_HEAD(&phb->ioda.pe_list); @@ -4024,8 +3203,7 @@ void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np) static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); if (!machine_is(powernv)) return; diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c new file mode 100644 index 000000000000..c4434f20f42f --- /dev/null +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -0,0 +1,766 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/kernel.h> +#include <linux/ioport.h> +#include <linux/bitmap.h> +#include <linux/pci.h> + +#include <asm/opal.h> + +#include "pci.h" + +/* for pci_dev_is_added() */ +#include "../../../../drivers/pci/pci.h" + +/* + * The majority of the complexity in supporting SR-IOV on PowerNV comes from + * the need to put the MMIO space for each VF into a separate PE. Internally + * the PHB maps MMIO addresses to a specific PE using the "Memory BAR Table". + * The MBT historically only applied to the 64bit MMIO window of the PHB + * so it's common to see it referred to as the "M64BT". + * + * An MBT entry stores the mapped range as an <base>,<mask> pair. This forces + * the address range that we want to map to be power-of-two sized and aligned. + * For conventional PCI devices this isn't really an issue since PCI device BARs + * have the same requirement. + * + * For a SR-IOV BAR things are a little more awkward since size and alignment + * are not coupled. The alignment is set based on the the per-VF BAR size, but + * the total BAR area is: number-of-vfs * per-vf-size. The number of VFs + * isn't necessarily a power of two, so neither is the total size. To fix that + * we need to finesse (read: hack) the Linux BAR allocator so that it will + * allocate the SR-IOV BARs in a way that lets us map them using the MBT. + * + * The changes to size and alignment that we need to do depend on the "mode" + * of MBT entry that we use. We only support SR-IOV on PHB3 (IODA2) and above, + * so as a baseline we can assume that we have the following BAR modes + * available: + * + * NB: $PE_COUNT is the number of PEs that the PHB supports. + * + * a) A segmented BAR that splits the mapped range into $PE_COUNT equally sized + * segments. The n'th segment is mapped to the n'th PE. + * b) An un-segmented BAR that maps the whole address range to a specific PE. + * + * + * We prefer to use mode a) since it only requires one MBT entry per SR-IOV BAR + * For comparison b) requires one entry per-VF per-BAR, or: + * (num-vfs * num-sriov-bars) in total. To use a) we need the size of each segment + * to equal the size of the per-VF BAR area. So: + * + * new_size = per-vf-size * number-of-PEs + * + * The alignment for the SR-IOV BAR also needs to be changed from per-vf-size + * to "new_size", calculated above. Implementing this is a convoluted process + * which requires several hooks in the PCI core: + * + * 1. In pcibios_add_device() we call pnv_pci_ioda_fixup_iov(). + * + * At this point the device has been probed and the device's BARs are sized, + * but no resource allocations have been done. The SR-IOV BARs are sized + * based on the maximum number of VFs supported by the device and we need + * to increase that to new_size. + * + * 2. Later, when Linux actually assigns resources it tries to make the resource + * allocations for each PCI bus as compact as possible. As a part of that it + * sorts the BARs on a bus by their required alignment, which is calculated + * using pci_resource_alignment(). + * + * For IOV resources this goes: + * pci_resource_alignment() + * pci_sriov_resource_alignment() + * pcibios_sriov_resource_alignment() + * pnv_pci_iov_resource_alignment() + * + * Our hook overrides the default alignment, equal to the per-vf-size, with + * new_size computed above. + * + * 3. When userspace enables VFs for a device: + * + * sriov_enable() + * pcibios_sriov_enable() + * pnv_pcibios_sriov_enable() + * + * This is where we actually allocate PE numbers for each VF and setup the + * MBT mapping for each SR-IOV BAR. In steps 1) and 2) we setup an "arena" + * where each MBT segment is equal in size to the VF BAR so we can shift + * around the actual SR-IOV BAR location within this arena. We need this + * ability because the PE space is shared by all devices on the same PHB. + * When using mode a) described above segment 0 in maps to PE#0 which might + * be already being used by another device on the PHB. + * + * As a result we need allocate a contigious range of PE numbers, then shift + * the address programmed into the SR-IOV BAR of the PF so that the address + * of VF0 matches up with the segment corresponding to the first allocated + * PE number. This is handled in pnv_pci_vf_resource_shift(). + * + * Once all that is done we return to the PCI core which then enables VFs, + * scans them and creates pci_devs for each. The init process for a VF is + * largely the same as a normal device, but the VF is inserted into the IODA + * PE that we allocated for it rather than the PE associated with the bus. + * + * 4. When userspace disables VFs we unwind the above in + * pnv_pcibios_sriov_disable(). Fortunately this is relatively simple since + * we don't need to validate anything, just tear down the mappings and + * move SR-IOV resource back to its "proper" location. + * + * That's how mode a) works. In theory mode b) (single PE mapping) is less work + * since we can map each individual VF with a separate BAR. However, there's a + * few limitations: + * + * 1) For IODA2 mode b) has a minimum alignment requirement of 32MB. This makes + * it only usable for devices with very large per-VF BARs. Such devices are + * similar to Big Foot. They definitely exist, but I've never seen one. + * + * 2) The number of MBT entries that we have is limited. PHB3 and PHB4 only + * 16 total and some are needed for. Most SR-IOV capable network cards can support + * more than 16 VFs on each port. + * + * We use b) when using a) would use more than 1/4 of the entire 64 bit MMIO + * window of the PHB. + * + * + * + * PHB4 (IODA3) added a few new features that would be useful for SR-IOV. It + * allowed the MBT to map 32bit MMIO space in addition to 64bit which allows + * us to support SR-IOV BARs in the 32bit MMIO window. This is useful since + * the Linux BAR allocation will place any BAR marked as non-prefetchable into + * the non-prefetchable bridge window, which is 32bit only. It also added two + * new modes: + * + * c) A segmented BAR similar to a), but each segment can be individually + * mapped to any PE. This is matches how the 32bit MMIO window worked on + * IODA1&2. + * + * d) A segmented BAR with 8, 64, or 128 segments. This works similarly to a), + * but with fewer segments and configurable base PE. + * + * i.e. The n'th segment maps to the (n + base)'th PE. + * + * The base PE is also required to be a multiple of the window size. + * + * Unfortunately, the OPAL API doesn't currently (as of skiboot v6.6) allow us + * to exploit any of the IODA3 features. + */ + +static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); + struct resource *res; + int i; + resource_size_t vf_bar_sz; + struct pnv_iov_data *iov; + int mul; + + iov = kzalloc(sizeof(*iov), GFP_KERNEL); + if (!iov) + goto disable_iov; + pdev->dev.archdata.iov_data = iov; + mul = phb->ioda.total_pe_num; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || res->parent) + continue; + if (!pnv_pci_is_m64_flags(res->flags)) { + dev_warn(&pdev->dev, "Don't support SR-IOV with non M64 VF BAR%d: %pR. \n", + i, res); + goto disable_iov; + } + + vf_bar_sz = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); + + /* + * Generally, one segmented M64 BAR maps one IOV BAR. However, + * if a VF BAR is too large we end up wasting a lot of space. + * If each VF needs more than 1/4 of the default m64 segment + * then each VF BAR should be mapped in single-PE mode to reduce + * the amount of space required. This does however limit the + * number of VFs we can support. + * + * The 1/4 limit is arbitrary and can be tweaked. + */ + if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) { + /* + * On PHB3, the minimum size alignment of M64 BAR in + * single mode is 32MB. If this VF BAR is smaller than + * 32MB, but still too large for a segmented window + * then we can't map it and need to disable SR-IOV for + * this device. + */ + if (vf_bar_sz < SZ_32M) { + pci_err(pdev, "VF BAR%d: %pR can't be mapped in single PE mode\n", + i, res); + goto disable_iov; + } + + iov->m64_single_mode[i] = true; + continue; + } + + /* + * This BAR can be mapped with one segmented window, so adjust + * te resource size to accommodate. + */ + pci_dbg(pdev, " Fixing VF BAR%d: %pR to\n", i, res); + res->end = res->start + vf_bar_sz * mul - 1; + pci_dbg(pdev, " %pR\n", res); + + pci_info(pdev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)", + i, res, mul); + + iov->need_shift = true; + } + + return; + +disable_iov: + /* Save ourselves some MMIO space by disabling the unusable BARs */ + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + res->flags = 0; + res->end = res->start - 1; + } + + pdev->dev.archdata.iov_data = NULL; + kfree(iov); +} + +void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) +{ + if (WARN_ON(pci_dev_is_added(pdev))) + return; + + if (pdev->is_virtfn) { + struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); + + /* + * VF PEs are single-device PEs so their pdev pointer needs to + * be set. The pdev doesn't exist when the PE is allocated (in + * (pcibios_sriov_enable()) so we fix it up here. + */ + pe->pdev = pdev; + WARN_ON(!(pe->flags & PNV_IODA_PE_VF)); + } else if (pdev->is_physfn) { + /* + * For PFs adjust their allocated IOV resources to match what + * the PHB can support using it's M64 BAR table. + */ + pnv_pci_ioda_fixup_iov_resources(pdev); + } +} + +resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, + int resno) +{ + resource_size_t align = pci_iov_resource_size(pdev, resno); + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); + struct pnv_iov_data *iov = pnv_iov_get(pdev); + + /* + * iov can be null if we have an SR-IOV device with IOV BAR that can't + * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch). + * In that case we don't allow VFs to be enabled since one of their + * BARs would not be placed in the correct PE. + */ + if (!iov) + return align; + + /* + * If we're using single mode then we can just use the native VF BAR + * alignment. We validated that it's possible to use a single PE + * window above when we did the fixup. + */ + if (iov->m64_single_mode[resno - PCI_IOV_RESOURCES]) + return align; + + /* + * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the + * SR-IOV. While from hardware perspective, the range mapped by M64 + * BAR should be size aligned. + * + * This function returns the total IOV BAR size if M64 BAR is in + * Shared PE mode or just VF BAR size if not. + * If the M64 BAR is in Single PE mode, return the VF BAR size or + * M64 segment size if IOV BAR size is less. + */ + return phb->ioda.total_pe_num * align; +} + +static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_iov_data *iov; + struct pnv_phb *phb; + int window_id; + + phb = pci_bus_to_pnvhb(pdev->bus); + iov = pnv_iov_get(pdev); + + for_each_set_bit(window_id, iov->used_m64_bar_mask, MAX_M64_BARS) { + opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + 0); + + clear_bit(window_id, &phb->ioda.m64_bar_alloc); + } + + return 0; +} + + +/* + * PHB3 and beyond support segmented windows. The window's address range + * is subdivided into phb->ioda.total_pe_num segments and there's a 1-1 + * mapping between PEs and segments. + */ +static int64_t pnv_ioda_map_m64_segmented(struct pnv_phb *phb, + int window_id, + resource_size_t start, + resource_size_t size) +{ + int64_t rc; + + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + start, + 0, /* unused */ + size); + if (rc) + goto out; + + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + OPAL_ENABLE_M64_SPLIT); +out: + if (rc) + pr_err("Failed to map M64 window #%d: %lld\n", window_id, rc); + + return rc; +} + +static int64_t pnv_ioda_map_m64_single(struct pnv_phb *phb, + int pe_num, + int window_id, + resource_size_t start, + resource_size_t size) +{ + int64_t rc; + + /* + * The API for setting up m64 mmio windows seems to have been designed + * with P7-IOC in mind. For that chip each M64 BAR (window) had a fixed + * split of 8 equally sized segments each of which could individually + * assigned to a PE. + * + * The problem with this is that the API doesn't have any way to + * communicate the number of segments we want on a BAR. This wasn't + * a problem for p7-ioc since you didn't have a choice, but the + * single PE windows added in PHB3 don't map cleanly to this API. + * + * As a result we've got this slightly awkward process where we + * call opal_pci_map_pe_mmio_window() to put the single in single + * PE mode, and set the PE for the window before setting the address + * bounds. We need to do it this way because the single PE windows + * for PHB3 have different alignment requirements on PHB3. + */ + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe_num, + OPAL_M64_WINDOW_TYPE, + window_id, + 0); + if (rc) + goto out; + + /* + * NB: In single PE mode the window needs to be aligned to 32MB + */ + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + start, + 0, /* ignored by FW, m64 is 1-1 */ + size); + if (rc) + goto out; + + /* + * Now actually enable it. We specified the BAR should be in "non-split" + * mode so FW will validate that the BAR is in single PE mode. + */ + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + OPAL_ENABLE_M64_NON_SPLIT); +out: + if (rc) + pr_err("Error mapping single PE BAR\n"); + + return rc; +} + +static int pnv_pci_alloc_m64_bar(struct pnv_phb *phb, struct pnv_iov_data *iov) +{ + int win; + + do { + win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, + phb->ioda.m64_bar_idx + 1, 0); + + if (win >= phb->ioda.m64_bar_idx + 1) + return -1; + } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); + + set_bit(win, iov->used_m64_bar_mask); + + return win; +} + +static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_iov_data *iov; + struct pnv_phb *phb; + unsigned int win; + struct resource *res; + int i, j; + int64_t rc; + resource_size_t size, start; + int base_pe_num; + + phb = pci_bus_to_pnvhb(pdev->bus); + iov = pnv_iov_get(pdev); + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + + /* don't need single mode? map everything in one go! */ + if (!iov->m64_single_mode[i]) { + win = pnv_pci_alloc_m64_bar(phb, iov); + if (win < 0) + goto m64_failed; + + size = resource_size(res); + start = res->start; + + rc = pnv_ioda_map_m64_segmented(phb, win, start, size); + if (rc) + goto m64_failed; + + continue; + } + + /* otherwise map each VF with single PE BARs */ + size = pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i); + base_pe_num = iov->vf_pe_arr[0].pe_number; + + for (j = 0; j < num_vfs; j++) { + win = pnv_pci_alloc_m64_bar(phb, iov); + if (win < 0) + goto m64_failed; + + start = res->start + size * j; + rc = pnv_ioda_map_m64_single(phb, win, + base_pe_num + j, + start, + size); + if (rc) + goto m64_failed; + } + } + return 0; + +m64_failed: + pnv_pci_vf_release_m64(pdev, num_vfs); + return -EBUSY; +} + +static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) +{ + struct pnv_phb *phb; + struct pnv_ioda_pe *pe, *pe_n; + + phb = pci_bus_to_pnvhb(pdev->bus); + + if (!pdev->is_physfn) + return; + + /* FIXME: Use pnv_ioda_release_pe()? */ + list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { + if (pe->parent_dev != pdev) + continue; + + pnv_pci_ioda2_release_pe_dma(pe); + + /* Remove from list */ + mutex_lock(&phb->ioda.pe_list_mutex); + list_del(&pe->list); + mutex_unlock(&phb->ioda.pe_list_mutex); + + pnv_ioda_deconfigure_pe(phb, pe); + + pnv_ioda_free_pe(pe); + } +} + +static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) +{ + struct resource *res, res2; + struct pnv_iov_data *iov; + resource_size_t size; + u16 num_vfs; + int i; + + if (!dev->is_physfn) + return -EINVAL; + iov = pnv_iov_get(dev); + + /* + * "offset" is in VFs. The M64 windows are sized so that when they + * are segmented, each segment is the same size as the IOV BAR. + * Each segment is in a separate PE, and the high order bits of the + * address are the PE number. Therefore, each VF's BAR is in a + * separate PE, and changing the IOV BAR start address changes the + * range of PEs the VFs are in. + */ + num_vfs = iov->num_vfs; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &dev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + if (iov->m64_single_mode[i]) + continue; + + /* + * The actual IOV BAR range is determined by the start address + * and the actual size for num_vfs VFs BAR. This check is to + * make sure that after shifting, the range will not overlap + * with another device. + */ + size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); + res2.flags = res->flags; + res2.start = res->start + (size * offset); + res2.end = res2.start + (size * num_vfs) - 1; + + if (res2.end > res->end) { + dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n", + i, &res2, res, num_vfs, offset); + return -EBUSY; + } + } + + /* + * Since M64 BAR shares segments among all possible 256 PEs, + * we have to shift the beginning of PF IOV BAR to make it start from + * the segment which belongs to the PE number assigned to the first VF. + * This creates a "hole" in the /proc/iomem which could be used for + * allocating other resources so we reserve this area below and + * release when IOV is released. + */ + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &dev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + if (iov->m64_single_mode[i]) + continue; + + size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); + res2 = *res; + res->start += size * offset; + + dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", + i, &res2, res, (offset > 0) ? "En" : "Dis", + num_vfs, offset); + + if (offset < 0) { + devm_release_resource(&dev->dev, &iov->holes[i]); + memset(&iov->holes[i], 0, sizeof(iov->holes[i])); + } + + pci_update_resource(dev, i + PCI_IOV_RESOURCES); + + if (offset > 0) { + iov->holes[i].start = res2.start; + iov->holes[i].end = res2.start + size * offset - 1; + iov->holes[i].flags = IORESOURCE_BUS; + iov->holes[i].name = "pnv_iov_reserved"; + devm_request_resource(&dev->dev, res->parent, + &iov->holes[i]); + } + } + return 0; +} + +static void pnv_pci_sriov_disable(struct pci_dev *pdev) +{ + u16 num_vfs, base_pe; + struct pnv_iov_data *iov; + + iov = pnv_iov_get(pdev); + num_vfs = iov->num_vfs; + base_pe = iov->vf_pe_arr[0].pe_number; + + if (WARN_ON(!iov)) + return; + + /* Release VF PEs */ + pnv_ioda_release_vf_PE(pdev); + + /* Un-shift the IOV BARs if we need to */ + if (iov->need_shift) + pnv_pci_vf_resource_shift(pdev, -base_pe); + + /* Release M64 windows */ + pnv_pci_vf_release_m64(pdev, num_vfs); +} + +static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_phb *phb; + struct pnv_ioda_pe *pe; + int pe_num; + u16 vf_index; + struct pnv_iov_data *iov; + struct pci_dn *pdn; + + if (!pdev->is_physfn) + return; + + phb = pci_bus_to_pnvhb(pdev->bus); + pdn = pci_get_pdn(pdev); + iov = pnv_iov_get(pdev); + + /* Reserve PE for each VF */ + for (vf_index = 0; vf_index < num_vfs; vf_index++) { + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); + struct pci_dn *vf_pdn; + + pe = &iov->vf_pe_arr[vf_index]; + pe->phb = phb; + pe->flags = PNV_IODA_PE_VF; + pe->pbus = NULL; + pe->parent_dev = pdev; + pe->mve_number = -1; + pe->rid = (vf_bus << 8) | vf_devfn; + + pe_num = pe->pe_number; + pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", + pci_domain_nr(pdev->bus), pdev->bus->number, + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); + + if (pnv_ioda_configure_pe(phb, pe)) { + /* XXX What do we do here ? */ + pnv_ioda_free_pe(pe); + pe->pdev = NULL; + continue; + } + + /* Put PE to the list */ + mutex_lock(&phb->ioda.pe_list_mutex); + list_add_tail(&pe->list, &phb->ioda.pe_list); + mutex_unlock(&phb->ioda.pe_list_mutex); + + /* associate this pe to it's pdn */ + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { + if (vf_pdn->busno == vf_bus && + vf_pdn->devfn == vf_devfn) { + vf_pdn->pe_number = pe_num; + break; + } + } + + pnv_pci_ioda2_setup_dma_pe(phb, pe); + } +} + +static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_ioda_pe *base_pe; + struct pnv_iov_data *iov; + struct pnv_phb *phb; + int ret; + u16 i; + + phb = pci_bus_to_pnvhb(pdev->bus); + iov = pnv_iov_get(pdev); + + /* + * There's a calls to IODA2 PE setup code littered throughout. We could + * probably fix that, but we'd still have problems due to the + * restriction inherent on IODA1 PHBs. + * + * NB: We class IODA3 as IODA2 since they're very similar. + */ + if (phb->type != PNV_PHB_IODA2) { + pci_err(pdev, "SR-IOV is not supported on this PHB\n"); + return -ENXIO; + } + + if (!iov) { + dev_info(&pdev->dev, "don't support this SRIOV device with non 64bit-prefetchable IOV BAR\n"); + return -ENOSPC; + } + + /* allocate a contigious block of PEs for our VFs */ + base_pe = pnv_ioda_alloc_pe(phb, num_vfs); + if (!base_pe) { + pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs); + return -EBUSY; + } + + iov->vf_pe_arr = base_pe; + iov->num_vfs = num_vfs; + + /* Assign M64 window accordingly */ + ret = pnv_pci_vf_assign_m64(pdev, num_vfs); + if (ret) { + dev_info(&pdev->dev, "Not enough M64 window resources\n"); + goto m64_failed; + } + + /* + * When using one M64 BAR to map one IOV BAR, we need to shift + * the IOV BAR according to the PE# allocated to the VFs. + * Otherwise, the PE# for the VF will conflict with others. + */ + if (iov->need_shift) { + ret = pnv_pci_vf_resource_shift(pdev, base_pe->pe_number); + if (ret) + goto shift_failed; + } + + /* Setup VF PEs */ + pnv_ioda_setup_vf_PE(pdev, num_vfs); + + return 0; + +shift_failed: + pnv_pci_vf_release_m64(pdev, num_vfs); + +m64_failed: + for (i = 0; i < num_vfs; i++) + pnv_ioda_free_pe(&iov->vf_pe_arr[i]); + + return ret; +} + +int pnv_pcibios_sriov_disable(struct pci_dev *pdev) +{ + pnv_pci_sriov_disable(pdev); + + /* Release PCI data */ + remove_sriov_vf_pdns(pdev); + return 0; +} + +int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + /* Allocate PCI data */ + add_sriov_vf_pdns(pdev); + + return pnv_pci_sriov_enable(pdev, num_vfs); +} diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 5bf818246339..9b9bca169275 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -162,8 +162,7 @@ EXPORT_SYMBOL_GPL(pnv_pci_set_power_state); int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct msi_desc *entry; struct msi_msg msg; int hwirq; @@ -211,8 +210,7 @@ int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) void pnv_teardown_msi_irqs(struct pci_dev *pdev) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct msi_desc *entry; irq_hw_number_t hwirq; @@ -824,10 +822,9 @@ EXPORT_SYMBOL(pnv_pci_get_phb_node); int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) { - __be64 val; - struct pci_controller *hose; - struct pnv_phb *phb; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); u64 tunnel_bar; + __be64 val; int rc; if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR)) @@ -835,9 +832,6 @@ int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR)) return -ENXIO; - hose = pci_bus_to_host(dev->bus); - phb = hose->private_data; - mutex_lock(&tunnel_mutex); rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val); if (rc != OPAL_SUCCESS) { @@ -955,28 +949,8 @@ static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct pci_dev *pdev; - struct pci_dn *pdn; - struct pnv_ioda_pe *pe; - struct pci_controller *hose; - struct pnv_phb *phb; switch (action) { - case BUS_NOTIFY_ADD_DEVICE: - pdev = to_pci_dev(dev); - pdn = pci_get_pdn(pdev); - hose = pci_bus_to_host(pdev->bus); - phb = hose->private_data; - - WARN_ON_ONCE(!phb); - if (!pdn || pdn->pe_number == IODA_INVALID_PE || !phb) - return 0; - - pe = &phb->ioda.pe_array[pdn->pe_number]; - if (!pe->table_group.group) - return 0; - iommu_add_device(&pe->table_group, dev); - return 0; case BUS_NOTIFY_DEL_DEVICE: iommu_del_device(dev); return 0; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index d3bbdeab3a32..739a0b3b72e1 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -33,6 +33,24 @@ enum pnv_phb_model { #define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */ #define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */ +/* + * A brief note on PNV_IODA_PE_BUS_ALL + * + * This is needed because of the behaviour of PCIe-to-PCI bridges. The PHB uses + * the Requester ID field of the PCIe request header to determine the device + * (and PE) that initiated a DMA. In legacy PCI individual memory read/write + * requests aren't tagged with the RID. To work around this the PCIe-to-PCI + * bridge will use (secondary_bus_no << 8) | 0x00 as the RID on the PCIe side. + * + * PCIe-to-X bridges have a similar issue even though PCI-X requests also have + * a RID in the transaction header. The PCIe-to-X bridge is permitted to "take + * ownership" of a transaction by a PCI-X device when forwarding it to the PCIe + * side of the bridge. + * + * To work around these problems we use the BUS_ALL flag since every subordinate + * bus of the bridge should go into the same PE. + */ + /* Indicates operations are frozen for a PE: MMIO in PESTA & DMA in PESTB. */ #define PNV_IODA_STOPPED_STATE 0x8000000000000000 @@ -69,7 +87,14 @@ struct pnv_ioda_pe { bool tce_bypass_enabled; uint64_t tce_bypass_base; - /* MSIs. MVE index is identical for for 32 and 64 bit MSI + /* + * Used to track whether we've done DMA setup for this PE or not. We + * want to defer allocating TCE tables, etc until we've added a + * non-bridge device to the PE. + */ + bool dma_setup_done; + + /* MSIs. MVE index is identical for 32 and 64 bit MSI * and -1 if not supported. (It's actually identical to the * PE number) */ @@ -118,7 +143,6 @@ struct pnv_phb { unsigned int total_pe_num; unsigned int reserved_pe_idx; unsigned int root_pe_idx; - bool root_pe_populated; /* 32-bit MMIO window */ unsigned int m32_size; @@ -130,6 +154,7 @@ struct pnv_phb { unsigned long m64_size; unsigned long m64_segsize; unsigned long m64_base; +#define MAX_M64_BARS 64 unsigned long m64_bar_alloc; /* IO ports */ @@ -170,6 +195,89 @@ struct pnv_phb { u8 *diag_data; }; + +/* IODA PE management */ + +static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) +{ + /* + * WARNING: We cannot rely on the resource flags. The Linux PCI + * allocation code sometimes decides to put a 64-bit prefetchable + * BAR in the 32-bit window, so we have to compare the addresses. + * + * For simplicity we only test resource start. + */ + return (r->start >= phb->ioda.m64_base && + r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); +} + +static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags) +{ + unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); + + return (resource_flags & flags) == flags; +} + +int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); +int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); + +void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); +void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe); + +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count); +void pnv_ioda_free_pe(struct pnv_ioda_pe *pe); + +#ifdef CONFIG_PCI_IOV +/* + * For SR-IOV we want to put each VF's MMIO resource in to a separate PE. + * This requires a bit of acrobatics with the MMIO -> PE configuration + * and this structure is used to keep track of it all. + */ +struct pnv_iov_data { + /* number of VFs enabled */ + u16 num_vfs; + + /* pointer to the array of VF PEs. num_vfs long*/ + struct pnv_ioda_pe *vf_pe_arr; + + /* Did we map the VF BAR with single-PE IODA BARs? */ + bool m64_single_mode[PCI_SRIOV_NUM_BARS]; + + /* + * True if we're using any segmented windows. In that case we need + * shift the start of the IOV resource the segment corresponding to + * the allocated PE. + */ + bool need_shift; + + /* + * Bit mask used to track which m64 windows are used to map the + * SR-IOV BARs for this device. + */ + DECLARE_BITMAP(used_m64_bar_mask, MAX_M64_BARS); + + /* + * If we map the SR-IOV BARs with a segmented window then + * parts of that window will be "claimed" by other PEs. + * + * "holes" here is used to reserve the leading portion + * of the window that is used by other (non VF) PEs. + */ + struct resource holes[PCI_SRIOV_NUM_BARS]; +}; + +static inline struct pnv_iov_data *pnv_iov_get(struct pci_dev *pdev) +{ + return pdev->dev.archdata.iov_data; +} + +void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev); +resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno); + +int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs); +int pnv_pcibios_sriov_disable(struct pci_dev *pdev); +#endif /* CONFIG_PCI_IOV */ + extern struct pci_ops pnv_pci_ops; void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, @@ -190,6 +298,7 @@ extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); +extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, @@ -209,11 +318,7 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, /* Nvlink functions */ extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); -extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe); -extern struct iommu_table_group *pnv_try_setup_npu_table_group( - struct pnv_ioda_pe *pe); -extern struct iommu_table_group *pnv_npu_compound_attach( - struct pnv_ioda_pe *pe); +extern void pnv_pci_npu_setup_iommu_groups(void); /* pci-ioda-tce.c */ #define POWERNV_IOMMU_DEFAULT_LEVELS 2 @@ -244,4 +349,16 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned int page_shift); +extern unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb); + +static inline struct pnv_phb *pci_bus_to_pnvhb(struct pci_bus *bus) +{ + struct pci_controller *hose = bus->sysdata; + + if (hose) + return hose->private_data; + + return NULL; +} + #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 3bc188da82ba..7fcb88623081 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -399,7 +399,15 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static unsigned long pnv_memory_block_size(void) { - return 256UL * 1024 * 1024; + /* + * We map the kernel linear region with 1GB large pages on radix. For + * memory hot unplug to work our memory block size must be at least + * this size. + */ + if (radix_enabled()) + return radix_mem_block_size; + else + return 256UL * 1024 * 1024; } #endif diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 13e251699346..b2ba3e95bda7 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -167,7 +167,6 @@ static void pnv_smp_cpu_kill_self(void) /* Standard hot unplug procedure */ idle_task_exit(); - current->active_mm = NULL; /* for sanity */ cpu = smp_processor_id(); DBG("CPU%d offline\n", cpu); generic_set_cpu_dead(cpu); diff --git a/arch/powerpc/platforms/powernv/vas-api.c b/arch/powerpc/platforms/powernv/vas-api.c new file mode 100644 index 000000000000..98ed5d8c5441 --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas-api.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * VAS user space API for its accelerators (Only NX-GZIP is supported now) + * Copyright (C) 2019 Haren Myneni, IBM Corp + */ + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <asm/vas.h> +#include <uapi/asm/vas-api.h> +#include "vas.h" + +/* + * The driver creates the device node that can be used as follows: + * For NX-GZIP + * + * fd = open("/dev/crypto/nx-gzip", O_RDWR); + * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); + * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL). + * vas_copy(&crb, 0, 1); + * vas_paste(paste_addr, 0, 1); + * close(fd) or exit process to close window. + * + * where "vas_copy" and "vas_paste" are defined in copy-paste.h. + * copy/paste returns to the user space directly. So refer NX hardware + * documententation for exact copy/paste usage and completion / error + * conditions. + */ + +/* + * Wrapper object for the nx-gzip device - there is just one instance of + * this node for the whole system. + */ +static struct coproc_dev { + struct cdev cdev; + struct device *device; + char *name; + dev_t devt; + struct class *class; + enum vas_cop_type cop_type; +} coproc_device; + +struct coproc_instance { + struct coproc_dev *coproc; + struct vas_window *txwin; +}; + +static char *coproc_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); +} + +static int coproc_open(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst; + + cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL); + if (!cp_inst) + return -ENOMEM; + + cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev, + cdev); + fp->private_data = cp_inst; + + return 0; +} + +static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) +{ + void __user *uptr = (void __user *)arg; + struct vas_tx_win_attr txattr = {}; + struct vas_tx_win_open_attr uattr; + struct coproc_instance *cp_inst; + struct vas_window *txwin; + int rc, vasid; + + cp_inst = fp->private_data; + + /* + * One window for file descriptor + */ + if (cp_inst->txwin) + return -EEXIST; + + rc = copy_from_user(&uattr, uptr, sizeof(uattr)); + if (rc) { + pr_err("%s(): copy_from_user() returns %d\n", __func__, rc); + return -EFAULT; + } + + if (uattr.version != 1) { + pr_err("Invalid version\n"); + return -EINVAL; + } + + vasid = uattr.vas_id; + + vas_init_tx_win_attr(&txattr, cp_inst->coproc->cop_type); + + txattr.lpid = mfspr(SPRN_LPID); + txattr.pidr = mfspr(SPRN_PID); + txattr.user_win = true; + txattr.rsvd_txbuf_count = false; + txattr.pswid = false; + + pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, + mfspr(SPRN_PID)); + + txwin = vas_tx_win_open(vasid, cp_inst->coproc->cop_type, &txattr); + if (IS_ERR(txwin)) { + pr_err("%s() vas_tx_win_open() failed, %ld\n", __func__, + PTR_ERR(txwin)); + return PTR_ERR(txwin); + } + + cp_inst->txwin = txwin; + + return 0; +} + +static int coproc_release(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst = fp->private_data; + + if (cp_inst->txwin) { + vas_win_close(cp_inst->txwin); + cp_inst->txwin = NULL; + } + + kfree(cp_inst); + fp->private_data = NULL; + + /* + * We don't know here if user has other receive windows + * open, so we can't really call clear_thread_tidr(). + * So, once the process calls set_thread_tidr(), the + * TIDR value sticks around until process exits, resulting + * in an extra copy in restore_sprs(). + */ + + return 0; +} + +static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) +{ + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + unsigned long pfn; + u64 paste_addr; + pgprot_t prot; + int rc; + + txwin = cp_inst->txwin; + + if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { + pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__, + (vma->vm_end - vma->vm_start), PAGE_SIZE); + return -EINVAL; + } + + /* Ensure instance has an open send window */ + if (!txwin) { + pr_err("%s(): No send window open?\n", __func__); + return -EINVAL; + } + + vas_win_paste_addr(txwin, &paste_addr, NULL); + pfn = paste_addr >> PAGE_SHIFT; + + /* flags, page_prot from cxl_mmap(), except we want cachable */ + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + + prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY); + + rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, prot); + + pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, + paste_addr, vma->vm_start, rc); + + return rc; +} + +static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case VAS_TX_WIN_OPEN: + return coproc_ioc_tx_win_open(fp, arg); + default: + return -EINVAL; + } +} + +static struct file_operations coproc_fops = { + .open = coproc_open, + .release = coproc_release, + .mmap = coproc_mmap, + .unlocked_ioctl = coproc_ioctl, +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + int rc = -EINVAL; + dev_t devno; + + rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name); + if (rc) { + pr_err("Unable to allocate coproc major number: %i\n", rc); + return rc; + } + + pr_devel("%s device allocated, dev [%i,%i]\n", name, + MAJOR(coproc_device.devt), MINOR(coproc_device.devt)); + + coproc_device.class = class_create(mod, name); + if (IS_ERR(coproc_device.class)) { + rc = PTR_ERR(coproc_device.class); + pr_err("Unable to create %s class %d\n", name, rc); + goto err_class; + } + coproc_device.class->devnode = coproc_devnode; + coproc_device.cop_type = cop_type; + + coproc_fops.owner = mod; + cdev_init(&coproc_device.cdev, &coproc_fops); + + devno = MKDEV(MAJOR(coproc_device.devt), 0); + rc = cdev_add(&coproc_device.cdev, devno, 1); + if (rc) { + pr_err("cdev_add() failed %d\n", rc); + goto err_cdev; + } + + coproc_device.device = device_create(coproc_device.class, NULL, + devno, NULL, name, MINOR(devno)); + if (IS_ERR(coproc_device.device)) { + rc = PTR_ERR(coproc_device.device); + pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc); + goto err; + } + + pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno), + MINOR(devno)); + + return 0; + +err: + cdev_del(&coproc_device.cdev); +err_cdev: + class_destroy(coproc_device.class); +err_class: + unregister_chrdev_region(coproc_device.devt, 1); + return rc; +} +EXPORT_SYMBOL_GPL(vas_register_coproc_api); + +void vas_unregister_coproc_api(void) +{ + dev_t devno; + + cdev_del(&coproc_device.cdev); + devno = MKDEV(MAJOR(coproc_device.devt), 0); + device_destroy(coproc_device.class, devno); + + class_destroy(coproc_device.class); + unregister_chrdev_region(coproc_device.devt, 1); +} +EXPORT_SYMBOL_GPL(vas_unregister_coproc_api); diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c index 44035a3d6414..41fa90d2f4ab 100644 --- a/arch/powerpc/platforms/powernv/vas-debug.c +++ b/arch/powerpc/platforms/powernv/vas-debug.c @@ -38,7 +38,7 @@ static int info_show(struct seq_file *s, void *private) seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop), window->tx_win ? "Send" : "Receive"); - seq_printf(s, "Pid : %d\n", window->pid); + seq_printf(s, "Pid : %d\n", vas_window_pid(window)); unlock: mutex_unlock(&vas_mutex); diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c new file mode 100644 index 000000000000..3d21fce254b7 --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * VAS Fault handling. + * Copyright 2019, IBM Corporation + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/kthread.h> +#include <linux/sched/signal.h> +#include <linux/mmu_context.h> +#include <asm/icswx.h> + +#include "vas.h" + +/* + * The maximum FIFO size for fault window can be 8MB + * (VAS_RX_FIFO_SIZE_MAX). Using 4MB FIFO since each VAS + * instance will be having fault window. + * 8MB FIFO can be used if expects more faults for each VAS + * instance. + */ +#define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) + +static void dump_crb(struct coprocessor_request_block *crb) +{ + struct data_descriptor_entry *dde; + struct nx_fault_stamp *nx; + + dde = &crb->source; + pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + dde = &crb->target; + pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + nx = &crb->stamp.nx; + pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n", + be32_to_cpu(nx->pswid), + be64_to_cpu(crb->stamp.nx.fault_storage_addr), + nx->flags, nx->fault_status); +} + +/* + * Update the CSB to indicate a translation error. + * + * User space will be polling on CSB after the request is issued. + * If NX can handle the request without any issues, it updates CSB. + * Whereas if NX encounters page fault, the kernel will handle the + * fault and update CSB with translation error. + * + * If we are unable to update the CSB means copy_to_user failed due to + * invalid csb_addr, send a signal to the process. + */ +static void update_csb(struct vas_window *window, + struct coprocessor_request_block *crb) +{ + struct coprocessor_status_block csb; + struct kernel_siginfo info; + struct task_struct *tsk; + void __user *csb_addr; + struct pid *pid; + int rc; + + /* + * NX user space windows can not be opened for task->mm=NULL + * and faults will not be generated for kernel requests. + */ + if (WARN_ON_ONCE(!window->mm || !window->user_win)) + return; + + csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); + + memset(&csb, 0, sizeof(csb)); + csb.cc = CSB_CC_FAULT_ADDRESS; + csb.ce = CSB_CE_TERMINATION; + csb.cs = 0; + csb.count = 0; + + /* + * NX operates and returns in BE format as defined CRB struct. + * So saves fault_storage_addr in BE as NX pastes in FIFO and + * expects user space to convert to CPU format. + */ + csb.address = crb->stamp.nx.fault_storage_addr; + csb.flags = 0; + + pid = window->pid; + tsk = get_pid_task(pid, PIDTYPE_PID); + /* + * Process closes send window after all pending NX requests are + * completed. In multi-thread applications, a child thread can + * open a window and can exit without closing it. May be some + * requests are pending or this window can be used by other + * threads later. We should handle faults if NX encounters + * pages faults on these requests. Update CSB with translation + * error and fault address. If csb_addr passed by user space is + * invalid, send SEGV signal to pid saved in window. If the + * child thread is not running, send the signal to tgid. + * Parent thread (tgid) will close this window upon its exit. + * + * pid and mm references are taken when window is opened by + * process (pid). So tgid is used only when child thread opens + * a window and exits without closing it. + */ + if (!tsk) { + pid = window->tgid; + tsk = get_pid_task(pid, PIDTYPE_PID); + /* + * Parent thread (tgid) will be closing window when it + * exits. So should not get here. + */ + if (WARN_ON_ONCE(!tsk)) + return; + } + + /* Return if the task is exiting. */ + if (tsk->flags & PF_EXITING) { + put_task_struct(tsk); + return; + } + + kthread_use_mm(window->mm); + rc = copy_to_user(csb_addr, &csb, sizeof(csb)); + /* + * User space polls on csb.flags (first byte). So add barrier + * then copy first byte with csb flags update. + */ + if (!rc) { + csb.flags = CSB_V; + /* Make sure update to csb.flags is visible now */ + smp_mb(); + rc = copy_to_user(csb_addr, &csb, sizeof(u8)); + } + kthread_unuse_mm(window->mm); + put_task_struct(tsk); + + /* Success */ + if (!rc) + return; + + pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", + csb_addr, pid_vnr(pid)); + + clear_siginfo(&info); + info.si_signo = SIGSEGV; + info.si_errno = EFAULT; + info.si_code = SEGV_MAPERR; + info.si_addr = csb_addr; + + /* + * process will be polling on csb.flags after request is sent to + * NX. So generally CSB update should not fail except when an + * application passes invalid csb_addr. So an error message will + * be displayed and leave it to user space whether to ignore or + * handle this signal. + */ + rcu_read_lock(); + rc = kill_pid_info(SIGSEGV, &info, pid); + rcu_read_unlock(); + + pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, + pid_vnr(pid), rc); +} + +static void dump_fifo(struct vas_instance *vinst, void *entry) +{ + unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size; + unsigned long *fifo = entry; + int i; + + pr_err("Fault fifo size %d, Max crbs %d\n", vinst->fault_fifo_size, + vinst->fault_fifo_size / CRB_SIZE); + + /* Dump 10 CRB entries or until end of FIFO */ + pr_err("Fault FIFO Dump:\n"); + for (i = 0; i < 10*(CRB_SIZE/8) && fifo < end; i += 4, fifo += 4) { + pr_err("[%.3d, %p]: 0x%.16lx 0x%.16lx 0x%.16lx 0x%.16lx\n", + i, fifo, *fifo, *(fifo+1), *(fifo+2), *(fifo+3)); + } +} + +/* + * Process valid CRBs in fault FIFO. + * NX process user space requests, return credit and update the status + * in CRB. If it encounters transalation error when accessing CRB or + * request buffers, raises interrupt on the CPU to handle the fault. + * It takes credit on fault window, updates nx_fault_stamp in CRB with + * the following information and pastes CRB in fault FIFO. + * + * pswid - window ID of the window on which the request is sent. + * fault_storage_addr - fault address + * + * It can raise a single interrupt for multiple faults. Expects OS to + * process all valid faults and return credit for each fault on user + * space and fault windows. This fault FIFO control will be done with + * credit mechanism. NX can continuously paste CRBs until credits are not + * available on fault window. Otherwise, returns with RMA_reject. + * + * Total credits available on fault window: FIFO_SIZE(4MB)/CRBS_SIZE(128) + * + */ +irqreturn_t vas_fault_thread_fn(int irq, void *data) +{ + struct vas_instance *vinst = data; + struct coprocessor_request_block *crb, *entry; + struct coprocessor_request_block buf; + struct vas_window *window; + unsigned long flags; + void *fifo; + + crb = &buf; + + /* + * VAS can interrupt with multiple page faults. So process all + * valid CRBs within fault FIFO until reaches invalid CRB. + * We use CCW[0] and pswid to validate validate CRBs: + * + * CCW[0] Reserved bit. When NX pastes CRB, CCW[0]=0 + * OS sets this bit to 1 after reading CRB. + * pswid NX assigns window ID. Set pswid to -1 after + * reading CRB from fault FIFO. + * + * We exit this function if no valid CRBs are available to process. + * So acquire fault_lock and reset fifo_in_progress to 0 before + * exit. + * In case kernel receives another interrupt with different page + * fault, interrupt handler returns with IRQ_HANDLED if + * fifo_in_progress is set. Means these new faults will be + * handled by the current thread. Otherwise set fifo_in_progress + * and return IRQ_WAKE_THREAD to wake up thread. + */ + while (true) { + spin_lock_irqsave(&vinst->fault_lock, flags); + /* + * Advance the fault fifo pointer to next CRB. + * Use CRB_SIZE rather than sizeof(*crb) since the latter is + * aligned to CRB_ALIGN (256) but the CRB written to by VAS is + * only CRB_SIZE in len. + */ + fifo = vinst->fault_fifo + (vinst->fault_crbs * CRB_SIZE); + entry = fifo; + + if ((entry->stamp.nx.pswid == cpu_to_be32(FIFO_INVALID_ENTRY)) + || (entry->ccw & cpu_to_be32(CCW0_INVALID))) { + vinst->fifo_in_progress = 0; + spin_unlock_irqrestore(&vinst->fault_lock, flags); + return IRQ_HANDLED; + } + + spin_unlock_irqrestore(&vinst->fault_lock, flags); + vinst->fault_crbs++; + if (vinst->fault_crbs == (vinst->fault_fifo_size / CRB_SIZE)) + vinst->fault_crbs = 0; + + memcpy(crb, fifo, CRB_SIZE); + entry->stamp.nx.pswid = cpu_to_be32(FIFO_INVALID_ENTRY); + entry->ccw |= cpu_to_be32(CCW0_INVALID); + /* + * Return credit for the fault window. + */ + vas_return_credit(vinst->fault_win, false); + + pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d\n", + vinst->vas_id, vinst->fault_fifo, fifo, + vinst->fault_crbs); + + dump_crb(crb); + window = vas_pswid_to_window(vinst, + be32_to_cpu(crb->stamp.nx.pswid)); + + if (IS_ERR(window)) { + /* + * We got an interrupt about a specific send + * window but we can't find that window and we can't + * even clean it up (return credit on user space + * window). + * But we should not get here. + * TODO: Disable IRQ. + */ + dump_fifo(vinst, (void *)entry); + pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, fault_crbs %d bad CRB?\n", + vinst->vas_id, vinst->fault_fifo, fifo, + be32_to_cpu(crb->stamp.nx.pswid), + vinst->fault_crbs); + + WARN_ON_ONCE(1); + } else { + update_csb(window, crb); + /* + * Return credit for send window after processing + * fault CRB. + */ + vas_return_credit(window, true); + } + } +} + +irqreturn_t vas_fault_handler(int irq, void *dev_id) +{ + struct vas_instance *vinst = dev_id; + irqreturn_t ret = IRQ_WAKE_THREAD; + unsigned long flags; + + /* + * NX can generate an interrupt for multiple faults. So the + * fault handler thread process all CRBs until finds invalid + * entry. In case if NX sees continuous faults, it is possible + * that the thread function entered with the first interrupt + * can execute and process all valid CRBs. + * So wake up thread only if the fault thread is not in progress. + */ + spin_lock_irqsave(&vinst->fault_lock, flags); + + if (vinst->fifo_in_progress) + ret = IRQ_HANDLED; + else + vinst->fifo_in_progress = 1; + + spin_unlock_irqrestore(&vinst->fault_lock, flags); + + return ret; +} + +/* + * Fault window is opened per VAS instance. NX pastes fault CRB in fault + * FIFO upon page faults. + */ +int vas_setup_fault_window(struct vas_instance *vinst) +{ + struct vas_rx_win_attr attr; + + vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE; + vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL); + if (!vinst->fault_fifo) { + pr_err("Unable to alloc %d bytes for fault_fifo\n", + vinst->fault_fifo_size); + return -ENOMEM; + } + + /* + * Invalidate all CRB entries. NX pastes valid entry for each fault. + */ + memset(vinst->fault_fifo, FIFO_INVALID_ENTRY, vinst->fault_fifo_size); + vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT); + + attr.rx_fifo_size = vinst->fault_fifo_size; + attr.rx_fifo = vinst->fault_fifo; + + /* + * Max creds is based on number of CRBs can fit in the FIFO. + * (fault_fifo_size/CRB_SIZE). If 8MB FIFO is used, max creds + * will be 0xffff since the receive creds field is 16bits wide. + */ + attr.wcreds_max = vinst->fault_fifo_size / CRB_SIZE; + attr.lnotify_lpid = 0; + attr.lnotify_pid = mfspr(SPRN_PID); + attr.lnotify_tid = mfspr(SPRN_PID); + + vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, + &attr); + + if (IS_ERR(vinst->fault_win)) { + pr_err("VAS: Error %ld opening FaultWin\n", + PTR_ERR(vinst->fault_win)); + kfree(vinst->fault_fifo); + return PTR_ERR(vinst->fault_win); + } + + pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n", + vinst->fault_win->winid, attr.lnotify_lpid, + attr.lnotify_pid, attr.lnotify_tid); + + return 0; +} diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 0c0d27d17976..6434f9cb5aed 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -12,6 +12,8 @@ #include <linux/log2.h> #include <linux/rcupdate.h> #include <linux/cred.h> +#include <linux/sched/mm.h> +#include <linux/mmu_context.h> #include <asm/switch_to.h> #include <asm/ppc-opcode.h> #include "vas.h" @@ -24,7 +26,7 @@ * Compute the paste address region for the window @window using the * ->paste_base_addr and ->paste_win_id_shift we got from device tree. */ -static void compute_paste_address(struct vas_window *window, u64 *addr, int *len) +void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len) { int winid; u64 base, shift; @@ -78,7 +80,7 @@ static void *map_paste_region(struct vas_window *txwin) goto free_name; txwin->paste_addr_name = name; - compute_paste_address(txwin, &start, &len); + vas_win_paste_addr(txwin, &start, &len); if (!request_mem_region(start, len, name)) { pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n", @@ -136,7 +138,7 @@ static void unmap_paste_region(struct vas_window *window) u64 busaddr_start; if (window->paste_kaddr) { - compute_paste_address(window, &busaddr_start, &len); + vas_win_paste_addr(window, &busaddr_start, &len); unmap_region(window->paste_kaddr, busaddr_start, len); window->paste_kaddr = NULL; kfree(window->paste_addr_name); @@ -373,7 +375,7 @@ int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx) init_xlate_regs(window, winctx->user_win); val = 0ULL; - val = SET_FIELD(VAS_FAULT_TX_WIN, val, 0); + val = SET_FIELD(VAS_FAULT_TX_WIN, val, winctx->fault_win_id); write_hvwc_reg(window, VREG(FAULT_TX_WIN), val); /* In PowerNV, interrupts go to HV. */ @@ -748,6 +750,8 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin, winctx->min_scope = VAS_SCOPE_LOCAL; winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; + if (rxwin->vinst->virq) + winctx->irq_port = rxwin->vinst->irq_port; } static bool rx_win_args_valid(enum vas_cop_type cop, @@ -768,7 +772,7 @@ static bool rx_win_args_valid(enum vas_cop_type cop, if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX) return false; - if (attr->wcreds_max > VAS_RX_WCREDS_MAX) + if (!attr->wcreds_max) return false; if (attr->nx_win) { @@ -813,7 +817,8 @@ void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop) { memset(rxattr, 0, sizeof(*rxattr)); - if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) { + if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI || + cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) { rxattr->pin_win = true; rxattr->nx_win = true; rxattr->fault_win = false; @@ -827,9 +832,9 @@ void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop) rxattr->fault_win = true; rxattr->notify_disable = true; rxattr->rx_wcred_mode = true; - rxattr->tx_wcred_mode = true; rxattr->rx_win_ord_mode = true; - rxattr->tx_win_ord_mode = true; + rxattr->rej_no_credit = true; + rxattr->tc_mode = VAS_THRESH_DISABLED; } else if (cop == VAS_COP_TYPE_FTW) { rxattr->user_win = true; rxattr->intr_disable = true; @@ -873,9 +878,7 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, rxwin->nx_win = rxattr->nx_win; rxwin->user_win = rxattr->user_win; rxwin->cop = cop; - rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT; - if (rxattr->user_win) - rxwin->pid = task_pid_vnr(current); + rxwin->wcreds_max = rxattr->wcreds_max; init_winctx_for_rxwin(rxwin, rxattr, &winctx); init_winctx_regs(rxwin, &winctx); @@ -890,7 +893,8 @@ void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop) { memset(txattr, 0, sizeof(*txattr)); - if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) { + if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI || + cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) { txattr->rej_no_credit = false; txattr->rx_wcred_mode = true; txattr->tx_wcred_mode = true; @@ -944,13 +948,22 @@ static void init_winctx_for_txwin(struct vas_window *txwin, winctx->lpid = txattr->lpid; winctx->pidr = txattr->pidr; winctx->rx_win_id = txwin->rxwin->winid; + /* + * IRQ and fault window setup is successful. Set fault window + * for the send window so that ready to handle faults. + */ + if (txwin->vinst->virq) + winctx->fault_win_id = txwin->vinst->fault_win->winid; winctx->dma_type = VAS_DMA_TYPE_INJECT; winctx->tc_mode = txattr->tc_mode; winctx->min_scope = VAS_SCOPE_LOCAL; winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; + if (txwin->vinst->virq) + winctx->irq_port = txwin->vinst->irq_port; - winctx->pswid = 0; + winctx->pswid = txattr->pswid ? txattr->pswid : + encode_pswid(txwin->vinst->vas_id, txwin->winid); } static bool tx_win_args_valid(enum vas_cop_type cop, @@ -965,9 +978,14 @@ static bool tx_win_args_valid(enum vas_cop_type cop, if (attr->wcreds_max > VAS_TX_WCREDS_MAX) return false; - if (attr->user_win && - (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count)) - return false; + if (attr->user_win) { + if (attr->rsvd_txbuf_count) + return false; + + if (cop != VAS_COP_TYPE_FTW && cop != VAS_COP_TYPE_GZIP && + cop != VAS_COP_TYPE_GZIP_HIPRI) + return false; + } return true; } @@ -1016,7 +1034,6 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, txwin->tx_win = 1; txwin->rxwin = rxwin; txwin->nx_win = txwin->rxwin->nx_win; - txwin->pid = attr->pid; txwin->user_win = attr->user_win; txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; @@ -1040,12 +1057,59 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, } } else { /* - * A user mapping must ensure that context switch issues - * CP_ABORT for this thread. + * Interrupt hanlder or fault window setup failed. Means + * NX can not generate fault for page fault. So not + * opening for user space tx window. */ - rc = set_thread_uses_vas(); - if (rc) + if (!vinst->virq) { + rc = -ENODEV; goto free_window; + } + + /* + * Window opened by a child thread may not be closed when + * it exits. So take reference to its pid and release it + * when the window is free by parent thread. + * Acquire a reference to the task's pid to make sure + * pid will not be re-used - needed only for multithread + * applications. + */ + txwin->pid = get_task_pid(current, PIDTYPE_PID); + /* + * Acquire a reference to the task's mm. + */ + txwin->mm = get_task_mm(current); + + if (!txwin->mm) { + put_pid(txwin->pid); + pr_err("VAS: pid(%d): mm_struct is not found\n", + current->pid); + rc = -EPERM; + goto free_window; + } + + mmgrab(txwin->mm); + mmput(txwin->mm); + mm_context_add_vas_window(txwin->mm); + /* + * Process closes window during exit. In the case of + * multithread application, the child thread can open + * window and can exit without closing it. Expects parent + * thread to use and close the window. So do not need + * to take pid reference for parent thread. + */ + txwin->tgid = find_get_pid(task_tgid_vnr(current)); + /* + * Even a process that has no foreign real address mapping can + * use an unpaired COPY instruction (to no real effect). Issue + * CP_ABORT to clear any pending COPY and prevent a covert + * channel. + * + * __switch_to() will issue CP_ABORT on future context switches + * if process / thread has any open VAS window (Use + * current->mm->context.vas_windows). + */ + asm volatile(PPC_CP_ABORT); } set_vinst_win(vinst, txwin); @@ -1128,6 +1192,7 @@ static void poll_window_credits(struct vas_window *window) { u64 val; int creds, mode; + int count = 0; val = read_hvwc_reg(window, VREG(WINCTL)); if (window->tx_win) @@ -1146,10 +1211,27 @@ retry: creds = GET_FIELD(VAS_LRX_WCRED, val); } + /* + * Takes around few milliseconds to complete all pending requests + * and return credits. + * TODO: Scan fault FIFO and invalidate CRBs points to this window + * and issue CRB Kill to stop all pending requests. Need only + * if there is a bug in NX or fault handling in kernel. + */ if (creds < window->wcreds_max) { val = 0; set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(msecs_to_jiffies(10)); + count++; + /* + * Process can not close send window until all credits are + * returned. + */ + if (!(count % 1000)) + pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n", + vas_window_pid(window), window->winid, + creds, count); + goto retry; } } @@ -1163,6 +1245,7 @@ static void poll_window_busy_state(struct vas_window *window) { int busy; u64 val; + int count = 0; retry: val = read_hvwc_reg(window, VREG(WIN_STATUS)); @@ -1170,7 +1253,16 @@ retry: if (busy) { val = 0; set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(msecs_to_jiffies(5)); + schedule_timeout(msecs_to_jiffies(10)); + count++; + /* + * Takes around few milliseconds to process all pending + * requests. + */ + if (!(count % 1000)) + pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n", + vas_window_pid(window), window->winid, count); + goto retry; } } @@ -1235,22 +1327,118 @@ int vas_win_close(struct vas_window *window) unmap_paste_region(window); - clear_vinst_win(window); - poll_window_busy_state(window); unpin_close_window(window); poll_window_credits(window); + clear_vinst_win(window); + poll_window_castout(window); /* if send window, drop reference to matching receive window */ - if (window->tx_win) + if (window->tx_win) { + if (window->user_win) { + /* Drop references to pid and mm */ + put_pid(window->pid); + if (window->mm) { + mm_context_remove_vas_window(window->mm); + mmdrop(window->mm); + } + } put_rx_win(window->rxwin); + } vas_window_free(window); return 0; } EXPORT_SYMBOL_GPL(vas_win_close); + +/* + * Return credit for the given window. + * Send windows and fault window uses credit mechanism as follows: + * + * Send windows: + * - The default number of credits available for each send window is + * 1024. It means 1024 requests can be issued asynchronously at the + * same time. If the credit is not available, that request will be + * returned with RMA_Busy. + * - One credit is taken when NX request is issued. + * - This credit is returned after NX processed that request. + * - If NX encounters translation error, kernel will return the + * credit on the specific send window after processing the fault CRB. + * + * Fault window: + * - The total number credits available is FIFO_SIZE/CRB_SIZE. + * Means 4MB/128 in the current implementation. If credit is not + * available, RMA_Reject is returned. + * - A credit is taken when NX pastes CRB in fault FIFO. + * - The kernel with return credit on fault window after reading entry + * from fault FIFO. + */ +void vas_return_credit(struct vas_window *window, bool tx) +{ + uint64_t val; + + val = 0ULL; + if (tx) { /* send window */ + val = SET_FIELD(VAS_TX_WCRED, val, 1); + write_hvwc_reg(window, VREG(TX_WCRED_ADDER), val); + } else { + val = SET_FIELD(VAS_LRX_WCRED, val, 1); + write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), val); + } +} + +struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, + uint32_t pswid) +{ + struct vas_window *window; + int winid; + + if (!pswid) { + pr_devel("%s: called for pswid 0!\n", __func__); + return ERR_PTR(-ESRCH); + } + + decode_pswid(pswid, NULL, &winid); + + if (winid >= VAS_WINDOWS_PER_CHIP) + return ERR_PTR(-ESRCH); + + /* + * If application closes the window before the hardware + * returns the fault CRB, we should wait in vas_win_close() + * for the pending requests. so the window must be active + * and the process alive. + * + * If its a kernel process, we should not get any faults and + * should not get here. + */ + window = vinst->windows[winid]; + + if (!window) { + pr_err("PSWID decode: Could not find window for winid %d pswid %d vinst 0x%p\n", + winid, pswid, vinst); + return NULL; + } + + /* + * Do some sanity checks on the decoded window. Window should be + * NX GZIP user send window. FTW windows should not incur faults + * since their CRBs are ignored (not queued on FIFO or processed + * by NX). + */ + if (!window->tx_win || !window->user_win || !window->nx_win || + window->cop == VAS_COP_TYPE_FAULT || + window->cop == VAS_COP_TYPE_FTW) { + pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n", + winid, window->tx_win, window->user_win, + window->nx_win, window->cop); + WARN_ON(1); + } + + return window; +} diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index ed9cc6df329a..598e4cd563fb 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -14,7 +14,10 @@ #include <linux/of_platform.h> #include <linux/of_address.h> #include <linux/of.h> +#include <linux/irqdomain.h> +#include <linux/interrupt.h> #include <asm/prom.h> +#include <asm/xive.h> #include "vas.h" @@ -23,12 +26,37 @@ static LIST_HEAD(vas_instances); static DEFINE_PER_CPU(int, cpu_vas_id); +static int vas_irq_fault_window_setup(struct vas_instance *vinst) +{ + char devname[64]; + int rc = 0; + + snprintf(devname, sizeof(devname), "vas-%d", vinst->vas_id); + rc = request_threaded_irq(vinst->virq, vas_fault_handler, + vas_fault_thread_fn, 0, devname, vinst); + + if (rc) { + pr_err("VAS[%d]: Request IRQ(%d) failed with %d\n", + vinst->vas_id, vinst->virq, rc); + goto out; + } + + rc = vas_setup_fault_window(vinst); + if (rc) + free_irq(vinst->virq, vinst); + +out: + return rc; +} + static int init_vas_instance(struct platform_device *pdev) { - int rc, cpu, vasid; - struct resource *res; - struct vas_instance *vinst; struct device_node *dn = pdev->dev.of_node; + struct vas_instance *vinst; + struct xive_irq_data *xd; + uint32_t chipid, hwirq; + struct resource *res; + int rc, cpu, vasid; rc = of_property_read_u32(dn, "ibm,vas-id", &vasid); if (rc) { @@ -36,6 +64,12 @@ static int init_vas_instance(struct platform_device *pdev) return -ENODEV; } + rc = of_property_read_u32(dn, "ibm,chip-id", &chipid); + if (rc) { + pr_err("No ibm,chip-id property for %s?\n", pdev->name); + return -ENODEV; + } + if (pdev->num_resources != 4) { pr_err("Unexpected DT configuration for [%s, %d]\n", pdev->name, vasid); @@ -69,9 +103,32 @@ static int init_vas_instance(struct platform_device *pdev) vinst->paste_win_id_shift = 63 - res->end; - pr_devel("Initialized instance [%s, %d], paste_base 0x%llx, " - "paste_win_id_shift 0x%llx\n", pdev->name, vasid, - vinst->paste_base_addr, vinst->paste_win_id_shift); + hwirq = xive_native_alloc_irq_on_chip(chipid); + if (!hwirq) { + pr_err("Inst%d: Unable to allocate global irq for chip %d\n", + vinst->vas_id, chipid); + return -ENOENT; + } + + vinst->virq = irq_create_mapping(NULL, hwirq); + if (!vinst->virq) { + pr_err("Inst%d: Unable to map global irq %d\n", + vinst->vas_id, hwirq); + return -EINVAL; + } + + xd = irq_get_handler_data(vinst->virq); + if (!xd) { + pr_err("Inst%d: Invalid virq %d\n", + vinst->vas_id, vinst->virq); + return -EINVAL; + } + + vinst->irq_port = xd->trig_page; + pr_devel("Initialized instance [%s, %d] paste_base 0x%llx paste_win_id_shift 0x%llx IRQ %d Port 0x%llx\n", + pdev->name, vasid, vinst->paste_base_addr, + vinst->paste_win_id_shift, vinst->virq, + vinst->irq_port); for_each_possible_cpu(cpu) { if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn)) @@ -82,6 +139,22 @@ static int init_vas_instance(struct platform_device *pdev) list_add(&vinst->node, &vas_instances); mutex_unlock(&vas_mutex); + spin_lock_init(&vinst->fault_lock); + /* + * IRQ and fault handling setup is needed only for user space + * send windows. + */ + if (vinst->virq) { + rc = vas_irq_fault_window_setup(vinst); + /* + * Fault window is used only for user space send windows. + * So if vinst->virq is NULL, tx_win_open returns -ENODEV + * for user space. + */ + if (rc) + vinst->virq = 0; + } + vas_instance_init_dbgdir(vinst); dev_set_drvdata(&pdev->dev, vinst); diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 5574aec9ee88..70f793e8f6cc 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -101,11 +101,9 @@ /* * Initial per-process credits. * Max send window credits: 4K-1 (12-bits in VAS_TX_WCRED) - * Max receive window credits: 64K-1 (16 bits in VAS_LRX_WCRED) * * TODO: Needs tuning for per-process credits */ -#define VAS_RX_WCREDS_MAX ((64 << 10) - 1) #define VAS_TX_WCREDS_MAX ((4 << 10) - 1) #define VAS_WCREDS_DEFAULT (1 << 10) @@ -296,6 +294,22 @@ enum vas_notify_after_count { }; /* + * NX can generate an interrupt for multiple faults and expects kernel + * to process all of them. So read all valid CRB entries until find the + * invalid one. So use pswid which is pasted by NX and ccw[0] (reserved + * bit in BE) to check valid CRB. CCW[0] will not be touched by user + * space. Application gets CRB formt error if it updates this bit. + * + * Invalidate FIFO during allocation and process all entries from last + * successful read until finds invalid pswid and ccw[0] values. + * After reading each CRB entry from fault FIFO, the kernel invalidate + * it by updating pswid with FIFO_INVALID_ENTRY and CCW[0] with + * CCW0_INVALID. + */ +#define FIFO_INVALID_ENTRY 0xffffffff +#define CCW0_INVALID 1 + +/* * One per instance of VAS. Each instance will have a separate set of * receive windows, one per coprocessor type. * @@ -313,6 +327,15 @@ struct vas_instance { u64 paste_base_addr; u64 paste_win_id_shift; + u64 irq_port; + int virq; + int fault_crbs; + int fault_fifo_size; + int fifo_in_progress; /* To wake up thread or return IRQ_HANDLED */ + spinlock_t fault_lock; /* Protects fifo_in_progress update */ + void *fault_fifo; + struct vas_window *fault_win; /* Fault window */ + struct mutex mutex; struct vas_window *rxwin[VAS_COP_TYPE_MAX]; struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; @@ -333,7 +356,9 @@ struct vas_window { bool user_win; /* True if user space window */ void *hvwc_map; /* HV window context */ void *uwc_map; /* OS/User window context */ - pid_t pid; /* Linux process id of owner */ + struct pid *pid; /* Linux process id of owner */ + struct pid *tgid; /* Thread group ID of owner */ + struct mm_struct *mm; /* Linux process mm_struct */ int wcreds_max; /* Window credits */ char *dbgname; @@ -406,6 +431,19 @@ extern void vas_init_dbgdir(void); extern void vas_instance_init_dbgdir(struct vas_instance *vinst); extern void vas_window_init_dbgdir(struct vas_window *win); extern void vas_window_free_dbgdir(struct vas_window *win); +extern int vas_setup_fault_window(struct vas_instance *vinst); +extern irqreturn_t vas_fault_thread_fn(int irq, void *data); +extern irqreturn_t vas_fault_handler(int irq, void *dev_id); +extern void vas_return_credit(struct vas_window *window, bool tx); +extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, + uint32_t pswid); +extern void vas_win_paste_addr(struct vas_window *window, u64 *addr, + int *len); + +static inline int vas_window_pid(struct vas_window *window) +{ + return pid_vnr(window->pid); +} static inline void vas_log_write(struct vas_window *win, char *name, void *regptr, u64 val) @@ -444,6 +482,21 @@ static inline u64 read_hvwc_reg(struct vas_window *win, return in_be64(win->hvwc_map+reg); } +/* + * Encode/decode the Partition Send Window ID (PSWID) for a window in + * a way that we can uniquely identify any window in the system. i.e. + * we should be able to locate the 'struct vas_window' given the PSWID. + * + * Bits Usage + * 0:7 VAS id (8 bits) + * 8:15 Unused, 0 (3 bits) + * 16:31 Window id (16 bits) + */ +static inline u32 encode_pswid(int vasid, int winid) +{ + return ((u32)winid | (vasid << (31 - 7))); +} + static inline void decode_pswid(u32 pswid, int *vasid, int *winid) { if (vasid) diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 423be34f0f5f..d094321964fb 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -200,13 +200,14 @@ void ps3_mm_vas_destroy(void) { int result; - DBG("%s:%d: map.vas_id = %llu\n", __func__, __LINE__, map.vas_id); - if (map.vas_id) { result = lv1_select_virtual_address_space(0); - BUG_ON(result); - result = lv1_destruct_virtual_address_space(map.vas_id); - BUG_ON(result); + result += lv1_destruct_virtual_address_space(map.vas_id); + + if (result) { + lv1_panic(0); + } + map.vas_id = 0; } } @@ -263,7 +264,7 @@ static int ps3_mm_region_create(struct mem_region *r, unsigned long size) int result; u64 muid; - r->size = _ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M); + r->size = ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M); DBG("%s:%d requested %lxh\n", __func__, __LINE__, size); DBG("%s:%d actual %llxh\n", __func__, __LINE__, r->size); @@ -304,19 +305,20 @@ static void ps3_mm_region_destroy(struct mem_region *r) int result; if (!r->destroy) { - pr_info("%s:%d: Not destroying high region: %llxh %llxh\n", - __func__, __LINE__, r->base, r->size); return; } - DBG("%s:%d: r->base = %llxh\n", __func__, __LINE__, r->base); - if (r->base) { result = lv1_release_memory(r->base); - BUG_ON(result); + + if (result) { + lv1_panic(0); + } + r->size = r->base = r->offset = 0; map.total = map.rm.size; } + ps3_mm_set_repository_highmem(NULL); } @@ -394,8 +396,8 @@ static struct dma_chunk * dma_find_chunk(struct ps3_dma_region *r, unsigned long bus_addr, unsigned long len) { struct dma_chunk *c; - unsigned long aligned_bus = _ALIGN_DOWN(bus_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len+bus_addr-aligned_bus, + unsigned long aligned_bus = ALIGN_DOWN(bus_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len+bus_addr-aligned_bus, 1 << r->page_size); list_for_each_entry(c, &r->chunk_list.head, link) { @@ -423,8 +425,8 @@ static struct dma_chunk *dma_find_chunk_lpar(struct ps3_dma_region *r, unsigned long lpar_addr, unsigned long len) { struct dma_chunk *c; - unsigned long aligned_lpar = _ALIGN_DOWN(lpar_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + lpar_addr - aligned_lpar, + unsigned long aligned_lpar = ALIGN_DOWN(lpar_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + lpar_addr - aligned_lpar, 1 << r->page_size); list_for_each_entry(c, &r->chunk_list.head, link) { @@ -775,8 +777,8 @@ static int dma_sb_map_area(struct ps3_dma_region *r, unsigned long virt_addr, struct dma_chunk *c; unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr) : virt_addr; - unsigned long aligned_phys = _ALIGN_DOWN(phys_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + phys_addr - aligned_phys, + unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys, 1 << r->page_size); *bus_addr = dma_sb_lpar_to_bus(r, ps3_mm_phys_to_lpar(phys_addr)); @@ -830,8 +832,8 @@ static int dma_ioc0_map_area(struct ps3_dma_region *r, unsigned long virt_addr, struct dma_chunk *c; unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr) : virt_addr; - unsigned long aligned_phys = _ALIGN_DOWN(phys_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + phys_addr - aligned_phys, + unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys, 1 << r->page_size); DBG(KERN_ERR "%s: vaddr=%#lx, len=%#lx\n", __func__, @@ -889,9 +891,9 @@ static int dma_sb_unmap_area(struct ps3_dma_region *r, dma_addr_t bus_addr, c = dma_find_chunk(r, bus_addr, len); if (!c) { - unsigned long aligned_bus = _ALIGN_DOWN(bus_addr, + unsigned long aligned_bus = ALIGN_DOWN(bus_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + bus_addr + unsigned long aligned_len = ALIGN(len + bus_addr - aligned_bus, 1 << r->page_size); DBG("%s:%d: not found: bus_addr %llxh\n", __func__, __LINE__, bus_addr); @@ -926,9 +928,9 @@ static int dma_ioc0_unmap_area(struct ps3_dma_region *r, c = dma_find_chunk(r, bus_addr, len); if (!c) { - unsigned long aligned_bus = _ALIGN_DOWN(bus_addr, + unsigned long aligned_bus = ALIGN_DOWN(bus_addr, 1 << r->page_size); - unsigned long aligned_len = _ALIGN_UP(len + bus_addr + unsigned long aligned_len = ALIGN(len + bus_addr - aligned_bus, 1 << r->page_size); DBG("%s:%d: not found: bus_addr %llxh\n", @@ -974,7 +976,7 @@ static int dma_sb_region_create_linear(struct ps3_dma_region *r) pr_info("%s:%d: forcing 16M pages for linear map\n", __func__, __LINE__); r->page_size = PS3_DMA_16M; - r->len = _ALIGN_UP(r->len, 1 << r->page_size); + r->len = ALIGN(r->len, 1 << r->page_size); } } @@ -1125,7 +1127,7 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, r->offset = lpar_addr; if (r->offset >= map.rm.size) r->offset -= map.r1.offset; - r->len = len ? len : _ALIGN_UP(map.total, 1 << r->page_size); + r->len = len ? len : ALIGN(map.total, 1 << r->page_size); switch (dev->dev_type) { case PS3_DEVICE_TYPE_SB: diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index b29368931c56..e9ae5dd03593 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -138,7 +138,7 @@ static int __init early_parse_ps3fb(char *p) if (!p) return 1; - ps3fb_videomemory.size = _ALIGN_UP(memparse(p, &p), + ps3fb_videomemory.size = ALIGN(memparse(p, &p), ps3fb_videomemory.align); return 0; } diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 24c18362e5ea..5e037df2a3a1 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -25,15 +25,22 @@ config PPC_PSERIES select SWIOTLB default y +config PARAVIRT_SPINLOCKS + bool + config PPC_SPLPAR - depends on PPC_PSERIES bool "Support for shared-processor logical partitions" + depends on PPC_PSERIES + select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS + default y help Enabling this option will make the kernel run more efficiently on logically-partitioned pSeries systems which use shared processors, that is, which share physical processors between two or more partitions. + Say Y if you are unsure. + config DTL bool "Dispatch Trace Log" depends on PPC_SPLPAR && DEBUG_FS diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 9dba7e880885..45a3a3022a85 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -26,7 +26,6 @@ #include <asm/firmware.h> #include <asm/hvcall.h> #include <asm/mmu.h> -#include <asm/pgalloc.h> #include <linux/uaccess.h> #include <linux/memory.h> #include <asm/plpar_wrappers.h> diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index eab8aa293743..982f069e4c31 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -12,6 +12,7 @@ #include <asm/smp.h> #include <linux/uaccess.h> #include <asm/firmware.h> +#include <asm/dtl.h> #include <asm/lppaca.h> #include <asm/debugfs.h> #include <asm/plpar_wrappers.h> diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 845342814edc..cb2d9a970b7b 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -24,6 +24,7 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/spinlock.h> +#include <linux/crash_dump.h> #include <asm/eeh.h> #include <asm/eeh_event.h> @@ -32,6 +33,8 @@ #include <asm/ppc-pci.h> #include <asm/rtas.h> +static int pseries_eeh_get_pe_addr(struct pci_dn *pdn); + /* RTAS tokens */ static int ibm_set_eeh_option; static int ibm_set_slot_reset; @@ -52,8 +55,6 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) dev_dbg(&pdev->dev, "EEH: Setting up device\n"); #ifdef CONFIG_PCI_IOV if (pdev->is_virtfn) { - struct pci_dn *physfn_pdn; - pdn->device_id = pdev->device; pdn->vendor_id = pdev->vendor; pdn->class_code = pdev->class; @@ -63,23 +64,172 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) * completion from platform. */ pdn->last_allow_rc = 0; - physfn_pdn = pci_get_pdn(pdev->physfn); - pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index]; } #endif pseries_eeh_init_edev(pdn); #ifdef CONFIG_PCI_IOV if (pdev->is_virtfn) { + /* + * FIXME: This really should be handled by choosing the right + * parent PE in in pseries_eeh_init_edev(). + */ + struct eeh_pe *physfn_pe = pci_dev_to_eeh_dev(pdev->physfn)->pe; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8); - eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */ - eeh_add_to_parent_pe(edev); /* Add as VF PE type */ + eeh_pe_tree_remove(edev); /* Remove as it is adding to bus pe */ + eeh_pe_tree_insert(edev, physfn_pe); /* Add as VF PE type */ } #endif eeh_probe_device(pdev); } + +/** + * pseries_eeh_get_config_addr - Retrieve config address + * + * Retrieve the assocated config address. Actually, there're 2 RTAS + * function calls dedicated for the purpose. We need implement + * it through the new function and then the old one. Besides, + * you should make sure the config address is figured out from + * FDT node before calling the function. + * + * It's notable that zero'ed return value means invalid PE config + * address. + */ +static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_addr) +{ + int ret = 0; + int rets[3]; + + if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { + /* + * First of all, we need to make sure there has one PE + * associated with the device. Otherwise, PE address is + * meaningless. + */ + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), 1); + if (ret || (rets[0] == 0)) + return 0; + + /* Retrieve the associated PE config address */ + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), 0); + if (ret) { + pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", + __func__, phb->global_number, config_addr); + return 0; + } + + return rets[0]; + } + + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), 0); + if (ret) { + pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", + __func__, phb->global_number, config_addr); + return 0; + } + + return rets[0]; + } + + return ret; +} + +/** + * pseries_eeh_phb_reset - Reset the specified PHB + * @phb: PCI controller + * @config_adddr: the associated config address + * @option: reset option + * + * Reset the specified PHB/PE + */ +static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, int option) +{ + int ret; + + /* Reset PE through RTAS call */ + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), option); + + /* If fundamental-reset not supported, try hot-reset */ + if (option == EEH_RESET_FUNDAMENTAL && + ret == -8) { + option = EEH_RESET_HOT; + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), option); + } + + /* We need reset hold or settlement delay */ + if (option == EEH_RESET_FUNDAMENTAL || + option == EEH_RESET_HOT) + msleep(EEH_PE_RST_HOLD_TIME); + else + msleep(EEH_PE_RST_SETTLE_TIME); + + return ret; +} + +/** + * pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE + * @phb: PCI controller + * @config_adddr: the associated config address + * + * The function will be called to reconfigure the bridges included + * in the specified PE so that the mulfunctional PE would be recovered + * again. + */ +static int pseries_eeh_phb_configure_bridge(struct pci_controller *phb, int config_addr) +{ + int ret; + /* Waiting 0.2s maximum before skipping configuration */ + int max_wait = 200; + + while (max_wait > 0) { + ret = rtas_call(ibm_configure_pe, 3, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid)); + + if (!ret) + return ret; + if (ret < 0) + break; + + /* + * If RTAS returns a delay value that's above 100ms, cut it + * down to 100ms in case firmware made a mistake. For more + * on how these delay values work see rtas_busy_delay_time + */ + if (ret > RTAS_EXTENDED_DELAY_MIN+2 && + ret <= RTAS_EXTENDED_DELAY_MAX) + ret = RTAS_EXTENDED_DELAY_MIN+2; + + max_wait -= rtas_busy_delay_time(ret); + + if (max_wait < 0) + break; + + rtas_busy_delay(ret); + } + + pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n", + __func__, phb->global_number, config_addr, ret); + /* PAPR defines -3 as "Parameter Error" for this function: */ + if (ret == -3) + return -EINVAL; + else + return -EIO; +} + /* * Buffer for reporting slot-error-detail rtas calls. Its here * in BSS, and not dynamically alloced, so that it ends up in @@ -96,6 +246,10 @@ static int eeh_error_buf_size; */ static int pseries_eeh_init(void) { + struct pci_controller *phb; + struct pci_dn *pdn; + int addr, config_addr; + /* figure out EEH RTAS function call tokens */ ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); @@ -148,6 +302,22 @@ static int pseries_eeh_init(void) /* Set EEH machine dependent code */ ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device; + if (is_kdump_kernel() || reset_devices) { + pr_info("Issue PHB reset ...\n"); + list_for_each_entry(phb, &hose_list, list_node) { + pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list); + addr = (pdn->busno << 16) | (pdn->devfn << 8); + config_addr = pseries_eeh_get_config_addr(phb, addr); + /* invalid PE config addr */ + if (config_addr == 0) + continue; + + pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL); + pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE); + pseries_eeh_phb_configure_bridge(phb, config_addr); + } + } + return 0; } @@ -221,6 +391,43 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap) } /** + * pseries_eeh_pe_get_parent - Retrieve the parent PE + * @edev: EEH device + * + * The whole PEs existing in the system are organized as hierarchy + * tree. The function is used to retrieve the parent PE according + * to the parent EEH device. + */ +static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev) +{ + struct eeh_dev *parent; + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + /* + * It might have the case for the indirect parent + * EEH device already having associated PE, but + * the direct parent EEH device doesn't have yet. + */ + if (edev->physfn) + pdn = pci_get_pdn(edev->physfn); + else + pdn = pdn ? pdn->parent : NULL; + while (pdn) { + /* We're poking out of PCI territory */ + parent = pdn_to_eeh_dev(pdn); + if (!parent) + return NULL; + + if (parent->pe) + return parent->pe; + + pdn = pdn->parent; + } + + return NULL; +} + +/** * pseries_eeh_init_edev - initialise the eeh_dev and eeh_pe for a pci_dn * * @pdn: PCI device node @@ -275,12 +482,11 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) * correctly reflects that current device is root port * or PCIe switch downstream port. */ - edev->class_code = pdn->class_code; edev->pcix_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); edev->pcie_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_EXP); edev->aer_cap = pseries_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); edev->mode &= 0xFFFFFF00; - if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { + if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; if (edev->pcie_cap) { rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS, @@ -304,8 +510,10 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) if (ret) { eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret); } else { + struct eeh_pe *parent; + /* Retrieve PE address */ - edev->pe_config_addr = eeh_ops->get_pe_addr(&pe); + edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn); pe.addr = edev->pe_config_addr; /* Some older systems (Power4) allow the ibm,set-eeh-option @@ -316,16 +524,19 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) enable = 1; - if (enable) { + /* + * This device doesn't support EEH, but it may have an + * EEH parent. In this case any error on the device will + * freeze the PE of it's upstream bridge, so added it to + * the upstream PE. + */ + parent = pseries_eeh_pe_get_parent(edev); + if (parent && !enable) + edev->pe_config_addr = parent->addr; + + if (enable || parent) { eeh_add_flag(EEH_ENABLED); - eeh_add_to_parent_pe(edev); - } else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) && - (pdn_to_eeh_dev(pdn->parent))->pe) { - /* This device doesn't support EEH, but it may have an - * EEH parent, in which case we mark it as supported. - */ - edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr; - eeh_add_to_parent_pe(edev); + eeh_pe_tree_insert(edev, parent); } eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n", (enable ? "enabled" : "unsupported"), ret); @@ -435,8 +646,10 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option) * It's notable that zero'ed return value means invalid PE config * address. */ -static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) +static int pseries_eeh_get_pe_addr(struct pci_dn *pdn) { + int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + unsigned long buid = pdn->phb->buid; int ret = 0; int rets[3]; @@ -447,18 +660,16 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) * meaningless. */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, - pe->config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), 1); + config_addr, BUID_HI(buid), BUID_LO(buid), 1); if (ret || (rets[0] == 0)) return 0; /* Retrieve the associated PE config address */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, - pe->config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), 0); + config_addr, BUID_HI(buid), BUID_LO(buid), 0); if (ret) { pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", - __func__, pe->phb->global_number, pe->config_addr); + __func__, pdn->phb->global_number, config_addr); return 0; } @@ -467,11 +678,10 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, - pe->config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), 0); + config_addr, BUID_HI(buid), BUID_LO(buid), 0); if (ret) { pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", - __func__, pe->phb->global_number, pe->config_addr); + __func__, pdn->phb->global_number, config_addr); return 0; } @@ -569,35 +779,13 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay) static int pseries_eeh_reset(struct eeh_pe *pe, int option) { int config_addr; - int ret; /* Figure out PE address */ config_addr = pe->config_addr; if (pe->addr) config_addr = pe->addr; - /* Reset PE through RTAS call */ - ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), option); - - /* If fundamental-reset not supported, try hot-reset */ - if (option == EEH_RESET_FUNDAMENTAL && - ret == -8) { - option = EEH_RESET_HOT; - ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), option); - } - - /* We need reset hold or settlement delay */ - if (option == EEH_RESET_FUNDAMENTAL || - option == EEH_RESET_HOT) - msleep(EEH_PE_RST_HOLD_TIME); - else - msleep(EEH_PE_RST_SETTLE_TIME); - - return ret; + return pseries_eeh_phb_reset(pe->phb, config_addr, option); } /** @@ -641,102 +829,49 @@ static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, u * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE * @pe: EEH PE * - * The function will be called to reconfigure the bridges included - * in the specified PE so that the mulfunctional PE would be recovered - * again. */ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) { int config_addr; - int ret; - /* Waiting 0.2s maximum before skipping configuration */ - int max_wait = 200; /* Figure out the PE address */ config_addr = pe->config_addr; if (pe->addr) config_addr = pe->addr; - while (max_wait > 0) { - ret = rtas_call(ibm_configure_pe, 3, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid)); - - if (!ret) - return ret; - - /* - * If RTAS returns a delay value that's above 100ms, cut it - * down to 100ms in case firmware made a mistake. For more - * on how these delay values work see rtas_busy_delay_time - */ - if (ret > RTAS_EXTENDED_DELAY_MIN+2 && - ret <= RTAS_EXTENDED_DELAY_MAX) - ret = RTAS_EXTENDED_DELAY_MIN+2; - - max_wait -= rtas_busy_delay_time(ret); - - if (max_wait < 0) - break; - - rtas_busy_delay(ret); - } - - pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n", - __func__, pe->phb->global_number, pe->addr, ret); - return ret; + return pseries_eeh_phb_configure_bridge(pe->phb, config_addr); } /** * pseries_eeh_read_config - Read PCI config space - * @pdn: PCI device node - * @where: PCI address + * @edev: EEH device handle + * @where: PCI config space offset * @size: size to read * @val: return value * * Read config space from the speicifed device */ -static int pseries_eeh_read_config(struct pci_dn *pdn, int where, int size, u32 *val) +static int pseries_eeh_read_config(struct eeh_dev *edev, int where, int size, u32 *val) { + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + return rtas_read_config(pdn, where, size, val); } /** * pseries_eeh_write_config - Write PCI config space - * @pdn: PCI device node - * @where: PCI address + * @edev: EEH device handle + * @where: PCI config space offset * @size: size to write * @val: value to be written * * Write config space to the specified device */ -static int pseries_eeh_write_config(struct pci_dn *pdn, int where, int size, u32 val) -{ - return rtas_write_config(pdn, where, size, val); -} - -static int pseries_eeh_restore_config(struct pci_dn *pdn) +static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u32 val) { - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - s64 ret = 0; - - if (!edev) - return -EEXIST; + struct pci_dn *pdn = eeh_dev_to_pdn(edev); - /* - * FIXME: The MPS, error routing rules, timeout setting are worthy - * to be exported by firmware in extendible way. - */ - if (edev->physfn) - ret = eeh_restore_vf_config(pdn); - - if (ret) { - pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", - __func__, edev->pe_config_addr, ret); - return -EIO; - } - - return ret; + return rtas_write_config(pdn, where, size, val); } #ifdef CONFIG_PCI_IOV @@ -766,8 +901,8 @@ int pseries_send_allow_unfreeze(struct pci_dn *pdn, static int pseries_call_allow_unfreeze(struct eeh_dev *edev) { + int cur_vfs = 0, rc = 0, vf_index, bus, devfn, vf_pe_num; struct pci_dn *pdn, *tmp, *parent, *physfn_pdn; - int cur_vfs = 0, rc = 0, vf_index, bus, devfn; u16 *vf_pe_array; vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); @@ -800,8 +935,10 @@ static int pseries_call_allow_unfreeze(struct eeh_dev *edev) } } else { pdn = pci_get_pdn(edev->pdev); - vf_pe_array[0] = cpu_to_be16(pdn->pe_number); physfn_pdn = pci_get_pdn(edev->physfn); + + vf_pe_num = physfn_pdn->pe_num_map[edev->vf_index]; + vf_pe_array[0] = cpu_to_be16(vf_pe_num); rc = pseries_send_allow_unfreeze(physfn_pdn, vf_pe_array, 1); pdn->last_allow_rc = rc; @@ -812,10 +949,8 @@ static int pseries_call_allow_unfreeze(struct eeh_dev *edev) return rc; } -static int pseries_notify_resume(struct pci_dn *pdn) +static int pseries_notify_resume(struct eeh_dev *edev) { - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - if (!edev) return -EEXIST; @@ -835,7 +970,6 @@ static struct eeh_ops pseries_eeh_ops = { .init = pseries_eeh_init, .probe = pseries_eeh_probe, .set_option = pseries_eeh_set_option, - .get_pe_addr = pseries_eeh_get_pe_addr, .get_state = pseries_eeh_get_state, .reset = pseries_eeh_reset, .get_log = pseries_eeh_get_log, @@ -844,7 +978,7 @@ static struct eeh_ops pseries_eeh_ops = { .read_config = pseries_eeh_read_config, .write_config = pseries_eeh_write_config, .next_error = NULL, - .restore_config = pseries_eeh_restore_config, + .restore_config = NULL, /* NB: configure_bridge() does this */ #ifdef CONFIG_PCI_IOV .notify_resume = pseries_notify_resume #endif diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index 3e49cc23a97a..4c7b7f5a2ebc 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -65,6 +65,7 @@ hypertas_fw_features_table[] = { {FW_FEATURE_HPT_RESIZE, "hcall-hpt-resize"}, {FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"}, {FW_FEATURE_PAPR_SCM, "hcall-scm"}, + {FW_FEATURE_RPT_INVALIDATE, "hcall-rpt-invalidate"}, }; /* Build up the firmware features bitmask using the contents of diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 3e8cbfe7a80f..7a974ed6b240 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -35,54 +35,10 @@ #include <asm/topology.h> #include "pseries.h" -#include "offline_states.h" /* This version can't take the spinlock, because it never returns */ static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE; -static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) = - CPU_STATE_OFFLINE; -static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE; - -static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE; - -static bool cede_offline_enabled __read_mostly = true; - -/* - * Enable/disable cede_offline when available. - */ -static int __init setup_cede_offline(char *str) -{ - return (kstrtobool(str, &cede_offline_enabled) == 0); -} - -__setup("cede_offline=", setup_cede_offline); - -enum cpu_state_vals get_cpu_current_state(int cpu) -{ - return per_cpu(current_state, cpu); -} - -void set_cpu_current_state(int cpu, enum cpu_state_vals state) -{ - per_cpu(current_state, cpu) = state; -} - -enum cpu_state_vals get_preferred_offline_state(int cpu) -{ - return per_cpu(preferred_offline_state, cpu); -} - -void set_preferred_offline_state(int cpu, enum cpu_state_vals state) -{ - per_cpu(preferred_offline_state, cpu) = state; -} - -void set_default_offline_state(int cpu) -{ - per_cpu(preferred_offline_state, cpu) = default_offline_state; -} - static void rtas_stop_self(void) { static struct rtas_args args; @@ -101,9 +57,7 @@ static void rtas_stop_self(void) static void pseries_mach_cpu_die(void) { - unsigned int cpu = smp_processor_id(); unsigned int hwcpu = hard_smp_processor_id(); - u8 cede_latency_hint = 0; local_irq_disable(); idle_task_exit(); @@ -112,49 +66,6 @@ static void pseries_mach_cpu_die(void) else xics_teardown_cpu(); - if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { - set_cpu_current_state(cpu, CPU_STATE_INACTIVE); - if (ppc_md.suspend_disable_cpu) - ppc_md.suspend_disable_cpu(); - - cede_latency_hint = 2; - - get_lppaca()->idle = 1; - if (!lppaca_shared_proc(get_lppaca())) - get_lppaca()->donate_dedicated_cpu = 1; - - while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { - while (!prep_irq_for_idle()) { - local_irq_enable(); - local_irq_disable(); - } - - extended_cede_processor(cede_latency_hint); - } - - local_irq_disable(); - - if (!lppaca_shared_proc(get_lppaca())) - get_lppaca()->donate_dedicated_cpu = 0; - get_lppaca()->idle = 0; - - if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) { - unregister_slb_shadow(hwcpu); - - hard_irq_disable(); - /* - * Call to start_secondary_resume() will not return. - * Kernel stack will be reset and start_secondary() - * will be called to continue the online operation. - */ - start_secondary_resume(); - } - } - - /* Requested state is CPU_STATE_OFFLINE at this point */ - WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE); - - set_cpu_current_state(cpu, CPU_STATE_OFFLINE); unregister_slb_shadow(hwcpu); rtas_stop_self(); @@ -196,33 +107,28 @@ static int pseries_cpu_disable(void) */ static void pseries_cpu_die(unsigned int cpu) { - int tries; int cpu_status = 1; unsigned int pcpu = get_hard_smp_processor_id(cpu); + unsigned long timeout = jiffies + msecs_to_jiffies(120000); - if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { - cpu_status = 1; - for (tries = 0; tries < 5000; tries++) { - if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) { - cpu_status = 0; - break; - } - msleep(1); - } - } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { + while (true) { + cpu_status = smp_query_cpu_stopped(pcpu); + if (cpu_status == QCSS_STOPPED || + cpu_status == QCSS_HARDWARE_ERROR) + break; - for (tries = 0; tries < 25; tries++) { - cpu_status = smp_query_cpu_stopped(pcpu); - if (cpu_status == QCSS_STOPPED || - cpu_status == QCSS_HARDWARE_ERROR) - break; - cpu_relax(); + if (time_after(jiffies, timeout)) { + pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n", + cpu, pcpu); + timeout = jiffies + msecs_to_jiffies(120000); } + + cond_resched(); } - if (cpu_status != 0) { - printk("Querying DEAD? cpu %i (%i) shows %i\n", - cpu, pcpu, cpu_status); + if (cpu_status == QCSS_HARDWARE_ERROR) { + pr_warn("CPU %i (hwid %i) reported error while dying\n", + cpu, pcpu); } /* Isolation and deallocation are definitely done by @@ -359,28 +265,14 @@ static int dlpar_offline_cpu(struct device_node *dn) if (get_hard_smp_processor_id(cpu) != thread) continue; - if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE) + if (!cpu_online(cpu)) break; - if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) { - set_preferred_offline_state(cpu, - CPU_STATE_OFFLINE); - cpu_maps_update_done(); - timed_topology_update(1); - rc = device_offline(get_cpu_device(cpu)); - if (rc) - goto out; - cpu_maps_update_begin(); - break; - } - - /* - * The cpu is in CPU_STATE_INACTIVE. - * Upgrade it's state to CPU_STATE_OFFLINE. - */ - set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); - WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS); - __cpu_die(cpu); + cpu_maps_update_done(); + rc = device_offline(get_cpu_device(cpu)); + if (rc) + goto out; + cpu_maps_update_begin(); break; } if (cpu == num_possible_cpus()) { @@ -414,10 +306,7 @@ static int dlpar_online_cpu(struct device_node *dn) for_each_present_cpu(cpu) { if (get_hard_smp_processor_id(cpu) != thread) continue; - BUG_ON(get_cpu_current_state(cpu) - != CPU_STATE_OFFLINE); cpu_maps_update_done(); - timed_topology_update(1); find_and_online_cpu_nid(cpu); rc = device_online(get_cpu_device(cpu)); if (rc) { @@ -854,7 +743,6 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) parent = of_find_node_by_path("/cpus"); if (!parent) { pr_warn("Could not find CPU root node in device tree\n"); - kfree(cpu_drcs); return -1; } @@ -896,25 +784,6 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) return rc; } -int dlpar_cpu_readd(int cpu) -{ - struct device_node *dn; - struct device *dev; - u32 drc_index; - int rc; - - dev = get_cpu_device(cpu); - dn = dev->of_node; - - rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index); - - rc = dlpar_cpu_remove_by_index(drc_index); - if (!rc) - rc = dlpar_cpu_add(drc_index); - - return rc; -} - int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) { u32 count, drc_index; @@ -1013,27 +882,8 @@ static struct notifier_block pseries_smp_nb = { .notifier_call = pseries_smp_notifier, }; -#define MAX_CEDE_LATENCY_LEVELS 4 -#define CEDE_LATENCY_PARAM_LENGTH 10 -#define CEDE_LATENCY_PARAM_MAX_LENGTH \ - (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char)) -#define CEDE_LATENCY_TOKEN 45 - -static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH]; - -static int parse_cede_parameters(void) -{ - memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH); - return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, - NULL, - CEDE_LATENCY_TOKEN, - __pa(cede_parameters), - CEDE_LATENCY_PARAM_MAX_LENGTH); -} - static int __init pseries_cpu_hotplug_init(void) { - int cpu; int qcss_tok; #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE @@ -1056,16 +906,8 @@ static int __init pseries_cpu_hotplug_init(void) smp_ops->cpu_die = pseries_cpu_die; /* Processors can be added/removed only on LPAR */ - if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR)) of_reconfig_notifier_register(&pseries_smp_nb); - cpu_maps_update_begin(); - if (cede_offline_enabled && parse_cede_parameters() == 0) { - default_offline_state = CPU_STATE_INACTIVE; - for_each_online_cpu(cpu) - set_default_offline_state(cpu); - } - cpu_maps_update_done(); - } return 0; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index b2cde1732301..5d545b78111f 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -22,12 +22,10 @@ #include <asm/drmem.h> #include "pseries.h" -static bool rtas_hp_event; - unsigned long pseries_memory_block_size(void) { struct device_node *np; - unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE; + u64 memblock_size = MIN_MEMORY_BLOCK_SIZE; struct resource r; np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); @@ -337,39 +335,19 @@ static int pseries_remove_mem_node(struct device_node *np) static bool lmb_is_removable(struct drmem_lmb *lmb) { - int i, scns_per_block; - bool rc = true; - unsigned long pfn, block_sz; - u64 phys_addr; - if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) return false; - block_sz = memory_block_size_bytes(); - scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; - phys_addr = lmb->base_addr; - #ifdef CONFIG_FA_DUMP /* * Don't hot-remove memory that falls in fadump boot memory area * and memory that is reserved for capturing old kernel memory. */ - if (is_fadump_memory_area(phys_addr, block_sz)) + if (is_fadump_memory_area(lmb->base_addr, memory_block_size_bytes())) return false; #endif - - for (i = 0; i < scns_per_block; i++) { - pfn = PFN_DOWN(phys_addr); - if (!pfn_in_present_section(pfn)) { - phys_addr += MIN_MEMORY_BLOCK_SIZE; - continue; - } - - rc = rc && is_mem_section_removable(pfn, PAGES_PER_SECTION); - phys_addr += MIN_MEMORY_BLOCK_SIZE; - } - - return rc; + /* device_offline() will determine if we can actually remove this lmb */ + return true; } static int dlpar_add_lmb(struct drmem_lmb *); @@ -507,40 +485,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index) return rc; } -static int dlpar_memory_readd_by_index(u32 drc_index) -{ - struct drmem_lmb *lmb; - int lmb_found; - int rc; - - pr_info("Attempting to update LMB, drc index %x\n", drc_index); - - lmb_found = 0; - for_each_drmem_lmb(lmb) { - if (lmb->drc_index == drc_index) { - lmb_found = 1; - rc = dlpar_remove_lmb(lmb); - if (!rc) { - rc = dlpar_add_lmb(lmb); - if (rc) - dlpar_release_drc(lmb->drc_index); - } - break; - } - } - - if (!lmb_found) - rc = -EINVAL; - - if (rc) - pr_info("Failed to update memory at %llx\n", - lmb->base_addr); - else - pr_info("Memory at %llx was updated\n", lmb->base_addr); - - return rc; -} - static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { struct drmem_lmb *lmb, *start_lmb, *end_lmb; @@ -637,10 +581,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index) { return -EOPNOTSUPP; } -static int dlpar_memory_readd_by_index(u32 drc_index) -{ - return -EOPNOTSUPP; -} static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { @@ -923,21 +863,14 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) } break; - case PSERIES_HP_ELOG_ACTION_READD: - drc_index = hp_elog->_drc_u.drc_index; - rc = dlpar_memory_readd_by_index(drc_index); - break; default: pr_err("Invalid action (%d) specified\n", hp_elog->action); rc = -EINVAL; break; } - if (!rc) { - rtas_hp_event = true; + if (!rc) rc = drmem_update_dt(); - rtas_hp_event = false; - } unlock_device_hotplug(); return rc; @@ -973,60 +906,6 @@ static int pseries_add_mem_node(struct device_node *np) return (ret < 0) ? -EINVAL : 0; } -static int pseries_update_drconf_memory(struct of_reconfig_data *pr) -{ - struct of_drconf_cell_v1 *new_drmem, *old_drmem; - unsigned long memblock_size; - u32 entries; - __be32 *p; - int i, rc = -EINVAL; - - if (rtas_hp_event) - return 0; - - memblock_size = pseries_memory_block_size(); - if (!memblock_size) - return -EINVAL; - - if (!pr->old_prop) - return 0; - - p = (__be32 *) pr->old_prop->value; - if (!p) - return -EINVAL; - - /* The first int of the property is the number of lmb's described - * by the property. This is followed by an array of of_drconf_cell - * entries. Get the number of entries and skip to the array of - * of_drconf_cell's. - */ - entries = be32_to_cpu(*p++); - old_drmem = (struct of_drconf_cell_v1 *)p; - - p = (__be32 *)pr->prop->value; - p++; - new_drmem = (struct of_drconf_cell_v1 *)p; - - for (i = 0; i < entries; i++) { - if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) && - (!(be32_to_cpu(new_drmem[i].flags) & DRCONF_MEM_ASSIGNED))) { - rc = pseries_remove_memblock( - be64_to_cpu(old_drmem[i].base_addr), - memblock_size); - break; - } else if ((!(be32_to_cpu(old_drmem[i].flags) & - DRCONF_MEM_ASSIGNED)) && - (be32_to_cpu(new_drmem[i].flags) & - DRCONF_MEM_ASSIGNED)) { - rc = memblock_add(be64_to_cpu(old_drmem[i].base_addr), - memblock_size); - rc = (rc < 0) ? -EINVAL : 0; - break; - } - } - return rc; -} - static int pseries_memory_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -1040,10 +919,6 @@ static int pseries_memory_notifier(struct notifier_block *nb, case OF_RECONFIG_DETACH_NODE: err = pseries_remove_mem_node(rd->dn); break; - case OF_RECONFIG_UPDATE_PROPERTY: - if (!strcmp(rd->prop->name, "ibm,dynamic-memory")) - err = pseries_update_drconf_memory(rd); - break; } return notifier_from_errno(err); } diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c index 267139b13530..96e18d3b2fcf 100644 --- a/arch/powerpc/platforms/pseries/hvcserver.c +++ b/arch/powerpc/platforms/pseries/hvcserver.c @@ -45,7 +45,7 @@ static int hvcs_convert(long to_convert) case H_LONG_BUSY_ORDER_10_SEC: case H_LONG_BUSY_ORDER_100_SEC: return -EBUSY; - case H_FUNCTION: /* fall through */ + case H_FUNCTION: default: return -EPERM; } diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index b91eb0929ed1..a6f101c958e8 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -47,6 +47,7 @@ #include <linux/stat.h> #include <linux/of_platform.h> #include <asm/ibmebus.h> +#include <asm/machdep.h> static struct device ibmebus_bus_device = { /* fake "parent" device */ .init_name = "ibmebus", @@ -464,4 +465,4 @@ static int __init ibmebus_bus_init(void) return 0; } -postcore_initcall(ibmebus_bus_init); +machine_postcore_initcall(pseries, ibmebus_bus_init); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index e4ed5317f117..baf24eacd268 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -21,10 +21,10 @@ #include <linux/cpuhotplug.h> #include <linux/workqueue.h> #include <linux/proc_fs.h> +#include <linux/pgtable.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/machdep.h> #include <asm/mmu_context.h> #include <asm/iommu.h> @@ -40,6 +40,7 @@ #include <asm/fadump.h> #include <asm/asm-prototypes.h> #include <asm/debugfs.h> +#include <asm/dtl.h> #include "pseries.h" @@ -1680,9 +1681,11 @@ static int pseries_lpar_register_process_table(unsigned long base, if (table_size) flags |= PROC_TABLE_NEW; - if (radix_enabled()) - flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE; - else + if (radix_enabled()) { + flags |= PROC_TABLE_RADIX; + if (mmu_has_feature(MMU_FTR_GTSE)) + flags |= PROC_TABLE_GTSE; + } else flags |= PROC_TABLE_HPT_SLB; for (;;) { rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index b571285f6c14..d6f4162478a5 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -6,6 +6,9 @@ * Copyright (C) 2010 IBM Corporation */ + +#define pr_fmt(fmt) "mobility: " fmt + #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/kobject.h> @@ -64,6 +67,8 @@ static int delete_dt_node(__be32 phandle) if (!dn) return -ENOENT; + pr_debug("removing node %pOFfp\n", dn); + dlpar_detach_node(dn); of_node_put(dn); return 0; @@ -122,6 +127,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop, } if (!more) { + pr_debug("updating node %pOF property %s\n", dn, name); of_update_property(dn, new_prop); *prop = NULL; } @@ -240,33 +246,12 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index) if (rc) dlpar_free_cc_nodes(dn); + pr_debug("added node %pOFfp\n", dn); + of_node_put(parent_dn); return rc; } -static void prrn_update_node(__be32 phandle) -{ - struct pseries_hp_errorlog hp_elog; - struct device_node *dn; - - /* - * If a node is found from a the given phandle, the phandle does not - * represent the drc index of an LMB and we can ignore. - */ - dn = of_find_node_by_phandle(be32_to_cpu(phandle)); - if (dn) { - of_node_put(dn); - return; - } - - hp_elog.resource = PSERIES_HP_ELOG_RESOURCE_MEM; - hp_elog.action = PSERIES_HP_ELOG_ACTION_READD; - hp_elog.id_type = PSERIES_HP_ELOG_ID_DRC_INDEX; - hp_elog._drc_u.drc_index = phandle; - - handle_dlpar_errorlog(&hp_elog); -} - int pseries_devicetree_update(s32 scope) { char *rtas_buf; @@ -305,10 +290,6 @@ int pseries_devicetree_update(s32 scope) break; case UPDATE_DT_NODE: update_dt_node(phandle, scope); - - if (scope == PRRN_SCOPE) - prrn_update_node(phandle); - break; case ADD_DT_NODE: drc_index = *data++; @@ -371,6 +352,9 @@ void post_mobility_fixup(void) /* Possibly switch to a new RFI flush type */ pseries_setup_rfi_flush(); + /* Reinitialise system information for hv-24x7 */ + read_24x7_sys_info(); + return; } @@ -385,8 +369,6 @@ static ssize_t migration_store(struct class *class, if (rc) return rc; - stop_topology_update(); - do { rc = rtas_ibm_suspend_me(streamid); if (rc == -EAGAIN) @@ -398,8 +380,6 @@ static ssize_t migration_store(struct class *class, post_mobility_fixup(); - start_topology_update(); - return count; } @@ -424,11 +404,11 @@ static int __init mobility_sysfs_init(void) rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr); if (rc) - pr_err("mobility: unable to create migration sysfs file (%d)\n", rc); + pr_err("unable to create migration sysfs file (%d)\n", rc); rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr); if (rc) - pr_err("mobility: unable to create api_version sysfs file (%d)\n", rc); + pr_err("unable to create api_version sysfs file (%d)\n", rc); return 0; } diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h deleted file mode 100644 index 51414aee2862..000000000000 --- a/arch/powerpc/platforms/pseries/offline_states.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _OFFLINE_STATES_H_ -#define _OFFLINE_STATES_H_ - -/* Cpu offline states go here */ -enum cpu_state_vals { - CPU_STATE_OFFLINE, - CPU_STATE_INACTIVE, - CPU_STATE_ONLINE, - CPU_MAX_OFFLINE_STATES -}; - -#ifdef CONFIG_HOTPLUG_CPU -extern enum cpu_state_vals get_cpu_current_state(int cpu); -extern void set_cpu_current_state(int cpu, enum cpu_state_vals state); -extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state); -extern void set_default_offline_state(int cpu); -#else -static inline enum cpu_state_vals get_cpu_current_state(int cpu) -{ - return CPU_STATE_ONLINE; -} - -static inline void set_cpu_current_state(int cpu, enum cpu_state_vals state) -{ -} - -static inline void set_preferred_offline_state(int cpu, enum cpu_state_vals state) -{ -} - -static inline void set_default_offline_state(int cpu) -{ -} -#endif - -extern enum cpu_state_vals get_preferred_offline_state(int cpu); -#endif diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f35592423380..a88a707a608a 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -12,16 +12,79 @@ #include <linux/libnvdimm.h> #include <linux/platform_device.h> #include <linux/delay.h> +#include <linux/seq_buf.h> +#include <linux/nd.h> #include <asm/plpar_wrappers.h> +#include <asm/papr_pdsm.h> +#include <asm/mce.h> #define BIND_ANY_ADDR (~0ul) #define PAPR_SCM_DIMM_CMD_MASK \ ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ (1ul << ND_CMD_GET_CONFIG_DATA) | \ - (1ul << ND_CMD_SET_CONFIG_DATA)) - + (1ul << ND_CMD_SET_CONFIG_DATA) | \ + (1ul << ND_CMD_CALL)) + +/* DIMM health bitmap bitmap indicators */ +/* SCM device is unable to persist memory contents */ +#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) +/* SCM device failed to persist memory contents */ +#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) +/* SCM device contents are persisted from previous IPL */ +#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2)) +/* SCM device contents are not persisted from previous IPL */ +#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) +/* SCM device memory life remaining is critically low */ +#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) +/* SCM device will be garded off next IPL due to failure */ +#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) +/* SCM contents cannot persist due to current platform health status */ +#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) +/* SCM device is unable to persist memory contents in certain conditions */ +#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7)) +/* SCM device is encrypted */ +#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8)) +/* SCM device has been scrubbed and locked */ +#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9)) + +/* Bits status indicators for health bitmap indicating unarmed dimm */ +#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +/* Bits status indicators for health bitmap indicating unflushed dimm */ +#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) + +/* Bits status indicators for health bitmap indicating unrestored dimm */ +#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) + +/* Bit status indicators for smart event notification */ +#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ + PAPR_PMEM_HEALTH_FATAL | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS) +#define PAPR_SCM_PERF_STATS_VERSION 0x1 + +/* Struct holding a single performance metric */ +struct papr_scm_perf_stat { + u8 stat_id[8]; + __be64 stat_val; +} __packed; + +/* Struct exchanged between kernel and PHYP for fetching drc perf stats */ +struct papr_scm_perf_stats { + u8 eye_catcher[8]; + /* Should be PAPR_SCM_PERF_STATS_VERSION */ + __be32 stats_version; + /* Number of stats following */ + __be32 num_statistics; + /* zero or more performance matrics */ + struct papr_scm_perf_stat scm_statistic[]; +} __packed; + +/* private struct associated with each region */ struct papr_scm_priv { struct platform_device *pdev; struct device_node *dn; @@ -39,8 +102,24 @@ struct papr_scm_priv { struct resource res; struct nd_region *region; struct nd_interleave_set nd_set; + struct list_head region_list; + + /* Protect dimm health data from concurrent read/writes */ + struct mutex health_mutex; + + /* Last time the health information of the dimm was updated */ + unsigned long lasthealth_jiffies; + + /* Health information for the dimm */ + u64 health_bitmap; + + /* length of the stat buffer as expected by phyp */ + size_t stat_buffer_len; }; +static LIST_HEAD(papr_nd_regions); +static DEFINE_MUTEX(papr_ndr_lock); + static int drc_pmem_bind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; @@ -144,6 +223,134 @@ err_out: return drc_pmem_bind(p); } +/* + * Query the Dimm performance stats from PHYP and copy them (if returned) to + * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast + * (num_stats + header) bytes. + * - If buff_stats == NULL the return value is the size in byes of the buffer + * needed to hold all supported performance-statistics. + * - If buff_stats != NULL and num_stats == 0 then we copy all known + * performance-statistics to 'buff_stat' and expect to be large enough to + * hold them. + * - if buff_stats != NULL and num_stats > 0 then copy the requested + * performance-statistics to buff_stats. + */ +static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, + struct papr_scm_perf_stats *buff_stats, + unsigned int num_stats) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + size_t size; + s64 rc; + + /* Setup the out buffer */ + if (buff_stats) { + memcpy(buff_stats->eye_catcher, + PAPR_SCM_PERF_STATS_EYECATCHER, 8); + buff_stats->stats_version = + cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION); + buff_stats->num_statistics = + cpu_to_be32(num_stats); + + /* + * Calculate the buffer size based on num-stats provided + * or use the prefetched max buffer length + */ + if (num_stats) + /* Calculate size from the num_stats */ + size = sizeof(struct papr_scm_perf_stats) + + num_stats * sizeof(struct papr_scm_perf_stat); + else + size = p->stat_buffer_len; + } else { + /* In case of no out buffer ignore the size */ + size = 0; + } + + /* Do the HCALL asking PHYP for info */ + rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index, + buff_stats ? virt_to_phys(buff_stats) : 0, + size); + + /* Check if the error was due to an unknown stat-id */ + if (rc == H_PARTIAL) { + dev_err(&p->pdev->dev, + "Unknown performance stats, Err:0x%016lX\n", ret[0]); + return -ENOENT; + } else if (rc != H_SUCCESS) { + dev_err(&p->pdev->dev, + "Failed to query performance stats, Err:%lld\n", rc); + return -EIO; + + } else if (!size) { + /* Handle case where stat buffer size was requested */ + dev_dbg(&p->pdev->dev, + "Performance stats size %ld\n", ret[0]); + return ret[0]; + } + + /* Successfully fetched the requested stats from phyp */ + dev_dbg(&p->pdev->dev, + "Performance stats returned %d stats\n", + be32_to_cpu(buff_stats->num_statistics)); + return 0; +} + +/* + * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the + * health information. + */ +static int __drc_pmem_query_health(struct papr_scm_priv *p) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + long rc; + + /* issue the hcall */ + rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); + if (rc != H_SUCCESS) { + dev_err(&p->pdev->dev, + "Failed to query health information, Err:%ld\n", rc); + return -ENXIO; + } + + p->lasthealth_jiffies = jiffies; + p->health_bitmap = ret[0] & ret[1]; + + dev_dbg(&p->pdev->dev, + "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", + ret[0], ret[1]); + + return 0; +} + +/* Min interval in seconds for assuming stable dimm health */ +#define MIN_HEALTH_QUERY_INTERVAL 60 + +/* Query cached health info and if needed call drc_pmem_query_health */ +static int drc_pmem_query_health(struct papr_scm_priv *p) +{ + unsigned long cache_timeout; + int rc; + + /* Protect concurrent modifications to papr_scm_priv */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + return rc; + + /* Jiffies offset for which the health data is assumed to be same */ + cache_timeout = p->lasthealth_jiffies + + msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000); + + /* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */ + if (time_after(jiffies, cache_timeout)) + rc = __drc_pmem_query_health(p); + else + /* Assume cached health data is valid */ + rc = 0; + + mutex_unlock(&p->health_mutex); + return rc; +} static int papr_scm_meta_get(struct papr_scm_priv *p, struct nd_cmd_get_config_data_hdr *hdr) @@ -246,16 +453,299 @@ static int papr_scm_meta_set(struct papr_scm_priv *p, return 0; } +/* + * Do a sanity checks on the inputs args to dimm-control function and return + * '0' if valid. Validation of PDSM payloads happens later in + * papr_scm_service_pdsm. + */ +static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len) +{ + unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK; + struct nd_cmd_pkg *nd_cmd; + struct papr_scm_priv *p; + enum papr_pdsm pdsm; + + /* Only dimm-specific calls are supported atm */ + if (!nvdimm) + return -EINVAL; + + /* get the provider data from struct nvdimm */ + p = nvdimm_provider_data(nvdimm); + + if (!test_bit(cmd, &cmd_mask)) { + dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd); + return -EINVAL; + } + + /* For CMD_CALL verify pdsm request */ + if (cmd == ND_CMD_CALL) { + /* Verify the envelope and envelop size */ + if (!buf || + buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n", + buf_len); + return -EINVAL; + } + + /* Verify that the nd_cmd_pkg.nd_family is correct */ + nd_cmd = (struct nd_cmd_pkg *)buf; + + if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) { + dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n", + nd_cmd->nd_family); + return -EINVAL; + } + + pdsm = (enum papr_pdsm)nd_cmd->nd_command; + + /* Verify if the pdsm command is valid */ + if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n", + pdsm); + return -EINVAL; + } + + /* Have enough space to hold returned 'nd_pkg_pdsm' header */ + if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n", + pdsm); + return -EINVAL; + } + } + + /* Let the command be further processed */ + return 0; +} + +static int papr_pdsm_fuel_gauge(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc, size; + u64 statval; + struct papr_scm_perf_stat *stat; + struct papr_scm_perf_stats *stats; + + /* Silently fail if fetching performance metrics isn't supported */ + if (!p->stat_buffer_len) + return 0; + + /* Allocate request buffer enough to hold single performance stat */ + size = sizeof(struct papr_scm_perf_stats) + + sizeof(struct papr_scm_perf_stat); + + stats = kzalloc(size, GFP_KERNEL); + if (!stats) + return -ENOMEM; + + stat = &stats->scm_statistic[0]; + memcpy(&stat->stat_id, "MemLife ", sizeof(stat->stat_id)); + stat->stat_val = 0; + + /* Fetch the fuel gauge and populate it in payload */ + rc = drc_pmem_query_stats(p, stats, 1); + if (rc < 0) { + dev_dbg(&p->pdev->dev, "Err(%d) fetching fuel gauge\n", rc); + goto free_stats; + } + + statval = be64_to_cpu(stat->stat_val); + dev_dbg(&p->pdev->dev, + "Fetched fuel-gauge %llu", statval); + payload->health.extension_flags |= + PDSM_DIMM_HEALTH_RUN_GAUGE_VALID; + payload->health.dimm_fuel_gauge = statval; + + rc = sizeof(struct nd_papr_pdsm_health); + +free_stats: + kfree(stats); + return rc; +} + +/* Fetch the DIMM health info and populate it in provided package. */ +static int papr_pdsm_health(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc; + + /* Ensure dimm health mutex is taken preventing concurrent access */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + goto out; + + /* Always fetch upto date dimm health data ignoring cached values */ + rc = __drc_pmem_query_health(p); + if (rc) { + mutex_unlock(&p->health_mutex); + goto out; + } + + /* update health struct with various flags derived from health bitmap */ + payload->health = (struct nd_papr_pdsm_health) { + .extension_flags = 0, + .dimm_unarmed = !!(p->health_bitmap & PAPR_PMEM_UNARMED_MASK), + .dimm_bad_shutdown = !!(p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK), + .dimm_bad_restore = !!(p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK), + .dimm_scrubbed = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), + .dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), + .dimm_encrypted = !!(p->health_bitmap & PAPR_PMEM_ENCRYPTED), + .dimm_health = PAPR_PDSM_DIMM_HEALTHY, + }; + + /* Update field dimm_health based on health_bitmap flags */ + if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL) + payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL; + else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL) + payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL; + else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY) + payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY; + + /* struct populated hence can release the mutex now */ + mutex_unlock(&p->health_mutex); + + /* Populate the fuel gauge meter in the payload */ + papr_pdsm_fuel_gauge(p, payload); + + rc = sizeof(struct nd_papr_pdsm_health); + +out: + return rc; +} + +/* + * 'struct pdsm_cmd_desc' + * Identifies supported PDSMs' expected length of in/out payloads + * and pdsm service function. + * + * size_in : Size of input payload if any in the PDSM request. + * size_out : Size of output payload if any in the PDSM request. + * service : Service function for the PDSM request. Return semantics: + * rc < 0 : Error servicing PDSM and rc indicates the error. + * rc >=0 : Serviced successfully and 'rc' indicate number of + * bytes written to payload. + */ +struct pdsm_cmd_desc { + u32 size_in; + u32 size_out; + int (*service)(struct papr_scm_priv *dimm, + union nd_pdsm_payload *payload); +}; + +/* Holds all supported PDSMs' command descriptors */ +static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { + [PAPR_PDSM_MIN] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, + /* New PDSM command descriptors to be added below */ + + [PAPR_PDSM_HEALTH] = { + .size_in = 0, + .size_out = sizeof(struct nd_papr_pdsm_health), + .service = papr_pdsm_health, + }, + /* Empty */ + [PAPR_PDSM_MAX] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, +}; + +/* Given a valid pdsm cmd return its command descriptor else return NULL */ +static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd) +{ + if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors)) + return &__pdsm_cmd_descriptors[cmd]; + + return NULL; +} + +/* + * For a given pdsm request call an appropriate service function. + * Returns errors if any while handling the pdsm command package. + */ +static int papr_scm_service_pdsm(struct papr_scm_priv *p, + struct nd_cmd_pkg *pkg) +{ + /* Get the PDSM header and PDSM command */ + struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload; + enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command; + const struct pdsm_cmd_desc *pdsc; + int rc; + + /* Fetch corresponding pdsm descriptor for validation and servicing */ + pdsc = pdsm_cmd_desc(pdsm); + + /* Validate pdsm descriptor */ + /* Ensure that reserved fields are 0 */ + if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n", + pdsm); + return -EINVAL; + } + + /* If pdsm expects some input, then ensure that the size_in matches */ + if (pdsc->size_in && + pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n", + pdsm, pkg->nd_size_in); + return -EINVAL; + } + + /* If pdsm wants to return data, then ensure that size_out matches */ + if (pdsc->size_out && + pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n", + pdsm, pkg->nd_size_out); + return -EINVAL; + } + + /* Service the pdsm */ + if (pdsc->service) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm); + + rc = pdsc->service(p, &pdsm_pkg->payload); + + if (rc < 0) { + /* error encountered while servicing pdsm */ + pdsm_pkg->cmd_status = rc; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } else { + /* pdsm serviced and 'rc' bytes written to payload */ + pdsm_pkg->cmd_status = 0; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc; + } + } else { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n", + pdsm); + pdsm_pkg->cmd_status = -ENOENT; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } + + return pdsm_pkg->cmd_status; +} + static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { struct nd_cmd_get_config_size *get_size_hdr; + struct nd_cmd_pkg *call_pkg = NULL; struct papr_scm_priv *p; + int rc; - /* Only dimm-specific calls are supported atm */ - if (!nvdimm) - return -EINVAL; + rc = is_cmd_valid(nvdimm, cmd, buf, buf_len); + if (rc) { + pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc); + return rc; + } + + /* Use a local variable in case cmd_rc pointer is NULL */ + if (!cmd_rc) + cmd_rc = &rc; p = nvdimm_provider_data(nvdimm); @@ -277,7 +767,13 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, *cmd_rc = papr_scm_meta_set(p, buf); break; + case ND_CMD_CALL: + call_pkg = (struct nd_cmd_pkg *)buf; + *cmd_rc = papr_scm_service_pdsm(p, call_pkg); + break; + default: + dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); return -EINVAL; } @@ -286,6 +782,107 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static ssize_t perf_stats_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int index, rc; + struct seq_buf s; + struct papr_scm_perf_stat *stat; + struct papr_scm_perf_stats *stats; + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + if (!p->stat_buffer_len) + return -ENOENT; + + /* Allocate the buffer for phyp where stats are written */ + stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); + if (!stats) + return -ENOMEM; + + /* Ask phyp to return all dimm perf stats */ + rc = drc_pmem_query_stats(p, stats, 0); + if (rc) + goto free_stats; + /* + * Go through the returned output buffer and print stats and + * values. Since stat_id is essentially a char string of + * 8 bytes, simply use the string format specifier to print it. + */ + seq_buf_init(&s, buf, PAGE_SIZE); + for (index = 0, stat = stats->scm_statistic; + index < be32_to_cpu(stats->num_statistics); + ++index, ++stat) { + seq_buf_printf(&s, "%.8s = 0x%016llX\n", + stat->stat_id, + be64_to_cpu(stat->stat_val)); + } + +free_stats: + kfree(stats); + return rc ? rc : seq_buf_used(&s); +} +DEVICE_ATTR_ADMIN_RO(perf_stats); + +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + struct seq_buf s; + u64 health; + int rc; + + rc = drc_pmem_query_health(p); + if (rc) + return rc; + + /* Copy health_bitmap locally, check masks & update out buffer */ + health = READ_ONCE(p->health_bitmap); + + seq_buf_init(&s, buf, PAGE_SIZE); + if (health & PAPR_PMEM_UNARMED_MASK) + seq_buf_printf(&s, "not_armed "); + + if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK) + seq_buf_printf(&s, "flush_fail "); + + if (health & PAPR_PMEM_BAD_RESTORE_MASK) + seq_buf_printf(&s, "restore_fail "); + + if (health & PAPR_PMEM_ENCRYPTED) + seq_buf_printf(&s, "encrypted "); + + if (health & PAPR_PMEM_SMART_EVENT_MASK) + seq_buf_printf(&s, "smart_notify "); + + if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED) + seq_buf_printf(&s, "scrubbed locked "); + + if (seq_buf_used(&s)) + seq_buf_printf(&s, "\n"); + + return seq_buf_used(&s); +} +DEVICE_ATTR_RO(flags); + +/* papr_scm specific dimm attributes */ +static struct attribute *papr_nd_attributes[] = { + &dev_attr_flags.attr, + &dev_attr_perf_stats.attr, + NULL, +}; + +static struct attribute_group papr_nd_attribute_group = { + .name = "papr", + .attrs = papr_nd_attributes, +}; + +static const struct attribute_group *papr_nd_attr_groups[] = { + &papr_nd_attribute_group, + NULL, +}; + static int papr_scm_nvdimm_init(struct papr_scm_priv *p) { struct device *dev = &p->pdev->dev; @@ -293,6 +890,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) struct nd_region_desc ndr_desc; unsigned long dimm_flags; int target_nid, online_nid; + ssize_t stat_size; p->bus_desc.ndctl = papr_scm_ndctl; p->bus_desc.module = THIS_MODULE; @@ -312,8 +910,8 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dimm_flags = 0; set_bit(NDD_LABELING, &dimm_flags); - p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags, - PAPR_SCM_DIMM_CMD_MASK, 0, NULL); + p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, + dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); if (!p->nvdimm) { dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn); goto err; @@ -356,6 +954,20 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dev_info(dev, "Region registered with target node %d and online node %d", target_nid, online_nid); + mutex_lock(&papr_ndr_lock); + list_add_tail(&p->region_list, &papr_nd_regions); + mutex_unlock(&papr_ndr_lock); + + /* Try retriving the stat buffer and see if its supported */ + stat_size = drc_pmem_query_stats(p, NULL, 0); + if (stat_size > 0) { + p->stat_buffer_len = stat_size; + dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", + p->stat_buffer_len); + } else { + dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n"); + } + return 0; err: nvdimm_bus_unregister(p->bus); @@ -363,6 +975,68 @@ err: nvdimm_bus_unregister(p->bus); return -ENXIO; } +static void papr_scm_add_badblock(struct nd_region *region, + struct nvdimm_bus *bus, u64 phys_addr) +{ + u64 aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); + + if (nvdimm_bus_add_badrange(bus, aligned_addr, L1_CACHE_BYTES)) { + pr_err("Bad block registration for 0x%llx failed\n", phys_addr); + return; + } + + pr_debug("Add memory range (0x%llx - 0x%llx) as bad range\n", + aligned_addr, aligned_addr + L1_CACHE_BYTES); + + nvdimm_region_notify(region, NVDIMM_REVALIDATE_POISON); +} + +static int handle_mce_ue(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct machine_check_event *evt = data; + struct papr_scm_priv *p; + u64 phys_addr; + bool found = false; + + if (evt->error_type != MCE_ERROR_TYPE_UE) + return NOTIFY_DONE; + + if (list_empty(&papr_nd_regions)) + return NOTIFY_DONE; + + /* + * The physical address obtained here is PAGE_SIZE aligned, so get the + * exact address from the effective address + */ + phys_addr = evt->u.ue_error.physical_address + + (evt->u.ue_error.effective_address & ~PAGE_MASK); + + if (!evt->u.ue_error.physical_address_provided || + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) + return NOTIFY_DONE; + + /* mce notifier is called from a process context, so mutex is safe */ + mutex_lock(&papr_ndr_lock); + list_for_each_entry(p, &papr_nd_regions, region_list) { + if (phys_addr >= p->res.start && phys_addr <= p->res.end) { + found = true; + break; + } + } + + if (found) + papr_scm_add_badblock(p->region, p->bus, phys_addr); + + mutex_unlock(&papr_ndr_lock); + + return found ? NOTIFY_OK : NOTIFY_DONE; +} + +static struct notifier_block mce_ue_nb = { + .notifier_call = handle_mce_ue +}; + static int papr_scm_probe(struct platform_device *pdev) { struct device_node *dn = pdev->dev.of_node; @@ -399,6 +1073,9 @@ static int papr_scm_probe(struct platform_device *pdev) if (!p) return -ENOMEM; + /* Initialize the dimm mutex */ + mutex_init(&p->health_mutex); + /* optional DT properties */ of_property_read_u32(dn, "ibm,metadata-size", &metadata_size); @@ -460,6 +1137,10 @@ static int papr_scm_remove(struct platform_device *pdev) { struct papr_scm_priv *p = platform_get_drvdata(pdev); + mutex_lock(&papr_ndr_lock); + list_del(&p->region_list); + mutex_unlock(&papr_ndr_lock); + nvdimm_bus_unregister(p->bus); drc_pmem_unbind(p); kfree(p->bus_desc.provider_name); @@ -470,6 +1151,7 @@ static int papr_scm_remove(struct platform_device *pdev) static const struct of_device_id papr_scm_match[] = { { .compatible = "ibm,pmemory" }, + { .compatible = "ibm,pmemory-v2" }, { }, }; @@ -482,7 +1164,25 @@ static struct platform_driver papr_scm_driver = { }, }; -module_platform_driver(papr_scm_driver); +static int __init papr_scm_init(void) +{ + int ret; + + ret = platform_driver_register(&papr_scm_driver); + if (!ret) + mce_register_notifier(&mce_ue_nb); + + return ret; +} +module_init(papr_scm_init); + +static void __exit papr_scm_exit(void) +{ + mce_unregister_notifier(&mce_ue_nb); + platform_driver_unregister(&papr_scm_driver); +} +module_exit(papr_scm_exit); + MODULE_DEVICE_TABLE(of, papr_scm_match); MODULE_LICENSE("GPL"); MODULE_AUTHOR("IBM Corporation"); diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index b3a38f5a6b68..f9ae17e8a0f4 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -34,7 +34,7 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) pci_devs_phb_init_dynamic(phb); /* Create EEH devices for the PHB */ - eeh_dev_phb_init_dynamic(phb); + eeh_phb_pe_create(phb); if (dn->child) pseries_eeh_init_edev_recursive(PCI_DN(dn)); diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c index f860a897a9e0..e1dc5d3254df 100644 --- a/arch/powerpc/platforms/pseries/pmem.c +++ b/arch/powerpc/platforms/pseries/pmem.c @@ -24,7 +24,6 @@ #include <asm/topology.h> #include "pseries.h" -#include "offline_states.h" static struct device_node *pmem_node; @@ -147,6 +146,12 @@ const struct of_device_id drc_pmem_match[] = { static int pseries_pmem_init(void) { + /* + * Only supported on POWER8 and above. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return 0; + pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory"); if (!pmem_node) return 0; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 1d1da639b8b7..13c86a292c6d 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -184,7 +184,6 @@ static void handle_system_shutdown(char event_modifier) case EPOW_SHUTDOWN_ON_UPS: pr_emerg("Loss of system power detected. System is running on" " UPS/battery. Check RTAS error log for details\n"); - orderly_poweroff(true); break; case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: @@ -395,16 +394,31 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) /* * Some versions of FWNMI place the buffer inside the 4kB page starting at * 0x7000. Other versions place it inside the rtas buffer. We check both. + * Minimum size of the buffer is 16 bytes. */ #define VALID_FWNMI_BUFFER(A) \ - ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \ - (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16)))) + ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \ + (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16)))) static inline struct rtas_error_log *fwnmi_get_errlog(void) { return (struct rtas_error_log *)local_paca->mce_data_buf; } +static __be64 *fwnmi_get_savep(struct pt_regs *regs) +{ + unsigned long savep_ra; + + /* Mask top two bits */ + savep_ra = regs->gpr[3] & ~(0x3UL << 62); + if (!VALID_FWNMI_BUFFER(savep_ra)) { + printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); + return NULL; + } + + return __va(savep_ra); +} + /* * Get the error information for errors coming through the * FWNMI vectors. The pt_regs' r3 will be updated to reflect @@ -422,19 +436,14 @@ static inline struct rtas_error_log *fwnmi_get_errlog(void) */ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) { - unsigned long *savep; struct rtas_error_log *h; + __be64 *savep; - /* Mask top two bits */ - regs->gpr[3] &= ~(0x3UL << 62); - - if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { - printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); + savep = fwnmi_get_savep(regs); + if (!savep) return NULL; - } - savep = __va(regs->gpr[3]); - regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ h = (struct rtas_error_log *)&savep[1]; /* Use the per cpu buffer from paca to store rtas error log */ @@ -458,7 +467,15 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) */ static void fwnmi_release_errinfo(void) { - int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); + struct rtas_args rtas_args; + int ret; + + /* + * On pseries, the machine check stack is limited to under 4GB, so + * args can be on-stack. + */ + rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL); + ret = be32_to_cpu(rtas_args.rets[0]); if (ret != 0) printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret); } @@ -481,11 +498,21 @@ int pSeries_system_reset_exception(struct pt_regs *regs) #endif if (fwnmi_active) { - struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs); - if (errhdr) { - /* XXX Should look at FWNMI information */ - } - fwnmi_release_errinfo(); + __be64 *savep; + + /* + * Firmware (PowerVM and KVM) saves r3 to a save area like + * machine check, which is not exactly what PAPR (2.9) + * suggests but there is no way to detect otherwise, so this + * is the interface now. + * + * System resets do not save any error log or require an + * "ibm,nmi-interlock" rtas call to release. + */ + + savep = fwnmi_get_savep(regs); + if (savep) + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ } if (smp_handle_nmi_ipi(regs)) diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c index 70c3013fdd07..81343908ed33 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.c +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -506,7 +506,7 @@ void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) fadump_conf->fadump_supported = 1; /* Firmware supports 64-bit value for size, align it to pagesize. */ - fadump_conf->max_copy_size = _ALIGN_DOWN(U64_MAX, PAGE_SIZE); + fadump_conf->max_copy_size = ALIGN_DOWN(U64_MAX, PAGE_SIZE); /* * The 'ibm,kernel-dump' rtas node is present only if there is diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 0c8421dd01ab..2f4ee0a90284 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -43,7 +43,6 @@ #include <asm/mmu.h> #include <asm/processor.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/prom.h> #include <asm/rtas.h> #include <asm/pci-bridge.h> @@ -68,8 +67,10 @@ #include <asm/isa-bridge.h> #include <asm/security_features.h> #include <asm/asm-const.h> +#include <asm/idle.h> #include <asm/swiotlb.h> #include <asm/svm.h> +#include <asm/dtl.h> #include "pseries.h" #include "../../../../drivers/pci/pci.h" @@ -83,6 +84,7 @@ unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K); EXPORT_SYMBOL(CMO_PageSize); int fwnmi_active; /* TRUE if an FWNMI handler is present */ +int ibm_nmi_interlock_token; static void pSeries_show_cpuinfo(struct seq_file *m) { @@ -113,9 +115,14 @@ static void __init fwnmi_init(void) struct slb_entry *slb_ptr; size_t size; #endif + int ibm_nmi_register_token; - int ibm_nmi_register = rtas_token("ibm,nmi-register"); - if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE) + ibm_nmi_register_token = rtas_token("ibm,nmi-register"); + if (ibm_nmi_register_token == RTAS_UNKNOWN_SERVICE) + return; + + ibm_nmi_interlock_token = rtas_token("ibm,nmi-interlock"); + if (WARN_ON(ibm_nmi_interlock_token == RTAS_UNKNOWN_SERVICE)) return; /* If the kernel's not linked at zero we point the firmware at low @@ -123,8 +130,8 @@ static void __init fwnmi_init(void) system_reset_addr = __pa(system_reset_fwnmi) - PHYSICAL_START; machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START; - if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr, - machine_check_addr)) + if (0 == rtas_call(ibm_nmi_register_token, 2, 1, NULL, + system_reset_addr, machine_check_addr)) fwnmi_active = 1; /* @@ -317,6 +324,9 @@ static int alloc_dispatch_log_kmem_cache(void) } machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache); +DEFINE_PER_CPU(u64, idle_spurr_cycles); +DEFINE_PER_CPU(u64, idle_entry_purr_snap); +DEFINE_PER_CPU(u64, idle_entry_spurr_snap); static void pseries_lpar_idle(void) { /* @@ -328,7 +338,7 @@ static void pseries_lpar_idle(void) return; /* Indicate to hypervisor that we are idle. */ - get_lppaca()->idle = 1; + pseries_idle_prolog(); /* * Yield the processor to the hypervisor. We return if @@ -339,7 +349,7 @@ static void pseries_lpar_idle(void) */ cede_processor(); - get_lppaca()->idle = 0; + pseries_idle_epilog(); } /* @@ -349,7 +359,7 @@ static void pseries_lpar_idle(void) * to ever be a problem in practice we can move this into a kernel thread to * finish off the process later in boot. */ -void pseries_enable_reloc_on_exc(void) +bool pseries_enable_reloc_on_exc(void) { long rc; unsigned int delay, total_delay = 0; @@ -360,11 +370,13 @@ void pseries_enable_reloc_on_exc(void) if (rc == H_P2) { pr_info("Relocation on exceptions not" " supported\n"); + return false; } else if (rc != H_SUCCESS) { pr_warn("Unable to enable relocation" " on exceptions: %ld\n", rc); + return false; } - break; + return true; } delay = get_longbusy_msecs(rc); @@ -373,7 +385,7 @@ void pseries_enable_reloc_on_exc(void) pr_warn("Warning: Giving up waiting to enable " "relocation on exceptions (%u msec)!\n", total_delay); - return; + return false; } mdelay(delay); @@ -736,6 +748,11 @@ static void __init pSeries_setup_arch(void) smp_init_pseries(); + if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE)) + if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) + panic("BUG: Radix support requires either GTSE or RPT_INVALIDATE\n"); + + /* openpic global configuration register (64-bit format). */ /* openpic Interrupt Source Unit pointer (64-bit format). */ /* python0 facility area (mmio) (64-bit format) REAL address. */ @@ -762,8 +779,10 @@ static void __init pSeries_setup_arch(void) if (firmware_has_feature(FW_FEATURE_LPAR)) { vpa_init(boot_cpuid); - if (lppaca_shared_proc(get_lppaca())) + if (lppaca_shared_proc(get_lppaca())) { static_branch_enable(&shared_processor); + pv_spinlocks_init(); + } ppc_md.power_save = pseries_lpar_idle; ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; @@ -822,12 +841,15 @@ static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx) return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx); } -static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx) +static int pseries_set_dawr(int nr, unsigned long dawr, unsigned long dawrx) { /* PAPR says we can't set HYP */ dawrx &= ~DAWRX_HYP; - return plpar_set_watchpoint0(dawr, dawrx); + if (nr == 0) + return plpar_set_watchpoint0(dawr, dawrx); + else + return plpar_set_watchpoint1(dawr, dawrx); } #define CMO_CHARACTERISTICS_TOKEN 44 diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index ad61e90032da..92922491a81c 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -20,12 +20,12 @@ #include <linux/err.h> #include <linux/device.h> #include <linux/cpu.h> +#include <linux/pgtable.h> #include <asm/ptrace.h> #include <linux/atomic.h> #include <asm/irq.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/smp.h> @@ -44,8 +44,6 @@ #include <asm/svm.h> #include "pseries.h" -#include "offline_states.h" - /* * The Primary thread of each non-boot processor was started from the OF client @@ -108,10 +106,7 @@ static inline int smp_startup_cpu(unsigned int lcpu) /* Fixup atomic count: it exited inside IRQ handler. */ task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0; -#ifdef CONFIG_HOTPLUG_CPU - if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE) - goto out; -#endif + /* * If the RTAS start-cpu token does not exist then presume the * cpu is already spinning. @@ -126,9 +121,6 @@ static inline int smp_startup_cpu(unsigned int lcpu) return 0; } -#ifdef CONFIG_HOTPLUG_CPU -out: -#endif return 1; } @@ -143,10 +135,6 @@ static void smp_setup_cpu(int cpu) vpa_init(cpu); cpumask_clear_cpu(cpu, of_spin_mask); -#ifdef CONFIG_HOTPLUG_CPU - set_cpu_current_state(cpu, CPU_STATE_ONLINE); - set_default_offline_state(cpu); -#endif } static int smp_pSeries_kick_cpu(int nr) @@ -163,20 +151,6 @@ static int smp_pSeries_kick_cpu(int nr) * the processor will continue on to secondary_start */ paca_ptrs[nr]->cpu_start = 1; -#ifdef CONFIG_HOTPLUG_CPU - set_preferred_offline_state(nr, CPU_STATE_ONLINE); - - if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) { - long rc; - unsigned long hcpuid; - - hcpuid = get_hard_smp_processor_id(nr); - rc = plpar_hcall_norets(H_PROD, hcpuid); - if (rc != H_SUCCESS) - printk(KERN_ERR "Error: Prod to wake up processor %d " - "Ret= %ld\n", nr, rc); - } -#endif return 0; } @@ -188,13 +162,16 @@ static int pseries_smp_prepare_cpu(int cpu) return 0; } -static void smp_pseries_cause_ipi(int cpu) +/* Cause IPI as setup by the interrupt controller (xics or xive) */ +static void (*ic_cause_ipi)(int cpu) __ro_after_init; + +/* Use msgsndp doorbells target is a sibling, else use interrupt controller */ +static void dbell_or_ic_cause_ipi(int cpu) { - /* POWER9 should not use this handler */ if (doorbell_try_core_ipi(cpu)) return; - icp_ops->cause_ipi(cpu); + ic_cause_ipi(cpu); } static int pseries_cause_nmi_ipi(int cpu) @@ -218,26 +195,49 @@ static int pseries_cause_nmi_ipi(int cpu) return 0; } -static __init void pSeries_smp_probe_xics(void) -{ - xics_smp_probe(); - - if (cpu_has_feature(CPU_FTR_DBELL) && !is_secure_guest()) - smp_ops->cause_ipi = smp_pseries_cause_ipi; - else - smp_ops->cause_ipi = icp_ops->cause_ipi; -} - static __init void pSeries_smp_probe(void) { if (xive_enabled()) - /* - * Don't use P9 doorbells when XIVE is enabled. IPIs - * using MMIOs should be faster - */ xive_smp_probe(); else - pSeries_smp_probe_xics(); + xics_smp_probe(); + + /* No doorbell facility, must use the interrupt controller for IPIs */ + if (!cpu_has_feature(CPU_FTR_DBELL)) + return; + + /* Doorbells can only be used for IPIs between SMT siblings */ + if (!cpu_has_feature(CPU_FTR_SMT)) + return; + + if (is_kvm_guest()) { + /* + * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp + * faults to the hypervisor which then reads the instruction + * from guest memory, which tends to be slower than using XIVE. + */ + if (xive_enabled()) + return; + + /* + * XICS hcalls aren't as fast, so we can use msgsndp (which + * also helps exercise KVM emulation), however KVM can't + * emulate secure guests because it can't read the instruction + * out of their memory. + */ + if (is_secure_guest()) + return; + } + + /* + * Under PowerVM, FSCR[MSGP] is enabled as guest vCPU siblings are + * gang scheduled on the same physical core, so doorbells are always + * faster than the interrupt controller, and they can be used by + * secure guests. + */ + + ic_cause_ipi = smp_ops->cause_ipi; + smp_ops->cause_ipi = dbell_or_ic_cause_ipi; } static struct smp_ops_t pseries_smp_ops = { diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 0a24a5a185f0..81e0ac58d620 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -132,15 +132,11 @@ static ssize_t store_hibernate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - cpumask_var_t offline_mask; int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL)) - return -ENOMEM; - stream_id = simple_strtoul(buf, NULL, 16); do { @@ -149,33 +145,14 @@ static ssize_t store_hibernate(struct device *dev, ssleep(1); } while (rc == -EAGAIN); - if (!rc) { - /* All present CPUs must be online */ - cpumask_andnot(offline_mask, cpu_present_mask, - cpu_online_mask); - rc = rtas_online_cpus_mask(offline_mask); - if (rc) { - pr_err("%s: Could not bring present CPUs online.\n", - __func__); - goto out; - } - - stop_topology_update(); + if (!rc) rc = pm_suspend(PM_SUSPEND_MEM); - start_topology_update(); - - /* Take down CPUs not online prior to suspend */ - if (!rtas_offline_cpus_mask(offline_mask)) - pr_warn("%s: Could not restore CPUs to offline " - "state.\n", __func__); - } stream_id = 0; if (!rc) rc = count; -out: - free_cpumask_var(offline_mask); + return rc; } diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index 40c0637203d5..e6d7a344d9f2 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -11,6 +11,7 @@ #include <asm/svm.h> #include <asm/swiotlb.h> #include <asm/ultravisor.h> +#include <asm/dtl.h> static int __init init_svm(void) { diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 37f1f25ba804..0487b26f6f1a 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -31,6 +31,7 @@ #include <asm/tce.h> #include <asm/page.h> #include <asm/hvcall.h> +#include <asm/machdep.h> static struct vio_dev vio_bus_device = { /* fake "parent" device */ .name = "vio", @@ -1513,7 +1514,7 @@ static int __init vio_bus_init(void) return 0; } -postcore_initcall(vio_bus_init); +machine_postcore_initcall(pseries, vio_bus_init); static int __init vio_device_init(void) { @@ -1522,7 +1523,7 @@ static int __init vio_device_init(void) return 0; } -device_initcall(vio_device_init); +machine_device_initcall(pseries, vio_device_init); static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1703,4 +1704,4 @@ static int __init vio_init(void) dma_debug_add_bus(&vio_bus_type); return 0; } -fs_initcall(vio_init); +machine_fs_initcall(pseries, vio_init); diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile index 7c6d8b14f440..348f59581052 100644 --- a/arch/powerpc/purgatory/Makefile +++ b/arch/powerpc/purgatory/Makefile @@ -2,11 +2,11 @@ KASAN_SANITIZE := n -targets += trampoline.o purgatory.ro kexec-purgatory.c +targets += trampoline_$(BITS).o purgatory.ro kexec-purgatory.c LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -$(obj)/purgatory.ro: $(obj)/trampoline.o FORCE +$(obj)/purgatory.ro: $(obj)/trampoline_$(BITS).o FORCE $(call if_changed,ld) quiet_cmd_bin2c = BIN2C $@ diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline_64.S index a5a83c3f53e6..d956b8a35fd1 100644 --- a/arch/powerpc/purgatory/trampoline.S +++ b/arch/powerpc/purgatory/trampoline_64.S @@ -10,6 +10,7 @@ */ #include <asm/asm-compat.h> +#include <asm/crashdump-ppc64.h> .machine ppc64 .balign 256 @@ -43,14 +44,39 @@ master: mr %r17,%r3 /* save cpu id to r17 */ mr %r15,%r4 /* save physical address in reg15 */ + /* Work out where we're running */ + bcl 20, 31, 0f +0: mflr %r18 + + /* + * Copy BACKUP_SRC_SIZE bytes from BACKUP_SRC_START to + * backup_start 8 bytes at a time. + * + * Use r3 = dest, r4 = src, r5 = size, r6 = count + */ + ld %r3, (backup_start - 0b)(%r18) + cmpdi %cr0, %r3, 0 + beq .Lskip_copy /* skip if there is no backup region */ + lis %r5, BACKUP_SRC_SIZE@h + ori %r5, %r5, BACKUP_SRC_SIZE@l + cmpdi %cr0, %r5, 0 + beq .Lskip_copy /* skip if copy size is zero */ + lis %r4, BACKUP_SRC_START@h + ori %r4, %r4, BACKUP_SRC_START@l + li %r6, 0 +.Lcopy_loop: + ldx %r0, %r6, %r4 + stdx %r0, %r6, %r3 + addi %r6, %r6, 8 + cmpld %cr0, %r6, %r5 + blt .Lcopy_loop + +.Lskip_copy: or %r3,%r3,%r3 /* ok now to high priority, lets boot */ lis %r6,0x1 mtctr %r6 /* delay a bit for slaves to catch up */ bdnz . /* before we overwrite 0-100 again */ - bl 0f /* Work out where we're running */ -0: mflr %r18 - /* load device-tree address */ ld %r3, (dt_offset - 0b)(%r18) mr %r16,%r3 /* save dt address in reg16 */ @@ -61,6 +87,10 @@ master: li %r4,28 STWX_BE %r17,%r3,%r4 /* Store my cpu as __be32 at byte 28 */ 1: + /* Load opal base and entry values in r8 & r9 respectively */ + ld %r8,(opal_base - 0b)(%r18) + ld %r9,(opal_entry - 0b)(%r18) + /* load the kernel address */ ld %r4,(kernel - 0b)(%r18) @@ -89,7 +119,6 @@ master: rfid /* update MSR and start kernel */ - .balign 8 .globl kernel kernel: @@ -102,6 +131,23 @@ dt_offset: .8byte 0x0 .size dt_offset, . - dt_offset + .balign 8 + .globl backup_start +backup_start: + .8byte 0x0 + .size backup_start, . - backup_start + + .balign 8 + .globl opal_base +opal_base: + .8byte 0x0 + .size opal_base, . - opal_base + + .balign 8 + .globl opal_entry +opal_entry: + .8byte 0x0 + .size opal_entry, . - opal_entry .data .balign 8 diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index cb5a5bd2cef5..026b3f01a991 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -31,8 +31,6 @@ obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o obj-$(CONFIG_PPC_INDIRECT_PCI) += indirect_pci.o obj-$(CONFIG_PPC_I8259) += i8259.o obj-$(CONFIG_IPIC) += ipic.o -obj-$(CONFIG_XILINX_VIRTEX) += xilinx_intc.o -obj-$(CONFIG_XILINX_PCI) += xilinx_pci.o obj-$(CONFIG_OF_RTC) += of_rtc.o obj-$(CONFIG_CPM) += cpm_common.o diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c index 07718b9a2c99..68538b8329f7 100644 --- a/arch/powerpc/sysdev/cpm2.c +++ b/arch/powerpc/sysdev/cpm2.c @@ -39,7 +39,6 @@ #include <asm/irq.h> #include <asm/mpc8260.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/cpm2.h> #include <asm/rheap.h> #include <asm/fs_pd.h> diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index 71660bacb264..7dc1960f8bdb 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -68,6 +68,8 @@ static void udbg_putc_cpm(char c) void __init udbg_init_cpm(void) { #ifdef CONFIG_PPC_8xx + mmu_mapin_immr(); + cpm_udbg_txdesc = (u32 __iomem __force *) (CONFIG_PPC_EARLY_DEBUG_CPM_ADDR - PHYS_IMMR_BASE + VIRT_IMMR_BASE); diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c index f6c665dac725..a3aeaa5f0f1b 100644 --- a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c +++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c @@ -15,7 +15,7 @@ #include <linux/slab.h> #include <linux/err.h> #include <linux/of_platform.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> #include <asm/fsl_85xx_cache_sram.h> #include "fsl_85xx_cache_ctlr.h" diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 4a8874bc1057..040b9d01c079 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1066,10 +1066,10 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) if (is_in_pci_mem_space(addr)) { if (user_mode(regs)) - ret = probe_user_read(&inst, (void __user *)regs->nip, - sizeof(inst)); + ret = copy_from_user_nofault(&inst, + (void __user *)regs->nip, sizeof(inst)); else - ret = probe_kernel_address((void *)regs->nip, inst); + ret = get_kernel_nofault(inst, (void *)regs->nip); if (!ret && mcheck_handle_load(regs, inst)) { regs->nip += 4; diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index a3a72b780e67..b0426f28946a 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -29,11 +29,11 @@ #include <linux/slab.h> #include <linux/syscore_ops.h> #include <linux/ratelimit.h> +#include <linux/pgtable.h> #include <asm/ptrace.h> #include <asm/signal.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/irq.h> #include <asm/machdep.h> #include <asm/mpic.h> diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index 6aabc74688a6..4cf18000f07c 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -50,8 +50,8 @@ static void ics_rtas_unmask_irq(struct irq_data *d) server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0); - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server, - DEFAULT_PRIORITY); + call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq, + server, DEFAULT_PRIORITY); if (call_status != 0) { printk(KERN_ERR "%s: ibm_set_xive irq %u server %x returned %d\n", @@ -60,7 +60,7 @@ static void ics_rtas_unmask_irq(struct irq_data *d) } /* Now unmask the interrupt (often a no-op) */ - call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq); + call_status = rtas_call_reentrant(ibm_int_on, 1, 1, NULL, hw_irq); if (call_status != 0) { printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n", __func__, hw_irq, call_status); @@ -91,7 +91,7 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq) if (hw_irq == XICS_IPI) return; - call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq); + call_status = rtas_call_reentrant(ibm_int_off, 1, 1, NULL, hw_irq); if (call_status != 0) { printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n", __func__, hw_irq, call_status); @@ -99,8 +99,8 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq) } /* Have to set XIVE to 0xff to be able to remove a slot */ - call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, - xics_default_server, 0xff); + call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq, + xics_default_server, 0xff); if (call_status != 0) { printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n", __func__, hw_irq, call_status); @@ -131,7 +131,7 @@ static int ics_rtas_set_affinity(struct irq_data *d, if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) return -1; - status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq); + status = rtas_call_reentrant(ibm_get_xive, 1, 3, xics_status, hw_irq); if (status) { printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", @@ -146,8 +146,8 @@ static int ics_rtas_set_affinity(struct irq_data *d, return -1; } - status = rtas_call(ibm_set_xive, 3, 1, NULL, - hw_irq, irq_server, xics_status[1]); + status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, + hw_irq, irq_server, xics_status[1]); if (status) { printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", @@ -179,7 +179,7 @@ static int ics_rtas_map(struct ics *ics, unsigned int virq) return -EINVAL; /* Check if RTAS knows about this interrupt */ - rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq); + rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, hw_irq); if (rc) return -ENXIO; @@ -198,7 +198,7 @@ static long ics_rtas_get_server(struct ics *ics, unsigned long vec) { int rc, status[2]; - rc = rtas_call(ibm_get_xive, 1, 3, status, vec); + rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, vec); if (rc) return -1; return status[0]; diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c deleted file mode 100644 index 4a86dcff3fcd..000000000000 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Interrupt controller driver for Xilinx Virtex FPGAs - * - * Copyright (C) 2007 Secret Lab Technologies Ltd. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - * - */ - -/* - * This is a driver for the interrupt controller typically found in - * Xilinx Virtex FPGA designs. - * - * The interrupt sense levels are hard coded into the FPGA design with - * typically a 1:1 relationship between irq lines and devices (no shared - * irq lines). Therefore, this driver does not attempt to handle edge - * and level interrupts differently. - */ -#undef DEBUG - -#include <linux/kernel.h> -#include <linux/irq.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> -#include <asm/io.h> -#include <asm/processor.h> -#include <asm/i8259.h> -#include <asm/irq.h> -#include <linux/irqchip.h> - -#if defined(CONFIG_PPC_I8259) -/* - * Support code for cascading to 8259 interrupt controllers - */ -static void xilinx_i8259_cascade(struct irq_desc *desc) -{ - struct irq_chip *chip = irq_desc_get_chip(desc); - unsigned int cascade_irq = i8259_irq(); - - if (cascade_irq) - generic_handle_irq(cascade_irq); - - /* Let xilinx_intc end the interrupt */ - chip->irq_unmask(&desc->irq_data); -} - -static void __init xilinx_i8259_setup_cascade(void) -{ - struct device_node *cascade_node; - int cascade_irq; - - /* Initialize i8259 controller */ - cascade_node = of_find_compatible_node(NULL, NULL, "chrp,iic"); - if (!cascade_node) - return; - - cascade_irq = irq_of_parse_and_map(cascade_node, 0); - if (!cascade_irq) { - pr_err("virtex_ml510: Failed to map cascade interrupt\n"); - goto out; - } - - i8259_init(cascade_node, 0); - irq_set_chained_handler(cascade_irq, xilinx_i8259_cascade); - - /* Program irq 7 (usb/audio), 14/15 (ide) to level sensitive */ - /* This looks like a dirty hack to me --gcl */ - outb(0xc0, 0x4d0); - outb(0xc0, 0x4d1); - - out: - of_node_put(cascade_node); -} -#else -static inline void xilinx_i8259_setup_cascade(void) { return; } -#endif /* defined(CONFIG_PPC_I8259) */ - -/* - * Initialize master Xilinx interrupt controller - */ -void __init xilinx_intc_init_tree(void) -{ - irqchip_init(); - xilinx_i8259_setup_cascade(); -} diff --git a/arch/powerpc/sysdev/xilinx_pci.c b/arch/powerpc/sysdev/xilinx_pci.c deleted file mode 100644 index fea5667699ed..000000000000 --- a/arch/powerpc/sysdev/xilinx_pci.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * PCI support for Xilinx plbv46_pci soft-core which can be used on - * Xilinx Virtex ML410 / ML510 boards. - * - * Copyright 2009 Roderick Colenbrander - * Copyright 2009 Secret Lab Technologies Ltd. - * - * The pci bridge fixup code was copied from ppc4xx_pci.c and was written - * by Benjamin Herrenschmidt. - * Copyright 2007 Ben. Herrenschmidt <benh@kernel.crashing.org>, IBM Corp. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include <linux/ioport.h> -#include <linux/of.h> -#include <linux/pci.h> -#include <mm/mmu_decl.h> -#include <asm/io.h> -#include <asm/xilinx_pci.h> - -#define XPLB_PCI_ADDR 0x10c -#define XPLB_PCI_DATA 0x110 -#define XPLB_PCI_BUS 0x114 - -#define PCI_HOST_ENABLE_CMD PCI_COMMAND_SERR | PCI_COMMAND_PARITY | PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY - -static const struct of_device_id xilinx_pci_match[] = { - { .compatible = "xlnx,plbv46-pci-1.03.a", }, - {} -}; - -/** - * xilinx_pci_fixup_bridge - Block Xilinx PHB configuration. - */ -static void xilinx_pci_fixup_bridge(struct pci_dev *dev) -{ - struct pci_controller *hose; - int i; - - if (dev->devfn || dev->bus->self) - return; - - hose = pci_bus_to_host(dev->bus); - if (!hose) - return; - - if (!of_match_node(xilinx_pci_match, hose->dn)) - return; - - /* Hide the PCI host BARs from the kernel as their content doesn't - * fit well in the resource management - */ - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - dev->resource[i].start = 0; - dev->resource[i].end = 0; - dev->resource[i].flags = 0; - } - - dev_info(&dev->dev, "Hiding Xilinx plb-pci host bridge resources %s\n", - pci_name(dev)); -} -DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, xilinx_pci_fixup_bridge); - -/** - * xilinx_pci_exclude_device - Don't do config access for non-root bus - * - * This is a hack. Config access to any bus other than bus 0 does not - * currently work on the ML510 so we prevent it here. - */ -static int -xilinx_pci_exclude_device(struct pci_controller *hose, u_char bus, u8 devfn) -{ - return (bus != 0); -} - -/** - * xilinx_pci_init - Find and register a Xilinx PCI host bridge - */ -void __init xilinx_pci_init(void) -{ - struct pci_controller *hose; - struct resource r; - void __iomem *pci_reg; - struct device_node *pci_node; - - pci_node = of_find_matching_node(NULL, xilinx_pci_match); - if(!pci_node) - return; - - if (of_address_to_resource(pci_node, 0, &r)) { - pr_err("xilinx-pci: cannot resolve base address\n"); - return; - } - - hose = pcibios_alloc_controller(pci_node); - if (!hose) { - pr_err("xilinx-pci: pcibios_alloc_controller() failed\n"); - return; - } - - /* Setup config space */ - setup_indirect_pci(hose, r.start + XPLB_PCI_ADDR, - r.start + XPLB_PCI_DATA, - PPC_INDIRECT_TYPE_SET_CFG_TYPE); - - /* According to the xilinx plbv46_pci documentation the soft-core starts - * a self-init when the bus master enable bit is set. Without this bit - * set the pci bus can't be scanned. - */ - early_write_config_word(hose, 0, 0, PCI_COMMAND, PCI_HOST_ENABLE_CMD); - - /* Set the max latency timer to 255 */ - early_write_config_byte(hose, 0, 0, PCI_LATENCY_TIMER, 0xff); - - /* Set the max bus number to 255 */ - pci_reg = of_iomap(pci_node, 0); - out_8(pci_reg + XPLB_PCI_BUS, 0xff); - iounmap(pci_reg); - - /* Nothing past the root bridge is working right now. By default - * exclude config access to anything except bus 0 */ - if (!ppc_md.pci_exclude_device) - ppc_md.pci_exclude_device = xilinx_pci_exclude_device; - - /* Register the host bridge with the linux kernel! */ - pci_process_bridge_OF_ranges(hose, pci_node, 1); - - pr_info("xilinx-pci: Registered PCI host bridge\n"); -} diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index b294f70f1a67..f591be9f01f4 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/msi.h> +#include <linux/vmalloc.h> #include <asm/debugfs.h> #include <asm/prom.h> @@ -196,6 +197,9 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) { u64 val; + if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + offset |= XIVE_ESB_LD_ST_MO; + /* Handle HW errata */ if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) offset |= offset << 4; @@ -1017,12 +1021,16 @@ EXPORT_SYMBOL_GPL(is_xive_irq); void xive_cleanup_irq_data(struct xive_irq_data *xd) { if (xd->eoi_mmio) { + unmap_kernel_range((unsigned long)xd->eoi_mmio, + 1u << xd->esb_shift); iounmap(xd->eoi_mmio); if (xd->eoi_mmio == xd->trig_mmio) xd->trig_mmio = NULL; xd->eoi_mmio = NULL; } if (xd->trig_mmio) { + unmap_kernel_range((unsigned long)xd->trig_mmio, + 1u << xd->esb_shift); iounmap(xd->trig_mmio); xd->trig_mmio = NULL; } @@ -1656,7 +1664,8 @@ DEFINE_SHOW_ATTRIBUTE(xive_core_debug); int xive_core_debug_init(void) { - debugfs_create_file("xive", 0400, powerpc_debugfs_root, - NULL, &xive_core_debug_fops); + if (xive_enabled()) + debugfs_create_file("xive", 0400, powerpc_debugfs_root, + NULL, &xive_core_debug_fops); return 0; } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 5218fdc4b29a..cb58ec7ce77a 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -18,6 +18,7 @@ #include <linux/delay.h> #include <linux/cpumask.h> #include <linux/mm.h> +#include <linux/kmemleak.h> #include <asm/machdep.h> #include <asm/prom.h> @@ -280,12 +281,12 @@ static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc) } #endif /* CONFIG_SMP */ -u32 xive_native_alloc_irq(void) +u32 xive_native_alloc_irq_on_chip(u32 chip_id) { s64 rc; for (;;) { - rc = opal_xive_allocate_irq(OPAL_XIVE_ANY_CHIP); + rc = opal_xive_allocate_irq(chip_id); if (rc != OPAL_BUSY) break; msleep(OPAL_BUSY_DELAY_MS); @@ -294,7 +295,7 @@ u32 xive_native_alloc_irq(void) return 0; return rc; } -EXPORT_SYMBOL_GPL(xive_native_alloc_irq); +EXPORT_SYMBOL_GPL(xive_native_alloc_irq_on_chip); void xive_native_free_irq(u32 irq) { @@ -647,6 +648,7 @@ static bool xive_native_provision_pages(void) pr_err("Failed to allocate provisioning page\n"); return false; } + kmemleak_ignore(p); opal_xive_donate_page(chip, __pa(p)); } return true; diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 7ab5c6780997..1e3674d7ea7b 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -27,6 +27,8 @@ #include <asm/xive.h> #include <asm/xive-regs.h> #include <asm/hvcall.h> +#include <asm/svm.h> +#include <asm/ultravisor.h> #include "xive-internal.h" @@ -502,6 +504,9 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, rc = -EIO; } else { q->qpage = qpage; + if (is_secure_guest()) + uv_share_page(PHYS_PFN(qpage_phys), + 1 << xive_alloc_order(order)); } fail: return rc; @@ -535,6 +540,8 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, hw_cpu, prio); alloc_order = xive_alloc_order(xive_queue_shift); + if (is_secure_guest()) + uv_unshare_page(PHYS_PFN(__pa(q->qpage)), 1 << alloc_order); free_pages((unsigned long)q->qpage, alloc_order); q->qpage = NULL; } @@ -761,7 +768,7 @@ static const u8 *get_vec5_feature(unsigned int index) return vec5 + index; } -static bool xive_spapr_disabled(void) +static bool __init xive_spapr_disabled(void) { const u8 *vec5_xive; diff --git a/arch/powerpc/tools/head_check.sh b/arch/powerpc/tools/head_check.sh index ad9e57209aa4..e32d3162e5ed 100644 --- a/arch/powerpc/tools/head_check.sh +++ b/arch/powerpc/tools/head_check.sh @@ -31,8 +31,10 @@ # level entry code (boot, interrupt vectors, etc) until r2 is set up. This # could cause the kernel to die in early boot. -# Turn this on if you want more debug output: -# set -x +# Allow for verbose output +if [ "$V" = "1" ]; then + set -x +fi if [ $# -lt 2 ]; then echo "$0 [path to nm] [path to vmlinux]" 1>&2 @@ -44,7 +46,7 @@ nm="$1" vmlinux="$2" # gcc-4.6-era toolchain make _stext an A (absolute) symbol rather than T -$nm "$vmlinux" | grep -e " [TA] _stext$" -e " t start_first_256B$" -e " a text_start$" -e " t start_text$" -m4 > .tmp_symbols.txt +$nm "$vmlinux" | grep -e " [TA] _stext$" -e " t start_first_256B$" -e " a text_start$" -e " t start_text$" > .tmp_symbols.txt vma=$(cat .tmp_symbols.txt | grep -e " [TA] _stext$" | cut -d' ' -f1) diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh index 77114755dc6f..6e6a30aea3ed 100755 --- a/arch/powerpc/tools/unrel_branch_check.sh +++ b/arch/powerpc/tools/unrel_branch_check.sh @@ -31,7 +31,10 @@ grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]] grep -v '\<__start_initialization_multiplatform>' | grep -v -e 'b.\?.\?ctr' | grep -v -e 'b.\?.\?lr' | -sed 's/://' | +sed -e 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' \ + -e 's/\bbf.\?[[:space:]]*[[:digit:]][[:digit:]]*,/bne/' \ + -e 's/[[:space:]]0x/ /' \ + -e 's/://' | awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}' ) diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 6f9cccea54f3..eb25d7554ffd 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -7,8 +7,7 @@ UBSAN_SANITIZE := n KASAN_SANITIZE := n # Disable ftrace for the entire directory -ORIG_CFLAGS := $(KBUILD_CFLAGS) -KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)) +ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE) ifdef CONFIG_CC_IS_CLANG # clang stores addresses on the stack causing the frame size to blow @@ -18,7 +17,7 @@ endif ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y += xmon.o nonstdio.o spr_access.o +obj-y += xmon.o nonstdio.o spr_access.o xmon_bpts.o ifdef CONFIG_XMON_DISASSEMBLY obj-y += ppc-dis.o ppc-opc.o diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 7af840c0fc93..df7bca00f5ec 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -35,7 +35,6 @@ #include <asm/machdep.h> #include <asm/xmon.h> #include <asm/processor.h> -#include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/plpar_wrappers.h> @@ -54,6 +53,7 @@ #include <asm/firmware.h> #include <asm/code-patching.h> #include <asm/sections.h> +#include <asm/inst.h> #ifdef CONFIG_PPC64 #include <asm/hvcall.h> @@ -62,6 +62,7 @@ #include "nonstdio.h" #include "dis-asm.h" +#include "xmon_bpts.h" #ifdef CONFIG_SMP static cpumask_t cpus_in_xmon = CPU_MASK_NONE; @@ -98,7 +99,7 @@ static long *xmon_fault_jmp[NR_CPUS]; /* Breakpoint stuff */ struct bpt { unsigned long address; - unsigned int instr[2]; + struct ppc_inst *instr; atomic_t ref_count; int enabled; unsigned long pad; @@ -109,9 +110,8 @@ struct bpt { #define BP_TRAP 2 #define BP_DABR 4 -#define NBPTS 256 static struct bpt bpts[NBPTS]; -static struct bpt dabr; +static struct bpt dabr[HBP_NUM_MAX]; static struct bpt *iabr; static unsigned bpinstr = 0x7fe00008; /* trap */ @@ -121,6 +121,7 @@ static unsigned bpinstr = 0x7fe00008; /* trap */ static int cmds(struct pt_regs *); static int mread(unsigned long, void *, int); static int mwrite(unsigned long, void *, int); +static int mread_instr(unsigned long, struct ppc_inst *); static int handle_fault(struct pt_regs *); static void byterev(unsigned char *, int); static void memex(void); @@ -326,11 +327,6 @@ static inline void sync(void) asm volatile("sync; isync"); } -static inline void store_inst(void *p) -{ - asm volatile ("dcbst 0,%0; sync; icbi 0,%0; isync" : : "r" (p)); -} - static inline void cflush(void *p) { asm volatile ("dcbf 0,%0; icbi 0,%0" : : "r" (p)); @@ -485,6 +481,13 @@ static inline int unrecoverable_excp(struct pt_regs *regs) #endif } +static void xmon_touch_watchdogs(void) +{ + touch_softlockup_watchdog_sync(); + rcu_cpu_stall_reset(); + touch_nmi_watchdog(); +} + static int xmon_core(struct pt_regs *regs, int fromipi) { int cmd = 0; @@ -706,13 +709,13 @@ static int xmon_core(struct pt_regs *regs, int fromipi) if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) { bp = at_breakpoint(regs->nip); if (bp != NULL) { - int stepped = emulate_step(regs, bp->instr[0]); + int stepped = emulate_step(regs, ppc_inst_read(bp->instr)); if (stepped == 0) { regs->nip = (unsigned long) &bp->instr[0]; atomic_inc(&bp->ref_count); } else if (stepped < 0) { printf("Couldn't single-step %s instruction\n", - (IS_RFID(bp->instr[0])? "rfid": "mtmsrd")); + IS_RFID(ppc_inst_read(bp->instr))? "rfid": "mtmsrd"); } } } @@ -722,7 +725,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) else insert_cpu_bpts(); - touch_nmi_watchdog(); + xmon_touch_watchdogs(); local_irq_restore(flags); return cmd != 'X' && cmd != EOF; @@ -761,8 +764,8 @@ static int xmon_bpt(struct pt_regs *regs) /* Are we at the trap at bp->instr[1] for some bp? */ bp = in_breakpoint_table(regs->nip, &offset); - if (bp != NULL && offset == 4) { - regs->nip = bp->address + 4; + if (bp != NULL && (offset == 4 || offset == 8)) { + regs->nip = bp->address + offset; atomic_dec(&bp->ref_count); return 1; } @@ -787,10 +790,17 @@ static int xmon_sstep(struct pt_regs *regs) static int xmon_break_match(struct pt_regs *regs) { + int i; + if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT)) return 0; - if (dabr.enabled == 0) - return 0; + for (i = 0; i < nr_wp_slots(); i++) { + if (dabr[i].enabled) + goto found; + } + return 0; + +found: xmon_core(regs, 0); return 1; } @@ -859,15 +869,13 @@ static struct bpt *in_breakpoint_table(unsigned long nip, unsigned long *offp) { unsigned long off; - off = nip - (unsigned long) bpts; - if (off >= sizeof(bpts)) + off = nip - (unsigned long)bpt_table; + if (off >= sizeof(bpt_table)) return NULL; - off %= sizeof(struct bpt); - if (off != offsetof(struct bpt, instr[0]) - && off != offsetof(struct bpt, instr[1])) + *offp = off & (BPT_SIZE - 1); + if (off & 3) return NULL; - *offp = off - offsetof(struct bpt, instr[0]); - return (struct bpt *) (nip - off); + return bpts + (off / BPT_SIZE); } static struct bpt *new_breakpoint(unsigned long a) @@ -882,8 +890,7 @@ static struct bpt *new_breakpoint(unsigned long a) for (bp = bpts; bp < &bpts[NBPTS]; ++bp) { if (!bp->enabled && atomic_read(&bp->ref_count) == 0) { bp->address = a; - bp->instr[1] = bpinstr; - store_inst(&bp->instr[1]); + bp->instr = (void *)(bpt_table + ((bp - bpts) * BPT_WORDS)); return bp; } } @@ -895,47 +902,75 @@ static struct bpt *new_breakpoint(unsigned long a) static void insert_bpts(void) { int i; - struct bpt *bp; + struct ppc_inst instr, instr2; + struct bpt *bp, *bp2; bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) { if ((bp->enabled & (BP_TRAP|BP_CIABR)) == 0) continue; - if (mread(bp->address, &bp->instr[0], 4) != 4) { + if (!mread_instr(bp->address, &instr)) { printf("Couldn't read instruction at %lx, " "disabling breakpoint there\n", bp->address); bp->enabled = 0; continue; } - if (IS_MTMSRD(bp->instr[0]) || IS_RFID(bp->instr[0])) { + if (IS_MTMSRD(instr) || IS_RFID(instr)) { printf("Breakpoint at %lx is on an mtmsrd or rfid " "instruction, disabling it\n", bp->address); bp->enabled = 0; continue; } - store_inst(&bp->instr[0]); + /* + * Check the address is not a suffix by looking for a prefix in + * front of it. + */ + if (mread_instr(bp->address - 4, &instr2) == 8) { + printf("Breakpoint at %lx is on the second word of a prefixed instruction, disabling it\n", + bp->address); + bp->enabled = 0; + continue; + } + /* + * We might still be a suffix - if the prefix has already been + * replaced by a breakpoint we won't catch it with the above + * test. + */ + bp2 = at_breakpoint(bp->address - 4); + if (bp2 && ppc_inst_prefixed(ppc_inst_read(bp2->instr))) { + printf("Breakpoint at %lx is on the second word of a prefixed instruction, disabling it\n", + bp->address); + bp->enabled = 0; + continue; + } + + patch_instruction(bp->instr, instr); + patch_instruction(ppc_inst_next(bp->instr, &instr), + ppc_inst(bpinstr)); if (bp->enabled & BP_CIABR) continue; - if (patch_instruction((unsigned int *)bp->address, - bpinstr) != 0) { + if (patch_instruction((struct ppc_inst *)bp->address, + ppc_inst(bpinstr)) != 0) { printf("Couldn't write instruction at %lx, " "disabling breakpoint there\n", bp->address); bp->enabled &= ~BP_TRAP; continue; } - store_inst((void *)bp->address); } } static void insert_cpu_bpts(void) { + int i; struct arch_hw_breakpoint brk; - if (dabr.enabled) { - brk.address = dabr.address; - brk.type = (dabr.enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; - brk.len = DABR_MAX_LEN; - __set_breakpoint(&brk); + for (i = 0; i < nr_wp_slots(); i++) { + if (dabr[i].enabled) { + brk.address = dabr[i].address; + brk.type = (dabr[i].enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; + brk.len = 8; + __set_breakpoint(i, &brk); + } } if (iabr) @@ -946,20 +981,18 @@ static void remove_bpts(void) { int i; struct bpt *bp; - unsigned instr; + struct ppc_inst instr; bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) { if ((bp->enabled & (BP_TRAP|BP_CIABR)) != BP_TRAP) continue; - if (mread(bp->address, &instr, 4) == 4 - && instr == bpinstr + if (mread_instr(bp->address, &instr) + && ppc_inst_equal(instr, ppc_inst(bpinstr)) && patch_instruction( - (unsigned int *)bp->address, bp->instr[0]) != 0) + (struct ppc_inst *)bp->address, ppc_inst_read(bp->instr)) != 0) printf("Couldn't remove breakpoint at %lx\n", bp->address); - else - store_inst((void *)bp->address); } } @@ -1164,13 +1197,13 @@ static int do_step(struct pt_regs *regs) */ static int do_step(struct pt_regs *regs) { - unsigned int instr; + struct ppc_inst instr; int stepped; force_enable_xmon(); /* check we are in 64-bit kernel mode, translation enabled */ if ((regs->msr & (MSR_64BIT|MSR_PR|MSR_IR)) == (MSR_64BIT|MSR_IR)) { - if (mread(regs->nip, &instr, 4) == 4) { + if (mread_instr(regs->nip, &instr)) { stepped = emulate_step(regs, instr); if (stepped < 0) { printf("Couldn't single-step %s instruction\n", @@ -1178,7 +1211,7 @@ static int do_step(struct pt_regs *regs) return 0; } if (stepped > 0) { - regs->trap = 0xd00 | (regs->trap & 1); + set_trap(regs, 0xd00); printf("stepped to "); xmon_print_symbol(regs->nip, " ", "\n"); ppc_inst_dump(regs->nip, 1, 0); @@ -1330,14 +1363,14 @@ csum(void) */ static long check_bp_loc(unsigned long addr) { - unsigned int instr; + struct ppc_inst instr; addr &= ~3; if (!is_kernel_addr(addr)) { printf("Breakpoints may only be placed at kernel addresses\n"); return 0; } - if (!mread(addr, &instr, sizeof(instr))) { + if (!mread_instr(addr, &instr)) { printf("Can't read instruction at address %lx\n", addr); return 0; } @@ -1349,6 +1382,35 @@ static long check_bp_loc(unsigned long addr) return 1; } +static int find_free_data_bpt(void) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (!dabr[i].enabled) + return i; + } + printf("Couldn't find free breakpoint register\n"); + return -1; +} + +static void print_data_bpts(void) +{ + int i; + + for (i = 0; i < nr_wp_slots(); i++) { + if (!dabr[i].enabled) + continue; + + printf(" data "REG" [", dabr[i].address); + if (dabr[i].enabled & 1) + printf("r"); + if (dabr[i].enabled & 2) + printf("w"); + printf("]\n"); + } +} + static char *breakpoint_help_string = "Breakpoint command usage:\n" "b show breakpoints\n" @@ -1382,6 +1444,9 @@ bpt_cmds(void) printf("Hardware data breakpoint not supported on this cpu\n"); break; } + i = find_free_data_bpt(); + if (i < 0) + break; mode = 7; cmd = inchar(); if (cmd == 'r') @@ -1390,15 +1455,15 @@ bpt_cmds(void) mode = 6; else termch = cmd; - dabr.address = 0; - dabr.enabled = 0; - if (scanhex(&dabr.address)) { - if (!is_kernel_addr(dabr.address)) { + dabr[i].address = 0; + dabr[i].enabled = 0; + if (scanhex(&dabr[i].address)) { + if (!is_kernel_addr(dabr[i].address)) { printf(badaddr); break; } - dabr.address &= ~HW_BRK_TYPE_DABR; - dabr.enabled = mode | BP_DABR; + dabr[i].address &= ~HW_BRK_TYPE_DABR; + dabr[i].enabled = mode | BP_DABR; } force_enable_xmon(); @@ -1437,7 +1502,9 @@ bpt_cmds(void) for (i = 0; i < NBPTS; ++i) bpts[i].enabled = 0; iabr = NULL; - dabr.enabled = 0; + for (i = 0; i < nr_wp_slots(); i++) + dabr[i].enabled = 0; + printf("All breakpoints cleared\n"); break; } @@ -1471,14 +1538,7 @@ bpt_cmds(void) if (xmon_is_ro || !scanhex(&a)) { /* print all breakpoints */ printf(" type address\n"); - if (dabr.enabled) { - printf(" data "REG" [", dabr.address); - if (dabr.enabled & 1) - printf("r"); - if (dabr.enabled & 2) - printf("w"); - printf("]\n"); - } + print_data_bpts(); for (bp = bpts; bp < &bpts[NBPTS]; ++bp) { if (!bp->enabled) continue; @@ -1540,6 +1600,7 @@ const char *getvecname(unsigned long vec) case 0x1300: ret = "(Instruction Breakpoint)"; break; case 0x1500: ret = "(Denormalisation)"; break; case 0x1700: ret = "(Altivec Assist)"; break; + case 0x3000: ret = "(System Call Vectored)"; break; default: ret = ""; } return ret; @@ -1776,7 +1837,7 @@ static void prregs(struct pt_regs *fp) #endif printf("pc = "); xmon_print_symbol(fp->nip, " ", "\n"); - if (TRAP(fp) != 0xc00 && cpu_has_feature(CPU_FTR_CFAR)) { + if (!trap_is_syscall(fp) && cpu_has_feature(CPU_FTR_CFAR)) { printf("cfar= "); xmon_print_symbol(fp->orig_gpr3, " ", "\n"); } @@ -1808,7 +1869,7 @@ static void cacheflush(void) catch_memory_errors = 1; sync(); - if (cmd != 'i') { + if (cmd != 'i' || IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES) cflush((void *) adrs); } else { @@ -1938,8 +1999,13 @@ static void dump_207_sprs(void) printf("hfscr = %.16lx dhdes = %.16lx rpr = %.16lx\n", mfspr(SPRN_HFSCR), mfspr(SPRN_DHDES), mfspr(SPRN_RPR)); - printf("dawr = %.16lx dawrx = %.16lx ciabr = %.16lx\n", - mfspr(SPRN_DAWR), mfspr(SPRN_DAWRX), mfspr(SPRN_CIABR)); + printf("dawr0 = %.16lx dawrx0 = %.16lx\n", + mfspr(SPRN_DAWR0), mfspr(SPRN_DAWRX0)); + if (nr_wp_slots() > 1) { + printf("dawr1 = %.16lx dawrx1 = %.16lx\n", + mfspr(SPRN_DAWR1), mfspr(SPRN_DAWRX1)); + } + printf("ciabr = %.16lx\n", mfspr(SPRN_CIABR)); #endif } @@ -1964,6 +2030,18 @@ static void dump_300_sprs(void) #endif } +static void dump_310_sprs(void) +{ +#ifdef CONFIG_PPC64 + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return; + + printf("mmcr3 = %.16lx, sier2 = %.16lx, sier3 = %.16lx\n", + mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3)); + +#endif +} + static void dump_one_spr(int spr, bool show_unimplemented) { unsigned long val; @@ -2018,6 +2096,7 @@ static void super_regs(void) dump_206_sprs(); dump_207_sprs(); dump_300_sprs(); + dump_310_sprs(); return; } @@ -2130,6 +2209,25 @@ mwrite(unsigned long adrs, void *buf, int size) return n; } +static int +mread_instr(unsigned long adrs, struct ppc_inst *instr) +{ + volatile int n; + + n = 0; + if (setjmp(bus_error_jmp) == 0) { + catch_memory_errors = 1; + sync(); + *instr = ppc_inst_read((struct ppc_inst *)adrs); + sync(); + /* wait a little while to see if we get a machine check */ + __delay(200); + n = ppc_inst_len(*instr); + } + catch_memory_errors = 0; + return n; +} + static int fault_type; static int fault_except; static char *fault_chars[] = { "--", "**", "##" }; @@ -2856,12 +2954,11 @@ generic_inst_dump(unsigned long adr, long count, int praddr, { int nr, dotted; unsigned long first_adr; - unsigned int inst, last_inst = 0; - unsigned char val[4]; + struct ppc_inst inst, last_inst = ppc_inst(0); dotted = 0; - for (first_adr = adr; count > 0; --count, adr += 4) { - nr = mread(adr, val, 4); + for (first_adr = adr; count > 0; --count, adr += ppc_inst_len(inst)) { + nr = mread_instr(adr, &inst); if (nr == 0) { if (praddr) { const char *x = fault_chars[fault_type]; @@ -2869,8 +2966,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, } break; } - inst = GETWORD(val); - if (adr > first_adr && inst == last_inst) { + if (adr > first_adr && ppc_inst_equal(inst, last_inst)) { if (!dotted) { printf(" ...\n"); dotted = 1; @@ -2880,9 +2976,12 @@ generic_inst_dump(unsigned long adr, long count, int praddr, dotted = 0; last_inst = inst; if (praddr) - printf(REG" %.8x", adr, inst); + printf(REG" %s", adr, ppc_inst_as_str(inst)); printf("\t"); - dump_func(inst, adr); + if (!ppc_inst_prefixed(inst)) + dump_func(ppc_inst_val(inst), adr); + else + dump_func(ppc_inst_as_u64(inst), adr); printf("\n"); } return adr - first_adr; @@ -3107,8 +3206,8 @@ static void show_task(struct task_struct *tsk) (tsk->exit_state & EXIT_DEAD) ? 'E' : (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; - printf("%px %016lx %6d %6d %c %2d %s\n", tsk, - tsk->thread.ksp, + printf("%16px %16lx %16px %6d %6d %c %2d %s\n", tsk, + tsk->thread.ksp, tsk->thread.regs, tsk->pid, rcu_dereference(tsk->parent)->pid, state, task_cpu(tsk), tsk->comm); @@ -3135,7 +3234,8 @@ static void show_pte(unsigned long addr) unsigned long tskv = 0; struct task_struct *tsk = NULL; struct mm_struct *mm; - pgd_t *pgdp, *pgdir; + pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -3159,28 +3259,26 @@ static void show_pte(unsigned long addr) catch_memory_errors = 1; sync(); - if (mm == &init_mm) { + if (mm == &init_mm) pgdp = pgd_offset_k(addr); - pgdir = pgd_offset_k(0); - } else { + else pgdp = pgd_offset(mm, addr); - pgdir = pgd_offset(mm, 0); - } - if (pgd_none(*pgdp)) { - printf("no linux page table for address\n"); + p4dp = p4d_offset(pgdp, addr); + + if (p4d_none(*p4dp)) { + printf("No valid P4D\n"); return; } - printf("pgd @ 0x%px\n", pgdir); - - if (pgd_is_leaf(*pgdp)) { - format_pte(pgdp, pgd_val(*pgdp)); + if (p4d_is_leaf(*p4dp)) { + format_pte(p4dp, p4d_val(*p4dp)); return; } - printf("pgdp @ 0x%px = 0x%016lx\n", pgdp, pgd_val(*pgdp)); - pudp = pud_offset(pgdp, addr); + printf("p4dp @ 0x%px = 0x%016lx\n", p4dp, p4d_val(*p4dp)); + + pudp = pud_offset(p4dp, addr); if (pud_none(*pudp)) { printf("No valid PUD\n"); @@ -3231,7 +3329,7 @@ static void show_tasks(void) unsigned long tskv; struct task_struct *tsk = NULL; - printf(" task_struct ->thread.ksp PID PPID S P CMD\n"); + printf(" task_struct ->thread.ksp ->thread.regs PID PPID S P CMD\n"); if (scanhex(&tskv)) tsk = (struct task_struct *)tskv; @@ -3842,7 +3940,7 @@ static void sysrq_handle_xmon(int key) xmon_init(0); } -static struct sysrq_key_op sysrq_xmon_op = { +static const struct sysrq_key_op sysrq_xmon_op = { .handler = sysrq_handle_xmon, .help_msg = "xmon(x)", .action_msg = "Entering xmon", @@ -3869,10 +3967,9 @@ static void clear_all_bpt(void) bpts[i].enabled = 0; /* Clear any data or iabr breakpoints */ - if (iabr || dabr.enabled) { - iabr = NULL; - dabr.enabled = 0; - } + iabr = NULL; + for (i = 0; i < nr_wp_slots(); i++) + dabr[i].enabled = 0; } #ifdef CONFIG_DEBUG_FS @@ -4181,7 +4278,7 @@ static int do_spu_cmd(void) subcmd = inchar(); if (isxdigit(subcmd) || subcmd == '\n') termch = subcmd; - /* fall through */ + fallthrough; case 'f': scanhex(&num); if (num >= XMON_NUM_SPUS || !spu_info[num].spu) { diff --git a/arch/powerpc/xmon/xmon_bpts.S b/arch/powerpc/xmon/xmon_bpts.S new file mode 100644 index 000000000000..69726814cd27 --- /dev/null +++ b/arch/powerpc/xmon/xmon_bpts.S @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <asm/ppc_asm.h> +#include <asm/asm-compat.h> +#include <asm/asm-offsets.h> +#include "xmon_bpts.h" + +/* Prefixed instructions can not cross 64 byte boundaries */ +.align 6 +.global bpt_table +bpt_table: + .space NBPTS * BPT_SIZE diff --git a/arch/powerpc/xmon/xmon_bpts.h b/arch/powerpc/xmon/xmon_bpts.h new file mode 100644 index 000000000000..57e6fb03de48 --- /dev/null +++ b/arch/powerpc/xmon/xmon_bpts.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef XMON_BPTS_H +#define XMON_BPTS_H + +#define NBPTS 256 +#ifndef __ASSEMBLY__ +#include <asm/inst.h> +#define BPT_SIZE (sizeof(struct ppc_inst) * 2) +#define BPT_WORDS (BPT_SIZE / sizeof(struct ppc_inst)) + +extern unsigned int bpt_table[NBPTS * BPT_WORDS]; +#endif /* __ASSEMBLY__ */ + +#endif /* XMON_BPTS_H */ |