diff options
173 files changed, 4326 insertions, 2462 deletions
diff --git a/Documentation/powerpc/syscall64-abi.rst b/Documentation/powerpc/syscall64-abi.rst index dabee3729e5a..56490c4c0c07 100644 --- a/Documentation/powerpc/syscall64-abi.rst +++ b/Documentation/powerpc/syscall64-abi.rst @@ -109,6 +109,16 @@ auxiliary vector. scv 0 syscalls will always behave as PPC_FEATURE2_HTM_NOSC. +ptrace +------ +When ptracing system calls (PTRACE_SYSCALL), the pt_regs.trap value contains +the system call type that can be used to distinguish between sc and scv 0 +system calls, and the different register conventions can be accounted for. + +If the value of (pt_regs.trap & 0xfff0) is 0xc00 then the system call was +performed with the sc instruction, if it is 0x3000 then the system call was +performed with the scv 0 instruction. + vsyscall ======== diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 088dd2afcfe4..9bf52e292eda 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -140,7 +140,9 @@ config PPC select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 + select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) + select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX && !PPC_BOOK3S_32 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UBSAN_SANITIZE_ALL @@ -266,6 +268,7 @@ config PPC select PPC_DAWR if PPC64 select RTC_LIB select SPARSE_IRQ + select STRICT_KERNEL_RWX if STRICT_MODULE_RWX select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select VIRT_TO_BUS if !PPC64 @@ -289,6 +292,7 @@ config PANIC_TIMEOUT config COMPAT bool "Enable support for 32bit binaries" depends on PPC64 + depends on !CC_IS_CLANG || CLANG_VERSION >= 120000 default y if !CPU_LITTLE_ENDIAN select ARCH_WANT_OLD_COMPAT_IPC select COMPAT_OLD_SIGACTION @@ -426,7 +430,7 @@ source "kernel/Kconfig.hz" config MATH_EMULATION bool "Math emulation" - depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE + depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE || PPC_MICROWATT select PPC_FPU_REGS help Some PowerPC chips designed for embedded applications do not have diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 3212d076ac6a..712c5e8768ce 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -376,6 +376,16 @@ ppc64_book3e_allmodconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/85xx-64bit.config \ -f $(srctree)/Makefile allmodconfig +PHONY += ppc32_randconfig +ppc32_randconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/32-bit.config \ + -f $(srctree)/Makefile randconfig + +PHONY += ppc64_randconfig +ppc64_randconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/64-bit.config \ + -f $(srctree)/Makefile randconfig + define archhelp @echo '* zImage - Build default images selected by kernel config' @echo ' zImage.* - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)' diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 2b8da923ceca..e312ea802aa6 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -163,6 +163,8 @@ src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S src-plat-$(CONFIG_MVME7100) += motload-head.S mvme7100.c +src-plat-$(CONFIG_PPC_MICROWATT) += fixed-head.S microwatt.c + src-wlib := $(sort $(src-wlib-y)) src-plat := $(sort $(src-plat-y)) src-boot := $(src-wlib) $(src-plat) empty.c @@ -227,7 +229,7 @@ $(obj)/wrapper.a: $(obj-wlib) FORCE hostprogs := addnote hack-coff mktree -targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) +targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) zImage.lds extra-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \ $(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds @@ -355,6 +357,8 @@ image-$(CONFIG_MVME5100) += dtbImage.mvme5100 # Board port in arch/powerpc/platform/amigaone/Kconfig image-$(CONFIG_AMIGAONE) += cuImage.amigaone +image-$(CONFIG_PPC_MICROWATT) += dtbImage.microwatt + # For 32-bit powermacs, build the COFF and miboot images # as well as the ELF images. ifdef CONFIG_PPC32 diff --git a/arch/powerpc/boot/decompress.c b/arch/powerpc/boot/decompress.c index 6098b879ac97..977eb15a6d17 100644 --- a/arch/powerpc/boot/decompress.c +++ b/arch/powerpc/boot/decompress.c @@ -99,8 +99,8 @@ static void print_err(char *s) * partial_decompress - decompresses part or all of a compressed buffer * @inbuf: input buffer * @input_size: length of the input buffer - * @outbuf: input buffer - * @output_size: length of the input buffer + * @outbuf: output buffer + * @output_size: length of the output buffer * @skip number of output bytes to ignore * * This function takes compressed data from inbuf, decompresses and write it to diff --git a/arch/powerpc/boot/devtree.c b/arch/powerpc/boot/devtree.c index 5d91036ad626..58fbcfcc98c9 100644 --- a/arch/powerpc/boot/devtree.c +++ b/arch/powerpc/boot/devtree.c @@ -13,6 +13,7 @@ #include "string.h" #include "stdio.h" #include "ops.h" +#include "of.h" void dt_fixup_memory(u64 start, u64 size) { @@ -23,21 +24,25 @@ void dt_fixup_memory(u64 start, u64 size) root = finddevice("/"); if (getprop(root, "#address-cells", &naddr, sizeof(naddr)) < 0) naddr = 2; + else + naddr = be32_to_cpu(naddr); if (naddr < 1 || naddr > 2) fatal("Can't cope with #address-cells == %d in /\n\r", naddr); if (getprop(root, "#size-cells", &nsize, sizeof(nsize)) < 0) nsize = 1; + else + nsize = be32_to_cpu(nsize); if (nsize < 1 || nsize > 2) fatal("Can't cope with #size-cells == %d in /\n\r", nsize); i = 0; if (naddr == 2) - memreg[i++] = start >> 32; - memreg[i++] = start & 0xffffffff; + memreg[i++] = cpu_to_be32(start >> 32); + memreg[i++] = cpu_to_be32(start & 0xffffffff); if (nsize == 2) - memreg[i++] = size >> 32; - memreg[i++] = size & 0xffffffff; + memreg[i++] = cpu_to_be32(size >> 32); + memreg[i++] = cpu_to_be32(size & 0xffffffff); memory = finddevice("/memory"); if (! memory) { @@ -45,9 +50,9 @@ void dt_fixup_memory(u64 start, u64 size) setprop_str(memory, "device_type", "memory"); } - printf("Memory <- <0x%x", memreg[0]); + printf("Memory <- <0x%x", be32_to_cpu(memreg[0])); for (i = 1; i < (naddr + nsize); i++) - printf(" 0x%x", memreg[i]); + printf(" 0x%x", be32_to_cpu(memreg[i])); printf("> (%ldMB)\n\r", (unsigned long)(size >> 20)); setprop(memory, "reg", memreg, (naddr + nsize)*sizeof(u32)); @@ -65,10 +70,10 @@ void dt_fixup_cpu_clocks(u32 cpu, u32 tb, u32 bus) printf("CPU bus-frequency <- 0x%x (%dMHz)\n\r", bus, MHZ(bus)); while ((devp = find_node_by_devtype(devp, "cpu"))) { - setprop_val(devp, "clock-frequency", cpu); - setprop_val(devp, "timebase-frequency", tb); + setprop_val(devp, "clock-frequency", cpu_to_be32(cpu)); + setprop_val(devp, "timebase-frequency", cpu_to_be32(tb)); if (bus > 0) - setprop_val(devp, "bus-frequency", bus); + setprop_val(devp, "bus-frequency", cpu_to_be32(bus)); } timebase_period_ns = 1000000000 / tb; @@ -80,7 +85,7 @@ void dt_fixup_clock(const char *path, u32 freq) if (devp) { printf("%s: clock-frequency <- %x (%dMHz)\n\r", path, freq, MHZ(freq)); - setprop_val(devp, "clock-frequency", freq); + setprop_val(devp, "clock-frequency", cpu_to_be32(freq)); } } @@ -133,8 +138,12 @@ void dt_get_reg_format(void *node, u32 *naddr, u32 *nsize) { if (getprop(node, "#address-cells", naddr, 4) != 4) *naddr = 2; + else + *naddr = be32_to_cpu(*naddr); if (getprop(node, "#size-cells", nsize, 4) != 4) *nsize = 1; + else + *nsize = be32_to_cpu(*nsize); } static void copy_val(u32 *dest, u32 *src, int naddr) @@ -163,9 +172,9 @@ static int add_reg(u32 *reg, u32 *add, int naddr) int i, carry = 0; for (i = MAX_ADDR_CELLS - 1; i >= MAX_ADDR_CELLS - naddr; i--) { - u64 tmp = (u64)reg[i] + add[i] + carry; + u64 tmp = (u64)be32_to_cpu(reg[i]) + be32_to_cpu(add[i]) + carry; carry = tmp >> 32; - reg[i] = (u32)tmp; + reg[i] = cpu_to_be32((u32)tmp); } return !carry; @@ -180,18 +189,18 @@ static int compare_reg(u32 *reg, u32 *range, u32 *rangesize) u32 end; for (i = 0; i < MAX_ADDR_CELLS; i++) { - if (reg[i] < range[i]) + if (be32_to_cpu(reg[i]) < be32_to_cpu(range[i])) return 0; - if (reg[i] > range[i]) + if (be32_to_cpu(reg[i]) > be32_to_cpu(range[i])) break; } for (i = 0; i < MAX_ADDR_CELLS; i++) { - end = range[i] + rangesize[i]; + end = be32_to_cpu(range[i]) + be32_to_cpu(rangesize[i]); - if (reg[i] < end) + if (be32_to_cpu(reg[i]) < end) break; - if (reg[i] > end) + if (be32_to_cpu(reg[i]) > end) return 0; } @@ -240,7 +249,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, return 0; dt_get_reg_format(parent, &naddr, &nsize); - if (nsize > 2) return 0; @@ -252,10 +260,10 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, copy_val(last_addr, prop_buf + offset, naddr); - ret_size = prop_buf[offset + naddr]; + ret_size = be32_to_cpu(prop_buf[offset + naddr]); if (nsize == 2) { ret_size <<= 32; - ret_size |= prop_buf[offset + naddr + 1]; + ret_size |= be32_to_cpu(prop_buf[offset + naddr + 1]); } for (;;) { @@ -278,7 +286,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, offset = find_range(last_addr, prop_buf, prev_naddr, naddr, prev_nsize, buflen / 4); - if (offset < 0) return 0; @@ -296,8 +303,7 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, if (naddr > 2) return 0; - ret_addr = ((u64)last_addr[2] << 32) | last_addr[3]; - + ret_addr = ((u64)be32_to_cpu(last_addr[2]) << 32) | be32_to_cpu(last_addr[3]); if (sizeof(void *) == 4 && (ret_addr >= 0x100000000ULL || ret_size > 0x100000000ULL || ret_addr + ret_size > 0x100000000ULL)) @@ -350,11 +356,14 @@ int dt_is_compatible(void *node, const char *compat) int dt_get_virtual_reg(void *node, void **addr, int nres) { unsigned long xaddr; - int n; + int n, i; n = getprop(node, "virtual-reg", addr, nres * 4); - if (n > 0) + if (n > 0) { + for (i = 0; i < n/4; i ++) + ((u32 *)addr)[i] = be32_to_cpu(((u32 *)addr)[i]); return n / 4; + } for (n = 0; n < nres; n++) { if (!dt_xlate_reg(node, n, &xaddr, NULL)) diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts new file mode 100644 index 000000000000..974abbdda249 --- /dev/null +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -0,0 +1,138 @@ +/dts-v1/; + +/ { + #size-cells = <0x02>; + #address-cells = <0x02>; + model-name = "microwatt"; + compatible = "microwatt-soc"; + + aliases { + serial0 = &UART0; + }; + + reserved-memory { + #size-cells = <0x02>; + #address-cells = <0x02>; + ranges; + }; + + memory@0 { + device_type = "memory"; + reg = <0x00000000 0x00000000 0x00000000 0x10000000>; + }; + + cpus { + #size-cells = <0x00>; + #address-cells = <0x01>; + + ibm,powerpc-cpu-features { + display-name = "Microwatt"; + isa = <3000>; + device_type = "cpu-features"; + compatible = "ibm,powerpc-cpu-features"; + + mmu-radix { + isa = <3000>; + usable-privilege = <2>; + }; + + little-endian { + isa = <2050>; + usable-privilege = <3>; + hwcap-bit-nr = <1>; + }; + + cache-inhibited-large-page { + isa = <2040>; + usable-privilege = <2>; + }; + + fixed-point-v3 { + isa = <3000>; + usable-privilege = <3>; + }; + + no-execute { + isa = <2010>; + usable-privilege = <2>; + }; + + floating-point { + hwcap-bit-nr = <27>; + isa = <0>; + usable-privilege = <3>; + }; + }; + + PowerPC,Microwatt@0 { + i-cache-sets = <2>; + ibm,dec-bits = <64>; + reservation-granule-size = <64>; + clock-frequency = <100000000>; + timebase-frequency = <100000000>; + i-tlb-sets = <1>; + ibm,ppc-interrupt-server#s = <0>; + i-cache-block-size = <64>; + d-cache-block-size = <64>; + d-cache-sets = <2>; + i-tlb-size = <64>; + cpu-version = <0x990000>; + status = "okay"; + i-cache-size = <0x1000>; + ibm,processor-radix-AP-encodings = <0x0c 0xa0000010 0x20000015 0x4000001e>; + tlb-size = <0>; + tlb-sets = <0>; + device_type = "cpu"; + d-tlb-size = <128>; + d-tlb-sets = <2>; + reg = <0>; + general-purpose; + 64-bit; + d-cache-size = <0x1000>; + ibm,chip-id = <0>; + }; + }; + + soc@c0000000 { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + interrupt-parent = <&ICS>; + + ranges = <0 0 0xc0000000 0x40000000>; + + interrupt-controller@4000 { + compatible = "openpower,xics-presentation", "ibm,ppc-xicp"; + ibm,interrupt-server-ranges = <0x0 0x1>; + reg = <0x4000 0x100>; + }; + + ICS: interrupt-controller@5000 { + compatible = "openpower,xics-sources"; + interrupt-controller; + interrupt-ranges = <0x10 0x10>; + reg = <0x5000 0x100>; + #address-cells = <0>; + #size-cells = <0>; + #interrupt-cells = <2>; + }; + + UART0: serial@2000 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0x2000 0x8>; + clock-frequency = <100000000>; + current-speed = <115200>; + reg-shift = <2>; + fifo-size = <16>; + interrupts = <0x10 0x1>; + }; + }; + + chosen { + bootargs = ""; + ibm,architecture-vec-5 = [19 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 40 00 40]; + stdout-path = &UART0; + }; +}; diff --git a/arch/powerpc/boot/microwatt.c b/arch/powerpc/boot/microwatt.c new file mode 100644 index 000000000000..ca9d83617fc1 --- /dev/null +++ b/arch/powerpc/boot/microwatt.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <stddef.h> +#include "stdio.h" +#include "types.h" +#include "io.h" +#include "ops.h" + +BSS_STACK(8192); + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5) +{ + unsigned long heapsize = 16*1024*1024 - (unsigned long)_end; + + /* + * Disable interrupts and turn off MSR_RI, since we'll + * shortly be overwriting the interrupt vectors. + */ + __asm__ volatile("mtmsrd %0,1" : : "r" (0)); + + simple_alloc_init(_end, heapsize, 32, 64); + fdt_init(_dtb_start); + serial_console_init(); +} diff --git a/arch/powerpc/boot/ns16550.c b/arch/powerpc/boot/ns16550.c index b0da4466d419..f16d2be1d0f3 100644 --- a/arch/powerpc/boot/ns16550.c +++ b/arch/powerpc/boot/ns16550.c @@ -15,6 +15,7 @@ #include "stdio.h" #include "io.h" #include "ops.h" +#include "of.h" #define UART_DLL 0 /* Out: Divisor Latch Low */ #define UART_DLM 1 /* Out: Divisor Latch High */ @@ -58,16 +59,20 @@ int ns16550_console_init(void *devp, struct serial_console_data *scdp) int n; u32 reg_offset; - if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) + if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) { + printf("virt reg parse fail...\r\n"); return -1; + } n = getprop(devp, "reg-offset", ®_offset, sizeof(reg_offset)); if (n == sizeof(reg_offset)) - reg_base += reg_offset; + reg_base += be32_to_cpu(reg_offset); n = getprop(devp, "reg-shift", ®_shift, sizeof(reg_shift)); if (n != sizeof(reg_shift)) reg_shift = 0; + else + reg_shift = be32_to_cpu(reg_shift); scdp->open = ns16550_open; scdp->putc = ns16550_putc; diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index cdb796b76e2e..1f4676bab786 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -342,6 +342,11 @@ gamecube|wii) link_address='0x600000' platformo="$object/$platform-head.o $object/$platform.o" ;; +microwatt) + link_address='0x500000' + platformo="$object/fixed-head.o $object/$platform.o" + binary=y + ;; treeboot-currituck) link_address='0x1000000' ;; diff --git a/arch/powerpc/boot/zImage.ps3.lds.S b/arch/powerpc/boot/zImage.ps3.lds.S index 7b2ff2eaa73a..d0ffb493614d 100644 --- a/arch/powerpc/boot/zImage.ps3.lds.S +++ b/arch/powerpc/boot/zImage.ps3.lds.S @@ -8,7 +8,7 @@ SECTIONS .kernel:vmlinux.bin : { *(.kernel:vmlinux.bin) } _vmlinux_end = .; - . = ALIGN(4096); + . = ALIGN(8); _dtb_start = .; .kernel:dtb : { *(.kernel:dtb) } _dtb_end = .; diff --git a/arch/powerpc/configs/32-bit.config b/arch/powerpc/configs/32-bit.config new file mode 100644 index 000000000000..ad6546850c68 --- /dev/null +++ b/arch/powerpc/configs/32-bit.config @@ -0,0 +1 @@ +# CONFIG_PPC64 is not set diff --git a/arch/powerpc/configs/64-bit.config b/arch/powerpc/configs/64-bit.config new file mode 100644 index 000000000000..0fe6406929e2 --- /dev/null +++ b/arch/powerpc/configs/64-bit.config @@ -0,0 +1 @@ +CONFIG_PPC64=y diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig new file mode 100644 index 000000000000..a08b739123da --- /dev/null +++ b/arch/powerpc/configs/microwatt_defconfig @@ -0,0 +1,98 @@ +# CONFIG_SWAP is not set +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_TICK_CPU_ACCOUNTING=y +CONFIG_LOG_BUF_SHIFT=16 +CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 +CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_SLUB_DEBUG is not set +# CONFIG_COMPAT_BRK is not set +# CONFIG_SLAB_MERGE_DEFAULT is not set +CONFIG_PPC64=y +# CONFIG_PPC_KUEP is not set +# CONFIG_PPC_KUAP is not set +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_NR_IRQS=64 +CONFIG_PANIC_TIMEOUT=10 +# CONFIG_PPC_POWERNV is not set +# CONFIG_PPC_PSERIES is not set +CONFIG_PPC_MICROWATT=y +# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set +CONFIG_CPU_FREQ=y +CONFIG_HZ_100=y +# CONFIG_PPC_MEM_KEYS is not set +# CONFIG_SECCOMP is not set +# CONFIG_MQ_IOSCHED_KYBER is not set +# CONFIG_COREDUMP is not set +# CONFIG_COMPACTION is not set +# CONFIG_MIGRATION is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=y +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=y +CONFIG_INET=y +CONFIG_INET_UDP_DIAG=y +CONFIG_INET_RAW_DIAG=y +# CONFIG_WIRELESS is not set +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +# CONFIG_ALLOW_DEV_COREDUMP is not set +CONFIG_MTD=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_PARTITIONED_MASTER=y +CONFIG_MTD_SPI_NOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_NETDEVICES=y +# CONFIG_WLAN is not set +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +CONFIG_SERIAL_8250=y +# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_NVRAM is not set +CONFIG_RANDOM_TRUST_CPU=y +CONFIG_SPI=y +CONFIG_SPI_DEBUG=y +CONFIG_SPI_BITBANG=y +CONFIG_SPI_SPIDEV=y +# CONFIG_HWMON is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_VIRTIO_MENU is not set +# CONFIG_IOMMU_SUPPORT is not set +# CONFIG_NVMEM is not set +CONFIG_EXT4_FS=y +# CONFIG_FILE_LOCKING is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY_USER is not set +# CONFIG_MISC_FILESYSTEMS is not set +# CONFIG_CRYPTO_HW is not set +# CONFIG_XZ_DEC_X86 is not set +# CONFIG_XZ_DEC_IA64 is not set +# CONFIG_XZ_DEC_ARM is not set +# CONFIG_XZ_DEC_ARMTHUMB is not set +# CONFIG_XZ_DEC_SPARC is not set +CONFIG_PRINTK_TIME=y +# CONFIG_SYMBOLIC_ERRNAME is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_DEBUG_MISC is not set +# CONFIG_SCHED_DEBUG is not set +# CONFIG_FTRACE is not set +# CONFIG_STRICT_DEVMEM is not set +CONFIG_PPC_DISABLE_WERROR=y +CONFIG_XMON=y +CONFIG_XMON_DEFAULT=y +# CONFIG_XMON_DEFAULT_RO_MODE is not set +# CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 949ff9ccda5e..d21f266cea9a 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -57,3 +57,28 @@ CONFIG_CRC32_SLICEBY4=y CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y +CONFIG_PPC_16K_PAGES=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_FS=y +CONFIG_PPC_PTDUMP=y +CONFIG_MODULES=y +CONFIG_SPI=y +CONFIG_SPI_FSL_SPI=y +CONFIG_CRYPTO=y +CONFIG_CRYPTO_DEV_TALITOS=y +CONFIG_8xx_GPIO=y +CONFIG_WATCHDOG=y +CONFIG_8xxx_WDT=y +CONFIG_SMC_UCODE_PATCH=y +CONFIG_ADVANCED_OPTIONS=y +CONFIG_PIN_TLB=y +CONFIG_PERF_EVENTS=y +CONFIG_MATH_EMULATION=y +CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y +CONFIG_STRICT_KERNEL_RWX=y +CONFIG_IPV6=y +CONFIG_BPF_JIT=y +CONFIG_DEBUG_VM_PGTABLE=y +CONFIG_BDI_SWITCH=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_CPM_ADDR=0xff002008 diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 7ae29cfb06c0..f0e687236484 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -46,6 +46,8 @@ # define SMPWMB eieio #endif +/* clang defines this macro for a builtin, which will not work with runtime patching */ +#undef __lwsync #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") #define dma_rmb() __lwsync() #define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") diff --git a/arch/powerpc/include/asm/book3s/32/hash.h b/arch/powerpc/include/asm/book3s/32/hash.h deleted file mode 100644 index 2a0a467d2985..000000000000 --- a/arch/powerpc/include/asm/book3s/32/hash.h +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_BOOK3S_32_HASH_H -#define _ASM_POWERPC_BOOK3S_32_HASH_H -#ifdef __KERNEL__ - -/* - * The "classic" 32-bit implementation of the PowerPC MMU uses a hash - * table containing PTEs, together with a set of 16 segment registers, - * to define the virtual to physical address mapping. - * - * We use the hash table as an extended TLB, i.e. a cache of currently - * active mappings. We maintain a two-level page table tree, much - * like that used by the i386, for the sake of the Linux memory - * management code. Low-level assembler code in hash_low_32.S - * (procedure hash_page) is responsible for extracting ptes from the - * tree and putting them into the hash table when necessary, and - * updating the accessed and modified bits in the page table tree. - */ - -#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ -#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ -#define _PAGE_USER 0x004 /* usermode access allowed */ -#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ -#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ -#define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ -#define _PAGE_WRITETHRU 0x040 /* W: cache write-through */ -#define _PAGE_DIRTY 0x080 /* C: page changed */ -#define _PAGE_ACCESSED 0x100 /* R: page referenced */ -#define _PAGE_EXEC 0x200 /* software: exec allowed */ -#define _PAGE_RW 0x400 /* software: user write access allowed */ -#define _PAGE_SPECIAL 0x800 /* software: Special page */ - -#ifdef CONFIG_PTE_64BIT -/* We never clear the high word of the pte */ -#define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE) -#else -#define _PTE_NONE_MASK _PAGE_HASHPTE -#endif - -#define _PMD_PRESENT 0 -#define _PMD_PRESENT_MASK (PAGE_MASK) -#define _PMD_BAD (~PAGE_MASK) - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_BOOK3S_32_HASH_H */ diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 1670dfe9d4f1..64201125a287 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -7,35 +7,104 @@ #ifndef __ASSEMBLY__ +#include <linux/jump_label.h> + +extern struct static_key_false disable_kuap_key; +extern struct static_key_false disable_kuep_key; + +static __always_inline bool kuap_is_disabled(void) +{ + return !IS_ENABLED(CONFIG_PPC_KUAP) || static_branch_unlikely(&disable_kuap_key); +} + +static __always_inline bool kuep_is_disabled(void) +{ + return !IS_ENABLED(CONFIG_PPC_KUEP) || static_branch_unlikely(&disable_kuep_key); +} + +static inline void kuep_lock(void) +{ + if (kuep_is_disabled()) + return; + + update_user_segments(mfsr(0) | SR_NX); +} + +static inline void kuep_unlock(void) +{ + if (kuep_is_disabled()) + return; + + update_user_segments(mfsr(0) & ~SR_NX); +} + #ifdef CONFIG_PPC_KUAP #include <linux/sched.h> -static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) +#define KUAP_NONE (~0UL) +#define KUAP_ALL (~1UL) + +static inline void kuap_lock_one(unsigned long addr) { - addr &= 0xf0000000; /* align addr to start of segment */ - barrier(); /* make sure thread.kuap is updated before playing with SRs */ - while (addr < end) { - mtsr(sr, addr); - sr += 0x111; /* next VSID */ - sr &= 0xf0ffffff; /* clear VSID overflow */ - addr += 0x10000000; /* address of next segment */ - } + mtsr(mfsr(addr) | SR_KS, addr); + isync(); /* Context sync required after mtsr() */ +} + +static inline void kuap_unlock_one(unsigned long addr) +{ + mtsr(mfsr(addr) & ~SR_KS, addr); + isync(); /* Context sync required after mtsr() */ +} + +static inline void kuap_lock_all(void) +{ + update_user_segments(mfsr(0) | SR_KS); + isync(); /* Context sync required after mtsr() */ +} + +static inline void kuap_unlock_all(void) +{ + update_user_segments(mfsr(0) & ~SR_KS); isync(); /* Context sync required after mtsr() */ } +void kuap_lock_all_ool(void); +void kuap_unlock_all_ool(void); + +static inline void kuap_lock(unsigned long addr, bool ool) +{ + if (likely(addr != KUAP_ALL)) + kuap_lock_one(addr); + else if (!ool) + kuap_lock_all(); + else + kuap_lock_all_ool(); +} + +static inline void kuap_unlock(unsigned long addr, bool ool) +{ + if (likely(addr != KUAP_ALL)) + kuap_unlock_one(addr); + else if (!ool) + kuap_unlock_all(); + else + kuap_unlock_all_ool(); +} + static inline void kuap_save_and_lock(struct pt_regs *regs) { unsigned long kuap = current->thread.kuap; - u32 addr = kuap & 0xf0000000; - u32 end = kuap << 28; + + if (kuap_is_disabled()) + return; regs->kuap = kuap; - if (unlikely(!kuap)) + if (unlikely(kuap == KUAP_NONE)) return; - current->thread.kuap = 0; - kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* Set Ks */ + current->thread.kuap = KUAP_NONE; + kuap_lock(kuap, false); } static inline void kuap_user_restore(struct pt_regs *regs) @@ -44,22 +113,22 @@ static inline void kuap_user_restore(struct pt_regs *regs) static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) { - u32 addr = regs->kuap & 0xf0000000; - u32 end = regs->kuap << 28; + if (kuap_is_disabled()) + return; current->thread.kuap = regs->kuap; - if (unlikely(regs->kuap == kuap)) - return; - - kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */ + kuap_unlock(regs->kuap, false); } static inline unsigned long kuap_get_and_assert_locked(void) { unsigned long kuap = current->thread.kuap; - WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0); + if (kuap_is_disabled()) + return KUAP_NONE; + + WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != KUAP_NONE); return kuap; } @@ -72,84 +141,78 @@ static inline void kuap_assert_locked(void) static __always_inline void allow_user_access(void __user *to, const void __user *from, u32 size, unsigned long dir) { - u32 addr, end; + if (kuap_is_disabled()) + return; BUILD_BUG_ON(!__builtin_constant_p(dir)); - BUILD_BUG_ON(dir & ~KUAP_READ_WRITE); if (!(dir & KUAP_WRITE)) return; - addr = (__force u32)to; - - if (unlikely(addr >= TASK_SIZE || !size)) - return; - - end = min(addr + size, TASK_SIZE); - - current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf); - kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */ + current->thread.kuap = (__force u32)to; + kuap_unlock_one((__force u32)to); } -static __always_inline void prevent_user_access(void __user *to, const void __user *from, - u32 size, unsigned long dir) +static __always_inline void prevent_user_access(unsigned long dir) { - u32 addr, end; - - BUILD_BUG_ON(!__builtin_constant_p(dir)); + u32 kuap = current->thread.kuap; - if (dir & KUAP_CURRENT_WRITE) { - u32 kuap = current->thread.kuap; - - if (unlikely(!kuap)) - return; + if (kuap_is_disabled()) + return; - addr = kuap & 0xf0000000; - end = kuap << 28; - } else if (dir & KUAP_WRITE) { - addr = (__force u32)to; - end = min(addr + size, TASK_SIZE); + BUILD_BUG_ON(!__builtin_constant_p(dir)); - if (unlikely(addr >= TASK_SIZE || !size)) - return; - } else { + if (!(dir & KUAP_WRITE)) return; - } - current->thread.kuap = 0; - kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* set Ks */ + current->thread.kuap = KUAP_NONE; + kuap_lock(kuap, true); } static inline unsigned long prevent_user_access_return(void) { unsigned long flags = current->thread.kuap; - unsigned long addr = flags & 0xf0000000; - unsigned long end = flags << 28; - void __user *to = (__force void __user *)addr; - if (flags) - prevent_user_access(to, to, end - addr, KUAP_READ_WRITE); + if (kuap_is_disabled()) + return KUAP_NONE; + + if (flags != KUAP_NONE) { + current->thread.kuap = KUAP_NONE; + kuap_lock(flags, true); + } return flags; } static inline void restore_user_access(unsigned long flags) { - unsigned long addr = flags & 0xf0000000; - unsigned long end = flags << 28; - void __user *to = (__force void __user *)addr; + if (kuap_is_disabled()) + return; - if (flags) - allow_user_access(to, to, end - addr, KUAP_READ_WRITE); + if (flags != KUAP_NONE) { + current->thread.kuap = flags; + kuap_unlock(flags, true); + } } static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - unsigned long begin = regs->kuap & 0xf0000000; - unsigned long end = regs->kuap << 28; + unsigned long kuap = regs->kuap; + + if (kuap_is_disabled()) + return false; + + if (!is_write || kuap == KUAP_ALL) + return false; + if (kuap == KUAP_NONE) + return true; + + /* If faulting address doesn't match unlocked segment, unlock all */ + if ((kuap ^ address) & 0xf0000000) + regs->kuap = KUAP_ALL; - return is_write && (address < begin || address >= end); + return false; } #endif /* CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index b85f8e114a9c..f5be185cbdf8 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -67,6 +67,16 @@ struct ppc_bat { #ifndef __ASSEMBLY__ /* + * This macro defines the mapping from contexts to VSIDs (virtual + * segment IDs). We use a skew on both the context and the high 4 bits + * of the 32-bit virtual address (the "effective segment ID") in order + * to spread out the entries in the MMU hash table. Note, if this + * function is changed then hash functions will have to be + * changed to correspond. + */ +#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff) + +/* * Hardware Page Table Entry * Note that the xpn and x bitfields are used only by processors that * support extended addressing; otherwise, those bits are reserved. @@ -102,6 +112,37 @@ extern s32 patch__hash_page_B, patch__hash_page_C; extern s32 patch__flush_hash_A0, patch__flush_hash_A1, patch__flush_hash_A2; extern s32 patch__flush_hash_B; +#include <asm/reg.h> +#include <asm/task_size_32.h> + +static __always_inline void update_user_segment(u32 n, u32 val) +{ + if (n << 28 < TASK_SIZE) + mtsr(val + n * 0x111, n << 28); +} + +static __always_inline void update_user_segments(u32 val) +{ + val &= 0xf0ffffff; + + update_user_segment(0, val); + update_user_segment(1, val); + update_user_segment(2, val); + update_user_segment(3, val); + update_user_segment(4, val); + update_user_segment(5, val); + update_user_segment(6, val); + update_user_segment(7, val); + update_user_segment(8, val); + update_user_segment(9, val); + update_user_segment(10, val); + update_user_segment(11, val); + update_user_segment(12, val); + update_user_segment(13, val); + update_user_segment(14, val); + update_user_segment(15, val); +} + #endif /* !__ASSEMBLY__ */ /* We happily ignore the smaller BATs on 601, we don't actually use diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 83c65845a1a9..609c80f67194 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -4,7 +4,43 @@ #include <asm-generic/pgtable-nopmd.h> -#include <asm/book3s/32/hash.h> +/* + * The "classic" 32-bit implementation of the PowerPC MMU uses a hash + * table containing PTEs, together with a set of 16 segment registers, + * to define the virtual to physical address mapping. + * + * We use the hash table as an extended TLB, i.e. a cache of currently + * active mappings. We maintain a two-level page table tree, much + * like that used by the i386, for the sake of the Linux memory + * management code. Low-level assembler code in hash_low_32.S + * (procedure hash_page) is responsible for extracting ptes from the + * tree and putting them into the hash table when necessary, and + * updating the accessed and modified bits in the page table tree. + */ + +#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ +#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ +#define _PAGE_USER 0x004 /* usermode access allowed */ +#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ +#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ +#define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ +#define _PAGE_WRITETHRU 0x040 /* W: cache write-through */ +#define _PAGE_DIRTY 0x080 /* C: page changed */ +#define _PAGE_ACCESSED 0x100 /* R: page referenced */ +#define _PAGE_EXEC 0x200 /* software: exec allowed */ +#define _PAGE_RW 0x400 /* software: user write access allowed */ +#define _PAGE_SPECIAL 0x800 /* software: Special page */ + +#ifdef CONFIG_PTE_64BIT +/* We never clear the high word of the pte */ +#define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE) +#else +#define _PTE_NONE_MASK _PAGE_HASHPTE +#endif + +#define _PMD_PRESENT 0 +#define _PMD_PRESENT_MASK (PAGE_MASK) +#define _PMD_BAD (~PAGE_MASK) /* And here we include common definitions */ diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 9700da3a4093..a1cc73a88710 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -398,8 +398,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user #endif /* !CONFIG_PPC_KUAP */ -static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static inline void prevent_user_access(unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); if (static_branch_unlikely(&uaccess_flush_key)) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index a666d561b44d..4d9941b2fe51 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -232,6 +232,9 @@ extern unsigned long __pmd_frag_size_shift; #define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) +#define MAX_PTRS_PER_PGD (1 << (H_PGD_INDEX_SIZE > RADIX_PGD_INDEX_SIZE ? \ + H_PGD_INDEX_SIZE : RADIX_PGD_INDEX_SIZE)) + /* PMD_SHIFT determines what a second-level page table entry can map */ #define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) #define PMD_SIZE (1UL << PMD_SHIFT) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index d5da7ddbf0fc..350de8f90250 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -91,7 +91,7 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, } #define HAVE_ARCH_CSUM_ADD -static inline __wsum csum_add(__wsum csum, __wsum addend) +static __always_inline __wsum csum_add(__wsum csum, __wsum addend) { #ifdef __powerpc64__ u64 res = (__force u64)csum; diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index f1d029bf906e..a95f63788c6b 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -23,13 +23,13 @@ #define BRANCH_ABSOLUTE 0x2 bool is_offset_in_branch_range(long offset); -int create_branch(struct ppc_inst *instr, const struct ppc_inst *addr, +int create_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags); -int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr, +int create_cond_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags); -int patch_branch(struct ppc_inst *addr, unsigned long target, int flags); -int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr); -int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr); +int patch_branch(u32 *addr, unsigned long target, int flags); +int patch_instruction(u32 *addr, struct ppc_inst instr); +int raw_patch_instruction(u32 *addr, struct ppc_inst instr); static inline unsigned long patch_site_addr(s32 *site) { @@ -38,18 +38,18 @@ static inline unsigned long patch_site_addr(s32 *site) static inline int patch_instruction_site(s32 *site, struct ppc_inst instr) { - return patch_instruction((struct ppc_inst *)patch_site_addr(site), instr); + return patch_instruction((u32 *)patch_site_addr(site), instr); } static inline int patch_branch_site(s32 *site, unsigned long target, int flags) { - return patch_branch((struct ppc_inst *)patch_site_addr(site), target, flags); + return patch_branch((u32 *)patch_site_addr(site), target, flags); } static inline int modify_instruction(unsigned int *addr, unsigned int clr, unsigned int set) { - return patch_instruction((struct ppc_inst *)addr, ppc_inst((*addr & ~clr) | set)); + return patch_instruction(addr, ppc_inst((*addr & ~clr) | set)); } static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int set) @@ -59,10 +59,8 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int instr_is_relative_branch(struct ppc_inst instr); int instr_is_relative_link_branch(struct ppc_inst instr); -int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr); -unsigned long branch_target(const struct ppc_inst *instr); -int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest, - const struct ppc_inst *src); +unsigned long branch_target(const u32 *instr); +int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src); extern bool is_conditional_branch(struct ppc_inst instr); #ifdef CONFIG_PPC_BOOK3E_64 void __patch_exception(int exc, unsigned long addr); @@ -73,9 +71,9 @@ void __patch_exception(int exc, unsigned long addr); #endif #define OP_RT_RA_MASK 0xffff0000UL -#define LIS_R2 (PPC_INST_ADDIS | __PPC_RT(R2)) -#define ADDIS_R2_R12 (PPC_INST_ADDIS | __PPC_RT(R2) | __PPC_RA(R12)) -#define ADDI_R2_R2 (PPC_INST_ADDI | __PPC_RT(R2) | __PPC_RA(R2)) +#define LIS_R2 (PPC_RAW_LIS(_R2, 0)) +#define ADDIS_R2_R12 (PPC_RAW_ADDIS(_R2, _R12, 0)) +#define ADDI_R2_R2 (PPC_RAW_ADDI(_R2, _R2, 0)) static inline unsigned long ppc_function_entry(void *func) @@ -180,12 +178,10 @@ static inline unsigned long ppc_kallsyms_lookup_name(const char *name) #define R2_STACK_OFFSET 40 #endif -#define PPC_INST_LD_TOC (PPC_INST_LD | ___PPC_RT(__REG_R2) | \ - ___PPC_RA(__REG_R1) | R2_STACK_OFFSET) +#define PPC_INST_LD_TOC PPC_RAW_LD(_R2, _R1, R2_STACK_OFFSET) /* usually preceded by a mflr r0 */ -#define PPC_INST_STD_LR (PPC_INST_STD | ___PPC_RS(__REG_R0) | \ - ___PPC_RA(__REG_R1) | PPC_LR_STKOFF) +#define PPC_INST_STD_LR PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF) #endif /* CONFIG_PPC64 */ #endif /* _ASM_POWERPC_CODE_PATCHING_H */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 7e4b2cef40c2..9bcf345cb208 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -294,6 +294,13 @@ #define H_RESIZE_HPT_COMMIT 0x370 #define H_REGISTER_PROC_TBL 0x37C #define H_SIGNAL_SYS_RESET 0x380 +#define H_ALLOCATE_VAS_WINDOW 0x388 +#define H_MODIFY_VAS_WINDOW 0x38C +#define H_DEALLOCATE_VAS_WINDOW 0x390 +#define H_QUERY_VAS_WINDOW 0x394 +#define H_QUERY_VAS_CAPABILITIES 0x398 +#define H_QUERY_NX_CAPABILITIES 0x39C +#define H_GET_NX_FAULT 0x3A0 #define H_INT_GET_SOURCE_INFO 0x3A8 #define H_INT_SET_SOURCE_CONFIG 0x3AC #define H_INT_GET_SOURCE_CONFIG 0x3B0 @@ -393,6 +400,9 @@ #define H_CPU_BEHAV_FAVOUR_SECURITY_H (1ull << 60) // IBM bit 3 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5 #define H_CPU_BEHAV_FLUSH_LINK_STACK (1ull << 57) // IBM bit 6 +#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY (1ull << 56) // IBM bit 7 +#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS (1ull << 55) // IBM bit 8 +#define H_CPU_BEHAV_NO_STF_BARRIER (1ull << 54) // IBM bit 9 /* Flag values used in H_REGISTER_PROC_TBL hcall */ #define PROC_TABLE_OP_MASK 0x18 diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 268d3bd073c8..b11c0e2f9639 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -8,17 +8,17 @@ #define ___get_user_instr(gu_op, dest, ptr) \ ({ \ - long __gui_ret = 0; \ - unsigned long __gui_ptr = (unsigned long)ptr; \ + long __gui_ret; \ + u32 __user *__gui_ptr = (u32 __user *)ptr; \ struct ppc_inst __gui_inst; \ unsigned int __prefix, __suffix; \ - __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr); \ + \ + __chk_user_ptr(ptr); \ + __gui_ret = gu_op(__prefix, __gui_ptr); \ if (__gui_ret == 0) { \ if ((__prefix >> 26) == OP_PREFIX) { \ - __gui_ret = gu_op(__suffix, \ - (unsigned int __user *)__gui_ptr + 1); \ - __gui_inst = ppc_inst_prefix(__prefix, \ - __suffix); \ + __gui_ret = gu_op(__suffix, __gui_ptr + 1); \ + __gui_inst = ppc_inst_prefix(__prefix, __suffix); \ } else { \ __gui_inst = ppc_inst(__prefix); \ } \ @@ -29,14 +29,15 @@ }) #else /* !CONFIG_PPC64 */ #define ___get_user_instr(gu_op, dest, ptr) \ - gu_op((dest).val, (u32 __user *)(ptr)) +({ \ + __chk_user_ptr(ptr); \ + gu_op((dest).val, (u32 __user *)(ptr)); \ +}) #endif /* CONFIG_PPC64 */ -#define get_user_instr(x, ptr) \ - ___get_user_instr(get_user, x, ptr) +#define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr) -#define __get_user_instr(x, ptr) \ - ___get_user_instr(__get_user, x, ptr) +#define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, ptr) /* * Instruction data type for POWER @@ -59,9 +60,9 @@ static inline int ppc_inst_primary_opcode(struct ppc_inst x) return ppc_inst_val(x) >> 26; } -#ifdef CONFIG_PPC64 -#define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff }) +#define ppc_inst(x) ((struct ppc_inst){ .val = (x) }) +#ifdef CONFIG_PPC64 #define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) }) static inline u32 ppc_inst_suffix(struct ppc_inst x) @@ -69,68 +70,43 @@ static inline u32 ppc_inst_suffix(struct ppc_inst x) return x.suffix; } -static inline bool ppc_inst_prefixed(struct ppc_inst x) -{ - return (ppc_inst_primary_opcode(x) == 1) && ppc_inst_suffix(x) != 0xff; -} +#else +#define ppc_inst_prefix(x, y) ppc_inst(x) -static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) +static inline u32 ppc_inst_suffix(struct ppc_inst x) { - return ppc_inst_prefix(swab32(ppc_inst_val(x)), - swab32(ppc_inst_suffix(x))); + return 0; } -static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) -{ - u32 val, suffix; - - val = *(u32 *)ptr; - if ((val >> 26) == OP_PREFIX) { - suffix = *((u32 *)ptr + 1); - return ppc_inst_prefix(val, suffix); - } else { - return ppc_inst(val); - } -} +#endif /* CONFIG_PPC64 */ -static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) +static inline struct ppc_inst ppc_inst_read(const u32 *ptr) { - return *(u64 *)&x == *(u64 *)&y; + if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX) + return ppc_inst_prefix(*ptr, *(ptr + 1)); + else + return ppc_inst(*ptr); } -#else - -#define ppc_inst(x) ((struct ppc_inst){ .val = x }) - -#define ppc_inst_prefix(x, y) ppc_inst(x) - static inline bool ppc_inst_prefixed(struct ppc_inst x) { - return false; -} - -static inline u32 ppc_inst_suffix(struct ppc_inst x) -{ - return 0; + return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX; } static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) { - return ppc_inst(swab32(ppc_inst_val(x))); -} - -static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) -{ - return *ptr; + return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x))); } static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) { - return ppc_inst_val(x) == ppc_inst_val(y); + if (ppc_inst_val(x) != ppc_inst_val(y)) + return false; + if (!ppc_inst_prefixed(x)) + return true; + return ppc_inst_suffix(x) == ppc_inst_suffix(y); } -#endif /* CONFIG_PPC64 */ - static inline int ppc_inst_len(struct ppc_inst x) { return ppc_inst_prefixed(x) ? 8 : 4; @@ -140,13 +116,13 @@ static inline int ppc_inst_len(struct ppc_inst x) * Return the address of the next instruction, if the instruction @value was * located at @location. */ -static inline struct ppc_inst *ppc_inst_next(void *location, struct ppc_inst *value) +static inline u32 *ppc_inst_next(u32 *location, u32 *value) { struct ppc_inst tmp; tmp = ppc_inst_read(value); - return location + ppc_inst_len(tmp); + return (void *)location + ppc_inst_len(tmp); } static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x) @@ -178,6 +154,6 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins __str; \ }) -int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src); +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src); #endif /* _ASM_POWERPC_INST_H */ diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 2d5c6bec2b4f..93ce3ec25387 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -50,7 +50,7 @@ l_yes: 1098: nop; \ .pushsection __jump_table, "aw"; \ .long 1098b - ., LABEL - .; \ - FTR_ENTRY_LONG KEY; \ + FTR_ENTRY_LONG KEY - .; \ .popsection #endif diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index ec96232529ac..1df763002726 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -5,14 +5,6 @@ #define KUAP_READ 1 #define KUAP_WRITE 2 #define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE) -/* - * For prevent_user_access() only. - * Use the current saved situation instead of the to/from/size params. - * Used on book3s/32 - */ -#define KUAP_CURRENT_READ 4 -#define KUAP_CURRENT_WRITE 8 -#define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE) #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/kup.h> @@ -46,10 +38,7 @@ void setup_kuep(bool disabled); static inline void setup_kuep(bool disabled) { } #endif /* CONFIG_PPC_KUEP */ -#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) -void kuep_lock(void); -void kuep_unlock(void); -#else +#ifndef CONFIG_PPC_BOOK3S_32 static inline void kuep_lock(void) { } static inline void kuep_unlock(void) { } #endif @@ -83,8 +72,7 @@ static inline unsigned long kuap_get_and_assert_locked(void) #ifndef CONFIG_PPC_BOOK3S_64 static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } -static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) { } +static inline void prevent_user_access(unsigned long dir) { } static inline unsigned long prevent_user_access_return(void) { return 0UL; } static inline void restore_user_access(unsigned long flags) { } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -96,53 +84,53 @@ static __always_inline void setup_kup(void) setup_kuap(disable_kuap); } -static inline void allow_read_from_user(const void __user *from, unsigned long size) +static __always_inline void allow_read_from_user(const void __user *from, unsigned long size) { barrier_nospec(); allow_user_access(NULL, from, size, KUAP_READ); } -static inline void allow_write_to_user(void __user *to, unsigned long size) +static __always_inline void allow_write_to_user(void __user *to, unsigned long size) { allow_user_access(to, NULL, size, KUAP_WRITE); } -static inline void allow_read_write_user(void __user *to, const void __user *from, - unsigned long size) +static __always_inline void allow_read_write_user(void __user *to, const void __user *from, + unsigned long size) { barrier_nospec(); allow_user_access(to, from, size, KUAP_READ_WRITE); } -static inline void prevent_read_from_user(const void __user *from, unsigned long size) +static __always_inline void prevent_read_from_user(const void __user *from, unsigned long size) { - prevent_user_access(NULL, from, size, KUAP_READ); + prevent_user_access(KUAP_READ); } -static inline void prevent_write_to_user(void __user *to, unsigned long size) +static __always_inline void prevent_write_to_user(void __user *to, unsigned long size) { - prevent_user_access(to, NULL, size, KUAP_WRITE); + prevent_user_access(KUAP_WRITE); } -static inline void prevent_read_write_user(void __user *to, const void __user *from, - unsigned long size) +static __always_inline void prevent_read_write_user(void __user *to, const void __user *from, + unsigned long size) { - prevent_user_access(to, from, size, KUAP_READ_WRITE); + prevent_user_access(KUAP_READ_WRITE); } -static inline void prevent_current_access_user(void) +static __always_inline void prevent_current_access_user(void) { - prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT); + prevent_user_access(KUAP_READ_WRITE); } -static inline void prevent_current_read_from_user(void) +static __always_inline void prevent_current_read_from_user(void) { - prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_READ); + prevent_user_access(KUAP_READ); } -static inline void prevent_current_write_to_user(void) +static __always_inline void prevent_current_write_to_user(void) { - prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_WRITE); + prevent_user_access(KUAP_WRITE); } #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 607168b1aef4..27016b98ecb2 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -220,7 +220,7 @@ enum { #elif defined(CONFIG_44x) #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_44x #endif -#if defined(CONFIG_E200) || defined(CONFIG_E500) +#ifdef CONFIG_E500 #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E #endif @@ -324,7 +324,6 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr) } #endif /* !CONFIG_DEBUG_VM */ -#ifdef CONFIG_PPC_RADIX_MMU static inline bool radix_enabled(void) { return mmu_has_feature(MMU_FTR_TYPE_RADIX); @@ -334,17 +333,6 @@ static inline bool early_radix_enabled(void) { return early_mmu_has_feature(MMU_FTR_TYPE_RADIX); } -#else -static inline bool radix_enabled(void) -{ - return false; -} - -static inline bool early_radix_enabled(void) -{ - return false; -} -#endif #ifdef CONFIG_STRICT_KERNEL_RWX static inline bool strict_kernel_rwx_enabled(void) @@ -357,6 +345,11 @@ static inline bool strict_kernel_rwx_enabled(void) return false; } #endif + +static inline bool strict_module_rwx_enabled(void) +{ + return IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && strict_kernel_rwx_enabled(); +} #endif /* !__ASSEMBLY__ */ /* The kernel use the constants below to index in the page sizes array. diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index db186c539d37..9ba6b585337f 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -57,7 +57,6 @@ static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, static inline void mm_iommu_init(struct mm_struct *mm) { } #endif extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); -extern void set_context(unsigned long id, pgd_t *pgd); #ifdef CONFIG_PPC_BOOK3S_64 extern void radix__switch_mmu_context(struct mm_struct *prev, diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 295ef5639609..882a0bc7887a 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -9,10 +9,22 @@ #ifndef __ASSEMBLY__ +#include <linux/jump_label.h> + #include <asm/reg.h> +extern struct static_key_false disable_kuap_key; + +static __always_inline bool kuap_is_disabled(void) +{ + return static_branch_unlikely(&disable_kuap_key); +} + static inline void kuap_save_and_lock(struct pt_regs *regs) { + if (kuap_is_disabled()) + return; + regs->kuap = mfspr(SPRN_MD_AP); mtspr(SPRN_MD_AP, MD_APG_KUAP); } @@ -23,12 +35,20 @@ static inline void kuap_user_restore(struct pt_regs *regs) static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) { + if (kuap_is_disabled()) + return; + mtspr(SPRN_MD_AP, regs->kuap); } static inline unsigned long kuap_get_and_assert_locked(void) { - unsigned long kuap = mfspr(SPRN_MD_AP); + unsigned long kuap; + + if (kuap_is_disabled()) + return MD_APG_INIT; + + kuap = mfspr(SPRN_MD_AP); if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16); @@ -38,25 +58,35 @@ static inline unsigned long kuap_get_and_assert_locked(void) static inline void kuap_assert_locked(void) { - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && !kuap_is_disabled()) kuap_get_and_assert_locked(); } static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { + if (kuap_is_disabled()) + return; + mtspr(SPRN_MD_AP, MD_APG_INIT); } -static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static inline void prevent_user_access(unsigned long dir) { + if (kuap_is_disabled()) + return; + mtspr(SPRN_MD_AP, MD_APG_KUAP); } static inline unsigned long prevent_user_access_return(void) { - unsigned long flags = mfspr(SPRN_MD_AP); + unsigned long flags; + + if (kuap_is_disabled()) + return MD_APG_INIT; + + flags = mfspr(SPRN_MD_AP); mtspr(SPRN_MD_AP, MD_APG_KUAP); @@ -65,12 +95,18 @@ static inline unsigned long prevent_user_access_return(void) static inline void restore_user_access(unsigned long flags) { + if (kuap_is_disabled()) + return; + mtspr(SPRN_MD_AP, flags); } static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { + if (kuap_is_disabled()) + return false; + return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000); } diff --git a/arch/powerpc/include/asm/nohash/32/mmu-44x.h b/arch/powerpc/include/asm/nohash/32/mmu-44x.h index 2d92a39d8f2e..43ceca128531 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-44x.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-44x.h @@ -113,6 +113,7 @@ typedef struct { /* patch sites */ extern s32 patch__tlb_44x_hwater_D, patch__tlb_44x_hwater_I; +extern s32 patch__tlb_44x_kuep, patch__tlb_47x_kuep; #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index ec18ac818e3a..ecc8d792a431 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -149,11 +149,9 @@ struct paca_struct { #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_BOOK3S - mm_context_id_t mm_ctx_id; #ifdef CONFIG_PPC_MM_SLICES unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; - unsigned long mm_ctx_slb_addr_limit; #else u16 mm_ctx_user_psize; u16 mm_ctx_sllp; diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index c6a676714f04..b1e6c9ee3206 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -41,6 +41,10 @@ struct mm_struct; #ifndef __ASSEMBLY__ +#ifndef MAX_PTRS_PER_PGD +#define MAX_PTRS_PER_PGD PTRS_PER_PGD +#endif + /* Keep these as a macros to avoid include dependency mess */ #define pte_page(x) pfn_to_page(pte_pfn(x)) #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) @@ -72,6 +76,7 @@ extern unsigned long empty_zero_page[]; extern pgd_t swapper_pg_dir[]; extern void paging_init(void); +void poking_init(void); extern unsigned long ioremap_bot; diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ac41776661e9..bede76dd3db7 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -76,6 +76,40 @@ #define __REGA0_R30 30 #define __REGA0_R31 31 +/* For use with PPC_RAW_() macros */ +#define _R0 0 +#define _R1 1 +#define _R2 2 +#define _R3 3 +#define _R4 4 +#define _R5 5 +#define _R6 6 +#define _R7 7 +#define _R8 8 +#define _R9 9 +#define _R10 10 +#define _R11 11 +#define _R12 12 +#define _R13 13 +#define _R14 14 +#define _R15 15 +#define _R16 16 +#define _R17 17 +#define _R18 18 +#define _R19 19 +#define _R20 20 +#define _R21 21 +#define _R22 22 +#define _R23 23 +#define _R24 24 +#define _R25 25 +#define _R26 26 +#define _R27 27 +#define _R28 28 +#define _R29 29 +#define _R30 30 +#define _R31 31 + #define IMM_L(i) ((uintptr_t)(i) & 0xffff) #define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) #define IMM_DQ(i) ((uintptr_t)(i) & 0xfff0) @@ -222,13 +256,11 @@ #define PPC_INST_LWSYNC 0x7c2004ac #define PPC_INST_SYNC 0x7c0004ac #define PPC_INST_SYNC_MASK 0xfc0007fe -#define PPC_INST_ISYNC 0x4c00012c #define PPC_INST_MCRXR 0x7c000400 #define PPC_INST_MCRXR_MASK 0xfc0007fe #define PPC_INST_MFSPR_PVR 0x7c1f42a6 #define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe #define PPC_INST_MTMSRD 0x7c000164 -#define PPC_INST_NOP 0x60000000 #define PPC_INST_POPCNTB 0x7c0000f4 #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_RFEBB 0x4c000124 @@ -241,10 +273,10 @@ #define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 #define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1ffffe -#define PPC_INST_SC 0x44000002 #define PPC_INST_STRING 0x7c00042a #define PPC_INST_STRING_MASK 0xfc0007fe #define PPC_INST_STRING_GEN_MASK 0xfc00067e +#define PPC_INST_SETB 0x7c000100 #define PPC_INST_STSWI 0x7c0005aa #define PPC_INST_STSWX 0x7c00052a #define PPC_INST_TRECHKPT 0x7c0007dd @@ -252,18 +284,9 @@ #define PPC_INST_TSR 0x7c0005dd #define PPC_INST_LD 0xe8000000 #define PPC_INST_STD 0xf8000000 -#define PPC_INST_MFLR 0x7c0802a6 -#define PPC_INST_MTCTR 0x7c0903a6 -#define PPC_INST_ADDI 0x38000000 #define PPC_INST_ADDIS 0x3c000000 #define PPC_INST_ADD 0x7c000214 -#define PPC_INST_BLR 0x4e800020 -#define PPC_INST_BCTR 0x4e800420 -#define PPC_INST_BCTRL 0x4e800421 #define PPC_INST_DIVD 0x7c0003d2 -#define PPC_INST_RLDICR 0x78000004 -#define PPC_INST_ORI 0x60000000 -#define PPC_INST_ORIS 0x64000000 #define PPC_INST_BRANCH 0x48000000 #define PPC_INST_BL 0x48000001 #define PPC_INST_BRANCH_COND 0x40800000 @@ -323,6 +346,8 @@ #define PPC_LO(v) ((v) & 0xffff) #define PPC_HI(v) (((v) >> 16) & 0xffff) #define PPC_HA(v) PPC_HI((v) + 0x8000) +#define PPC_HIGHER(v) (((v) >> 32) & 0xffff) +#define PPC_HIGHEST(v) (((v) >> 48) & 0xffff) /* * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a @@ -383,6 +408,10 @@ #define PPC_RAW_STBCIX(s, a, b) (0x7c0007aa | __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b)) #define PPC_RAW_DCBFPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21)) #define PPC_RAW_DCBSTPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21)) +#define PPC_RAW_SC() (0x44000002) +#define PPC_RAW_SYNC() (0x7c0004ac) +#define PPC_RAW_ISYNC() (0x4c00012c) + /* * Define what the VSX XX1 form instructions will look like, then add * the 128 bit load store instructions based on that. @@ -404,10 +433,10 @@ #define PPC_RAW_STXVP(xsp, a, i) (0x18000001 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_DQ(i)) #define PPC_RAW_LXVPX(xtp, a, b) (0x7c00029a | __PPC_XTP(xtp) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_STXVPX(xsp, a, b) (0x7c00039a | __PPC_XSP(xsp) | ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_RAW_PLXVP(xtp, i, a, pr) \ - ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i))) -#define PPC_RAW_PSTXVP(xsp, i, a, pr) \ - ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i))) +#define PPC_RAW_PLXVP_P(xtp, i, a, pr) (PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) +#define PPC_RAW_PLXVP_S(xtp, i, a, pr) (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i)) +#define PPC_RAW_PSTXVP_P(xsp, i, a, pr) (PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) +#define PPC_RAW_PSTXVP_S(xsp, i, a, pr) (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i)) #define PPC_RAW_NAP (0x4c000364) #define PPC_RAW_SLEEP (0x4c0003a4) #define PPC_RAW_WINKLE (0x4c0003e4) @@ -445,16 +474,17 @@ #define PPC_RAW_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) #define PPC_RAW_ADDC(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_ADDC_DOT(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) -#define PPC_RAW_NOP() (PPC_INST_NOP) -#define PPC_RAW_BLR() (PPC_INST_BLR) +#define PPC_RAW_NOP() PPC_RAW_ORI(0, 0, 0) +#define PPC_RAW_BLR() (0x4e800020) #define PPC_RAW_BLRL() (0x4e800021) #define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r)) -#define PPC_RAW_MFLR(t) (PPC_INST_MFLR | ___PPC_RT(t)) -#define PPC_RAW_BCTR() (PPC_INST_BCTR) -#define PPC_RAW_MTCTR(r) (PPC_INST_MTCTR | ___PPC_RT(r)) -#define PPC_RAW_ADDI(d, a, i) (PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_MFLR(t) (0x7c0802a6 | ___PPC_RT(t)) +#define PPC_RAW_BCTR() (0x4e800420) +#define PPC_RAW_BCTRL() (0x4e800421) +#define PPC_RAW_MTCTR(r) (0x7c0903a6 | ___PPC_RT(r)) +#define PPC_RAW_ADDI(d, a, i) (0x38000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_LI(r, i) PPC_RAW_ADDI(r, 0, i) -#define PPC_RAW_ADDIS(d, a, i) (PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_ADDIS(d, a, i) (0x3c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_ADDIC(d, a, i) (0x30000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_ADDIC_DOT(d, a, i) (0x34000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_LIS(r, i) PPC_RAW_ADDIS(r, 0, i) @@ -499,8 +529,8 @@ #define PPC_RAW_AND_DOT(d, a, b) (0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_OR(d, a, b) (0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_MR(d, a) PPC_RAW_OR(d, a, a) -#define PPC_RAW_ORI(d, a, i) (PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) -#define PPC_RAW_ORIS(d, a, i) (PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_ORI(d, a, i) (0x60000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_ORIS(d, a, i) (0x64000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) #define PPC_RAW_NOR(d, a, b) (0x7c0000f8 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_XOR(d, a, b) (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_XORI(d, a, i) (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) @@ -519,7 +549,7 @@ (0x54000001 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) #define PPC_RAW_RLWIMI(d, a, i, mb, me) (0x50000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) #define PPC_RAW_RLDICL(d, a, i, mb) (0x78000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_MB64(mb)) -#define PPC_RAW_RLDICR(d, a, i, me) (PPC_INST_RLDICR | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me)) +#define PPC_RAW_RLDICR(d, a, i, me) (0x78000004 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me)) /* slwi = rlwinm Rx, Ry, n, 0, 31-n */ #define PPC_RAW_SLWI(d, a, i) PPC_RAW_RLWINM(d, a, i, 0, 31-(i)) @@ -533,6 +563,8 @@ #define PPC_RAW_NEG(d, a) (0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a)) #define PPC_RAW_MFSPR(d, spr) (0x7c0002a6 | ___PPC_RT(d) | __PPC_SPR(spr)) +#define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr)) +#define PPC_RAW_EIEIO() (0x7c0006ac) /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 7bf8a15af224..f348e564f7dd 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -276,7 +276,15 @@ struct thread_struct { #define SPEFSCR_INIT #endif -#ifdef CONFIG_PPC32 +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) +#define INIT_THREAD { \ + .ksp = INIT_SP, \ + .pgdir = swapper_pg_dir, \ + .kuap = ~0UL, /* KUAP_NONE */ \ + .fpexc_mode = MSR_FE0 | MSR_FE1, \ + SPEFSCR_INIT \ +} +#elif defined(CONFIG_PPC32) #define INIT_THREAD { \ .ksp = INIT_SP, \ .pgdir = swapper_pg_dir, \ @@ -339,17 +347,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #define spin_end() HMT_medium() -#define spin_until_cond(cond) \ -do { \ - if (unlikely(!(cond))) { \ - spin_begin(); \ - do { \ - spin_cpu_relax(); \ - } while (!(cond)); \ - spin_end(); \ - } \ -} while (0) - #endif /* Check that a certain kernel stack pointer is valid in task_struct p */ diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index e646c7f218bc..8a0d8fb35328 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -71,6 +71,7 @@ struct ps3_dma_region_ops; * @bus_addr: The 'translated' bus address of the region. * @len: The length in bytes of the region. * @offset: The offset from the start of memory of the region. + * @dma_mask: Device dma_mask. * @ioid: The IOID of the device who owns this region * @chunk_list: Opaque variable used by the ioc page manager. * @region_ops: struct ps3_dma_region_ops - dma region operations @@ -85,6 +86,7 @@ struct ps3_dma_region { enum ps3_dma_region_type region_type; unsigned long len; unsigned long offset; + u64 dma_mask; /* driver variables (set by ps3_dma_region_create) */ unsigned long bus_addr; @@ -232,7 +234,7 @@ enum lv1_result { static inline const char* ps3_result(int result) { -#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT) +#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT) || defined(CONFIG_PS3_VERBOSE_RESULT) switch (result) { case LV1_SUCCESS: return "LV1_SUCCESS (0)"; diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h index 33fa5dd8ee6a..714a35f0d425 100644 --- a/arch/powerpc/include/asm/pte-walk.h +++ b/arch/powerpc/include/asm/pte-walk.h @@ -31,6 +31,35 @@ static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift) pgd_t *pgdir = init_mm.pgd; return __find_linux_pte(pgdir, ea, NULL, hshift); } + +/* + * Convert a kernel vmap virtual address (vmalloc or ioremap space) to a + * physical address, without taking locks. This can be used in real-mode. + */ +static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr) +{ + pte_t *ptep; + phys_addr_t pa; + int hugepage_shift; + + /* + * init_mm does not free page tables, and does not do THP. It may + * have huge pages from huge vmalloc / ioremap etc. + */ + ptep = find_init_mm_pte(addr, &hugepage_shift); + if (WARN_ON(!ptep)) + return 0; + + pa = PFN_PHYS(pte_pfn(*ptep)); + + if (!hugepage_shift) + hugepage_shift = PAGE_SHIFT; + + pa |= addr & ((1ul << hugepage_shift) - 1); + + return pa; +} + /* * This is what we should always use. Any other lockless page table lookup needs * careful audit against THP split. diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 9c9ab2746168..e06d61b668d4 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -19,6 +19,7 @@ #ifndef _ASM_POWERPC_PTRACE_H #define _ASM_POWERPC_PTRACE_H +#include <linux/err.h> #include <uapi/asm/ptrace.h> #include <asm/asm-const.h> @@ -47,7 +48,7 @@ struct pt_regs unsigned long result; }; }; - +#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_KUAP) union { struct { #ifdef CONFIG_PPC64 @@ -67,6 +68,7 @@ struct pt_regs }; unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */ }; +#endif }; #endif @@ -152,25 +154,6 @@ extern unsigned long profile_pc(struct pt_regs *regs); long do_syscall_trace_enter(struct pt_regs *regs); void do_syscall_trace_leave(struct pt_regs *regs); -#define kernel_stack_pointer(regs) ((regs)->gpr[1]) -static inline int is_syscall_success(struct pt_regs *regs) -{ - return !(regs->ccr & 0x10000000); -} - -static inline long regs_return_value(struct pt_regs *regs) -{ - if (is_syscall_success(regs)) - return regs->gpr[3]; - else - return -regs->gpr[3]; -} - -static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) -{ - regs->gpr[3] = rc; -} - #ifdef __powerpc64__ #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1) #else @@ -235,6 +218,31 @@ static __always_inline void set_trap_norestart(struct pt_regs *regs) regs->trap |= 0x1; } +#define kernel_stack_pointer(regs) ((regs)->gpr[1]) +static inline int is_syscall_success(struct pt_regs *regs) +{ + if (trap_is_scv(regs)) + return !IS_ERR_VALUE((unsigned long)regs->gpr[3]); + else + return !(regs->ccr & 0x10000000); +} + +static inline long regs_return_value(struct pt_regs *regs) +{ + if (trap_is_scv(regs)) + return regs->gpr[3]; + + if (is_syscall_success(regs)) + return regs->gpr[3]; + else + return -regs->gpr[3]; +} + +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->gpr[3] = rc; +} + #define arch_has_single_step() (1) #define arch_has_block_step() (true) #define ARCH_HAS_USER_SINGLE_STEP_REPORT diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7c81d3e563b2..be85cf156a1f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -393,6 +393,7 @@ #define SPRN_PMMAR 0x356 /* Power Management Memory Activity Register */ #define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */ #define SPRN_PSSCR_PR 0x337 /* PSSCR ISA 3.0, privileged mode access */ +#define SPRN_TRIG2 0x372 #define SPRN_PMCR 0x374 /* Power Management Control Register */ #define SPRN_RWMR 0x375 /* Region-Weighting Mode Register */ @@ -1435,8 +1436,6 @@ static inline void mtsr(u32 val, u32 idx) } #endif -#define proc_trap() asm volatile("trap") - extern unsigned long current_stack_frame(void); register unsigned long current_stack_pointer asm("r1"); @@ -1447,16 +1446,6 @@ extern void scom970_write(unsigned int address, unsigned long value); struct pt_regs; extern void ppc_save_regs(struct pt_regs *regs); - -static inline void update_power8_hid0(unsigned long hid0) -{ - /* - * The HID0 update on Power8 should at the very least be - * preceded by a SYNC instruction followed by an ISYNC - * instruction - */ - asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); -} #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_REG_H */ diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index b774a4477d5f..792eefaf230b 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -92,6 +92,9 @@ static inline bool security_ftr_enabled(u64 feature) // The L1-D cache should be flushed after user accesses from the kernel #define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull +// The STF flush should be executed on privilege state switch +#define SEC_FTR_STF_BARRIER 0x0000000000010000ull + // Features enabled by default #define SEC_FTR_DEFAULT \ (SEC_FTR_L1D_FLUSH_HV | \ @@ -99,6 +102,7 @@ static inline bool security_ftr_enabled(u64 feature) SEC_FTR_BNDS_CHK_SPEC_BAR | \ SEC_FTR_L1D_FLUSH_ENTRY | \ SEC_FTR_L1D_FLUSH_UACCESS | \ + SEC_FTR_STF_BARRIER | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h new file mode 100644 index 000000000000..b040094f7920 --- /dev/null +++ b/arch/powerpc/include/asm/set_memory.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_SET_MEMORY_H +#define _ASM_POWERPC_SET_MEMORY_H + +#define SET_MEMORY_RO 0 +#define SET_MEMORY_RW 1 +#define SET_MEMORY_NX 2 +#define SET_MEMORY_X 3 + +int change_memory_attr(unsigned long addr, int numpages, long action); + +static inline int set_memory_ro(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_RO); +} + +static inline int set_memory_rw(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_RW); +} + +static inline int set_memory_nx(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_NX); +} + +static inline int set_memory_x(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_X); +} + +int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot); + +#endif diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index fd1b518eed17..ba0f88f3a30d 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -41,11 +41,17 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - /* - * If the system call failed, - * regs->gpr[3] contains a positive ERRORCODE. - */ - return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; + if (trap_is_scv(regs)) { + unsigned long error = regs->gpr[3]; + + return IS_ERR_VALUE(error) ? error : 0; + } else { + /* + * If the system call failed, + * regs->gpr[3] contains a positive ERRORCODE. + */ + return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; + } } static inline long syscall_get_return_value(struct task_struct *task, @@ -58,18 +64,22 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - /* - * In the general case it's not obvious that we must deal with CCR - * here, as the syscall exit path will also do that for us. However - * there are some places, eg. the signal code, which check ccr to - * decide if the value in r3 is actually an error. - */ - if (error) { - regs->ccr |= 0x10000000L; - regs->gpr[3] = error; + if (trap_is_scv(regs)) { + regs->gpr[3] = (long) error ?: val; } else { - regs->ccr &= ~0x10000000L; - regs->gpr[3] = val; + /* + * In the general case it's not obvious that we must deal with + * CCR here, as the syscall exit path will also do that for us. + * However there are some places, eg. the signal code, which + * check ccr to decide if the value in r3 is actually an error. + */ + if (error) { + regs->ccr |= 0x10000000L; + regs->gpr[3] = error; + } else { + regs->ccr &= ~0x10000000L; + regs->gpr[3] = val; + } } } diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index 5bf65f5d44a9..fe683371336f 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -24,8 +24,8 @@ typedef ppc_opcode_t uprobe_opcode_t; struct arch_uprobe { union { - struct ppc_inst insn; - struct ppc_inst ixol; + u32 insn[2]; + u32 ixol[2]; }; }; diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index e33f80b0ea81..57573d9c1e09 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -5,8 +5,10 @@ #ifndef _ASM_POWERPC_VAS_H #define _ASM_POWERPC_VAS_H - -struct vas_window; +#include <linux/sched/mm.h> +#include <linux/mmu_context.h> +#include <asm/icswx.h> +#include <uapi/asm/vas-api.h> /* * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25 @@ -49,6 +51,64 @@ enum vas_cop_type { }; /* + * User space VAS windows are opened by tasks and take references + * to pid and mm until windows are closed. + * Stores pid, mm, and tgid for each window. + */ +struct vas_user_win_ref { + struct pid *pid; /* PID of owner */ + struct pid *tgid; /* Thread group ID of owner */ + struct mm_struct *mm; /* Linux process mm_struct */ +}; + +/* + * Common VAS window struct on PowerNV and PowerVM + */ +struct vas_window { + u32 winid; + u32 wcreds_max; /* Window credits */ + enum vas_cop_type cop; + struct vas_user_win_ref task_ref; + char *dbgname; + struct dentry *dbgdir; +}; + +/* + * User space window operations used for powernv and powerVM + */ +struct vas_user_win_ops { + struct vas_window * (*open_win)(int vas_id, u64 flags, + enum vas_cop_type); + u64 (*paste_addr)(struct vas_window *); + int (*close_win)(struct vas_window *); +}; + +static inline void put_vas_user_win_ref(struct vas_user_win_ref *ref) +{ + /* Drop references to pid, tgid, and mm */ + put_pid(ref->pid); + put_pid(ref->tgid); + if (ref->mm) + mmdrop(ref->mm); +} + +static inline void vas_user_win_add_mm_context(struct vas_user_win_ref *ref) +{ + mm_context_add_vas_window(ref->mm); + /* + * Even a process that has no foreign real address mapping can + * use an unpaired COPY instruction (to no real effect). Issue + * CP_ABORT to clear any pending COPY and prevent a covert + * channel. + * + * __switch_to() will issue CP_ABORT on future context switches + * if process / thread has any open VAS window (Use + * current->mm->context.vas_windows). + */ + asm volatile(PPC_CP_ABORT); +} + +/* * Receive window attributes specified by the (in-kernel) owner of window. */ struct vas_rx_win_attr { @@ -100,6 +160,7 @@ struct vas_tx_win_attr { bool rx_win_ord_mode; }; +#ifdef CONFIG_PPC_POWERNV /* * Helper to map a chip id to VAS id. * For POWER9, this is a 1:1 mapping. In the future this maybe a 1:N @@ -162,6 +223,43 @@ int vas_copy_crb(void *crb, int offset); */ int vas_paste_crb(struct vas_window *win, int offset, bool re); +int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type, + const char *name); +void vas_unregister_api_powernv(void); +#endif + +#ifdef CONFIG_PPC_PSERIES + +/* VAS Capabilities */ +#define VAS_GZIP_QOS_FEAT 0x1 +#define VAS_GZIP_DEF_FEAT 0x2 +#define VAS_GZIP_QOS_FEAT_BIT PPC_BIT(VAS_GZIP_QOS_FEAT) /* Bit 1 */ +#define VAS_GZIP_DEF_FEAT_BIT PPC_BIT(VAS_GZIP_DEF_FEAT) /* Bit 2 */ + +/* NX Capabilities */ +#define VAS_NX_GZIP_FEAT 0x1 +#define VAS_NX_GZIP_FEAT_BIT PPC_BIT(VAS_NX_GZIP_FEAT) /* Bit 1 */ + +/* + * These structs are used to retrieve overall VAS capabilities that + * the hypervisor provides. + */ +struct hv_vas_all_caps { + __be64 descriptor; + __be64 feat_type; +} __packed __aligned(0x1000); + +struct vas_all_caps { + u64 descriptor; + u64 feat_type; +}; + +int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result); +int vas_register_api_pseries(struct module *mod, + enum vas_cop_type cop_type, const char *name); +void vas_unregister_api_pseries(void); +#endif + /* * Register / unregister coprocessor type to VAS API which will be exported * to user space. Applications can use this API to open / close window @@ -171,7 +269,12 @@ int vas_paste_crb(struct vas_window *win, int offset, bool re); * used for others in future. */ int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, - const char *name); + const char *name, + const struct vas_user_win_ops *vops); void vas_unregister_coproc_api(void); +int get_vas_user_win_ref(struct vas_user_win_ref *task_ref); +void vas_update_csb(struct coprocessor_request_block *crb, + struct vas_user_win_ref *task_ref); +void vas_dump_crb(struct coprocessor_request_block *crb); #endif /* __ASM_POWERPC_VAS_H */ diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 8e903b3f9c24..d9cf192368ad 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -65,8 +65,12 @@ struct icp_ops { extern const struct icp_ops *icp_ops; +#ifdef CONFIG_PPC_ICS_NATIVE /* Native ICS */ extern int ics_native_init(void); +#else +static inline int ics_native_init(void) { return -ENODEV; } +#endif /* RTAS ICS */ #ifdef CONFIG_PPC_ICS_RTAS diff --git a/arch/powerpc/include/uapi/asm/vas-api.h b/arch/powerpc/include/uapi/asm/vas-api.h index ebd4b2424785..7c81301ecdba 100644 --- a/arch/powerpc/include/uapi/asm/vas-api.h +++ b/arch/powerpc/include/uapi/asm/vas-api.h @@ -13,11 +13,15 @@ #define VAS_MAGIC 'v' #define VAS_TX_WIN_OPEN _IOW(VAS_MAGIC, 0x20, struct vas_tx_win_open_attr) +/* Flags to VAS TX open window ioctl */ +/* To allocate a window with QoS credit, otherwise use default credit */ +#define VAS_TX_WIN_FLAG_QOS_CREDIT 0x0000000000000001 + struct vas_tx_win_open_attr { __u32 version; __s16 vas_id; /* specific instance of vas or -1 for default */ __u16 reserved1; - __u64 flags; /* Future use */ + __u64 flags; __u64 reserved2[6]; }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index aa267d173ded..31881ef3641d 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -86,11 +86,7 @@ int main(void) OFFSET(PACA_CANARY, paca_struct, canary); #endif #endif - OFFSET(MMCONTEXTID, mm_struct, context.id); -#ifdef CONFIG_PPC64 - DEFINE(SIGSEGV, SIGSEGV); - DEFINE(NMI_MASK, NMI_MASK); -#else +#ifdef CONFIG_PPC32 #ifdef CONFIG_PPC_RTAS OFFSET(RTAS_SP, thread_struct, rtas_sp); #endif @@ -119,7 +115,6 @@ int main(void) #ifdef CONFIG_ALTIVEC OFFSET(THREAD_VRSTATE, thread_struct, vr_state.vr); OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area); - OFFSET(THREAD_VRSAVE, thread_struct, vrsave); OFFSET(THREAD_USED_VR, thread_struct, used_vr); OFFSET(VRSTATE_VSCR, thread_vr_state, vscr); OFFSET(THREAD_LOAD_VEC, thread_struct, load_vec); @@ -150,22 +145,15 @@ int main(void) #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); OFFSET(THREAD_ACC, thread_struct, acc); - OFFSET(THREAD_SPEFSCR, thread_struct, spefscr); OFFSET(THREAD_USED_SPE, thread_struct, used_spe); #endif /* CONFIG_SPE */ #endif /* CONFIG_PPC64 */ -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - OFFSET(THREAD_DBCR0, thread_struct, debug.dbcr0); -#endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER OFFSET(THREAD_KVM_SVCPU, thread_struct, kvm_shadow_vcpu); #endif #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu); #endif -#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) - OFFSET(KUAP, thread_struct, kuap); -#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM OFFSET(PACATMSCRATCH, paca_struct, tm_scratch); @@ -185,19 +173,12 @@ int main(void) sizeof(struct pt_regs) + 16); #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - OFFSET(TI_FLAGS, thread_info, flags); OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); - OFFSET(TI_PREEMPT, thread_info, preempt_count); #ifdef CONFIG_PPC64 OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size); OFFSET(DCACHEL1LOGBLOCKSIZE, ppc64_caches, l1d.log_block_size); - OFFSET(DCACHEL1BLOCKSPERPAGE, ppc64_caches, l1d.blocks_per_page); - OFFSET(ICACHEL1BLOCKSIZE, ppc64_caches, l1i.block_size); - OFFSET(ICACHEL1LOGBLOCKSIZE, ppc64_caches, l1i.log_block_size); - OFFSET(ICACHEL1BLOCKSPERPAGE, ppc64_caches, l1i.blocks_per_page); /* paca */ - DEFINE(PACA_SIZE, sizeof(struct paca_struct)); OFFSET(PACAPACAINDEX, paca_struct, paca_index); OFFSET(PACAPROCSTART, paca_struct, cpu_start); OFFSET(PACAKSAVE, paca_struct, kstack); @@ -212,15 +193,6 @@ int main(void) OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask); OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled); -#ifdef CONFIG_PPC_BOOK3S - OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id); -#ifdef CONFIG_PPC_MM_SLICES - OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize); - OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize); - OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit); - DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); -#endif /* CONFIG_PPC_MM_SLICES */ -#endif #ifdef CONFIG_PPC_BOOK3E OFFSET(PACAPGD, paca_struct, pgd); @@ -241,21 +213,9 @@ int main(void) #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_BOOK3S_64 - OFFSET(PACASLBCACHE, paca_struct, slb_cache); - OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr); - OFFSET(PACASTABRR, paca_struct, stab_rr); - OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp); -#ifdef CONFIG_PPC_MM_SLICES - OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp); -#else - OFFSET(PACACONTEXTSLLP, paca_struct, mm_ctx_sllp); -#endif /* CONFIG_PPC_MM_SLICES */ OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); OFFSET(PACA_EXNMI, paca_struct, exnmi); -#ifdef CONFIG_PPC_PSERIES - OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); -#endif OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid); @@ -264,9 +224,7 @@ int main(void) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use); #endif - OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx); OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count); - OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx); #endif /* CONFIG_PPC_BOOK3S_64 */ OFFSET(PACAEMERGSP, paca_struct, emergency_sp); #ifdef CONFIG_PPC_BOOK3S_64 @@ -343,10 +301,6 @@ int main(void) STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr); STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr); #endif -#ifdef CONFIG_PPC_KUAP - STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap); -#endif - #if defined(CONFIG_PPC32) #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) @@ -368,10 +322,6 @@ int main(void) #endif #endif -#ifndef CONFIG_PPC64 - OFFSET(MM_PGD, mm_struct, pgd); -#endif /* ! CONFIG_PPC64 */ - /* About the CPU features table */ OFFSET(CPU_SPEC_FEATURES, cpu_spec, cpu_features); OFFSET(CPU_SPEC_SETUP, cpu_spec, cpu_setup); @@ -404,13 +354,6 @@ int main(void) DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); #endif -#ifdef CONFIG_PPC_BOOK3S_64 - DEFINE(PGD_TABLE_SIZE, (sizeof(pgd_t) << max(RADIX_PGD_INDEX_SIZE, H_PGD_INDEX_SIZE))); -#else - DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); -#endif - DEFINE(PTE_SIZE, sizeof(pte_t)); - #ifdef CONFIG_KVM OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack); OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid); @@ -482,11 +425,9 @@ int main(void) OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid); OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr); OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1); - OFFSET(KVM_NEED_FLUSH, kvm, arch.need_tlb_flush.bits); OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls); OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); OFFSET(KVM_RADIX, kvm, arch.radix); - OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled); OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest); OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); @@ -514,7 +455,6 @@ int main(void) OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1); OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr); OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags); - OFFSET(VCPU_DEC, kvm_vcpu, arch.dec); OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires); OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); @@ -525,7 +465,6 @@ int main(void) OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra); OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs); OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); - OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc); OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar); OFFSET(VCPU_SDAR, kvm_vcpu, arch.sdar); OFFSET(VCPU_SIER, kvm_vcpu, arch.sier); @@ -645,10 +584,8 @@ int main(void) HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); - HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys); HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt); - HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); @@ -756,7 +693,6 @@ int main(void) #endif DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); - DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE); #ifdef CONFIG_PPC_8xx DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE)); diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 735e89337398..5693e1c67c2b 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -35,7 +35,7 @@ void __init reserve_kdump_trampoline(void) static void __init create_trampoline(unsigned long addr) { - struct ppc_inst *p = (struct ppc_inst *)addr; + u32 *p = (u32 *)addr; /* The maximum range of a single instruction branch, is the current * instruction's address + (32 MB - 4) bytes. For the trampoline we @@ -45,8 +45,8 @@ static void __init create_trampoline(unsigned long addr) * branch to "addr" we jump to ("addr" + 32 MB). Although it requires * two instructions it doesn't require any registers. */ - patch_instruction(p, ppc_inst(PPC_INST_NOP)); - patch_branch((void *)p + 4, addr + PHYSICAL_START, 0); + patch_instruction(p, ppc_inst(PPC_RAW_NOP())); + patch_branch(p + 1, addr + PHYSICAL_START, 0); } void __init setup_kdump_trampoline(void) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f24cd53ff26e..3bbdcc86d01b 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -346,28 +346,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) */ static inline unsigned long eeh_token_to_phys(unsigned long token) { - pte_t *ptep; - unsigned long pa; - int hugepage_shift; - - /* - * We won't find hugepages here(this is iomem). Hence we are not - * worried about _PAGE_SPLITTING/collapse. Also we will not hit - * page table free, because of init_mm. - */ - ptep = find_init_mm_pte(token, &hugepage_shift); - if (!ptep) - return token; - - pa = pte_pfn(*ptep); - - /* On radix we can do hugepage mappings for io, so handle that */ - if (!hugepage_shift) - hugepage_shift = PAGE_SHIFT; - - pa <<= PAGE_SHIFT; - pa |= token & ((1ul << hugepage_shift) - 1); - return pa; + return ppc_find_vmap_phys(token); } /* diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 9160285cb2f4..6c09ebb853ec 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -176,28 +176,6 @@ _GLOBAL(_switch) /* r3-r12 are caller saved -- Cort */ SAVE_NVGPRS(r1) stw r0,_NIP(r1) /* Return to switch caller */ - mfmsr r11 - li r0,MSR_FP /* Disable floating-point */ -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - oris r0,r0,MSR_VEC@h /* Disable altivec */ - mfspr r12,SPRN_VRSAVE /* save vrsave register value */ - stw r12,THREAD+THREAD_VRSAVE(r2) -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_SPE -BEGIN_FTR_SECTION - oris r0,r0,MSR_SPE@h /* Disable SPE */ - mfspr r12,SPRN_SPEFSCR /* save spefscr register value */ - stw r12,THREAD+THREAD_SPEFSCR(r2) -END_FTR_SECTION_IFSET(CPU_FTR_SPE) -#endif /* CONFIG_SPE */ - and. r0,r0,r11 /* FP or altivec or SPE enabled? */ - beq+ 1f - andc r11,r11,r0 - mtmsr r11 - isync -1: stw r11,_MSR(r1) mfcr r10 stw r10,_CCR(r1) stw r1,KSP(r3) /* Set old stack pointer */ @@ -218,19 +196,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) mr r3,r2 addi r2,r4,-THREAD /* Update current */ -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - lwz r0,THREAD+THREAD_VRSAVE(r2) - mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_SPE -BEGIN_FTR_SECTION - lwz r0,THREAD+THREAD_SPEFSCR(r2) - mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */ -END_FTR_SECTION_IFSET(CPU_FTR_SPE) -#endif /* CONFIG_SPE */ - lwz r0,_CCR(r1) mtcrf 0xFF,r0 /* r3-r12 are destroyed -- Cort */ diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 2ed14d4a47f5..93b0f3ec8fb0 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -38,9 +38,9 @@ static int __init early_init_dt_scan_epapr(unsigned long node, for (i = 0; i < (len / 4); i++) { struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i])); - patch_instruction((struct ppc_inst *)(epapr_hypercall_start + i), inst); + patch_instruction(epapr_hypercall_start + i, inst); #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) - patch_instruction((struct ppc_inst *)(epapr_ev_idle_start + i), inst); + patch_instruction(epapr_ev_idle_start + i, inst); #endif } diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index e1360b88b6cb..7d72ee5ab387 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -701,39 +701,3 @@ _GLOBAL(abort) mfspr r13,SPRN_DBCR0 oris r13,r13,DBCR0_RST_SYSTEM@h mtspr SPRN_DBCR0,r13 - -_GLOBAL(set_context) - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@ha - stw r4, abatron_pteptrs@l + 0x4(r5) -#endif - sync - mtspr SPRN_PID,r3 - isync /* Need an isync to flush shadow */ - /* TLBs after changing PID */ - blr - -/* We put a few things here that have to be page-aligned. This stuff - * goes at the beginning of the data segment, which is page-aligned. - */ - .data - .align 12 - .globl sdata -sdata: - .globl empty_zero_page -empty_zero_page: - .space 4096 -EXPORT_SYMBOL(empty_zero_page) - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 5c106ac36626..ddc978a2d381 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -532,6 +532,10 @@ finish_tlb_load_44x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +#ifdef CONFIG_PPC_KUEP +0: rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ + patch_site 0b, patch__tlb_44x_kuep +#endif 1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ /* Done...restore registers and get out of here. @@ -743,6 +747,10 @@ finish_tlb_load_47x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +#ifdef CONFIG_PPC_KUEP +0: rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ + patch_site 0b, patch__tlb_47x_kuep +#endif 1: tlbwe r11,r13,2 /* Done...restore registers and get out of here. @@ -780,20 +788,6 @@ _GLOBAL(__fixup_440A_mcheck) sync blr -_GLOBAL(set_context) - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r4, 0x4(r5) -#endif - mtspr SPRN_PID,r3 - isync /* Force context change */ - blr - /* * Init CPU state. This is called at boot time or for secondary CPUs * to setup initial TLB entries, setup IVORs, etc... @@ -1239,34 +1233,8 @@ head_start_common: isync blr -/* - * We put a few things here that have to be page-aligned. This stuff - * goes at the beginning of the data segment, which is page-aligned. - */ - .data - .align PAGE_SHIFT - .globl sdata -sdata: - .globl empty_zero_page -empty_zero_page: - .space PAGE_SIZE -EXPORT_SYMBOL(empty_zero_page) - -/* - * To support >32-bit physical addresses, we use an 8KB pgdir. - */ - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* - * Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 - #ifdef CONFIG_SMP + .data .align 12 temp_boot_stack: .space 1024 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ece7f97bafff..79f2d1e61abd 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -997,23 +997,3 @@ start_here_common: 0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 .previous - -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the bss, which is page-aligned. - */ - .section ".bss" -/* - * pgd dir should be aligned to PGD_TABLE_SIZE which is 64K. - * We will need to find a better way to fix this - */ - .align 16 - - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - - .globl empty_zero_page -empty_zero_page: - .space PAGE_SIZE -EXPORT_SYMBOL(empty_zero_page) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 7d445e4342c0..9bdb95f5694f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -786,28 +786,3 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi - -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the data segment, - * which is page-aligned. - */ - .data - .globl sdata -sdata: - .globl empty_zero_page - .align PAGE_SHIFT -empty_zero_page: - .space PAGE_SIZE -EXPORT_SYMBOL(empty_zero_page) - - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* Room for two PTE table pointers, usually the kernel and current user - * pointer to their respective root page table (pgdir). - */ - .globl abatron_pteptrs -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 065178f19a3d..e8d861e48c25 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -518,8 +518,6 @@ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 - mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ - mtcrf 0x80,r2 BEGIN_MMU_FTR_SECTION li r0,1 mfspr r1,SPRN_SPRG_603_LRU @@ -531,9 +529,15 @@ BEGIN_MMU_FTR_SECTION mfspr r2,SPRN_SRR1 rlwimi r2,r0,31-14,14,14 mtspr SPRN_SRR1,r2 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + mtcrf 0x80,r2 + tlbld r3 + rfi +MMU_FTR_SECTION_ELSE + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 tlbld r3 rfi +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) DataAddressInvalid: mfspr r3,SPRN_SRR1 rlwinm r1,r3,9,6,6 /* Get load/store bit */ @@ -607,9 +611,15 @@ BEGIN_MMU_FTR_SECTION mfspr r2,SPRN_SRR1 rlwimi r2,r0,31-14,14,14 mtspr SPRN_SRR1,r2 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + mtcrf 0x80,r2 + tlbld r3 + rfi +MMU_FTR_SECTION_ELSE + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 tlbld r3 rfi +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #ifndef CONFIG_ALTIVEC #define altivec_assist_exception unknown_exception @@ -924,12 +934,6 @@ _GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ mtctr r0 /* for context 0 */ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ -#ifdef CONFIG_PPC_KUEP - oris r3, r3, SR_NX@h /* Set Nx */ -#endif -#ifdef CONFIG_PPC_KUAP - oris r3, r3, SR_KS@h /* Set Ks */ -#endif li r4, 0 3: mtsrin r3, r4 addi r3, r3, 0x111 /* increment VSID */ @@ -1024,58 +1028,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) rfi /* - * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); - * - * Set up the segment registers for a new context. - */ -_ENTRY(switch_mmu_context) - lwz r3,MMCONTEXTID(r4) - cmpwi cr0,r3,0 - blt- 4f - mulli r3,r3,897 /* multiply context by skew factor */ - rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ -#ifdef CONFIG_PPC_KUEP - oris r3, r3, SR_NX@h /* Set Nx */ -#endif -#ifdef CONFIG_PPC_KUAP - oris r3, r3, SR_KS@h /* Set Ks */ -#endif - li r0,NUM_USER_SEGMENTS - mtctr r0 - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is passed as second argument. - */ - lwz r4, MM_PGD(r4) - lis r5, abatron_pteptrs@ha - stw r4, abatron_pteptrs@l + 0x4(r5) -#endif -BEGIN_MMU_FTR_SECTION -#ifndef CONFIG_BDI_SWITCH - lwz r4, MM_PGD(r4) -#endif - tophys(r4, r4) - rlwinm r4, r4, 4, 0xffff01ff - mtspr SPRN_SDR1, r4 -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) - li r4,0 - isync -3: - mtsrin r3,r4 - addi r3,r3,0x111 /* next VSID */ - rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */ - addis r4,r4,0x1000 /* address of next segment */ - bdnz 3b - sync - isync - blr -4: trap - EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0 - blr -EXPORT_SYMBOL(switch_mmu_context) - -/* * An undocumented "feature" of 604e requires that the v bit * be cleared before changing BAT values. * @@ -1256,61 +1208,4 @@ setup_usbgecko_bat: blr #endif -#ifdef CONFIG_8260 -/* Jump into the system reset for the rom. - * We first disable the MMU, and then jump to the ROM reset address. - * - * r3 is the board info structure, r4 is the location for starting. - * I use this for building a small kernel that can load other kernels, - * rather than trying to write or rely on a rom monitor that can tftp load. - */ - .globl m8260_gorom -m8260_gorom: - mfmsr r0 - rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */ - sync - mtmsr r0 - sync - mfspr r11, SPRN_HID0 - lis r10, 0 - ori r10,r10,HID0_ICE|HID0_DCE - andc r11, r11, r10 - mtspr SPRN_HID0, r11 - isync - li r5, MSR_ME|MSR_RI - lis r6,2f@h - addis r6,r6,-KERNELBASE@h - ori r6,r6,2f@l - mtspr SPRN_SRR0,r6 - mtspr SPRN_SRR1,r5 - isync - sync - rfi -2: - mtlr r4 - blr -#endif - - -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the data segment, - * which is page-aligned. - */ .data - .globl sdata -sdata: - .globl empty_zero_page -empty_zero_page: - .space 4096 -EXPORT_SYMBOL(empty_zero_page) - - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index a1a5c3f10dc4..0f9642f36b49 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -985,20 +985,6 @@ _GLOBAL(abort) mtspr SPRN_DBCR0,r13 isync -_GLOBAL(set_context) - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r4, 0x4(r5) -#endif - mtspr SPRN_PID,r3 - isync /* Force context change */ - blr - #ifdef CONFIG_SMP /* When we get here, r24 needs to hold the CPU # */ .globl __secondary_start @@ -1226,26 +1212,3 @@ _GLOBAL(restore_to_as0) */ 3: mr r3,r5 bl _start - -/* - * We put a few things here that have to be page-aligned. This stuff - * goes at the beginning of the data segment, which is page-aligned. - */ - .data - .align 12 - .globl sdata -sdata: - .globl empty_zero_page -empty_zero_page: - .space 4096 -EXPORT_SYMBOL(empty_zero_page) - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* - * Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 51bbaae94ccc..c877f074d174 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -55,7 +55,6 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) #ifdef CONFIG_PPC_INDIRECT_MMIO struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) { - unsigned hugepage_shift; struct iowa_bus *bus; int token; @@ -65,22 +64,13 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) bus = &iowa_busses[token - 1]; else { unsigned long vaddr, paddr; - pte_t *ptep; vaddr = (unsigned long)PCI_FIX_ADDR(addr); if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) return NULL; - /* - * We won't find huge pages here (iomem). Also can't hit - * a page table free due to init_mm - */ - ptep = find_init_mm_pte(vaddr, &hugepage_shift); - if (ptep == NULL) - paddr = 0; - else { - WARN_ON(hugepage_shift); - paddr = pte_pfn(*ptep) << PAGE_SHIFT; - } + + paddr = ppc_find_vmap_phys(vaddr); + bus = iowa_pci_find(vaddr, paddr); if (bus == NULL) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 57d6b85e9b96..2af89a5e379f 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -898,7 +898,6 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, unsigned int order; unsigned int nio_pages, io_order; struct page *page; - size_t size_io = size; size = PAGE_ALIGN(size); order = get_order(size); @@ -925,9 +924,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - size_io = IOMMU_PAGE_ALIGN(size_io, tbl); - nio_pages = size_io >> tbl->it_page_shift; - io_order = get_iommu_order(size_io, tbl); + nio_pages = size >> tbl->it_page_shift; + io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); if (mapping == DMA_MAPPING_ERROR) { @@ -942,9 +940,10 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, void *vaddr, dma_addr_t dma_handle) { if (tbl) { - size_t size_io = IOMMU_PAGE_ALIGN(size, tbl); - unsigned int nio_pages = size_io >> tbl->it_page_shift; + unsigned int nio_pages; + size = PAGE_ALIGN(size); + nio_pages = size >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index ce87dc5ea23c..5277cf582c16 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -11,10 +11,10 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry); + u32 *addr = (u32 *)jump_entry_code(entry); if (type == JUMP_LABEL_JMP) patch_branch(addr, jump_entry_target(entry), 0); else - patch_instruction(addr, ppc_inst(PPC_INST_NOP)); + patch_instruction(addr, ppc_inst(PPC_RAW_NOP())); } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7dd2ad3603ad..c48565762698 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -417,11 +417,10 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { + u32 instr, *addr = (u32 *)bpt->bpt_addr; int err; - unsigned int instr; - struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = get_kernel_nofault(instr, (unsigned *) addr); + err = get_kernel_nofault(instr, addr); if (err) return err; @@ -429,7 +428,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) if (err) return -EFAULT; - *(unsigned int *)bpt->saved_instr = instr; + *(u32 *)bpt->saved_instr = instr; return 0; } @@ -438,7 +437,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { int err; unsigned int instr = *(unsigned int *)bpt->saved_instr; - struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; + u32 *addr = (u32 *)bpt->bpt_addr; err = patch_instruction(addr, ppc_inst(instr)); if (err) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 01ab2163659e..8ac248cb518a 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -19,11 +19,13 @@ #include <linux/extable.h> #include <linux/kdebug.h> #include <linux/slab.h> +#include <linux/moduleloader.h> #include <asm/code-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/sections.h> #include <asm/inst.h> +#include <asm/set_memory.h> #include <linux/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -103,12 +105,26 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } +void *alloc_insn_page(void) +{ + void *page; + + page = module_alloc(PAGE_SIZE); + if (!page) + return NULL; + + if (strict_module_rwx_enabled()) { + set_memory_ro((unsigned long)page, 1); + set_memory_x((unsigned long)page, 1); + } + return page; +} + int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; struct kprobe *prev; - struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr); - struct ppc_inst prefix = ppc_inst_read((struct ppc_inst *)(p->addr - 1)); + struct ppc_inst insn = ppc_inst_read(p->addr); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); @@ -116,15 +132,15 @@ int arch_prepare_kprobe(struct kprobe *p) } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) { printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n"); ret = -EINVAL; - } else if (ppc_inst_prefixed(prefix)) { + } else if ((unsigned long)p->addr & ~PAGE_MASK && + ppc_inst_prefixed(ppc_inst_read(p->addr - 1))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } preempt_disable(); prev = get_kprobe(p->addr - 1); preempt_enable_no_resched(); - if (prev && - ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)prev->ainsn.insn))) { + if (prev && ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } @@ -138,7 +154,7 @@ int arch_prepare_kprobe(struct kprobe *p) } if (!ret) { - patch_instruction((struct ppc_inst *)p->ainsn.insn, insn); + patch_instruction(p->ainsn.insn, insn); p->opcode = ppc_inst_val(insn); } @@ -149,13 +165,13 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe); void arch_arm_kprobe(struct kprobe *p) { - patch_instruction((struct ppc_inst *)p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)); + patch_instruction(p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)); } NOKPROBE_SYMBOL(arch_arm_kprobe); void arch_disarm_kprobe(struct kprobe *p) { - patch_instruction((struct ppc_inst *)p->addr, ppc_inst(p->opcode)); + patch_instruction(p->addr, ppc_inst(p->opcode)); } NOKPROBE_SYMBOL(arch_disarm_kprobe); @@ -228,7 +244,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; - struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); + struct ppc_inst insn = ppc_inst_read(p->ainsn.insn); /* regs->nip is also adjusted if emulate_step returns 1 */ ret = emulate_step(regs, insn); @@ -439,7 +455,7 @@ int kprobe_post_handler(struct pt_regs *regs) if (!cur || user_mode(regs)) return 0; - len = ppc_inst_len(ppc_inst_read((struct ppc_inst *)cur->ainsn.insn)); + len = ppc_inst_len(ppc_inst_read(cur->ainsn.insn)); /* make sure we got here for instruction we have a kprobe on */ if (((unsigned long)cur->ainsn.insn + len) != regs->nip) return 0; diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 667104d4c455..c2f55fe7092d 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -463,7 +463,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, pfn = addr_to_pfn(regs, regs->nip); if (pfn != ULONG_MAX) { instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK); - instr = ppc_inst_read((struct ppc_inst *)instr_addr); + instr = ppc_inst_read((u32 *)instr_addr); if (!analyse_instr(&op, &tmp, instr)) { pfn = addr_to_pfn(regs, op.ea); *addr = op.ea; @@ -481,12 +481,11 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, return -1; } -static int mce_handle_ierror(struct pt_regs *regs, +static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1, const struct mce_ierror_table table[], struct mce_error_info *mce_err, uint64_t *addr, uint64_t *phys_addr) { - uint64_t srr1 = regs->msr; int handled = 0; int i; @@ -695,19 +694,19 @@ static long mce_handle_ue_error(struct pt_regs *regs, } static long mce_handle_error(struct pt_regs *regs, + unsigned long srr1, const struct mce_derror_table dtable[], const struct mce_ierror_table itable[]) { struct mce_error_info mce_err = { 0 }; uint64_t addr, phys_addr = ULONG_MAX; - uint64_t srr1 = regs->msr; long handled; if (SRR1_MC_LOADSTORE(srr1)) handled = mce_handle_derror(regs, dtable, &mce_err, &addr, &phys_addr); else - handled = mce_handle_ierror(regs, itable, &mce_err, &addr, + handled = mce_handle_ierror(regs, srr1, itable, &mce_err, &addr, &phys_addr); if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) @@ -723,16 +722,20 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs) /* P7 DD1 leaves top bits of DSISR undefined */ regs->dsisr &= 0x0000ffff; - return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table); + return mce_handle_error(regs, regs->msr, + mce_p7_derror_table, mce_p7_ierror_table); } long __machine_check_early_realmode_p8(struct pt_regs *regs) { - return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table); + return mce_handle_error(regs, regs->msr, + mce_p8_derror_table, mce_p8_ierror_table); } long __machine_check_early_realmode_p9(struct pt_regs *regs) { + unsigned long srr1 = regs->msr; + /* * On POWER9 DD2.1 and below, it's possible to get a machine check * caused by a paste instruction where only DSISR bit 25 is set. This @@ -746,10 +749,39 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs) if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000) return 1; - return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); + /* + * Async machine check due to bad real address from store or foreign + * link time out comes with the load/store bit (PPC bit 42) set in + * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're + * directed to the ierror table so it will find the cause (which + * describes it correctly as a store error). + */ + if (SRR1_MC_LOADSTORE(srr1) && + ((srr1 & 0x081c0000) == 0x08140000 || + (srr1 & 0x081c0000) == 0x08180000)) { + srr1 &= ~PPC_BIT(42); + } + + return mce_handle_error(regs, srr1, + mce_p9_derror_table, mce_p9_ierror_table); } long __machine_check_early_realmode_p10(struct pt_regs *regs) { - return mce_handle_error(regs, mce_p10_derror_table, mce_p10_ierror_table); + unsigned long srr1 = regs->msr; + + /* + * Async machine check due to bad real address from store comes with + * the load/store bit (PPC bit 42) set in SRR1, but the cause comes in + * SRR1 not DSISR. Clear bit 42 so we're directed to the ierror table + * so it will find the cause (which describes it correctly as a store + * error). + */ + if (SRR1_MC_LOADSTORE(srr1) && + (srr1 & 0x081c0000) == 0x08140000) { + srr1 &= ~PPC_BIT(42); + } + + return mce_handle_error(regs, srr1, + mce_p10_derror_table, mce_p10_ierror_table); } diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 6a076bef2932..39ab15419592 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -388,9 +388,3 @@ _GLOBAL(start_secondary_resume) bl start_secondary b . #endif /* CONFIG_SMP */ - -/* - * This routine is just here to keep GCC happy - sigh... - */ -_GLOBAL(__main) - blr diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 3f35c8d20be7..ed04a3ba66fe 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -92,12 +92,14 @@ int module_finalize(const Elf_Ehdr *hdr, static __always_inline void * __module_alloc(unsigned long size, unsigned long start, unsigned long end) { + pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; + /* * Don't do huge page allocations for modules yet until more testing * is done. STRICT_MODULE_RWX may require extra work to support this * too. */ - return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC, + return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot, VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index c27b8687b82a..f417afc08d33 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -145,10 +145,9 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) { - if (entry->jump[0] != (PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val))) + if (entry->jump[0] != PPC_RAW_LIS(_R12, PPC_HA(val))) return 0; - if (entry->jump[1] != (PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | - PPC_LO(val))) + if (entry->jump[1] != PPC_RAW_ADDI(_R12, _R12, PPC_LO(val))) return 0; return 1; } @@ -175,16 +174,10 @@ static uint32_t do_plt_call(void *location, entry++; } - /* - * lis r12, sym@ha - * addi r12, r12, sym@l - * mtctr r12 - * bctr - */ - entry->jump[0] = PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val); - entry->jump[1] = PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | PPC_LO(val); - entry->jump[2] = PPC_INST_MTCTR | __PPC_RS(R12); - entry->jump[3] = PPC_INST_BCTR; + entry->jump[0] = PPC_RAW_LIS(_R12, PPC_HA(val)); + entry->jump[1] = PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)); + entry->jump[2] = PPC_RAW_MTCTR(_R12); + entry->jump[3] = PPC_RAW_BCTR(); pr_debug("Initialized plt for 0x%x at %p\n", val, entry); return (uint32_t)entry; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index ae2b188365b1..6baa676e7cb6 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -122,27 +122,19 @@ struct ppc64_stub_entry * the stub, but it's significantly shorter to put these values at the * end of the stub code, and patch the stub address (32-bits relative * to the TOC ptr, r2) into the stub. - * - * addis r11,r2, <high> - * addi r11,r11, <low> - * std r2,R2_STACK_OFFSET(r1) - * ld r12,32(r11) - * ld r2,40(r11) - * mtctr r12 - * bctr */ static u32 ppc64_stub_insns[] = { - PPC_INST_ADDIS | __PPC_RT(R11) | __PPC_RA(R2), - PPC_INST_ADDI | __PPC_RT(R11) | __PPC_RA(R11), + PPC_RAW_ADDIS(_R11, _R2, 0), + PPC_RAW_ADDI(_R11, _R11, 0), /* Save current r2 value in magic place on the stack. */ - PPC_INST_STD | __PPC_RS(R2) | __PPC_RA(R1) | R2_STACK_OFFSET, - PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R11) | 32, + PPC_RAW_STD(_R2, _R1, R2_STACK_OFFSET), + PPC_RAW_LD(_R12, _R11, 32), #ifdef PPC64_ELF_ABI_v1 /* Set up new r2 from function descriptor */ - PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R11) | 40, + PPC_RAW_LD(_R2, _R11, 40), #endif - PPC_INST_MTCTR | __PPC_RS(R12), - PPC_INST_BCTR, + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR(), }; /* Count how many different 24-bit relocations (different symbol, @@ -336,21 +328,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, #ifdef CONFIG_MPROFILE_KERNEL -#define PACATOC offsetof(struct paca_struct, kernel_toc) - -/* - * ld r12,PACATOC(r13) - * addis r12,r12,<high> - * addi r12,r12,<low> - * mtctr r12 - * bctr - */ static u32 stub_insns[] = { - PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC, - PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_MTCTR | __PPC_RS(R12), - PPC_INST_BCTR, + PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)), + PPC_RAW_ADDIS(_R12, _R12, 0), + PPC_RAW_ADDI(_R12, _R12, 0), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR(), }; /* @@ -507,7 +490,7 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) if (!instr_is_relative_link_branch(ppc_inst(*prev_insn))) return 1; - if (*instruction != PPC_INST_NOP) { + if (*instruction != PPC_RAW_NOP()) { pr_err("%s: Expected nop after call, got %08x at %pS\n", me->name, *instruction, instruction); return 0; @@ -696,21 +679,17 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, * ld r2, ...(r12) * add r2, r2, r12 */ - if ((((uint32_t *)location)[0] & ~0xfffc) != - (PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R12))) + if ((((uint32_t *)location)[0] & ~0xfffc) != PPC_RAW_LD(_R2, _R12, 0)) break; - if (((uint32_t *)location)[1] != - (PPC_INST_ADD | __PPC_RT(R2) | __PPC_RA(R2) | __PPC_RB(R12))) + if (((uint32_t *)location)[1] != PPC_RAW_ADD(_R2, _R2, _R12)) break; /* * If found, replace it with: * addis r2, r12, (.TOC.-func)@ha * addi r2, r2, (.TOC.-func)@l */ - ((uint32_t *)location)[0] = PPC_INST_ADDIS | __PPC_RT(R2) | - __PPC_RA(R12) | PPC_HA(value); - ((uint32_t *)location)[1] = PPC_INST_ADDI | __PPC_RT(R2) | - __PPC_RA(R2) | PPC_LO(value); + ((uint32_t *)location)[0] = PPC_RAW_ADDIS(_R2, _R12, PPC_HA(value)); + ((uint32_t *)location)[1] = PPC_RAW_ADDI(_R2, _R2, PPC_LO(value)); break; case R_PPC64_REL16_HA: diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index cdf87086fa33..2b8fe40069ad 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -18,24 +18,16 @@ #include <asm/ppc-opcode.h> #include <asm/inst.h> -#define TMPL_CALL_HDLR_IDX \ - (optprobe_template_call_handler - optprobe_template_entry) -#define TMPL_EMULATE_IDX \ - (optprobe_template_call_emulate - optprobe_template_entry) -#define TMPL_RET_IDX \ - (optprobe_template_ret - optprobe_template_entry) -#define TMPL_OP_IDX \ - (optprobe_template_op_address - optprobe_template_entry) -#define TMPL_INSN_IDX \ - (optprobe_template_insn - optprobe_template_entry) -#define TMPL_END_IDX \ - (optprobe_template_end - optprobe_template_entry) - -DEFINE_INSN_CACHE_OPS(ppc_optinsn); +#define TMPL_CALL_HDLR_IDX (optprobe_template_call_handler - optprobe_template_entry) +#define TMPL_EMULATE_IDX (optprobe_template_call_emulate - optprobe_template_entry) +#define TMPL_RET_IDX (optprobe_template_ret - optprobe_template_entry) +#define TMPL_OP_IDX (optprobe_template_op_address - optprobe_template_entry) +#define TMPL_INSN_IDX (optprobe_template_insn - optprobe_template_entry) +#define TMPL_END_IDX (optprobe_template_end - optprobe_template_entry) static bool insn_page_in_use; -static void *__ppc_alloc_insn_page(void) +void *alloc_optinsn_page(void) { if (insn_page_in_use) return NULL; @@ -43,20 +35,11 @@ static void *__ppc_alloc_insn_page(void) return &optinsn_slot; } -static void __ppc_free_insn_page(void *page __maybe_unused) +void free_optinsn_page(void *page) { insn_page_in_use = false; } -struct kprobe_insn_cache kprobe_ppc_optinsn_slots = { - .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex), - .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages), - /* insn_size initialized later */ - .alloc = __ppc_alloc_insn_page, - .free = __ppc_free_insn_page, - .nr_garbage = 0, -}; - /* * Check if we can optimize this probe. Returns NIP post-emulation if this can * be optimized and 0 otherwise. @@ -66,6 +49,7 @@ static unsigned long can_optimize(struct kprobe *p) struct pt_regs regs; struct instruction_op op; unsigned long nip = 0; + unsigned long addr = (unsigned long)p->addr; /* * kprobe placed for kretprobe during boot time @@ -73,7 +57,7 @@ static unsigned long can_optimize(struct kprobe *p) * So further checks can be skipped. */ if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) - return (unsigned long)p->addr + sizeof(kprobe_opcode_t); + return addr + sizeof(kprobe_opcode_t); /* * We only support optimizing kernel addresses, but not @@ -81,11 +65,11 @@ static unsigned long can_optimize(struct kprobe *p) * * FIXME: Optimize kprobes placed in module addresses. */ - if (!is_kernel_addr((unsigned long)p->addr)) + if (!is_kernel_addr(addr)) return 0; memset(®s, 0, sizeof(struct pt_regs)); - regs.nip = (unsigned long)p->addr; + regs.nip = addr; regs.trap = 0x0; regs.msr = MSR_KERNEL; @@ -100,9 +84,8 @@ static unsigned long can_optimize(struct kprobe *p) * Ensure that the instruction is not a conditional branch, * and that can be emulated. */ - if (!is_conditional_branch(ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) && - analyse_instr(&op, ®s, - ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) == 1) { + if (!is_conditional_branch(ppc_inst_read(p->ainsn.insn)) && + analyse_instr(&op, ®s, ppc_inst_read(p->ainsn.insn)) == 1) { emulate_update_regs(®s, &op); nip = regs.nip; } @@ -136,19 +119,15 @@ NOKPROBE_SYMBOL(optimized_callback); void arch_remove_optimized_kprobe(struct optimized_kprobe *op) { if (op->optinsn.insn) { - free_ppc_optinsn_slot(op->optinsn.insn, 1); + free_optinsn_slot(op->optinsn.insn, 1); op->optinsn.insn = NULL; } } static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) { - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_RAW_LIS(reg, IMM_H(val)))); - addr++; - - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val)))); + patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HI(val)))); + patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val)))); } /* @@ -157,34 +136,11 @@ static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t * */ static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr) { - /* lis reg,(op)@highest */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | - ((val >> 48) & 0xffff))); - addr++; - - /* ori reg,reg,(op)@higher */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | - ___PPC_RS(reg) | ((val >> 32) & 0xffff))); - addr++; - - /* rldicr reg,reg,32,31 */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | - ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); - addr++; - - /* oris reg,reg,(op)@h */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | - ___PPC_RS(reg) | ((val >> 16) & 0xffff))); - addr++; - - /* ori reg,reg,(op)@l */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | - ___PPC_RS(reg) | (val & 0xffff))); + patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HIGHEST(val)))); + patch_instruction(addr++, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_HIGHER(val)))); + patch_instruction(addr++, ppc_inst(PPC_RAW_SLDI(reg, reg, 32))); + patch_instruction(addr++, ppc_inst(PPC_RAW_ORIS(reg, reg, PPC_HI(val)))); + patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val)))); } static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) @@ -198,19 +154,18 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { struct ppc_inst branch_op_callback, branch_emulate_step, temp; - kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff; + unsigned long op_callback_addr, emulate_step_addr; + kprobe_opcode_t *buff; long b_offset; unsigned long nip, size; int rc, i; - kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; - nip = can_optimize(p); if (!nip) return -EILSEQ; /* Allocate instruction slot for detour buffer */ - buff = get_ppc_optinsn_slot(); + buff = get_optinsn_slot(); if (!buff) return -ENOMEM; @@ -228,8 +183,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) goto error; /* Check if the return address is also within 32MB range */ - b_offset = (unsigned long)(buff + TMPL_RET_IDX) - - (unsigned long)nip; + b_offset = (unsigned long)(buff + TMPL_RET_IDX) - nip; if (!is_offset_in_branch_range(b_offset)) goto error; @@ -238,8 +192,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int); pr_devel("Copying template to %p, size %lu\n", buff, size); for (i = 0; i < size; i++) { - rc = patch_instruction((struct ppc_inst *)(buff + i), - ppc_inst(*(optprobe_template_entry + i))); + rc = patch_instruction(buff + i, ppc_inst(*(optprobe_template_entry + i))); if (rc < 0) goto error; } @@ -253,51 +206,44 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 2. branch to optimized_callback() and emulate_step() */ - op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback"); - emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step"); + op_callback_addr = ppc_kallsyms_lookup_name("optimized_callback"); + emulate_step_addr = ppc_kallsyms_lookup_name("emulate_step"); if (!op_callback_addr || !emulate_step_addr) { WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n"); goto error; } - rc = create_branch(&branch_op_callback, - (struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), - (unsigned long)op_callback_addr, - BRANCH_SET_LINK); + rc = create_branch(&branch_op_callback, buff + TMPL_CALL_HDLR_IDX, + op_callback_addr, BRANCH_SET_LINK); - rc |= create_branch(&branch_emulate_step, - (struct ppc_inst *)(buff + TMPL_EMULATE_IDX), - (unsigned long)emulate_step_addr, - BRANCH_SET_LINK); + rc |= create_branch(&branch_emulate_step, buff + TMPL_EMULATE_IDX, + emulate_step_addr, BRANCH_SET_LINK); if (rc) goto error; - patch_instruction((struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), - branch_op_callback); - patch_instruction((struct ppc_inst *)(buff + TMPL_EMULATE_IDX), - branch_emulate_step); + patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback); + patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step); /* * 3. load instruction to be emulated into relevant register, and */ - temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); + temp = ppc_inst_read(p->ainsn.insn); patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline */ - patch_branch((struct ppc_inst *)(buff + TMPL_RET_IDX), (unsigned long)nip, 0); + patch_branch(buff + TMPL_RET_IDX, nip, 0); - flush_icache_range((unsigned long)buff, - (unsigned long)(&buff[TMPL_END_IDX])); + flush_icache_range((unsigned long)buff, (unsigned long)(&buff[TMPL_END_IDX])); op->optinsn.insn = buff; return 0; error: - free_ppc_optinsn_slot(buff, 0); + free_optinsn_slot(buff, 0); return -ERANGE; } @@ -328,12 +274,9 @@ void arch_optimize_kprobes(struct list_head *oplist) * Backup instructions which will be replaced * by jump address */ - memcpy(op->optinsn.copied_insn, op->kp.addr, - RELATIVEJUMP_SIZE); - create_branch(&instr, - (struct ppc_inst *)op->kp.addr, - (unsigned long)op->optinsn.insn, 0); - patch_instruction((struct ppc_inst *)op->kp.addr, instr); + memcpy(op->optinsn.copied_insn, op->kp.addr, RELATIVEJUMP_SIZE); + create_branch(&instr, op->kp.addr, (unsigned long)op->optinsn.insn, 0); + patch_instruction(op->kp.addr, instr); list_del_init(&op->list); } } @@ -343,8 +286,7 @@ void arch_unoptimize_kprobe(struct optimized_kprobe *op) arch_arm_kprobe(&op->kp); } -void arch_unoptimize_kprobes(struct list_head *oplist, - struct list_head *done_list) +void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list) { struct optimized_kprobe *op; struct optimized_kprobe *tmp; @@ -355,8 +297,7 @@ void arch_unoptimize_kprobes(struct list_head *oplist, } } -int arch_within_optimized_kprobe(struct optimized_kprobe *op, - unsigned long addr) +int arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) { return ((unsigned long)op->kp.addr <= addr && (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 7f5aae3c387d..9bd30cac852b 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -346,10 +346,8 @@ void copy_mm_to_paca(struct mm_struct *mm) #ifdef CONFIG_PPC_BOOK3S mm_context_t *context = &mm->context; - get_paca()->mm_ctx_id = context->id; #ifdef CONFIG_PPC_MM_SLICES VM_BUG_ON(!mm_ctx_slb_addr_limit(context)); - get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context); memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context), LOW_SLICE_ARRAY_SZ); memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context), diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 89e34aa273e2..4e593fc2c66d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1129,6 +1129,10 @@ static inline void save_sprs(struct thread_struct *t) if (cpu_has_feature(CPU_FTR_ALTIVEC)) t->vrsave = mfspr(SPRN_VRSAVE); #endif +#ifdef CONFIG_SPE + if (cpu_has_feature(CPU_FTR_SPE)) + t->spefscr = mfspr(SPRN_SPEFSCR); +#endif #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DSCR)) t->dscr = mfspr(SPRN_DSCR); @@ -1159,6 +1163,11 @@ static inline void restore_sprs(struct thread_struct *old_thread, old_thread->vrsave != new_thread->vrsave) mtspr(SPRN_VRSAVE, new_thread->vrsave); #endif +#ifdef CONFIG_SPE + if (cpu_has_feature(CPU_FTR_SPE) && + old_thread->spefscr != new_thread->spefscr) + mtspr(SPRN_SPEFSCR, new_thread->spefscr); +#endif #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DSCR)) { u64 dscr = get_paca()->dscr_default; @@ -1736,6 +1745,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, #ifdef CONFIG_ALTIVEC p->thread.vr_save_area = NULL; #endif +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) + p->thread.kuap = KUAP_NONE; +#endif setup_ksp_vsid(p, sp); @@ -2121,8 +2133,9 @@ unsigned long get_wchan(struct task_struct *p) static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; -void show_stack(struct task_struct *tsk, unsigned long *stack, - const char *loglvl) +void __no_sanitize_address show_stack(struct task_struct *tsk, + unsigned long *stack, + const char *loglvl) { unsigned long sp, ip, lr, newsp; int count = 0; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 41ed7e33d897..523b31685c4c 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -701,13 +701,12 @@ static int __init prom_setprop(phandle node, const char *nodename, } /* We can't use the standard versions because of relocation headaches. */ -#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ - || ('a' <= (c) && (c) <= 'f') \ - || ('A' <= (c) && (c) <= 'F')) +#define prom_isxdigit(c) \ + (('0' <= (c) && (c) <= '9') || ('a' <= (c) && (c) <= 'f') || ('A' <= (c) && (c) <= 'F')) -#define isdigit(c) ('0' <= (c) && (c) <= '9') -#define islower(c) ('a' <= (c) && (c) <= 'z') -#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c)) +#define prom_isdigit(c) ('0' <= (c) && (c) <= '9') +#define prom_islower(c) ('a' <= (c) && (c) <= 'z') +#define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c)) static unsigned long prom_strtoul(const char *cp, const char **endp) { @@ -716,14 +715,14 @@ static unsigned long prom_strtoul(const char *cp, const char **endp) if (*cp == '0') { base = 8; cp++; - if (toupper(*cp) == 'X') { + if (prom_toupper(*cp) == 'X') { cp++; base = 16; } } - while (isxdigit(*cp) && - (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) { + while (prom_isxdigit(*cp) && + (value = prom_isdigit(*cp) ? *cp - '0' : prom_toupper(*cp) - 'A' + 10) < base) { result = result * base + value; cp++; } diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index c17d1c9362b5..cc51fa52e783 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -300,9 +300,7 @@ static void stf_barrier_enable(bool enable) void setup_stf_barrier(void) { enum stf_barrier_type type; - bool enable, hv; - - hv = cpu_has_feature(CPU_FTR_HVMODE); + bool enable; /* Default to fallback in case fw-features are not available */ if (cpu_has_feature(CPU_FTR_ARCH_300)) @@ -315,8 +313,7 @@ void setup_stf_barrier(void) type = STF_BARRIER_NONE; enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && - (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || - (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv)); + security_ftr_enabled(SEC_FTR_STF_BARRIER); if (type == STF_BARRIER_FALLBACK) { pr_info("stf-barrier: fallback barrier available\n"); @@ -439,8 +436,8 @@ static void update_branch_cache_flush(void) site2 = &patch__call_kvm_flush_link_stack_p9; // This controls the branch from guest_exit_cont to kvm_flush_link_stack if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); - patch_instruction_site(site2, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); + patch_instruction_site(site2, ppc_inst(PPC_RAW_NOP())); } else { // Could use HW flush, but that could also flush count cache patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); @@ -450,11 +447,11 @@ static void update_branch_cache_flush(void) // Patch out the bcctr first, then nop the rest site = &patch__call_flush_branch_caches3; - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); site = &patch__call_flush_branch_caches2; - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); site = &patch__call_flush_branch_caches1; - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); // This controls the branch from _switch to flush_branch_caches if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE && @@ -477,12 +474,12 @@ static void update_branch_cache_flush(void) // If we just need to flush the link stack, early return if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) { patch_instruction_site(&patch__flush_link_stack_return, - ppc_inst(PPC_INST_BLR)); + ppc_inst(PPC_RAW_BLR())); // If we have flush instruction, early return } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) { patch_instruction_site(&patch__flush_count_cache_return, - ppc_inst(PPC_INST_BLR)); + ppc_inst(PPC_RAW_BLR())); } } } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index d7c1f92152af..7ec5c47fce0e 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -74,7 +74,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE); */ notrace void __init machine_init(u64 dt_ptr) { - struct ppc_inst *addr = (struct ppc_inst *)patch_site_addr(&patch__memset_nocache); + u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache); struct ppc_inst insn; /* Configure static keys first, now that we're relocated. */ @@ -85,7 +85,7 @@ notrace void __init machine_init(u64 dt_ptr) /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_RAW_NOP())); create_cond_branch(&insn, addr, branch_target(addr), 0x820000); patch_instruction(addr, insn); /* replace b by bne cr0 */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index b779d25761cf..e42b85e4f1aa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -369,11 +369,11 @@ void __init early_setup(unsigned long dt_ptr) apply_feature_fixups(); setup_feature_keys(); - early_ioremap_setup(); - /* Initialize the hash table or TLB handling */ early_init_mmu(); + early_ioremap_setup(); + /* * After firmware and early platform setup code has set things up, * we note the SPR values for configurable control/performance diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 8f05ed0da292..f55ac65c7492 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -354,14 +354,8 @@ static void prepare_save_tm_user_regs(void) { WARN_ON(tm_suspend_disabled); -#ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC)) current->thread.ckvrsave = mfspr(SPRN_VRSAVE); -#endif -#ifdef CONFIG_SPE - if (current->thread.used_spe) - flush_spe_to_thread(current); -#endif } static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, @@ -379,7 +373,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user */ unsafe_put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR], failed); -#ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { unsafe_copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, @@ -412,7 +405,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user else unsafe_put_user(current->thread.ckvrsave, (u32 __user *)&tm_frame->mc_vregs[32], failed); -#endif /* CONFIG_ALTIVEC */ unsafe_copy_ckfpr_to_user(&frame->mc_fregs, current, failed); if (msr & MSR_FP) @@ -420,7 +412,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user else unsafe_copy_ckfpr_to_user(&tm_frame->mc_fregs, current, failed); -#ifdef CONFIG_VSX /* * Copy VSR 0-31 upper half from thread_struct to local * buffer, then write that to userspace. Also set MSR_VSX in @@ -436,23 +427,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user msr |= MSR_VSX; } -#endif /* CONFIG_VSX */ -#ifdef CONFIG_SPE - /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in __unsafe_save_user_regs(). - */ - if (current->thread.used_spe) { - unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, - ELF_NEVRREG * sizeof(u32), failed); - /* set MSR_SPE in the saved MSR value to indicate that - * frame->mc_vregs contains valid data */ - msr |= MSR_SPE; - } - - /* We always copy to/from spefscr */ - unsafe_put_user(current->thread.spefscr, - (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed); -#endif /* CONFIG_SPE */ unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed); @@ -587,9 +561,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *tm_sr) { unsigned long msr, msr_hi; -#ifdef CONFIG_VSX int i; -#endif if (tm_suspend_disabled) return 1; @@ -610,7 +582,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, /* Restore the previous little-endian mode */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); -#ifdef CONFIG_ALTIVEC regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { /* restore altivec registers from the stack */ @@ -629,13 +600,11 @@ static long restore_tm_user_regs(struct pt_regs *regs, (u32 __user *)&sr->mc_vregs[32], failed); if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, current->thread.ckvrsave); -#endif /* CONFIG_ALTIVEC */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed); -#ifdef CONFIG_VSX regs->msr &= ~MSR_VSX; if (msr & MSR_VSX) { /* @@ -649,24 +618,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0; } -#endif /* CONFIG_VSX */ - -#ifdef CONFIG_SPE - /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in restore_user_regs(). - */ - regs->msr &= ~MSR_SPE; - if (msr & MSR_SPE) { - unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32), failed); - current->thread.used_spe = true; - } else if (current->thread.used_spe) - memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); - - /* Always get SPEFSCR back */ - unsafe_get_user(current->thread.spefscr, - (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); -#endif /* CONFIG_SPE */ user_read_access_end(); @@ -675,7 +626,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, unsafe_restore_general_regs(regs, tm_sr, failed); -#ifdef CONFIG_ALTIVEC /* restore altivec registers from the stack */ if (msr & MSR_VEC) unsafe_copy_from_user(¤t->thread.vr_state, &tm_sr->mc_vregs, @@ -684,11 +634,9 @@ static long restore_tm_user_regs(struct pt_regs *regs, /* Always get VRSAVE back */ unsafe_get_user(current->thread.vrsave, (u32 __user *)&tm_sr->mc_vregs[32], failed); -#endif /* CONFIG_ALTIVEC */ unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed); -#ifdef CONFIG_VSX if (msr & MSR_VSX) { /* * Restore altivec registers from the stack to a local @@ -697,7 +645,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed); current->thread.used_vsr = true; } -#endif /* CONFIG_VSX */ /* Get the top half of the MSR from the user context */ unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed); @@ -742,12 +689,10 @@ static long restore_tm_user_regs(struct pt_regs *regs, load_fp_state(¤t->thread.fp_state); regs->msr |= (MSR_FP | current->thread.fpexc_mode); } -#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { load_vr_state(¤t->thread.vr_state); regs->msr |= MSR_VEC; } -#endif preempt_enable(); @@ -828,10 +773,8 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32); } else { tramp = (unsigned long)mctx->mc_pad; - /* Set up the sigreturn trampoline: li r0,sigret; sc */ - unsafe_put_user(PPC_INST_ADDI + __NR_rt_sigreturn, &mctx->mc_pad[0], - failed); - unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); + unsafe_put_user(PPC_RAW_LI(_R0, __NR_rt_sigreturn), &mctx->mc_pad[0], failed); + unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed); asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0])); } unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed); @@ -926,9 +869,8 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32); } else { tramp = (unsigned long)mctx->mc_pad; - /* Set up the sigreturn trampoline: li r0,sigret; sc */ - unsafe_put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0], failed); - unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); + unsafe_put_user(PPC_RAW_LI(_R0, __NR_sigreturn), &mctx->mc_pad[0], failed); + unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed); asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0])); } user_access_end(); diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index dca66481d0c2..16b41470ec92 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -618,15 +618,12 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) int i; long err = 0; - /* bctrl # call the handler */ - err |= __put_user(PPC_INST_BCTRL, &tramp[0]); - /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */ - err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) | - (__SIGNAL_FRAMESIZE & 0xffff), &tramp[1]); - /* li r0, __NR_[rt_]sigreturn| */ - err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[2]); - /* sc */ - err |= __put_user(PPC_INST_SC, &tramp[3]); + /* Call the handler and pop the dummy stackframe*/ + err |= __put_user(PPC_RAW_BCTRL(), &tramp[0]); + err |= __put_user(PPC_RAW_ADDI(_R1, _R1, __SIGNAL_FRAMESIZE), &tramp[1]); + + err |= __put_user(PPC_RAW_LI(_R0, syscall), &tramp[2]); + err |= __put_user(PPC_RAW_SC(), &tramp[3]); /* Minimal traceback info */ for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++) @@ -902,6 +899,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block); user_write_access_end(); + /* Save the siginfo outside of the unsafe block. */ + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + goto badframe; + /* Make sure signal handler doesn't get spurious FP exceptions */ tsk->thread.fp_state.fpscr = 0; @@ -915,11 +916,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, regs->nip = (unsigned long) &frame->tramp[0]; } - - /* Save the siginfo outside of the unsafe block. */ - if (copy_siginfo_to_user(&frame->info, &ksig->info)) - goto badframe; - /* Allocate a dummy caller frame for the signal handler. */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); @@ -948,8 +944,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, regs->gpr[3] = ksig->sig; regs->result = 0; if (ksig->ka.sa.sa_flags & SA_SIGINFO) { - err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo); - err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc); + regs->gpr[4] = (unsigned long)&frame->info; + regs->gpr[5] = (unsigned long)&frame->uc; regs->gpr[6] = (unsigned long) frame; } else { regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 2e05c783440a..820ae31e0231 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1541,6 +1541,10 @@ void start_secondary(void *unused) { unsigned int cpu = raw_smp_processor_id(); + /* PPC64 calls setup_kup() in early_setup_secondary() */ + if (IS_ENABLED(CONFIG_PPC32)) + setup_kup(); + mmgrab(&init_mm); current->active_mm = &init_mm; diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 1deb1bf331dd..1961e6d5e33b 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -23,8 +23,8 @@ #include <asm/paca.h> -void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, - struct task_struct *task, struct pt_regs *regs) +void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { unsigned long sp; @@ -61,8 +61,8 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, * * If the task is not 'current', the caller *must* ensure the task is inactive. */ -int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, - void *cookie, struct task_struct *task) +int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task) { unsigned long sp; unsigned long newsp; diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 6c31af7f4fa8..b9a047d92ec0 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -201,7 +201,7 @@ static int __init TAU_init(void) tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && !strcmp(cur_cpu_spec->platform, "ppc750"); - tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0); + tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1); if (!tau_workq) return -ENOMEM; diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index ffe9537195aa..d89c5df4f206 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -49,7 +49,7 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link) addr = ppc_function_entry((void *)addr); /* if (link) set op to 'bl' else 'b' */ - create_branch(&op, (struct ppc_inst *)ip, addr, link ? 1 : 0); + create_branch(&op, (u32 *)ip, addr, link ? 1 : 0); return op; } @@ -79,7 +79,7 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) } /* replace the text with the new text */ - if (patch_instruction((struct ppc_inst *)ip, new)) + if (patch_instruction((u32 *)ip, new)) return -EPERM; return 0; @@ -94,7 +94,7 @@ static int test_24bit_addr(unsigned long ip, unsigned long addr) addr = ppc_function_entry((void *)addr); /* use the create_branch to verify that this offset can be branched */ - return create_branch(&op, (struct ppc_inst *)ip, addr, 0) == 0; + return create_branch(&op, (u32 *)ip, addr, 0) == 0; } static int is_bl_op(struct ppc_inst op) @@ -162,7 +162,7 @@ __ftrace_make_nop(struct module *mod, #ifdef CONFIG_MPROFILE_KERNEL /* When using -mkernel_profile there is no load to jump over */ - pop = ppc_inst(PPC_INST_NOP); + pop = ppc_inst(PPC_RAW_NOP()); if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) { pr_err("Fetching instruction at %lx failed.\n", ip - 4); @@ -170,7 +170,7 @@ __ftrace_make_nop(struct module *mod, } /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) && + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) && !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { pr_err("Unexpected instruction %s around bl _mcount\n", ppc_inst_as_str(op)); @@ -203,12 +203,12 @@ __ftrace_make_nop(struct module *mod, } if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { - pr_err("Expected %08x found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); + pr_err("Expected %08lx found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); return -EINVAL; } #endif /* CONFIG_MPROFILE_KERNEL */ - if (patch_instruction((struct ppc_inst *)ip, pop)) { + if (patch_instruction((u32 *)ip, pop)) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -278,9 +278,9 @@ __ftrace_make_nop(struct module *mod, return -EINVAL; } - op = ppc_inst(PPC_INST_NOP); + op = ppc_inst(PPC_RAW_NOP()); - if (patch_instruction((struct ppc_inst *)ip, op)) + if (patch_instruction((u32 *)ip, op)) return -EPERM; return 0; @@ -380,7 +380,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) return -1; } - if (patch_branch((struct ppc_inst *)tramp, ptr, 0)) { + if (patch_branch((u32 *)tramp, ptr, 0)) { pr_debug("REL24 out of range!\n"); return -1; } @@ -424,7 +424,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) } } - if (patch_instruction((struct ppc_inst *)ip, ppc_inst(PPC_INST_NOP))) { + if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -446,7 +446,7 @@ int ftrace_make_nop(struct module *mod, if (test_24bit_addr(ip, addr)) { /* within range */ old = ftrace_call_replace(ip, addr, 1); - new = ppc_inst(PPC_INST_NOP); + new = ppc_inst(PPC_RAW_NOP()); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) return __ftrace_make_nop_kernel(rec, addr); @@ -510,7 +510,7 @@ static int expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) { /* look for patched "NOP" on ppc64 with -mprofile-kernel */ - if (!ppc_inst_equal(op0, ppc_inst(PPC_INST_NOP))) + if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()))) return 0; return 1; } @@ -589,14 +589,14 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { int err; struct ppc_inst op; - unsigned long ip = rec->ip; + u32 *ip = (u32 *)rec->ip; /* read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)ip)) + if (copy_inst_from_kernel_nofault(&op, ip)) return -EFAULT; /* It should be pointing to a nop */ - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { pr_err("Expected NOP but have %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -608,8 +608,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* create the branch to the trampoline */ - err = create_branch(&op, (struct ppc_inst *)ip, - rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK); if (err) { pr_err("REL24 out of range!\n"); return -EINVAL; @@ -617,7 +616,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) pr_devel("write to %lx\n", rec->ip); - if (patch_instruction((struct ppc_inst *)ip, op)) + if (patch_instruction(ip, op)) return -EPERM; return 0; @@ -653,7 +652,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; } - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op)); return -EINVAL; } @@ -684,7 +683,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) */ if (test_24bit_addr(ip, addr)) { /* within range */ - old = ppc_inst(PPC_INST_NOP); + old = ppc_inst(PPC_RAW_NOP()); new = ftrace_call_replace(ip, addr, 1); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) @@ -762,7 +761,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, /* The new target may be within range */ if (test_24bit_addr(ip, addr)) { /* within range */ - if (patch_branch((struct ppc_inst *)ip, addr, BRANCH_SET_LINK)) { + if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -790,12 +789,12 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } /* Ensure branch is within 24 bits */ - if (create_branch(&op, (struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { + if (create_branch(&op, (u32 *)ip, tramp, BRANCH_SET_LINK)) { pr_err("Branch out of range\n"); return -EINVAL; } - if (patch_branch((struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { + if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -851,7 +850,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) struct ppc_inst old, new; int ret; - old = ppc_inst_read((struct ppc_inst *)&ftrace_call); + old = ppc_inst_read((u32 *)&ftrace_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); @@ -859,7 +858,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) /* Also update the regs callback function */ if (!ret) { ip = (unsigned long)(&ftrace_regs_call); - old = ppc_inst_read((struct ppc_inst *)&ftrace_regs_call); + old = ppc_inst_read((u32 *)&ftrace_regs_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index b4ab95c9e94a..c929d93c35d0 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -67,6 +67,7 @@ #include <asm/kprobes.h> #include <asm/stacktrace.h> #include <asm/nmi.h> +#include <asm/disassemble.h> #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -537,11 +538,11 @@ static inline int check_io_access(struct pt_regs *regs) * For the debug message, we look at the preceding * load or store. */ - if (*nip == PPC_INST_NOP) + if (*nip == PPC_RAW_NOP()) nip -= 2; - else if (*nip == PPC_INST_ISYNC) + else if (*nip == PPC_RAW_ISYNC()) --nip; - if (*nip == PPC_INST_SYNC || (*nip >> 26) == OP_TRAP) { + if (*nip == PPC_RAW_SYNC() || get_op(*nip) == OP_TRAP) { unsigned int rb; --nip; diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 9356b60d6030..8513aa49614e 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -296,3 +296,42 @@ void __init udbg_init_40x_realmode(void) } #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_MICROWATT + +#define UDBG_UART_MW_ADDR ((void __iomem *)0xc0002000) + +static u8 udbg_uart_in_isa300_rm(unsigned int reg) +{ + uint64_t msr = mfmsr(); + uint8_t c; + + mtmsr(msr & ~(MSR_EE|MSR_DR)); + isync(); + eieio(); + c = __raw_rm_readb(UDBG_UART_MW_ADDR + (reg << 2)); + mtmsr(msr); + isync(); + return c; +} + +static void udbg_uart_out_isa300_rm(unsigned int reg, u8 val) +{ + uint64_t msr = mfmsr(); + + mtmsr(msr & ~(MSR_EE|MSR_DR)); + isync(); + eieio(); + __raw_rm_writeb(val, UDBG_UART_MW_ADDR + (reg << 2)); + mtmsr(msr); + isync(); +} + +void __init udbg_init_debug_microwatt(void) +{ + udbg_uart_in = udbg_uart_in_isa300_rm; + udbg_uart_out = udbg_uart_out_isa300_rm; + udbg_use_uart(); +} + +#endif /* CONFIG_PPC_EARLY_DEBUG_MICROWATT */ diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 186f69b11e94..1aefd2ecbb8a 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, return -EINVAL; if (cpu_has_feature(CPU_FTR_ARCH_31) && - ppc_inst_prefixed(auprobe->insn) && + ppc_inst_prefixed(ppc_inst_read(auprobe->insn)) && (addr & 0x3f) == 60) { pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n"); return -EINVAL; @@ -119,7 +119,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * support doesn't exist and have to fix-up the next instruction * to be executed. */ - regs->nip = (unsigned long)ppc_inst_next((void *)utask->vaddr, &auprobe->insn); + regs->nip = (unsigned long)ppc_inst_next((void *)utask->vaddr, auprobe->insn); user_disable_single_step(current); return 0; @@ -182,7 +182,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) * emulate_step() returns 1 if the insn was successfully emulated. * For all other cases, we need to single-step in hardware. */ - ret = emulate_step(regs, ppc_inst_read(&auprobe->insn)); + ret = emulate_step(regs, ppc_inst_read(auprobe->insn)); if (ret > 0) return true; diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index c9a8f4781a10..a165635fd214 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -24,6 +24,7 @@ #include <linux/kdebug.h> #include <linux/sched/debug.h> #include <linux/delay.h> +#include <linux/processor.h> #include <linux/smp.h> #include <asm/interrupt.h> diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index e8e7b2c530d1..4b3a8d80cfa3 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -353,9 +353,6 @@ void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu) preempt_enable(); } -/* From mm/mmu_context_hash32.c */ -#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff) - int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 8b70de4595f0..632b2545072b 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -23,20 +23,9 @@ #include <asm/pte-walk.h> /* Translate address of a vmalloc'd thing to a linear map address */ -static void *real_vmalloc_addr(void *x) +static void *real_vmalloc_addr(void *addr) { - unsigned long addr = (unsigned long) x; - pte_t *p; - /* - * assume we don't have huge pages in vmalloc space... - * So don't worry about THP collapse/split. Called - * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore. - */ - p = find_init_mm_pte(addr, NULL); - if (!p || !pte_present(*p)) - return NULL; - addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); - return __va(addr); + return __va(ppc_find_vmap_phys((unsigned long)addr)); } /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 870b30d9be2f..f9a3019e37b4 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -18,8 +18,7 @@ #include <asm/setup.h> #include <asm/inst.h> -static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr, - struct ppc_inst *patch_addr) +static int __patch_instruction(u32 *exec_addr, struct ppc_inst instr, u32 *patch_addr) { if (!ppc_inst_prefixed(instr)) { u32 val = ppc_inst_val(instr); @@ -40,7 +39,7 @@ failed: return -EFAULT; } -int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) +int raw_patch_instruction(u32 *addr, struct ppc_inst instr) { return __patch_instruction(addr, instr, addr); } @@ -70,22 +69,16 @@ static int text_area_cpu_down(unsigned int cpu) } /* - * Run as a late init call. This allows all the boot time patching to be done - * simply by patching the code, and then we're called here prior to - * mark_rodata_ro(), which happens after all init calls are run. Although - * BUG_ON() is rude, in this case it should only happen if ENOMEM, and we judge - * it as being preferable to a kernel that will crash later when someone tries - * to use patch_instruction(). + * Although BUG_ON() is rude, in this case it should only happen if ENOMEM, and + * we judge it as being preferable to a kernel that will crash later when + * someone tries to use patch_instruction(). */ -static int __init setup_text_poke_area(void) +void __init poking_init(void) { BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/text_poke:online", text_area_cpu_up, text_area_cpu_down)); - - return 0; } -late_initcall(setup_text_poke_area); /* * This can be called for kernel text or a module. @@ -148,10 +141,10 @@ static inline int unmap_patch_area(unsigned long addr) return 0; } -static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) +static int do_patch_instruction(u32 *addr, struct ppc_inst instr) { int err; - struct ppc_inst *patch_addr = NULL; + u32 *patch_addr = NULL; unsigned long flags; unsigned long text_poke_addr; unsigned long kaddr = (unsigned long)addr; @@ -172,7 +165,7 @@ static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) goto out; } - patch_addr = (struct ppc_inst *)(text_poke_addr + (kaddr & ~PAGE_MASK)); + patch_addr = (u32 *)(text_poke_addr + (kaddr & ~PAGE_MASK)); __patch_instruction(addr, instr, patch_addr); @@ -187,14 +180,14 @@ out: } #else /* !CONFIG_STRICT_KERNEL_RWX */ -static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) +static int do_patch_instruction(u32 *addr, struct ppc_inst instr) { return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ -int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) +int patch_instruction(u32 *addr, struct ppc_inst instr) { /* Make sure we aren't patching a freed init section */ if (init_mem_is_free && init_section_contains(addr, 4)) { @@ -205,7 +198,7 @@ int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) } NOKPROBE_SYMBOL(patch_instruction); -int patch_branch(struct ppc_inst *addr, unsigned long target, int flags) +int patch_branch(u32 *addr, unsigned long target, int flags) { struct ppc_inst instr; @@ -257,8 +250,7 @@ bool is_conditional_branch(struct ppc_inst instr) } NOKPROBE_SYMBOL(is_conditional_branch); -int create_branch(struct ppc_inst *instr, - const struct ppc_inst *addr, +int create_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags) { long offset; @@ -278,7 +270,7 @@ int create_branch(struct ppc_inst *instr, return 0; } -int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr, +int create_cond_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags) { long offset; @@ -325,39 +317,39 @@ int instr_is_relative_link_branch(struct ppc_inst instr) return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK); } -static unsigned long branch_iform_target(const struct ppc_inst *instr) +static unsigned long branch_iform_target(const u32 *instr) { signed long imm; - imm = ppc_inst_val(*instr) & 0x3FFFFFC; + imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x2000000) imm -= 0x4000000; - if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; } -static unsigned long branch_bform_target(const struct ppc_inst *instr) +static unsigned long branch_bform_target(const u32 *instr) { signed long imm; - imm = ppc_inst_val(*instr) & 0xFFFC; + imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x8000) imm -= 0x10000; - if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; } -unsigned long branch_target(const struct ppc_inst *instr) +unsigned long branch_target(const u32 *instr) { if (instr_is_branch_iform(ppc_inst_read(instr))) return branch_iform_target(instr); @@ -367,17 +359,7 @@ unsigned long branch_target(const struct ppc_inst *instr) return 0; } -int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr) -{ - if (instr_is_branch_iform(ppc_inst_read(instr)) || - instr_is_branch_bform(ppc_inst_read(instr))) - return branch_target(instr) == addr; - - return 0; -} - -int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest, - const struct ppc_inst *src) +int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src) { unsigned long target; target = branch_target(src); @@ -404,12 +386,21 @@ void __patch_exception(int exc, unsigned long addr) * instruction of the exception, not the first one */ - patch_branch((struct ppc_inst *)(ibase + (exc / 4) + 1), addr, 0); + patch_branch(ibase + (exc / 4) + 1, addr, 0); } #endif #ifdef CONFIG_CODE_PATCHING_SELFTEST +static int instr_is_branch_to_addr(const u32 *instr, unsigned long addr) +{ + if (instr_is_branch_iform(ppc_inst_read(instr)) || + instr_is_branch_bform(ppc_inst_read(instr))) + return branch_target(instr) == addr; + + return 0; +} + static void __init test_trampoline(void) { asm ("nop;\n"); @@ -422,9 +413,9 @@ static void __init test_branch_iform(void) { int err; struct ppc_inst instr; - unsigned long addr; - - addr = (unsigned long)&instr; + u32 tmp[2]; + u32 *iptr = tmp; + unsigned long addr = (unsigned long)tmp; /* The simplest case, branch to self, no flags */ check(instr_is_branch_iform(ppc_inst(0x48000000))); @@ -445,63 +436,68 @@ static void __init test_branch_iform(void) check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); /* Absolute branch to 0x100 */ - instr = ppc_inst(0x48000103); - check(instr_is_branch_to_addr(&instr, 0x100)); + patch_instruction(iptr, ppc_inst(0x48000103)); + check(instr_is_branch_to_addr(iptr, 0x100)); /* Absolute branch to 0x420fc */ - instr = ppc_inst(0x480420ff); - check(instr_is_branch_to_addr(&instr, 0x420fc)); + patch_instruction(iptr, ppc_inst(0x480420ff)); + check(instr_is_branch_to_addr(iptr, 0x420fc)); /* Maximum positive relative branch, + 20MB - 4B */ - instr = ppc_inst(0x49fffffc); - check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC)); + patch_instruction(iptr, ppc_inst(0x49fffffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC)); /* Smallest negative relative branch, - 4B */ - instr = ppc_inst(0x4bfffffc); - check(instr_is_branch_to_addr(&instr, addr - 4)); + patch_instruction(iptr, ppc_inst(0x4bfffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); /* Largest negative relative branch, - 32 MB */ - instr = ppc_inst(0x4a000000); - check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); + patch_instruction(iptr, ppc_inst(0x4a000000)); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); /* Branch to self, with link */ - err = create_branch(&instr, &instr, addr, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr)); + err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); /* Branch to self - 0x100, with link */ - err = create_branch(&instr, &instr, addr - 0x100, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr - 0x100)); + err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); /* Branch to self + 0x100, no link */ - err = create_branch(&instr, &instr, addr + 0x100, 0); - check(instr_is_branch_to_addr(&instr, addr + 0x100)); + err = create_branch(&instr, iptr, addr + 0x100, 0); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); /* Maximum relative negative offset, - 32 MB */ - err = create_branch(&instr, &instr, addr - 0x2000000, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); + err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); /* Out of range relative negative offset, - 32 MB + 4*/ - err = create_branch(&instr, &instr, addr - 0x2000004, BRANCH_SET_LINK); + err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK); check(err); /* Out of range relative positive offset, + 32 MB */ - err = create_branch(&instr, &instr, addr + 0x2000000, BRANCH_SET_LINK); + err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK); check(err); /* Unaligned target */ - err = create_branch(&instr, &instr, addr + 3, BRANCH_SET_LINK); + err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK); check(err); /* Check flags are masked correctly */ - err = create_branch(&instr, &instr, addr, 0xFFFFFFFC); - check(instr_is_branch_to_addr(&instr, addr)); + err = create_branch(&instr, iptr, addr, 0xFFFFFFFC); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); check(ppc_inst_equal(instr, ppc_inst(0x48000000))); } static void __init test_create_function_call(void) { - struct ppc_inst *iptr; + u32 *iptr; unsigned long dest; struct ppc_inst instr; /* Check we can create a function call */ - iptr = (struct ppc_inst *)ppc_function_entry(test_trampoline); + iptr = (u32 *)ppc_function_entry(test_trampoline); dest = ppc_function_entry(test_create_function_call); create_branch(&instr, iptr, dest, BRANCH_SET_LINK); patch_instruction(iptr, instr); @@ -512,10 +508,11 @@ static void __init test_branch_bform(void) { int err; unsigned long addr; - struct ppc_inst *iptr, instr; + struct ppc_inst instr; + u32 tmp[2]; + u32 *iptr = tmp; unsigned int flags; - iptr = &instr; addr = (unsigned long)iptr; /* The simplest case, branch to self, no flags */ @@ -528,39 +525,43 @@ static void __init test_branch_bform(void) check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); /* Absolute conditional branch to 0x100 */ - instr = ppc_inst(0x43ff0103); - check(instr_is_branch_to_addr(&instr, 0x100)); + patch_instruction(iptr, ppc_inst(0x43ff0103)); + check(instr_is_branch_to_addr(iptr, 0x100)); /* Absolute conditional branch to 0x20fc */ - instr = ppc_inst(0x43ff20ff); - check(instr_is_branch_to_addr(&instr, 0x20fc)); + patch_instruction(iptr, ppc_inst(0x43ff20ff)); + check(instr_is_branch_to_addr(iptr, 0x20fc)); /* Maximum positive relative conditional branch, + 32 KB - 4B */ - instr = ppc_inst(0x43ff7ffc); - check(instr_is_branch_to_addr(&instr, addr + 0x7FFC)); + patch_instruction(iptr, ppc_inst(0x43ff7ffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x7FFC)); /* Smallest negative relative conditional branch, - 4B */ - instr = ppc_inst(0x43fffffc); - check(instr_is_branch_to_addr(&instr, addr - 4)); + patch_instruction(iptr, ppc_inst(0x43fffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); /* Largest negative relative conditional branch, - 32 KB */ - instr = ppc_inst(0x43ff8000); - check(instr_is_branch_to_addr(&instr, addr - 0x8000)); + patch_instruction(iptr, ppc_inst(0x43ff8000)); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); /* All condition code bits set & link */ flags = 0x3ff000 | BRANCH_SET_LINK; /* Branch to self */ err = create_cond_branch(&instr, iptr, addr, flags); - check(instr_is_branch_to_addr(&instr, addr)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); /* Branch to self - 0x100 */ err = create_cond_branch(&instr, iptr, addr - 0x100, flags); - check(instr_is_branch_to_addr(&instr, addr - 0x100)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); /* Branch to self + 0x100 */ err = create_cond_branch(&instr, iptr, addr + 0x100, flags); - check(instr_is_branch_to_addr(&instr, addr + 0x100)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); /* Maximum relative negative offset, - 32 KB */ err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); - check(instr_is_branch_to_addr(&instr, addr - 0x8000)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); /* Out of range relative negative offset, - 32 KB + 4*/ err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); @@ -576,7 +577,8 @@ static void __init test_branch_bform(void) /* Check flags are masked correctly */ err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); - check(instr_is_branch_to_addr(&instr, addr)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); } @@ -715,9 +717,9 @@ static void __init test_prefixed_patching(void) extern unsigned int code_patching_test1_expected[]; extern unsigned int end_code_patching_test1[]; - __patch_instruction((struct ppc_inst *)code_patching_test1, + __patch_instruction(code_patching_test1, ppc_inst_prefix(OP_PREFIX << 26, 0x00000000), - (struct ppc_inst *)code_patching_test1); + code_patching_test1); check(!memcmp(code_patching_test1, code_patching_test1_expected, diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index fe26f2fa0f3f..4537459d012f 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -33,26 +33,25 @@ struct fixup_entry { long alt_end_off; }; -static struct ppc_inst *calc_addr(struct fixup_entry *fcur, long offset) +static u32 *calc_addr(struct fixup_entry *fcur, long offset) { /* * We store the offset to the code as a negative offset from * the start of the alt_entry, to support the VDSO. This * routine converts that back into an actual address. */ - return (struct ppc_inst *)((unsigned long)fcur + offset); + return (u32 *)((unsigned long)fcur + offset); } -static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest, - struct ppc_inst *alt_start, struct ppc_inst *alt_end) +static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end) { int err; struct ppc_inst instr; instr = ppc_inst_read(src); - if (instr_is_relative_branch(*src)) { - struct ppc_inst *target = (struct ppc_inst *)branch_target(src); + if (instr_is_relative_branch(ppc_inst_read(src))) { + u32 *target = (u32 *)branch_target(src); /* Branch within the section doesn't need translating */ if (target < alt_start || target > alt_end) { @@ -69,7 +68,7 @@ static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest, static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) { - struct ppc_inst *start, *end, *alt_start, *alt_end, *src, *dest, nop; + u32 *start, *end, *alt_start, *alt_end, *src, *dest; start = calc_addr(fcur, fcur->start_off); end = calc_addr(fcur, fcur->end_off); @@ -91,9 +90,8 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) return 1; } - nop = ppc_inst(PPC_INST_NOP); - for (; dest < end; dest = ppc_inst_next(dest, &nop)) - raw_patch_instruction(dest, nop); + for (; dest < end; dest++) + raw_patch_instruction(dest, ppc_inst(PPC_RAW_NOP())); return 0; } @@ -128,21 +126,21 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) start = PTRRELOC(&__start___stf_entry_barrier_fixup); end = PTRRELOC(&__stop___stf_entry_barrier_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); i = 0; if (types & STF_BARRIER_FALLBACK) { - instrs[i++] = 0x7d4802a6; /* mflr r10 */ - instrs[i++] = 0x60000000; /* branch patched below */ - instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + instrs[i++] = PPC_RAW_MFLR(_R10); + instrs[i++] = PPC_RAW_NOP(); /* branch patched below */ + instrs[i++] = PPC_RAW_MTLR(_R10); } else if (types & STF_BARRIER_EIEIO) { - instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ } else if (types & STF_BARRIER_SYNC_ORI) { - instrs[i++] = 0x7c0004ac; /* hwsync */ - instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */ - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = PPC_RAW_SYNC(); + instrs[i++] = PPC_RAW_LD(_R10, _R13, 0); + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } for (i = 0; start < end; start++, i++) { @@ -152,14 +150,14 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) // See comment in do_entry_flush_fixups() RE order of patching if (types & STF_BARRIER_FALLBACK) { - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_branch((struct ppc_inst *)(dest + 1), + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK); } else { - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); } } @@ -180,32 +178,31 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) start = PTRRELOC(&__start___stf_exit_barrier_fixup); end = PTRRELOC(&__stop___stf_exit_barrier_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ - instrs[3] = 0x60000000; /* nop */ - instrs[4] = 0x60000000; /* nop */ - instrs[5] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + instrs[3] = PPC_RAW_NOP(); + instrs[4] = PPC_RAW_NOP(); + instrs[5] = PPC_RAW_NOP(); i = 0; if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) { if (cpu_has_feature(CPU_FTR_HVMODE)) { - instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */ - instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_HSPRG1, _R13); + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG0); } else { - instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */ - instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_SPRG2, _R13); + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG1); } - instrs[i++] = 0x7c0004ac; /* hwsync */ - instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */ - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - if (cpu_has_feature(CPU_FTR_HVMODE)) { - instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */ - } else { - instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */ - } + instrs[i++] = PPC_RAW_SYNC(); + instrs[i++] = PPC_RAW_LD(_R13, _R13, 0); + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG1); + else + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG2); } else if (types & STF_BARRIER_EIEIO) { - instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ } for (i = 0; start < end; start++, i++) { @@ -213,12 +210,12 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3])); - patch_instruction((struct ppc_inst *)(dest + 4), ppc_inst(instrs[4])); - patch_instruction((struct ppc_inst *)(dest + 5), ppc_inst(instrs[5])); + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest + 3, ppc_inst(instrs[3])); + patch_instruction(dest + 4, ppc_inst(instrs[4])); + patch_instruction(dest + 5, ppc_inst(instrs[5])); } printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : @@ -258,35 +255,35 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types) start = PTRRELOC(&__start___uaccess_flush_fixup); end = PTRRELOC(&__stop___uaccess_flush_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ - instrs[3] = 0x4e800020; /* blr */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + instrs[3] = PPC_RAW_BLR(); i = 0; if (types == L1D_FLUSH_FALLBACK) { - instrs[3] = 0x60000000; /* nop */ + instrs[3] = PPC_RAW_NOP(); /* fallthrough to fallback flush */ } if (types & L1D_FLUSH_ORI) { - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ } if (types & L1D_FLUSH_MTTRIG) - instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); for (i = 0; start < end; start++, i++) { dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest + 3, ppc_inst(instrs[3])); } printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, @@ -306,24 +303,24 @@ static int __do_entry_flush_fixups(void *data) long *start, *end; int i; - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); i = 0; if (types == L1D_FLUSH_FALLBACK) { - instrs[i++] = 0x7d4802a6; /* mflr r10 */ - instrs[i++] = 0x60000000; /* branch patched below */ - instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + instrs[i++] = PPC_RAW_MFLR(_R10); + instrs[i++] = PPC_RAW_NOP(); /* branch patched below */ + instrs[i++] = PPC_RAW_MTLR(_R10); } if (types & L1D_FLUSH_ORI) { - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ } if (types & L1D_FLUSH_MTTRIG) - instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); /* * If we're patching in or out the fallback flush we need to be careful about the @@ -358,14 +355,14 @@ static int __do_entry_flush_fixups(void *data) pr_devel("patching dest %lx\n", (unsigned long)dest); if (types == L1D_FLUSH_FALLBACK) { - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_branch((struct ppc_inst *)(dest + 1), + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK); } else { - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); } } @@ -377,14 +374,14 @@ static int __do_entry_flush_fixups(void *data) pr_devel("patching dest %lx\n", (unsigned long)dest); if (types == L1D_FLUSH_FALLBACK) { - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_branch((struct ppc_inst *)(dest + 1), + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK); } else { - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); } } @@ -421,31 +418,31 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) start = PTRRELOC(&__start___rfi_flush_fixup); end = PTRRELOC(&__stop___rfi_flush_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); if (types & L1D_FLUSH_FALLBACK) /* b .+16 to fallback flush */ - instrs[0] = 0x48000010; + instrs[0] = PPC_INST_BRANCH | 16; i = 0; if (types & L1D_FLUSH_ORI) { - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ } if (types & L1D_FLUSH_MTTRIG) - instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); for (i = 0; start < end; start++, i++) { dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); } printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, @@ -467,18 +464,18 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ start = fixup_start; end = fixup_end; - instr = 0x60000000; /* nop */ + instr = PPC_RAW_NOP(); if (enable) { pr_info("barrier-nospec: using ORI speculation barrier\n"); - instr = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } for (i = 0; start < end; start++, i++) { dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instr)); + patch_instruction(dest, ppc_inst(instr)); } printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); @@ -508,21 +505,21 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ start = fixup_start; end = fixup_end; - instr[0] = PPC_INST_NOP; - instr[1] = PPC_INST_NOP; + instr[0] = PPC_RAW_NOP(); + instr[1] = PPC_RAW_NOP(); if (enable) { pr_info("barrier-nospec: using isync; sync as speculation barrier\n"); - instr[0] = PPC_INST_ISYNC; - instr[1] = PPC_INST_SYNC; + instr[0] = PPC_RAW_ISYNC(); + instr[1] = PPC_RAW_SYNC(); } for (i = 0; start < end; start++, i++) { dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instr[0])); - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instr[1])); + patch_instruction(dest, ppc_inst(instr[0])); + patch_instruction(dest + 1, ppc_inst(instr[1])); } printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); @@ -536,7 +533,7 @@ static void patch_btb_flush_section(long *curr) end = (void *)curr + *(curr + 1); for (; start < end; start++) { pr_devel("patching dest %lx\n", (unsigned long)start); - patch_instruction((struct ppc_inst *)start, ppc_inst(PPC_INST_NOP)); + patch_instruction(start, ppc_inst(PPC_RAW_NOP())); } } @@ -555,7 +552,7 @@ void do_btb_flush_fixups(void) void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; - struct ppc_inst *dest; + u32 *dest; if (!(value & CPU_FTR_LWSYNC)) return ; @@ -572,13 +569,14 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) static void do_final_fixups(void) { #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE) - struct ppc_inst inst, *src, *dest, *end; + struct ppc_inst inst; + u32 *src, *dest, *end; if (PHYSICAL_START == 0) return; - src = (struct ppc_inst *)(KERNELBASE + PHYSICAL_START); - dest = (struct ppc_inst *)KERNELBASE; + src = (u32 *)(KERNELBASE + PHYSICAL_START); + dest = (u32 *)KERNELBASE; end = (void *)src + (__end_interrupts - _stext); while (src < end) { diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 45bda2520755..aee42bcc775b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1700,6 +1700,28 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->val = regs->ccr & imm; goto compute_done; + case 128: /* setb */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + /* + * 'ra' encodes the CR field number (bfa) in the top 3 bits. + * Since each CR field is 4 bits, + * we can simply mask off the bottom two bits (bfa * 4) + * to yield the first bit in the CR field. + */ + ra = ra & ~0x3; + /* 'val' stores bits of the CR field (bfa) */ + val = regs->ccr >> (CR0_SHIFT - ra); + /* checks if the LT bit of CR field (bfa) is set */ + if (val & 8) + op->val = -1; + /* checks if the GT bit of CR field (bfa) is set */ + else if (val & 4) + op->val = 1; + else + op->val = 0; + goto compute_done; + case 144: /* mtcrf */ op->type = COMPUTE + SETCC; imm = 0xf0000000UL; diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 783d1b85ecfe..1cbfa468813b 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -53,6 +53,8 @@ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ PPC_RAW_ADDI(t, a, i)) +#define TEST_SETB(t, bfa) ppc_inst(PPC_INST_SETB | ___PPC_RT(t) | ___PPC_RA((bfa & 0x7) << 2)) + static void __init init_pt_regs(struct pt_regs *regs) { @@ -824,8 +826,7 @@ static void __init test_plxvp_pstxvp(void) * XTp = 32xTX + 2xTp * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1 */ - instr = ppc_inst_prefix(PPC_RAW_PLXVP(34, 0, 3, 0) >> 32, - PPC_RAW_PLXVP(34, 0, 3, 0) & 0xffffffff); + instr = ppc_inst_prefix(PPC_RAW_PLXVP_P(34, 0, 3, 0), PPC_RAW_PLXVP_S(34, 0, 3, 0)); stepped = emulate_step(®s, instr); if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { @@ -853,8 +854,7 @@ static void __init test_plxvp_pstxvp(void) * XSp = 32xSX + 2xSp * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1 */ - instr = ppc_inst_prefix(PPC_RAW_PSTXVP(34, 0, 3, 0) >> 32, - PPC_RAW_PSTXVP(34, 0, 3, 0) & 0xffffffff); + instr = ppc_inst_prefix(PPC_RAW_PSTXVP_P(34, 0, 3, 0), PPC_RAW_PSTXVP_S(34, 0, 3, 0)); stepped = emulate_step(®s, instr); @@ -922,7 +922,7 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "R0 = LONG_MAX", - .instr = ppc_inst(PPC_INST_NOP), + .instr = ppc_inst(PPC_RAW_NOP()), .regs = { .gpr[0] = LONG_MAX, } @@ -930,6 +930,33 @@ static struct compute_test compute_tests[] = { } }, { + .mnemonic = "setb", + .cpu_feature = CPU_FTR_ARCH_300, + .subtests = { + { + .descr = "BFA = 1, CR = GT", + .instr = TEST_SETB(20, 1), + .regs = { + .ccr = 0x4000000, + } + }, + { + .descr = "BFA = 4, CR = LT", + .instr = TEST_SETB(20, 4), + .regs = { + .ccr = 0x8000, + } + }, + { + .descr = "BFA = 5, CR = EQ", + .instr = TEST_SETB(20, 5), + .regs = { + .ccr = 0x200, + } + } + } + }, + { .mnemonic = "add", .subtests = { { diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index c3df3a8501d4..9142cf1fb0d5 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -5,7 +5,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \ +obj-y := fault.o mem.o pgtable.o mmap.o maccess.o pageattr.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ init-common.o mmu_context.o drmem.o \ diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 7f0c8a78ba0c..15f4773643d2 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -10,3 +10,4 @@ obj-y += mmu.o mmu_context.o obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o obj-$(CONFIG_PPC_KUEP) += kuep.o +obj-$(CONFIG_PPC_KUAP) += kuap.o diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index fb4233a5bdf7..6925ce998557 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -27,8 +27,10 @@ #include <asm/code-patching-asm.h> #ifdef CONFIG_PTE_64BIT +#define PTE_T_SIZE 8 #define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */ #else +#define PTE_T_SIZE 4 #define PTE_FLAGS_OFFSET 0 #endif @@ -488,7 +490,7 @@ _GLOBAL(flush_hash_pages) bne 2f ble cr1,19f addi r4,r4,0x1000 - addi r5,r5,PTE_SIZE + addi r5,r5,PTE_T_SIZE addi r6,r6,-1 b 1b @@ -573,7 +575,7 @@ _GLOBAL(flush_hash_pages) 8: ble cr1,9f /* if all ptes checked */ 81: addi r6,r6,-1 - addi r5,r5,PTE_SIZE + addi r5,r5,PTE_T_SIZE addi r4,r4,0x1000 lwz r0,0(r5) /* check next pte */ cmpwi cr1,r6,1 diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c new file mode 100644 index 000000000000..9df6911b8fde --- /dev/null +++ b/arch/powerpc/mm/book3s32/kuap.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <asm/kup.h> +#include <asm/smp.h> + +struct static_key_false disable_kuap_key; +EXPORT_SYMBOL(disable_kuap_key); + +void kuap_lock_all_ool(void) +{ + kuap_lock_all(); +} +EXPORT_SYMBOL(kuap_lock_all_ool); + +void kuap_unlock_all_ool(void) +{ + kuap_unlock_all(); +} +EXPORT_SYMBOL(kuap_unlock_all_ool); + +void __init setup_kuap(bool disabled) +{ + if (!disabled) + kuap_lock_all_ool(); + + if (smp_processor_id() != boot_cpuid) + return; + + if (disabled) + static_branch_enable(&disable_kuap_key); + else + pr_info("Activating Kernel Userspace Access Protection\n"); +} diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c index 8ed1b8634839..3f6eb6e23fca 100644 --- a/arch/powerpc/mm/book3s32/kuep.c +++ b/arch/powerpc/mm/book3s32/kuep.c @@ -1,40 +1,20 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <asm/kup.h> -#include <asm/reg.h> -#include <asm/task_size_32.h> -#include <asm/mmu.h> +#include <asm/smp.h> -#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do { \ - if (TASK_SIZE > ((n) << 28)) \ - mtsr(val1, (n) << 28); \ - if (TASK_SIZE > (((n) + 1) << 28)) \ - mtsr(val2, ((n) + 1) << 28); \ - val1 = (val1 + 0x222) & 0xf0ffffff; \ - val2 = (val2 + 0x222) & 0xf0ffffff; \ -} while (0) +struct static_key_false disable_kuep_key; -static __always_inline void kuep_update(u32 val) +void __init setup_kuep(bool disabled) { - int val1 = val; - int val2 = (val + 0x111) & 0xf0ffffff; + if (!disabled) + kuep_lock(); - KUEP_UPDATE_TWO_USER_SEGMENTS(0); - KUEP_UPDATE_TWO_USER_SEGMENTS(2); - KUEP_UPDATE_TWO_USER_SEGMENTS(4); - KUEP_UPDATE_TWO_USER_SEGMENTS(6); - KUEP_UPDATE_TWO_USER_SEGMENTS(8); - KUEP_UPDATE_TWO_USER_SEGMENTS(10); - KUEP_UPDATE_TWO_USER_SEGMENTS(12); - KUEP_UPDATE_TWO_USER_SEGMENTS(14); -} + if (smp_processor_id() != boot_cpuid) + return; -void kuep_lock(void) -{ - kuep_update(mfsr(0) | SR_NX); -} - -void kuep_unlock(void) -{ - kuep_update(mfsr(0) & ~SR_NX); + if (disabled) + static_branch_enable(&disable_kuep_key); + else + pr_info("Activating Kernel Userspace Execution Prevention\n"); } diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 159930351d9f..27061583a010 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -445,26 +445,6 @@ void __init print_system_hash_info(void) pr_info("Hash_mask = 0x%lx\n", Hash_mask); } -#ifdef CONFIG_PPC_KUEP -void __init setup_kuep(bool disabled) -{ - pr_info("Activating Kernel Userspace Execution Prevention\n"); - - if (disabled) - pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n"); -} -#endif - -#ifdef CONFIG_PPC_KUAP -void __init setup_kuap(bool disabled) -{ - pr_info("Activating Kernel Userspace Access Protection\n"); - - if (disabled) - pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n"); -} -#endif - void __init early_init_mmu(void) { } diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c index 218996e40a8e..e2708e387dc3 100644 --- a/arch/powerpc/mm/book3s32/mmu_context.c +++ b/arch/powerpc/mm/book3s32/mmu_context.c @@ -24,6 +24,12 @@ #include <asm/mmu_context.h> /* + * Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +void *abatron_pteptrs[2]; + +/* * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs * (virtual segment identifiers) for each context. Although the * hardware supports 24-bit VSIDs, and thus >1 million contexts, @@ -39,19 +45,6 @@ #define LAST_CONTEXT 32767 #define FIRST_CONTEXT 1 -/* - * This function defines the mapping from contexts to VSIDs (virtual - * segment IDs). We use a skew on both the context and the high 4 bits - * of the 32-bit virtual address (the "effective segment ID") in order - * to spread out the entries in the MMU hash table. Note, if this - * function is changed then arch/ppc/mm/hashtable.S will have to be - * changed to correspond. - * - * - * CTX_TO_VSID(ctx, va) (((ctx) * (897 * 16) + ((va) >> 28) * 0x111) \ - * & 0xffffff) - */ - static unsigned long next_mmu_context; static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; @@ -111,3 +104,32 @@ void __init mmu_context_init(void) context_map[0] = (1 << FIRST_CONTEXT) - 1; next_mmu_context = FIRST_CONTEXT; } + +void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) +{ + long id = next->context.id; + unsigned long val; + + if (id < 0) + panic("mm_struct %p has no context ID", next); + + isync(); + + val = CTX_TO_VSID(id, 0); + if (!kuep_is_disabled()) + val |= SR_NX; + if (!kuap_is_disabled()) + val |= SR_KS; + + update_user_segments(val); + + if (IS_ENABLED(CONFIG_BDI_SWITCH)) + abatron_pteptrs[1] = next->pgd; + + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) + mtspr(SPRN_SDR1, rol32(__pa(next->pgd), 4) & 0xffff01ff); + + mb(); /* sync */ + isync(); +} +EXPORT_SYMBOL(switch_mmu_context); diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 318ec4f33661..ae12fb5559cb 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -354,22 +354,30 @@ static inline void fixup_tlbie_lpid(unsigned long lpid) /* * We use 128 set in radix mode and 256 set in hpt mode. */ -static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) +static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) { int set; asm volatile("ptesync": : :"memory"); - /* - * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, - * also flush the entire Page Walk Cache. - */ - __tlbiel_pid(pid, 0, ric); + switch (ric) { + case RIC_FLUSH_PWC: - /* For PWC, only one flush is needed */ - if (ric == RIC_FLUSH_PWC) { + /* For PWC, only one flush is needed */ + __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); ppc_after_tlbiel_barrier(); return; + case RIC_FLUSH_TLB: + __tlbiel_pid(pid, 0, RIC_FLUSH_TLB); + break; + case RIC_FLUSH_ALL: + default: + /* + * Flush the first set of the TLB, and if + * we're doing a RIC_FLUSH_ALL, also flush + * the entire Page Walk Cache. + */ + __tlbiel_pid(pid, 0, RIC_FLUSH_ALL); } if (!cpu_has_feature(CPU_FTR_ARCH_31)) { @@ -1290,7 +1298,7 @@ void radix__tlb_flush(struct mmu_gather *tlb) } } -static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, +static void __radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, unsigned long end, int psize, bool also_pwc) { diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c index a3c30a884076..aad7c47e0030 100644 --- a/arch/powerpc/mm/maccess.c +++ b/arch/powerpc/mm/maccess.c @@ -12,7 +12,7 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) return is_kernel_addr((unsigned long)unsafe_src); } -int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src) { unsigned int val, suffix; int err; @@ -21,7 +21,7 @@ int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) if (err) return err; if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { - err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); + err = copy_from_kernel_nofault(&suffix, src + 1, sizeof(suffix)); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 043bbeaf407c..c3b4fdda7069 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -20,6 +20,7 @@ #include <asm/machdep.h> #include <asm/rtas.h> #include <asm/kasan.h> +#include <asm/sparsemem.h> #include <asm/svm.h> #include <mm/mmu_decl.h> @@ -27,6 +28,9 @@ unsigned long long memory_limit; bool init_mem_is_free; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; +EXPORT_SYMBOL(empty_zero_page); + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -298,6 +302,10 @@ void __init mem_init(void) ioremap_bot, IOREMAP_TOP); pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n", VMALLOC_START, VMALLOC_END); +#ifdef MODULES_VADDR + pr_info(" * 0x%08lx..0x%08lx : modules\n", + MODULES_VADDR, MODULES_END); +#endif #endif /* CONFIG_PPC32 */ } diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c index 3d6ae7c72412..7da6d1e9fc9b 100644 --- a/arch/powerpc/mm/nohash/44x.c +++ b/arch/powerpc/mm/nohash/44x.c @@ -239,3 +239,16 @@ void __init mmu_init_secondary(int cpu) } } #endif /* CONFIG_SMP */ + +#ifdef CONFIG_PPC_KUEP +void __init setup_kuep(bool disabled) +{ + if (disabled) + patch_instruction_site(&patch__tlb_44x_kuep, ppc_inst(PPC_RAW_NOP())); + else + pr_info("Activating Kernel Userspace Execution Prevention\n"); + + if (IS_ENABLED(CONFIG_PPC_47x) && disabled) + patch_instruction_site(&patch__tlb_47x_kuep, ppc_inst(PPC_RAW_NOP())); +} +#endif diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 71bfdbedacee..60780e089118 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -212,37 +212,6 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M)); } -/* - * Set up to use a given MMU context. - * id is context number, pgd is PGD pointer. - * - * We place the physical address of the new task page directory loaded - * into the MMU base register, and set the ASID compare register with - * the new "context." - */ -void set_context(unsigned long id, pgd_t *pgd) -{ - s16 offset = (s16)(__pa(swapper_pg_dir)); - - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is passed as second argument. - */ - if (IS_ENABLED(CONFIG_BDI_SWITCH)) - abatron_pteptrs[1] = pgd; - - /* Register M_TWB will contain base address of level 1 table minus the - * lower part of the kernel PGDIR base address, so that all accesses to - * level 1 table are done relative to lower part of kernel PGDIR base - * address. - */ - mtspr(SPRN_M_TWB, __pa(pgd) - offset); - - /* Update context */ - mtspr(SPRN_M_CASID, id - 1); - /* sync */ - mb(); -} - #ifdef CONFIG_PPC_KUEP void __init setup_kuep(bool disabled) { @@ -256,12 +225,17 @@ void __init setup_kuep(bool disabled) #endif #ifdef CONFIG_PPC_KUAP +struct static_key_false disable_kuap_key; +EXPORT_SYMBOL(disable_kuap_key); + void __init setup_kuap(bool disabled) { - pr_info("Activating Kernel Userspace Access Protection\n"); + if (disabled) { + static_branch_enable(&disable_kuap_key); + return; + } - if (disabled) - pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n"); + pr_info("Activating Kernel Userspace Access Protection\n"); mtspr(SPRN_MD_AP, MD_APG_KUAP); } diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c index aac81c9f84a5..44b2b5e7cabe 100644 --- a/arch/powerpc/mm/nohash/mmu_context.c +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -21,21 +21,6 @@ * also clear mm->cpu_vm_mask bits when processes are migrated */ -//#define DEBUG_MAP_CONSISTENCY -//#define DEBUG_CLAMP_LAST_CONTEXT 31 -//#define DEBUG_HARDER - -/* We don't use DEBUG because it tends to be compiled in always nowadays - * and this would generate way too much output - */ -#ifdef DEBUG_HARDER -#define pr_hard(args...) printk(KERN_DEBUG args) -#define pr_hardcont(args...) printk(KERN_CONT args) -#else -#define pr_hard(args...) do { } while(0) -#define pr_hardcont(args...) do { } while(0) -#endif - #include <linux/kernel.h> #include <linux/mm.h> #include <linux/init.h> @@ -47,10 +32,17 @@ #include <asm/mmu_context.h> #include <asm/tlbflush.h> +#include <asm/smp.h> #include <mm/mmu_decl.h> /* + * Room for two PTE table pointers, usually the kernel and current user + * pointer to their respective root page table (pgdir). + */ +void *abatron_pteptrs[2]; + +/* * The MPC8xx has only 16 contexts. We rotate through them on each task switch. * A better way would be to keep track of tasks that own contexts, and implement * an LRU usage. That way very active tasks don't always have to pay the TLB @@ -68,9 +60,7 @@ * -- BenH */ #define FIRST_CONTEXT 1 -#ifdef DEBUG_CLAMP_LAST_CONTEXT -#define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT -#elif defined(CONFIG_PPC_8xx) +#if defined(CONFIG_PPC_8xx) #define LAST_CONTEXT 16 #elif defined(CONFIG_PPC_47x) #define LAST_CONTEXT 65535 @@ -80,9 +70,7 @@ static unsigned int next_context, nr_free_contexts; static unsigned long *context_map; -#ifdef CONFIG_SMP static unsigned long *stale_map[NR_CPUS]; -#endif static struct mm_struct **context_mm; static DEFINE_RAW_SPINLOCK(context_lock); @@ -105,7 +93,6 @@ static DEFINE_RAW_SPINLOCK(context_lock); * the stale map as we can just flush the local CPU * -- benh */ -#ifdef CONFIG_SMP static unsigned int steal_context_smp(unsigned int id) { struct mm_struct *mm; @@ -127,7 +114,6 @@ static unsigned int steal_context_smp(unsigned int id) id = FIRST_CONTEXT; continue; } - pr_hardcont(" | steal %d from 0x%p", id, mm); /* Mark this mm has having no context anymore */ mm->context.id = MMU_NO_CONTEXT; @@ -158,34 +144,25 @@ static unsigned int steal_context_smp(unsigned int id) /* This will cause the caller to try again */ return MMU_NO_CONTEXT; } -#endif /* CONFIG_SMP */ static unsigned int steal_all_contexts(void) { struct mm_struct *mm; -#ifdef CONFIG_SMP int cpu = smp_processor_id(); -#endif unsigned int id; for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { /* Pick up the victim mm */ mm = context_mm[id]; - pr_hardcont(" | steal %d from 0x%p", id, mm); - /* Mark this mm as having no context anymore */ mm->context.id = MMU_NO_CONTEXT; if (id != FIRST_CONTEXT) { context_mm[id] = NULL; __clear_bit(id, context_map); -#ifdef DEBUG_MAP_CONSISTENCY - mm->context.active = 0; -#endif } -#ifdef CONFIG_SMP - __clear_bit(id, stale_map[cpu]); -#endif + if (IS_ENABLED(CONFIG_SMP)) + __clear_bit(id, stale_map[cpu]); } /* Flush the TLB for all contexts (not to be used on SMP) */ @@ -204,15 +181,11 @@ static unsigned int steal_all_contexts(void) static unsigned int steal_context_up(unsigned int id) { struct mm_struct *mm; -#ifdef CONFIG_SMP int cpu = smp_processor_id(); -#endif /* Pick up the victim mm */ mm = context_mm[id]; - pr_hardcont(" | steal %d from 0x%p", id, mm); - /* Flush the TLB for that context */ local_flush_tlb_mm(mm); @@ -220,81 +193,64 @@ static unsigned int steal_context_up(unsigned int id) mm->context.id = MMU_NO_CONTEXT; /* XXX This clear should ultimately be part of local_flush_tlb_mm */ -#ifdef CONFIG_SMP - __clear_bit(id, stale_map[cpu]); -#endif + if (IS_ENABLED(CONFIG_SMP)) + __clear_bit(id, stale_map[cpu]); return id; } -#ifdef DEBUG_MAP_CONSISTENCY -static void context_check_map(void) +static void set_context(unsigned long id, pgd_t *pgd) { - unsigned int id, nrf, nact; + if (IS_ENABLED(CONFIG_PPC_8xx)) { + s16 offset = (s16)(__pa(swapper_pg_dir)); + + /* + * Register M_TWB will contain base address of level 1 table minus the + * lower part of the kernel PGDIR base address, so that all accesses to + * level 1 table are done relative to lower part of kernel PGDIR base + * address. + */ + mtspr(SPRN_M_TWB, __pa(pgd) - offset); - nrf = nact = 0; - for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { - int used = test_bit(id, context_map); - if (!used) - nrf++; - if (used != (context_mm[id] != NULL)) - pr_err("MMU: Context %d is %s and MM is %p !\n", - id, used ? "used" : "free", context_mm[id]); - if (context_mm[id] != NULL) - nact += context_mm[id]->context.active; - } - if (nrf != nr_free_contexts) { - pr_err("MMU: Free context count out of sync ! (%d vs %d)\n", - nr_free_contexts, nrf); - nr_free_contexts = nrf; + /* Update context */ + mtspr(SPRN_M_CASID, id - 1); + + /* sync */ + mb(); + } else { + if (IS_ENABLED(CONFIG_40x)) + mb(); /* sync */ + + mtspr(SPRN_PID, id); + isync(); } - if (nact > num_online_cpus()) - pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n", - nact, num_online_cpus()); - if (FIRST_CONTEXT > 0 && !test_bit(0, context_map)) - pr_err("MMU: Context 0 has been freed !!!\n"); } -#else -static void context_check_map(void) { } -#endif void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { unsigned int id; -#ifdef CONFIG_SMP unsigned int i, cpu = smp_processor_id(); -#endif unsigned long *map; /* No lockless fast path .. yet */ raw_spin_lock(&context_lock); - pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", - cpu, next, next->context.active, next->context.id); - -#ifdef CONFIG_SMP - /* Mark us active and the previous one not anymore */ - next->context.active++; - if (prev) { - pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active); - WARN_ON(prev->context.active < 1); - prev->context.active--; + if (IS_ENABLED(CONFIG_SMP)) { + /* Mark us active and the previous one not anymore */ + next->context.active++; + if (prev) { + WARN_ON(prev->context.active < 1); + prev->context.active--; + } } again: -#endif /* CONFIG_SMP */ /* If we already have a valid assigned context, skip all that */ id = next->context.id; - if (likely(id != MMU_NO_CONTEXT)) { -#ifdef DEBUG_MAP_CONSISTENCY - if (context_mm[id] != next) - pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n", - next, id, id, context_mm[id]); -#endif + if (likely(id != MMU_NO_CONTEXT)) goto ctxt_ok; - } /* We really don't have a context, let's try to acquire one */ id = next_context; @@ -304,14 +260,12 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, /* No more free contexts, let's try to steal one */ if (nr_free_contexts == 0) { -#ifdef CONFIG_SMP if (num_online_cpus() > 1) { id = steal_context_smp(id); if (id == MMU_NO_CONTEXT) goto again; goto stolen; } -#endif /* CONFIG_SMP */ if (IS_ENABLED(CONFIG_PPC_8xx)) id = steal_all_contexts(); else @@ -330,20 +284,13 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, next_context = id + 1; context_mm[id] = next; next->context.id = id; - pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts); - context_check_map(); ctxt_ok: /* If that context got marked stale on this CPU, then flush the * local TLB for it and unmark it before we use it */ -#ifdef CONFIG_SMP - if (test_bit(id, stale_map[cpu])) { - pr_hardcont(" | stale flush %d [%d..%d]", - id, cpu_first_thread_sibling(cpu), - cpu_last_thread_sibling(cpu)); - + if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) { local_flush_tlb_mm(next); /* XXX This clear should ultimately be part of local_flush_tlb_mm */ @@ -353,10 +300,10 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, __clear_bit(id, stale_map[i]); } } -#endif /* Flick the MMU and release lock */ - pr_hardcont(" -> %d\n", id); + if (IS_ENABLED(CONFIG_BDI_SWITCH)) + abatron_pteptrs[1] = next->pgd; set_context(id, next->pgd); raw_spin_unlock(&context_lock); } @@ -366,8 +313,6 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, */ int init_new_context(struct task_struct *t, struct mm_struct *mm) { - pr_hard("initing context for mm @%p\n", mm); - /* * We have MMU_NO_CONTEXT set to be ~0. Hence check * explicitly against context.id == 0. This ensures that we properly @@ -401,16 +346,12 @@ void destroy_context(struct mm_struct *mm) if (id != MMU_NO_CONTEXT) { __clear_bit(id, context_map); mm->context.id = MMU_NO_CONTEXT; -#ifdef DEBUG_MAP_CONSISTENCY - mm->context.active = 0; -#endif context_mm[id] = NULL; nr_free_contexts++; } raw_spin_unlock_irqrestore(&context_lock, flags); } -#ifdef CONFIG_SMP static int mmu_ctx_cpu_prepare(unsigned int cpu) { /* We don't touch CPU 0 map, it's allocated at aboot and kept @@ -419,7 +360,6 @@ static int mmu_ctx_cpu_prepare(unsigned int cpu) if (cpu == boot_cpuid) return 0; - pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); return 0; } @@ -430,7 +370,6 @@ static int mmu_ctx_cpu_dead(unsigned int cpu) if (cpu == boot_cpuid) return 0; - pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); kfree(stale_map[cpu]); stale_map[cpu] = NULL; @@ -440,8 +379,6 @@ static int mmu_ctx_cpu_dead(unsigned int cpu) return 0; } -#endif /* CONFIG_SMP */ - /* * Initialize the context management stuff. */ @@ -465,16 +402,16 @@ void __init mmu_context_init(void) if (!context_mm) panic("%s: Failed to allocate %zu bytes\n", __func__, sizeof(void *) * (LAST_CONTEXT + 1)); -#ifdef CONFIG_SMP - stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); - if (!stale_map[boot_cpuid]) - panic("%s: Failed to allocate %zu bytes\n", __func__, - CTX_MAP_SIZE); - - cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, - "powerpc/mmu/ctx:prepare", - mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); -#endif + if (IS_ENABLED(CONFIG_SMP)) { + stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); + if (!stale_map[boot_cpuid]) + panic("%s: Failed to allocate %zu bytes\n", __func__, + CTX_MAP_SIZE); + + cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, + "powerpc/mmu/ctx:prepare", + mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); + } printk(KERN_INFO "MMU: Allocated %zu bytes of context maps for %d contexts\n", diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index 68797e072f55..4613bf8e9aae 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -360,19 +360,6 @@ _GLOBAL(_tlbivax_bcast) sync wrtee r10 blr - -_GLOBAL(set_context) -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r4, 0x4(r5) -#endif - mtspr SPRN_PID,r3 - isync /* Force context change */ - blr #else #error Unsupported processor type ! #endif diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c new file mode 100644 index 000000000000..0876216ceee6 --- /dev/null +++ b/arch/powerpc/mm/pageattr.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * MMU-generic set_memory implementation for powerpc + * + * Copyright 2019-2021, IBM Corporation. + */ + +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/set_memory.h> + +#include <asm/mmu.h> +#include <asm/page.h> +#include <asm/pgtable.h> + + +/* + * Updates the attributes of a page in three steps: + * + * 1. invalidate the page table entry + * 2. flush the TLB + * 3. install the new entry with the updated attributes + * + * Invalidating the pte means there are situations where this will not work + * when in theory it should. + * For example: + * - removing write from page whilst it is being executed + * - setting a page read-only whilst it is being read by another CPU + * + */ +static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) +{ + long action = (long)data; + pte_t pte; + + spin_lock(&init_mm.page_table_lock); + + /* invalidate the PTE so it's safe to modify */ + pte = ptep_get_and_clear(&init_mm, addr, ptep); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + + /* modify the PTE bits as desired, then apply */ + switch (action) { + case SET_MEMORY_RO: + pte = pte_wrprotect(pte); + break; + case SET_MEMORY_RW: + pte = pte_mkwrite(pte_mkdirty(pte)); + break; + case SET_MEMORY_NX: + pte = pte_exprotect(pte); + break; + case SET_MEMORY_X: + pte = pte_mkexec(pte); + break; + default: + WARN_ON_ONCE(1); + break; + } + + set_pte_at(&init_mm, addr, ptep, pte); + + /* See ptesync comment in radix__set_pte_at() */ + if (radix_enabled()) + asm volatile("ptesync": : :"memory"); + spin_unlock(&init_mm.page_table_lock); + + return 0; +} + +int change_memory_attr(unsigned long addr, int numpages, long action) +{ + unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); + unsigned long size = numpages * PAGE_SIZE; + + if (!numpages) + return 0; + + if (WARN_ON_ONCE(is_vmalloc_or_module_addr((void *)addr) && + is_vm_area_hugepages((void *)addr))) + return -EINVAL; + +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * On hash, the linear mapping is not in the Linux page table so + * apply_to_existing_page_range() will have no effect. If in the future + * the set_memory_* functions are used on the linear map this will need + * to be updated. + */ + if (!radix_enabled()) { + int region = get_region_id(addr); + + if (WARN_ON_ONCE(region != VMALLOC_REGION_ID && region != IO_REGION_ID)) + return -EINVAL; + } +#endif + + return apply_to_existing_page_range(&init_mm, start, size, + change_page_attr, (void *)action); +} + +/* + * Set the attributes of a page: + * + * This function is used by PPC32 at the end of init to set final kernel memory + * protection. It includes changing the maping of the page it is executing from + * and data pages it is using. + */ +static int set_page_attr(pte_t *ptep, unsigned long addr, void *data) +{ + pgprot_t prot = __pgprot((unsigned long)data); + + spin_lock(&init_mm.page_table_lock); + + set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot)); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + + spin_unlock(&init_mm.page_table_lock); + + return 0; +} + +int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot) +{ + unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); + unsigned long sz = numpages * PAGE_SIZE; + + if (numpages <= 0) + return 0; + + return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr, + (void *)pgprot_val(prot)); +} diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 354611940118..1707ab580ee2 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -28,6 +28,8 @@ #include <asm/hugetlb.h> #include <asm/pte-walk.h> +pgd_t swapper_pg_dir[MAX_PTRS_PER_PGD] __page_aligned_bss; + static inline int is_exec_fault(void) { return current->thread.regs && TRAP(current->thread.regs) == 0x400; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index e0ec67a16887..dcf5ecca19d9 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -23,6 +23,7 @@ #include <linux/highmem.h> #include <linux/memblock.h> #include <linux/slab.h> +#include <linux/set_memory.h> #include <asm/pgalloc.h> #include <asm/fixmap.h> @@ -132,64 +133,20 @@ void __init mapin_ram(void) } } -static int __change_page_attr_noflush(struct page *page, pgprot_t prot) -{ - pte_t *kpte; - unsigned long address; - - BUG_ON(PageHighMem(page)); - address = (unsigned long)page_address(page); - - if (v_block_mapped(address)) - return 0; - kpte = virt_to_kpte(address); - if (!kpte) - return -EINVAL; - __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); - - return 0; -} - -/* - * Change the page attributes of an page in the linear mapping. - * - * THIS DOES NOTHING WITH BAT MAPPINGS, DEBUG USE ONLY - */ -static int change_page_attr(struct page *page, int numpages, pgprot_t prot) -{ - int i, err = 0; - unsigned long flags; - struct page *start = page; - - local_irq_save(flags); - for (i = 0; i < numpages; i++, page++) { - err = __change_page_attr_noflush(page, prot); - if (err) - break; - } - wmb(); - local_irq_restore(flags); - flush_tlb_kernel_range((unsigned long)page_address(start), - (unsigned long)page_address(page)); - return err; -} - void mark_initmem_nx(void) { - struct page *page = virt_to_page(_sinittext); unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); if (v_block_mapped((unsigned long)_sinittext)) mmu_mark_initmem_nx(); else - change_page_attr(page, numpages, PAGE_KERNEL); + set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL); } #ifdef CONFIG_STRICT_KERNEL_RWX void mark_rodata_ro(void) { - struct page *page; unsigned long numpages; if (v_block_mapped((unsigned long)_stext + 1)) { @@ -198,20 +155,18 @@ void mark_rodata_ro(void) return; } - page = virt_to_page(_stext); numpages = PFN_UP((unsigned long)_etext) - PFN_DOWN((unsigned long)_stext); - change_page_attr(page, numpages, PAGE_KERNEL_ROX); + set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX); /* * mark .rodata as read only. Use __init_begin rather than __end_rodata * to cover NOTES and EXCEPTION_TABLE. */ - page = virt_to_page(__start_rodata); numpages = PFN_UP((unsigned long)__init_begin) - PFN_DOWN((unsigned long)__start_rodata); - change_page_attr(page, numpages, PAGE_KERNEL_RO); + set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO); // mark_initmem_nx() should have already run by now ptdump_check_wx(); @@ -221,9 +176,14 @@ void mark_rodata_ro(void) #ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { + unsigned long addr = (unsigned long)page_address(page); + if (PageHighMem(page)) return; - change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); + if (enable) + set_memory_attr(addr, numpages, PAGE_KERNEL); + else + set_memory_attr(addr, numpages, __pgprot(0)); } #endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 798ac4350a82..53aefee3fe70 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -237,6 +237,7 @@ skip_codegen_passes: fp->jited_len = alloclen; bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + bpf_jit_binary_lock_ro(bpf_hdr); if (!fp->is_func || extra_pass) { bpf_prog_fill_jited_linfo(fp, addrs); out_addrs: @@ -257,15 +258,3 @@ out: return fp; } - -/* Overriding bpf_jit_free() as we don't set images read-only. */ -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *bpf_hdr = (void *)addr; - - if (fp->jited) - bpf_jit_binary_free(bpf_hdr); - - bpf_prog_unlock_free(fp); -} diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index bbb16099e8c7..92e17fbaaabb 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -108,20 +108,20 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) int i; /* First arg comes in as a 32 bits pointer. */ - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), __REG_R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), _R3)); EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0)); - EMIT(PPC_RAW_STWU(__REG_R1, __REG_R1, -BPF_PPC_STACKFRAME(ctx))); + EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); /* * Initialize tail_call_cnt in stack frame if we do tail calls. * Otherwise, put in NOPs so that it can be skipped when we are * invoked through a tail call. */ - if (ctx->seen & SEEN_TAILCALL) { - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - } else { + if (ctx->seen & SEEN_TAILCALL) + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, _R1, + bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + else EMIT(PPC_RAW_NOP()); - } #define BPF_TAILCALL_PROLOGUE_SIZE 16 @@ -130,30 +130,30 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * save/restore LR unless we call other functions */ if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MFLR(__REG_R0)); + EMIT(PPC_RAW_MFLR(_R0)); /* * Back up non-volatile regs -- registers r18-r31 */ for (i = BPF_PPC_NVR_MIN; i <= 31; i++) if (bpf_is_seen_register(ctx, i)) - EMIT(PPC_RAW_STW(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); + EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i))); /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 8); - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 12); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); } /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) { EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0)); - EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), __REG_R1, + EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + EMIT(PPC_RAW_STW(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); } static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) @@ -163,24 +163,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_PPC_NVR_MIN; i <= 31; i++) if (bpf_is_seen_register(ctx, i)) - EMIT(PPC_RAW_LWZ(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); + EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i))); } void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) { - EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_0))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_0))); bpf_jit_emit_common_epilogue(image, ctx); /* Tear down our stack frame */ if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); - EMIT(PPC_RAW_ADDI(__REG_R1, __REG_R1, BPF_PPC_STACKFRAME(ctx))); + EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx))); if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_MTLR(_R0)); EMIT(PPC_RAW_BLR()); } @@ -193,9 +193,9 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun PPC_BL_ABS(func); } else { /* Load function address into r0 */ - EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func))); - EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func))); - EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_LIS(_R0, IMM_H(func))); + EMIT(PPC_RAW_ORI(_R0, _R0, IMM_L(func))); + EMIT(PPC_RAW_MTLR(_R0)); EMIT(PPC_RAW_BLRL()); } } @@ -215,47 +215,47 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 * if (index >= array->map.max_entries) * goto out; */ - EMIT(PPC_RAW_LWZ(__REG_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); - EMIT(PPC_RAW_CMPLW(b2p_index, __REG_R0)); - EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + EMIT(PPC_RAW_LWZ(_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_CMPLW(b2p_index, _R0)); + EMIT(PPC_RAW_LWZ(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); PPC_BCC(COND_GE, out); /* * if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; */ - EMIT(PPC_RAW_CMPLWI(__REG_R0, MAX_TAIL_CALL_CNT)); + EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT)); /* tail_call_cnt++; */ - EMIT(PPC_RAW_ADDIC(__REG_R0, __REG_R0, 1)); + EMIT(PPC_RAW_ADDIC(_R0, _R0, 1)); PPC_BCC(COND_GT, out); /* prog = array->ptrs[index]; */ - EMIT(PPC_RAW_RLWINM(__REG_R3, b2p_index, 2, 0, 29)); - EMIT(PPC_RAW_ADD(__REG_R3, __REG_R3, b2p_bpf_array)); - EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_array, ptrs))); - EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); + EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array)); + EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs))); + EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); /* * if (prog == NULL) * goto out; */ - EMIT(PPC_RAW_CMPLWI(__REG_R3, 0)); + EMIT(PPC_RAW_CMPLWI(_R3, 0)); PPC_BCC(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ - EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_prog, bpf_func))); + EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); - EMIT(PPC_RAW_ADDIC(__REG_R3, __REG_R3, BPF_TAILCALL_PROLOGUE_SIZE)); + EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE)); if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_MTLR(_R0)); - EMIT(PPC_RAW_MTCTR(__REG_R3)); + EMIT(PPC_RAW_MTCTR(_R3)); - EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_1))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_1))); /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -352,8 +352,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm >= -32768 && imm < 32768) { EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, _R0)); } if (imm >= 0) EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h)); @@ -362,11 +362,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_MULW(__REG_R0, dst_reg, src_reg_h)); + EMIT(PPC_RAW_MULW(_R0, dst_reg, src_reg_h)); EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg)); EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg)); EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0)); EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg)); break; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ @@ -376,8 +376,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm >= -32768 && imm < 32768) { EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_MULW(dst_reg, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, _R0)); } break; case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ @@ -398,17 +398,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg)); if (imm < 0) EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg)); - EMIT(PPC_RAW_MULHWU(__REG_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULHWU(_R0, dst_reg, tmp_reg)); EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg)); - EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0)); break; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ - EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, src_reg)); - EMIT(PPC_RAW_MULW(__REG_R0, src_reg, __REG_R0)); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_DIVWU(_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(_R0, src_reg, _R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, _R0)); break; case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ return -EOPNOTSUPP; @@ -420,8 +420,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm == 1) break; - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, _R0)); break; case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ if (!imm) @@ -430,9 +430,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (!is_power_of_2((u32)imm)) { bpf_set_seen_register(ctx, tmp_reg); PPC_LI32(tmp_reg, imm); - EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, tmp_reg)); - EMIT(PPC_RAW_MULW(__REG_R0, tmp_reg, __REG_R0)); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_DIVWU(_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(_R0, tmp_reg, _R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, _R0)); break; } if (imm == 1) @@ -503,8 +503,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - fls(imm), 32 - ffs(imm))); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_AND(dst_reg, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, _R0)); } break; case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ @@ -555,12 +555,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32)); EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg)); EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); - EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0)); + EMIT(PPC_RAW_SRW(_R0, dst_reg, _R0)); EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg)); - EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, _R0)); EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg)); break; @@ -591,12 +591,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32)); EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); - EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SLW(_R0, dst_reg_h, _R0)); EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg)); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0)); EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg)); EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); break; @@ -627,15 +627,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32)); EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SLW(_R0, dst_reg_h, _R0)); EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); - EMIT(PPC_RAW_RLWINM(__REG_R0, tmp_reg, 0, 26, 26)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0)); + EMIT(PPC_RAW_RLWINM(_R0, tmp_reg, 0, 26, 26)); EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg)); EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg)); - EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, __REG_R0)); + EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, _R0)); EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); break; case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ @@ -702,24 +702,24 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * 2 bytes are already in their final position * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) */ - EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(_R0, dst_reg, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ - EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(_R0, dst_reg, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ - EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 16, 23)); - EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + EMIT(PPC_RAW_RLWIMI(_R0, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, _R0)); break; case 64: bpf_set_seen_register(ctx, tmp_reg); EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31)); - EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg_h, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(_R0, dst_reg_h, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7)); - EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(_R0, dst_reg_h, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23)); - EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 16, 23)); - EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + EMIT(PPC_RAW_RLWIMI(_R0, dst_reg_h, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, _R0)); EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg)); break; } @@ -744,32 +744,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); break; case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_STB(__REG_R0, dst_reg, off)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_STB(_R0, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); break; case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_STH(__REG_R0, dst_reg, off)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_STH(_R0, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); break; case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_STW(_R0, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off)); EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4)); break; case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off + 4)); - PPC_EX32(__REG_R0, imm); - EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_STW(_R0, dst_reg, off + 4)); + PPC_EX32(_R0, imm); + EMIT(PPC_RAW_STW(_R0, dst_reg, off)); break; /* @@ -780,11 +780,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * /* Get offset into TMP_REG */ EMIT(PPC_RAW_LI(tmp_reg, off)); /* load value from memory into r0 */ - EMIT(PPC_RAW_LWARX(__REG_R0, tmp_reg, dst_reg, 0)); + EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0)); /* add value from src_reg into this */ - EMIT(PPC_RAW_ADD(__REG_R0, __REG_R0, src_reg)); + EMIT(PPC_RAW_ADD(_R0, _R0, src_reg)); /* store result back */ - EMIT(PPC_RAW_STWCX(__REG_R0, tmp_reg, dst_reg)); + EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg)); /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4); break; @@ -852,14 +852,14 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * return ret; if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, 8)); - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, 12)); + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, 8)); + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), _R1, 12)); } bpf_jit_emit_func_call_rel(image, ctx, func_addr); - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, __REG_R3)); - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), __REG_R4)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, _R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), _R4)); break; /* @@ -967,12 +967,12 @@ cond_branch: EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); break; case BPF_JMP | BPF_JSET | BPF_X: - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg_h, src_reg_h)); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg_h, src_reg_h)); PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg)); break; case BPF_JMP32 | BPF_JSET | BPF_X: { - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg)); break; case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP | BPF_JEQ | BPF_K: @@ -990,11 +990,11 @@ cond_branch: EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); } else { /* sign-extending load ... but unsigned comparison */ - PPC_EX32(__REG_R0, imm); - EMIT(PPC_RAW_CMPLW(dst_reg_h, __REG_R0)); - PPC_LI32(__REG_R0, imm); + PPC_EX32(_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg_h, _R0)); + PPC_LI32(_R0, imm); PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); - EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + EMIT(PPC_RAW_CMPLW(dst_reg, _R0)); } break; case BPF_JMP32 | BPF_JNE | BPF_K: @@ -1006,8 +1006,8 @@ cond_branch: if (imm >= 0 && imm < 65536) { EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg, _R0)); } break; } @@ -1022,9 +1022,9 @@ cond_branch: } else { /* sign-extending load */ EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0)); - PPC_LI32(__REG_R0, imm); + PPC_LI32(_R0, imm); PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); - EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + EMIT(PPC_RAW_CMPLW(dst_reg, _R0)); } break; case BPF_JMP32 | BPF_JSGT | BPF_K: @@ -1039,32 +1039,32 @@ cond_branch: EMIT(PPC_RAW_CMPWI(dst_reg, imm)); } else { /* sign-extending load */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_CMPW(dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_CMPW(dst_reg, _R0)); } break; case BPF_JMP | BPF_JSET | BPF_K: /* andi does not sign-extend the immediate */ if (imm >= 0 && imm < 32768) { /* PPC_ANDI is _only/always_ dot-form */ - EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); + PPC_LI32(_R0, imm); if (imm < 0) { EMIT(PPC_RAW_CMPWI(dst_reg_h, 0)); PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); } - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0)); } break; case BPF_JMP32 | BPF_JSET | BPF_K: /* andi does not sign-extend the immediate */ if (imm >= -32768 && imm < 32768) { /* PPC_ANDI is _only/always_ dot-form */ - EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0)); } break; } diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 57a8c1153851..7ec9c852f105 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -94,7 +94,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * save/restore LR unless we call other functions */ if (ctx->seen & SEEN_FUNC) { - EMIT(PPC_INST_MFLR | __PPC_RT(R0)); + EMIT(PPC_RAW_MFLR(_R0)); PPC_BPF_STL(0, 1, PPC_LR_STKOFF); } diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index c02854dea2b2..2f46e31c7612 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_PERF_EVENTS) += callchain.o callchain_$(BITS).o perf_regs.o -ifdef CONFIG_COMPAT -obj-$(CONFIG_PERF_EVENTS) += callchain_32.o -endif +obj-y += callchain.o callchain_$(BITS).o perf_regs.o +obj-$(CONFIG_COMPAT) += callchain_32.o obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 6c028ee513c0..082f6d0308a4 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -40,7 +40,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; } -void +void __no_sanitize_address perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { unsigned long sp, next_sp; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 16d4d1b6a1ff..3e864feaacf7 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -460,7 +460,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) sizeof(instr))) return 0; - return branch_target((struct ppc_inst *)&instr); + return branch_target(&instr); } /* Userspace: need copy instruction here then translate it */ @@ -468,7 +468,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) sizeof(instr))) return 0; - target = branch_target((struct ppc_inst *)&instr); + target = branch_target(&instr); if ((!target) || (instr & BRANCH_ABSOLUTE)) return target; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 8c0d324f657e..3823df235f25 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -582,6 +582,7 @@ static long mpc52xx_wdt_ioctl(struct file *file, unsigned int cmd, if (ret) break; /* fall through and return the timeout */ + fallthrough; case WDIOC_GETTIMEOUT: /* we need to round here as to avoid e.g. the following diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index 87f524e4b09c..8a7e55acf090 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -73,7 +73,7 @@ smp_86xx_kick_cpu(int nr) /* Setup fake reset vector to call __secondary_start_mpc86xx. */ target = (unsigned long) __secondary_start_mpc86xx; - patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); + patch_branch(vector, target, BRANCH_SET_LINK); /* Kick that CPU */ smp_86xx_release_core(nr); @@ -83,7 +83,7 @@ smp_86xx_kick_cpu(int nr) mdelay(1); /* Restore the exception vector */ - patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); + patch_instruction(vector, ppc_inst(save_vector)); local_irq_restore(flags); diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 7a5e8f4541e3..2f071fb9694c 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -20,6 +20,8 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig" source "arch/powerpc/platforms/44x/Kconfig" source "arch/powerpc/platforms/40x/Kconfig" source "arch/powerpc/platforms/amigaone/Kconfig" +source "arch/powerpc/platforms/book3s/Kconfig" +source "arch/powerpc/platforms/microwatt/Kconfig" config KVM_GUEST bool "KVM Guest support" diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index f998e655b570..50237e2007e7 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -61,6 +61,7 @@ config 44x select 4xx_SOC select HAVE_PCI select PHYS_64BIT + select PPC_HAVE_KUEP endchoice @@ -389,7 +390,7 @@ config PPC_HAVE_KUEP config PPC_KUEP bool "Kernel Userspace Execution Prevention" depends on PPC_HAVE_KUEP - default y if !PPC_BOOK3S_32 + default y help Enable support for Kernel Userspace Execution Prevention (KUEP) @@ -401,7 +402,7 @@ config PPC_HAVE_KUAP config PPC_KUAP bool "Kernel Userspace Access Protection" depends on PPC_HAVE_KUAP - default y if !PPC_BOOK3S_32 + default y help Enable support for Kernel Userspace Access Protection (KUAP) @@ -424,10 +425,6 @@ config PPC_MMU_NOHASH def_bool y depends on !PPC_BOOK3S -config PPC_MMU_NOHASH_32 - def_bool y - depends on PPC_MMU_NOHASH && PPC32 - config PPC_BOOK3E_MMU def_bool y depends on FSL_BOOKE || PPC_BOOK3E @@ -476,9 +473,9 @@ config SMP If you don't know what to do here, say N. config NR_CPUS - int "Maximum number of CPUs (2-8192)" - range 2 8192 - depends on SMP + int "Maximum number of CPUs (2-8192)" if SMP + range 2 8192 if SMP + default "1" if !SMP default "32" if PPC64 default "4" diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 143d4417f6cc..94470fb27c99 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -22,3 +22,5 @@ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ obj-$(CONFIG_AMIGAONE) += amigaone/ +obj-$(CONFIG_PPC_BOOK3S) += book3s/ +obj-$(CONFIG_PPC_MICROWATT) += microwatt/ diff --git a/arch/powerpc/platforms/book3s/Kconfig b/arch/powerpc/platforms/book3s/Kconfig new file mode 100644 index 000000000000..34c931592ef0 --- /dev/null +++ b/arch/powerpc/platforms/book3s/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_VAS + bool "IBM Virtual Accelerator Switchboard (VAS)" + depends on (PPC_POWERNV || PPC_PSERIES) && PPC_64K_PAGES + default y + help + This enables support for IBM Virtual Accelerator Switchboard (VAS). + + VAS devices are found in POWER9-based and later systems, they + provide access to accelerator coprocessors such as NX-GZIP and + NX-842. This config allows the kernel to use NX-842 accelerators, + and user-mode APIs for the NX-GZIP accelerator on POWER9 PowerNV + and POWER10 PowerVM platforms. + + If unsure, say "N". diff --git a/arch/powerpc/platforms/book3s/Makefile b/arch/powerpc/platforms/book3s/Makefile new file mode 100644 index 000000000000..e790f1910f61 --- /dev/null +++ b/arch/powerpc/platforms/book3s/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_PPC_VAS) += vas-api.o diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c new file mode 100644 index 000000000000..30172e52e16b --- /dev/null +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -0,0 +1,493 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * VAS user space API for its accelerators (Only NX-GZIP is supported now) + * Copyright (C) 2019 Haren Myneni, IBM Corp + */ + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/kthread.h> +#include <linux/sched/signal.h> +#include <linux/mmu_context.h> +#include <linux/io.h> +#include <asm/vas.h> +#include <uapi/asm/vas-api.h> + +/* + * The driver creates the device node that can be used as follows: + * For NX-GZIP + * + * fd = open("/dev/crypto/nx-gzip", O_RDWR); + * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); + * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL). + * vas_copy(&crb, 0, 1); + * vas_paste(paste_addr, 0, 1); + * close(fd) or exit process to close window. + * + * where "vas_copy" and "vas_paste" are defined in copy-paste.h. + * copy/paste returns to the user space directly. So refer NX hardware + * documententation for exact copy/paste usage and completion / error + * conditions. + */ + +/* + * Wrapper object for the nx-gzip device - there is just one instance of + * this node for the whole system. + */ +static struct coproc_dev { + struct cdev cdev; + struct device *device; + char *name; + dev_t devt; + struct class *class; + enum vas_cop_type cop_type; + const struct vas_user_win_ops *vops; +} coproc_device; + +struct coproc_instance { + struct coproc_dev *coproc; + struct vas_window *txwin; +}; + +static char *coproc_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); +} + +/* + * Take reference to pid and mm + */ +int get_vas_user_win_ref(struct vas_user_win_ref *task_ref) +{ + /* + * Window opened by a child thread may not be closed when + * it exits. So take reference to its pid and release it + * when the window is free by parent thread. + * Acquire a reference to the task's pid to make sure + * pid will not be re-used - needed only for multithread + * applications. + */ + task_ref->pid = get_task_pid(current, PIDTYPE_PID); + /* + * Acquire a reference to the task's mm. + */ + task_ref->mm = get_task_mm(current); + if (!task_ref->mm) { + put_pid(task_ref->pid); + pr_err("VAS: pid(%d): mm_struct is not found\n", + current->pid); + return -EPERM; + } + + mmgrab(task_ref->mm); + mmput(task_ref->mm); + /* + * Process closes window during exit. In the case of + * multithread application, the child thread can open + * window and can exit without closing it. So takes tgid + * reference until window closed to make sure tgid is not + * reused. + */ + task_ref->tgid = find_get_pid(task_tgid_vnr(current)); + + return 0; +} + +/* + * Successful return must release the task reference with + * put_task_struct + */ +static bool ref_get_pid_and_task(struct vas_user_win_ref *task_ref, + struct task_struct **tskp, struct pid **pidp) +{ + struct task_struct *tsk; + struct pid *pid; + + pid = task_ref->pid; + tsk = get_pid_task(pid, PIDTYPE_PID); + if (!tsk) { + pid = task_ref->tgid; + tsk = get_pid_task(pid, PIDTYPE_PID); + /* + * Parent thread (tgid) will be closing window when it + * exits. So should not get here. + */ + if (WARN_ON_ONCE(!tsk)) + return false; + } + + /* Return if the task is exiting. */ + if (tsk->flags & PF_EXITING) { + put_task_struct(tsk); + return false; + } + + *tskp = tsk; + *pidp = pid; + + return true; +} + +/* + * Update the CSB to indicate a translation error. + * + * User space will be polling on CSB after the request is issued. + * If NX can handle the request without any issues, it updates CSB. + * Whereas if NX encounters page fault, the kernel will handle the + * fault and update CSB with translation error. + * + * If we are unable to update the CSB means copy_to_user failed due to + * invalid csb_addr, send a signal to the process. + */ +void vas_update_csb(struct coprocessor_request_block *crb, + struct vas_user_win_ref *task_ref) +{ + struct coprocessor_status_block csb; + struct kernel_siginfo info; + struct task_struct *tsk; + void __user *csb_addr; + struct pid *pid; + int rc; + + /* + * NX user space windows can not be opened for task->mm=NULL + * and faults will not be generated for kernel requests. + */ + if (WARN_ON_ONCE(!task_ref->mm)) + return; + + csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); + + memset(&csb, 0, sizeof(csb)); + csb.cc = CSB_CC_FAULT_ADDRESS; + csb.ce = CSB_CE_TERMINATION; + csb.cs = 0; + csb.count = 0; + + /* + * NX operates and returns in BE format as defined CRB struct. + * So saves fault_storage_addr in BE as NX pastes in FIFO and + * expects user space to convert to CPU format. + */ + csb.address = crb->stamp.nx.fault_storage_addr; + csb.flags = 0; + + /* + * Process closes send window after all pending NX requests are + * completed. In multi-thread applications, a child thread can + * open a window and can exit without closing it. May be some + * requests are pending or this window can be used by other + * threads later. We should handle faults if NX encounters + * pages faults on these requests. Update CSB with translation + * error and fault address. If csb_addr passed by user space is + * invalid, send SEGV signal to pid saved in window. If the + * child thread is not running, send the signal to tgid. + * Parent thread (tgid) will close this window upon its exit. + * + * pid and mm references are taken when window is opened by + * process (pid). So tgid is used only when child thread opens + * a window and exits without closing it. + */ + + if (!ref_get_pid_and_task(task_ref, &tsk, &pid)) + return; + + kthread_use_mm(task_ref->mm); + rc = copy_to_user(csb_addr, &csb, sizeof(csb)); + /* + * User space polls on csb.flags (first byte). So add barrier + * then copy first byte with csb flags update. + */ + if (!rc) { + csb.flags = CSB_V; + /* Make sure update to csb.flags is visible now */ + smp_mb(); + rc = copy_to_user(csb_addr, &csb, sizeof(u8)); + } + kthread_unuse_mm(task_ref->mm); + put_task_struct(tsk); + + /* Success */ + if (!rc) + return; + + + pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", + csb_addr, pid_vnr(pid)); + + clear_siginfo(&info); + info.si_signo = SIGSEGV; + info.si_errno = EFAULT; + info.si_code = SEGV_MAPERR; + info.si_addr = csb_addr; + /* + * process will be polling on csb.flags after request is sent to + * NX. So generally CSB update should not fail except when an + * application passes invalid csb_addr. So an error message will + * be displayed and leave it to user space whether to ignore or + * handle this signal. + */ + rcu_read_lock(); + rc = kill_pid_info(SIGSEGV, &info, pid); + rcu_read_unlock(); + + pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, + pid_vnr(pid), rc); +} + +void vas_dump_crb(struct coprocessor_request_block *crb) +{ + struct data_descriptor_entry *dde; + struct nx_fault_stamp *nx; + + dde = &crb->source; + pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + dde = &crb->target; + pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + nx = &crb->stamp.nx; + pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n", + be32_to_cpu(nx->pswid), + be64_to_cpu(crb->stamp.nx.fault_storage_addr), + nx->flags, nx->fault_status); +} + +static int coproc_open(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst; + + cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL); + if (!cp_inst) + return -ENOMEM; + + cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev, + cdev); + fp->private_data = cp_inst; + + return 0; +} + +static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) +{ + void __user *uptr = (void __user *)arg; + struct vas_tx_win_open_attr uattr; + struct coproc_instance *cp_inst; + struct vas_window *txwin; + int rc; + + cp_inst = fp->private_data; + + /* + * One window for file descriptor + */ + if (cp_inst->txwin) + return -EEXIST; + + rc = copy_from_user(&uattr, uptr, sizeof(uattr)); + if (rc) { + pr_err("%s(): copy_from_user() returns %d\n", __func__, rc); + return -EFAULT; + } + + if (uattr.version != 1) { + pr_err("Invalid window open API version\n"); + return -EINVAL; + } + + if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->open_win) { + pr_err("VAS API is not registered\n"); + return -EACCES; + } + + txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags, + cp_inst->coproc->cop_type); + if (IS_ERR(txwin)) { + pr_err("%s() VAS window open failed, %ld\n", __func__, + PTR_ERR(txwin)); + return PTR_ERR(txwin); + } + + cp_inst->txwin = txwin; + + return 0; +} + +static int coproc_release(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst = fp->private_data; + int rc; + + if (cp_inst->txwin) { + if (cp_inst->coproc->vops && + cp_inst->coproc->vops->close_win) { + rc = cp_inst->coproc->vops->close_win(cp_inst->txwin); + if (rc) + return rc; + } + cp_inst->txwin = NULL; + } + + kfree(cp_inst); + fp->private_data = NULL; + + /* + * We don't know here if user has other receive windows + * open, so we can't really call clear_thread_tidr(). + * So, once the process calls set_thread_tidr(), the + * TIDR value sticks around until process exits, resulting + * in an extra copy in restore_sprs(). + */ + + return 0; +} + +static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) +{ + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + unsigned long pfn; + u64 paste_addr; + pgprot_t prot; + int rc; + + txwin = cp_inst->txwin; + + if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { + pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__, + (vma->vm_end - vma->vm_start), PAGE_SIZE); + return -EINVAL; + } + + /* Ensure instance has an open send window */ + if (!txwin) { + pr_err("%s(): No send window open?\n", __func__); + return -EINVAL; + } + + if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->paste_addr) { + pr_err("%s(): VAS API is not registered\n", __func__); + return -EACCES; + } + + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); + if (!paste_addr) { + pr_err("%s(): Window paste address failed\n", __func__); + return -EINVAL; + } + + pfn = paste_addr >> PAGE_SHIFT; + + /* flags, page_prot from cxl_mmap(), except we want cachable */ + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + + prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY); + + rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, prot); + + pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, + paste_addr, vma->vm_start, rc); + + return rc; +} + +static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case VAS_TX_WIN_OPEN: + return coproc_ioc_tx_win_open(fp, arg); + default: + return -EINVAL; + } +} + +static struct file_operations coproc_fops = { + .open = coproc_open, + .release = coproc_release, + .mmap = coproc_mmap, + .unlocked_ioctl = coproc_ioctl, +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, + const char *name, + const struct vas_user_win_ops *vops) +{ + int rc = -EINVAL; + dev_t devno; + + rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name); + if (rc) { + pr_err("Unable to allocate coproc major number: %i\n", rc); + return rc; + } + + pr_devel("%s device allocated, dev [%i,%i]\n", name, + MAJOR(coproc_device.devt), MINOR(coproc_device.devt)); + + coproc_device.class = class_create(mod, name); + if (IS_ERR(coproc_device.class)) { + rc = PTR_ERR(coproc_device.class); + pr_err("Unable to create %s class %d\n", name, rc); + goto err_class; + } + coproc_device.class->devnode = coproc_devnode; + coproc_device.cop_type = cop_type; + coproc_device.vops = vops; + + coproc_fops.owner = mod; + cdev_init(&coproc_device.cdev, &coproc_fops); + + devno = MKDEV(MAJOR(coproc_device.devt), 0); + rc = cdev_add(&coproc_device.cdev, devno, 1); + if (rc) { + pr_err("cdev_add() failed %d\n", rc); + goto err_cdev; + } + + coproc_device.device = device_create(coproc_device.class, NULL, + devno, NULL, name, MINOR(devno)); + if (IS_ERR(coproc_device.device)) { + rc = PTR_ERR(coproc_device.device); + pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc); + goto err; + } + + pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno), + MINOR(devno)); + + return 0; + +err: + cdev_del(&coproc_device.cdev); +err_cdev: + class_destroy(coproc_device.class); +err_class: + unregister_chrdev_region(coproc_device.devt, 1); + return rc; +} + +void vas_unregister_coproc_api(void) +{ + dev_t devno; + + cdev_del(&coproc_device.cdev); + devno = MKDEV(MAJOR(coproc_device.devt), 0); + device_destroy(coproc_device.class, devno); + + class_destroy(coproc_device.class); + unregister_chrdev_region(coproc_device.devt, 1); +} diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c index 93ea41680f54..a1c293f42a1f 100644 --- a/arch/powerpc/platforms/cell/spider-pci.c +++ b/arch/powerpc/platforms/cell/spider-pci.c @@ -25,10 +25,9 @@ struct spiderpci_iowa_private { static void spiderpci_io_flush(struct iowa_bus *bus) { struct spiderpci_iowa_private *priv; - u32 val; priv = bus->private; - val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ); + in_be32(priv->regs + SPIDER_PCI_DUMMY_READ); iosync(); } diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index d56b4e3241cd..b41e81b22fdc 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -1657,14 +1657,13 @@ static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu) static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu) { struct spu_problem __iomem *prob = spu->problem; - u32 dummy = 0; /* Restore, Step 66: * If CSA.MB_Stat[P]=0 (mailbox empty) then * read from the PPU_MB register. */ if ((csa->prob.mb_stat_R & 0xFF) == 0) { - dummy = in_be32(&prob->pu_mb_R); + in_be32(&prob->pu_mb_R); eieio(); } } @@ -1672,14 +1671,13 @@ static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu) static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 dummy = 0UL; /* Restore, Step 66: * If CSA.MB_Stat[I]=0 (mailbox empty) then * read from the PPUINT_MB register. */ if ((csa->prob.mb_stat_R & 0xFF0000) == 0) { - dummy = in_be64(&priv2->puint_mb_R); + in_be64(&priv2->puint_mb_R); eieio(); spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); eieio(); diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig new file mode 100644 index 000000000000..8f6a81978461 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_MICROWATT + depends on PPC_BOOK3S_64 && !SMP + bool "Microwatt SoC platform" + select PPC_XICS + select PPC_ICS_NATIVE + select PPC_ICP_NATIVE + select PPC_NATIVE + select PPC_UDBG_16550 + select ARCH_RANDOM + help + This option enables support for FPGA-based Microwatt implementations. + diff --git a/arch/powerpc/platforms/microwatt/Makefile b/arch/powerpc/platforms/microwatt/Makefile new file mode 100644 index 000000000000..116d6d3ad3f0 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/Makefile @@ -0,0 +1 @@ +obj-y += setup.o rng.o diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c new file mode 100644 index 000000000000..3d8ee6eb7dad --- /dev/null +++ b/arch/powerpc/platforms/microwatt/rng.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Derived from arch/powerpc/platforms/powernv/rng.c, which is: + * Copyright 2013, Michael Ellerman, IBM Corporation. + */ + +#define pr_fmt(fmt) "microwatt-rng: " fmt + +#include <linux/kernel.h> +#include <linux/smp.h> +#include <asm/archrandom.h> +#include <asm/cputable.h> +#include <asm/machdep.h> + +#define DARN_ERR 0xFFFFFFFFFFFFFFFFul + +int microwatt_get_random_darn(unsigned long *v) +{ + unsigned long val; + + /* Using DARN with L=1 - 64-bit conditioned random number */ + asm volatile(PPC_DARN(%0, 1) : "=r"(val)); + + if (val == DARN_ERR) + return 0; + + *v = val; + + return 1; +} + +static __init int rng_init(void) +{ + unsigned long val; + int i; + + for (i = 0; i < 10; i++) { + if (microwatt_get_random_darn(&val)) { + ppc_md.get_random_seed = microwatt_get_random_darn; + return 0; + } + } + + pr_warn("Unable to use DARN for get_random_seed()\n"); + + return -EIO; +} +machine_subsys_initcall(, rng_init); diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c new file mode 100644 index 000000000000..0b02603bdb74 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/setup.c @@ -0,0 +1,41 @@ +/* + * Microwatt FPGA-based SoC platform setup code. + * + * Copyright 2020 Paul Mackerras (paulus@ozlabs.org), IBM Corp. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/stddef.h> +#include <linux/init.h> +#include <linux/of.h> +#include <linux/of_platform.h> + +#include <asm/machdep.h> +#include <asm/time.h> +#include <asm/xics.h> +#include <asm/udbg.h> + +static void __init microwatt_init_IRQ(void) +{ + xics_init(); +} + +static int __init microwatt_probe(void) +{ + return of_machine_is_compatible("microwatt-soc"); +} + +static int __init microwatt_populate(void) +{ + return of_platform_default_populate(NULL, NULL, NULL); +} +machine_arch_initcall(microwatt, microwatt_populate); + +define_machine(microwatt) { + .name = "microwatt", + .probe = microwatt_probe, + .init_IRQ = microwatt_init_IRQ, + .progress = udbg_progress, + .calibrate_decr = generic_calibrate_decr, +}; diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index adae2a6712e1..bdfea6d6ab69 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -810,7 +810,7 @@ static int smp_core99_kick_cpu(int nr) * b __secondary_start_pmac_0 + nr*8 */ target = (unsigned long) __secondary_start_pmac_0 + nr * 8; - patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); + patch_branch(vector, target, BRANCH_SET_LINK); /* Put some life in our friend */ pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0); @@ -823,7 +823,7 @@ static int smp_core99_kick_cpu(int nr) mdelay(1); /* Restore our exception vector */ - patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); + patch_instruction(vector, ppc_inst(save_vector)); local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 619b093a0657..043eefbbdd28 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -33,20 +33,6 @@ config PPC_MEMTRACE Enabling this option allows for runtime allocation of memory (RAM) for hardware tracing. -config PPC_VAS - bool "IBM Virtual Accelerator Switchboard (VAS)" - depends on PPC_POWERNV && PPC_64K_PAGES - default y - help - This enables support for IBM Virtual Accelerator Switchboard (VAS). - - VAS allows accelerators in co-processors like NX-GZIP and NX-842 - to be accessible to kernel subsystems and user processes. - - VAS adapters are found in POWER9 based systems. - - If unsure, say N. - config SCOM_DEBUGFS bool "Expose SCOM controllers via debugfs" depends on DEBUG_FS diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index be2546b96816..dc7b37c23b60 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o -obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o vas-api.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o obj-$(CONFIG_OCXL_BASE) += ocxl.o obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b18468dc31ff..6bb3c52633fb 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -711,7 +711,7 @@ int pnv_pci_cfg_write(struct pci_dn *pdn, return PCIBIOS_SUCCESSFUL; } -#if CONFIG_EEH +#ifdef CONFIG_EEH static bool pnv_pci_cfg_check(struct pci_dn *pdn) { struct eeh_dev *edev = NULL; diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 73207b53dc2b..7e98b00ea2e8 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -169,6 +169,16 @@ static void update_hid_in_slw(u64 hid0) } } +static inline void update_power8_hid0(unsigned long hid0) +{ + /* + * The HID0 update on Power8 should at the very least be + * preceded by a SYNC instruction followed by an ISYNC + * instruction + */ + asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); +} + static void unsplit_core(void) { u64 hid0, mask; diff --git a/arch/powerpc/platforms/powernv/vas-api.c b/arch/powerpc/platforms/powernv/vas-api.c deleted file mode 100644 index 98ed5d8c5441..000000000000 --- a/arch/powerpc/platforms/powernv/vas-api.c +++ /dev/null @@ -1,278 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * VAS user space API for its accelerators (Only NX-GZIP is supported now) - * Copyright (C) 2019 Haren Myneni, IBM Corp - */ - -#include <linux/kernel.h> -#include <linux/device.h> -#include <linux/cdev.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include <linux/uaccess.h> -#include <asm/vas.h> -#include <uapi/asm/vas-api.h> -#include "vas.h" - -/* - * The driver creates the device node that can be used as follows: - * For NX-GZIP - * - * fd = open("/dev/crypto/nx-gzip", O_RDWR); - * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); - * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL). - * vas_copy(&crb, 0, 1); - * vas_paste(paste_addr, 0, 1); - * close(fd) or exit process to close window. - * - * where "vas_copy" and "vas_paste" are defined in copy-paste.h. - * copy/paste returns to the user space directly. So refer NX hardware - * documententation for exact copy/paste usage and completion / error - * conditions. - */ - -/* - * Wrapper object for the nx-gzip device - there is just one instance of - * this node for the whole system. - */ -static struct coproc_dev { - struct cdev cdev; - struct device *device; - char *name; - dev_t devt; - struct class *class; - enum vas_cop_type cop_type; -} coproc_device; - -struct coproc_instance { - struct coproc_dev *coproc; - struct vas_window *txwin; -}; - -static char *coproc_devnode(struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); -} - -static int coproc_open(struct inode *inode, struct file *fp) -{ - struct coproc_instance *cp_inst; - - cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL); - if (!cp_inst) - return -ENOMEM; - - cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev, - cdev); - fp->private_data = cp_inst; - - return 0; -} - -static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) -{ - void __user *uptr = (void __user *)arg; - struct vas_tx_win_attr txattr = {}; - struct vas_tx_win_open_attr uattr; - struct coproc_instance *cp_inst; - struct vas_window *txwin; - int rc, vasid; - - cp_inst = fp->private_data; - - /* - * One window for file descriptor - */ - if (cp_inst->txwin) - return -EEXIST; - - rc = copy_from_user(&uattr, uptr, sizeof(uattr)); - if (rc) { - pr_err("%s(): copy_from_user() returns %d\n", __func__, rc); - return -EFAULT; - } - - if (uattr.version != 1) { - pr_err("Invalid version\n"); - return -EINVAL; - } - - vasid = uattr.vas_id; - - vas_init_tx_win_attr(&txattr, cp_inst->coproc->cop_type); - - txattr.lpid = mfspr(SPRN_LPID); - txattr.pidr = mfspr(SPRN_PID); - txattr.user_win = true; - txattr.rsvd_txbuf_count = false; - txattr.pswid = false; - - pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, - mfspr(SPRN_PID)); - - txwin = vas_tx_win_open(vasid, cp_inst->coproc->cop_type, &txattr); - if (IS_ERR(txwin)) { - pr_err("%s() vas_tx_win_open() failed, %ld\n", __func__, - PTR_ERR(txwin)); - return PTR_ERR(txwin); - } - - cp_inst->txwin = txwin; - - return 0; -} - -static int coproc_release(struct inode *inode, struct file *fp) -{ - struct coproc_instance *cp_inst = fp->private_data; - - if (cp_inst->txwin) { - vas_win_close(cp_inst->txwin); - cp_inst->txwin = NULL; - } - - kfree(cp_inst); - fp->private_data = NULL; - - /* - * We don't know here if user has other receive windows - * open, so we can't really call clear_thread_tidr(). - * So, once the process calls set_thread_tidr(), the - * TIDR value sticks around until process exits, resulting - * in an extra copy in restore_sprs(). - */ - - return 0; -} - -static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) -{ - struct coproc_instance *cp_inst = fp->private_data; - struct vas_window *txwin; - unsigned long pfn; - u64 paste_addr; - pgprot_t prot; - int rc; - - txwin = cp_inst->txwin; - - if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { - pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__, - (vma->vm_end - vma->vm_start), PAGE_SIZE); - return -EINVAL; - } - - /* Ensure instance has an open send window */ - if (!txwin) { - pr_err("%s(): No send window open?\n", __func__); - return -EINVAL; - } - - vas_win_paste_addr(txwin, &paste_addr, NULL); - pfn = paste_addr >> PAGE_SHIFT; - - /* flags, page_prot from cxl_mmap(), except we want cachable */ - vma->vm_flags |= VM_IO | VM_PFNMAP; - vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); - - prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY); - - rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, - vma->vm_end - vma->vm_start, prot); - - pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, - paste_addr, vma->vm_start, rc); - - return rc; -} - -static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - case VAS_TX_WIN_OPEN: - return coproc_ioc_tx_win_open(fp, arg); - default: - return -EINVAL; - } -} - -static struct file_operations coproc_fops = { - .open = coproc_open, - .release = coproc_release, - .mmap = coproc_mmap, - .unlocked_ioctl = coproc_ioctl, -}; - -/* - * Supporting only nx-gzip coprocessor type now, but this API code - * extended to other coprocessor types later. - */ -int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, - const char *name) -{ - int rc = -EINVAL; - dev_t devno; - - rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name); - if (rc) { - pr_err("Unable to allocate coproc major number: %i\n", rc); - return rc; - } - - pr_devel("%s device allocated, dev [%i,%i]\n", name, - MAJOR(coproc_device.devt), MINOR(coproc_device.devt)); - - coproc_device.class = class_create(mod, name); - if (IS_ERR(coproc_device.class)) { - rc = PTR_ERR(coproc_device.class); - pr_err("Unable to create %s class %d\n", name, rc); - goto err_class; - } - coproc_device.class->devnode = coproc_devnode; - coproc_device.cop_type = cop_type; - - coproc_fops.owner = mod; - cdev_init(&coproc_device.cdev, &coproc_fops); - - devno = MKDEV(MAJOR(coproc_device.devt), 0); - rc = cdev_add(&coproc_device.cdev, devno, 1); - if (rc) { - pr_err("cdev_add() failed %d\n", rc); - goto err_cdev; - } - - coproc_device.device = device_create(coproc_device.class, NULL, - devno, NULL, name, MINOR(devno)); - if (IS_ERR(coproc_device.device)) { - rc = PTR_ERR(coproc_device.device); - pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc); - goto err; - } - - pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno), - MINOR(devno)); - - return 0; - -err: - cdev_del(&coproc_device.cdev); -err_cdev: - class_destroy(coproc_device.class); -err_class: - unregister_chrdev_region(coproc_device.devt, 1); - return rc; -} -EXPORT_SYMBOL_GPL(vas_register_coproc_api); - -void vas_unregister_coproc_api(void) -{ - dev_t devno; - - cdev_del(&coproc_device.cdev); - devno = MKDEV(MAJOR(coproc_device.devt), 0); - device_destroy(coproc_device.class, devno); - - class_destroy(coproc_device.class); - unregister_chrdev_region(coproc_device.devt, 1); -} -EXPORT_SYMBOL_GPL(vas_unregister_coproc_api); diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c index 41fa90d2f4ab..3ce89a4b54be 100644 --- a/arch/powerpc/platforms/powernv/vas-debug.c +++ b/arch/powerpc/platforms/powernv/vas-debug.c @@ -9,6 +9,7 @@ #include <linux/slab.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <asm/vas.h> #include "vas.h" static struct dentry *vas_debugfs; @@ -28,7 +29,7 @@ static char *cop_to_str(int cop) static int info_show(struct seq_file *s, void *private) { - struct vas_window *window = s->private; + struct pnv_vas_window *window = s->private; mutex_lock(&vas_mutex); @@ -36,9 +37,9 @@ static int info_show(struct seq_file *s, void *private) if (!window->hvwc_map) goto unlock; - seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop), + seq_printf(s, "Type: %s, %s\n", cop_to_str(window->vas_win.cop), window->tx_win ? "Send" : "Receive"); - seq_printf(s, "Pid : %d\n", vas_window_pid(window)); + seq_printf(s, "Pid : %d\n", vas_window_pid(&window->vas_win)); unlock: mutex_unlock(&vas_mutex); @@ -47,7 +48,7 @@ unlock: DEFINE_SHOW_ATTRIBUTE(info); -static inline void print_reg(struct seq_file *s, struct vas_window *win, +static inline void print_reg(struct seq_file *s, struct pnv_vas_window *win, char *name, u32 reg) { seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name); @@ -55,7 +56,7 @@ static inline void print_reg(struct seq_file *s, struct vas_window *win, static int hvwc_show(struct seq_file *s, void *private) { - struct vas_window *window = s->private; + struct pnv_vas_window *window = s->private; mutex_lock(&vas_mutex); @@ -103,8 +104,10 @@ unlock: DEFINE_SHOW_ATTRIBUTE(hvwc); -void vas_window_free_dbgdir(struct vas_window *window) +void vas_window_free_dbgdir(struct pnv_vas_window *pnv_win) { + struct vas_window *window = &pnv_win->vas_win; + if (window->dbgdir) { debugfs_remove_recursive(window->dbgdir); kfree(window->dbgname); @@ -113,21 +116,21 @@ void vas_window_free_dbgdir(struct vas_window *window) } } -void vas_window_init_dbgdir(struct vas_window *window) +void vas_window_init_dbgdir(struct pnv_vas_window *window) { struct dentry *d; if (!window->vinst->dbgdir) return; - window->dbgname = kzalloc(16, GFP_KERNEL); - if (!window->dbgname) + window->vas_win.dbgname = kzalloc(16, GFP_KERNEL); + if (!window->vas_win.dbgname) return; - snprintf(window->dbgname, 16, "w%d", window->winid); + snprintf(window->vas_win.dbgname, 16, "w%d", window->vas_win.winid); - d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir); - window->dbgdir = d; + d = debugfs_create_dir(window->vas_win.dbgname, window->vinst->dbgdir); + window->vas_win.dbgdir = d; debugfs_create_file("info", 0444, d, window, &info_fops); debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops); diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 3d21fce254b7..a7aabc18039e 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -26,150 +26,6 @@ */ #define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) -static void dump_crb(struct coprocessor_request_block *crb) -{ - struct data_descriptor_entry *dde; - struct nx_fault_stamp *nx; - - dde = &crb->source; - pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", - be64_to_cpu(dde->address), be32_to_cpu(dde->length), - dde->count, dde->index, dde->flags); - - dde = &crb->target; - pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", - be64_to_cpu(dde->address), be32_to_cpu(dde->length), - dde->count, dde->index, dde->flags); - - nx = &crb->stamp.nx; - pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n", - be32_to_cpu(nx->pswid), - be64_to_cpu(crb->stamp.nx.fault_storage_addr), - nx->flags, nx->fault_status); -} - -/* - * Update the CSB to indicate a translation error. - * - * User space will be polling on CSB after the request is issued. - * If NX can handle the request without any issues, it updates CSB. - * Whereas if NX encounters page fault, the kernel will handle the - * fault and update CSB with translation error. - * - * If we are unable to update the CSB means copy_to_user failed due to - * invalid csb_addr, send a signal to the process. - */ -static void update_csb(struct vas_window *window, - struct coprocessor_request_block *crb) -{ - struct coprocessor_status_block csb; - struct kernel_siginfo info; - struct task_struct *tsk; - void __user *csb_addr; - struct pid *pid; - int rc; - - /* - * NX user space windows can not be opened for task->mm=NULL - * and faults will not be generated for kernel requests. - */ - if (WARN_ON_ONCE(!window->mm || !window->user_win)) - return; - - csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); - - memset(&csb, 0, sizeof(csb)); - csb.cc = CSB_CC_FAULT_ADDRESS; - csb.ce = CSB_CE_TERMINATION; - csb.cs = 0; - csb.count = 0; - - /* - * NX operates and returns in BE format as defined CRB struct. - * So saves fault_storage_addr in BE as NX pastes in FIFO and - * expects user space to convert to CPU format. - */ - csb.address = crb->stamp.nx.fault_storage_addr; - csb.flags = 0; - - pid = window->pid; - tsk = get_pid_task(pid, PIDTYPE_PID); - /* - * Process closes send window after all pending NX requests are - * completed. In multi-thread applications, a child thread can - * open a window and can exit without closing it. May be some - * requests are pending or this window can be used by other - * threads later. We should handle faults if NX encounters - * pages faults on these requests. Update CSB with translation - * error and fault address. If csb_addr passed by user space is - * invalid, send SEGV signal to pid saved in window. If the - * child thread is not running, send the signal to tgid. - * Parent thread (tgid) will close this window upon its exit. - * - * pid and mm references are taken when window is opened by - * process (pid). So tgid is used only when child thread opens - * a window and exits without closing it. - */ - if (!tsk) { - pid = window->tgid; - tsk = get_pid_task(pid, PIDTYPE_PID); - /* - * Parent thread (tgid) will be closing window when it - * exits. So should not get here. - */ - if (WARN_ON_ONCE(!tsk)) - return; - } - - /* Return if the task is exiting. */ - if (tsk->flags & PF_EXITING) { - put_task_struct(tsk); - return; - } - - kthread_use_mm(window->mm); - rc = copy_to_user(csb_addr, &csb, sizeof(csb)); - /* - * User space polls on csb.flags (first byte). So add barrier - * then copy first byte with csb flags update. - */ - if (!rc) { - csb.flags = CSB_V; - /* Make sure update to csb.flags is visible now */ - smp_mb(); - rc = copy_to_user(csb_addr, &csb, sizeof(u8)); - } - kthread_unuse_mm(window->mm); - put_task_struct(tsk); - - /* Success */ - if (!rc) - return; - - pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", - csb_addr, pid_vnr(pid)); - - clear_siginfo(&info); - info.si_signo = SIGSEGV; - info.si_errno = EFAULT; - info.si_code = SEGV_MAPERR; - info.si_addr = csb_addr; - - /* - * process will be polling on csb.flags after request is sent to - * NX. So generally CSB update should not fail except when an - * application passes invalid csb_addr. So an error message will - * be displayed and leave it to user space whether to ignore or - * handle this signal. - */ - rcu_read_lock(); - rc = kill_pid_info(SIGSEGV, &info, pid); - rcu_read_unlock(); - - pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, - pid_vnr(pid), rc); -} - static void dump_fifo(struct vas_instance *vinst, void *entry) { unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size; @@ -212,7 +68,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) struct vas_instance *vinst = data; struct coprocessor_request_block *crb, *entry; struct coprocessor_request_block buf; - struct vas_window *window; + struct pnv_vas_window *window; unsigned long flags; void *fifo; @@ -272,7 +128,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) vinst->vas_id, vinst->fault_fifo, fifo, vinst->fault_crbs); - dump_crb(crb); + vas_dump_crb(crb); window = vas_pswid_to_window(vinst, be32_to_cpu(crb->stamp.nx.pswid)); @@ -293,7 +149,14 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) WARN_ON_ONCE(1); } else { - update_csb(window, crb); + /* + * NX sees faults only with user space windows. + */ + if (window->user_win) + vas_update_csb(crb, &window->vas_win.task_ref); + else + WARN_ON_ONCE(!window->user_win); + /* * Return credit for send window after processing * fault CRB. @@ -336,6 +199,7 @@ irqreturn_t vas_fault_handler(int irq, void *dev_id) int vas_setup_fault_window(struct vas_instance *vinst) { struct vas_rx_win_attr attr; + struct vas_window *win; vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE; vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL); @@ -364,18 +228,17 @@ int vas_setup_fault_window(struct vas_instance *vinst) attr.lnotify_pid = mfspr(SPRN_PID); attr.lnotify_tid = mfspr(SPRN_PID); - vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, - &attr); - - if (IS_ERR(vinst->fault_win)) { - pr_err("VAS: Error %ld opening FaultWin\n", - PTR_ERR(vinst->fault_win)); + win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, &attr); + if (IS_ERR(win)) { + pr_err("VAS: Error %ld opening FaultWin\n", PTR_ERR(win)); kfree(vinst->fault_fifo); - return PTR_ERR(vinst->fault_win); + return PTR_ERR(win); } + vinst->fault_win = container_of(win, struct pnv_vas_window, vas_win); + pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n", - vinst->fault_win->winid, attr.lnotify_lpid, + vinst->fault_win->vas_win.winid, attr.lnotify_lpid, attr.lnotify_pid, attr.lnotify_tid); return 0; diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h index a449b9f0c12e..ca2e08f2ddc0 100644 --- a/arch/powerpc/platforms/powernv/vas-trace.h +++ b/arch/powerpc/platforms/powernv/vas-trace.h @@ -80,7 +80,7 @@ TRACE_EVENT( vas_tx_win_open, TRACE_EVENT( vas_paste_crb, TP_PROTO(struct task_struct *tsk, - struct vas_window *win), + struct pnv_vas_window *win), TP_ARGS(tsk, win), @@ -96,7 +96,7 @@ TRACE_EVENT( vas_paste_crb, TP_fast_assign( __entry->pid = tsk->pid; __entry->vasid = win->vinst->vas_id; - __entry->winid = win->winid; + __entry->winid = win->vas_win.winid; __entry->paste_kaddr = (unsigned long)win->paste_kaddr ), diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 5f5fe63a3d1c..0f8d39fbf2b2 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -16,6 +16,7 @@ #include <linux/mmu_context.h> #include <asm/switch_to.h> #include <asm/ppc-opcode.h> +#include <asm/vas.h> #include "vas.h" #include "copy-paste.h" @@ -26,14 +27,14 @@ * Compute the paste address region for the window @window using the * ->paste_base_addr and ->paste_win_id_shift we got from device tree. */ -void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len) +void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, int *len) { int winid; u64 base, shift; base = window->vinst->paste_base_addr; shift = window->vinst->paste_win_id_shift; - winid = window->winid; + winid = window->vas_win.winid; *addr = base + (winid << shift); if (len) @@ -42,23 +43,23 @@ void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len) pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr); } -static inline void get_hvwc_mmio_bar(struct vas_window *window, +static inline void get_hvwc_mmio_bar(struct pnv_vas_window *window, u64 *start, int *len) { u64 pbaddr; pbaddr = window->vinst->hvwc_bar_start; - *start = pbaddr + window->winid * VAS_HVWC_SIZE; + *start = pbaddr + window->vas_win.winid * VAS_HVWC_SIZE; *len = VAS_HVWC_SIZE; } -static inline void get_uwc_mmio_bar(struct vas_window *window, +static inline void get_uwc_mmio_bar(struct pnv_vas_window *window, u64 *start, int *len) { u64 pbaddr; pbaddr = window->vinst->uwc_bar_start; - *start = pbaddr + window->winid * VAS_UWC_SIZE; + *start = pbaddr + window->vas_win.winid * VAS_UWC_SIZE; *len = VAS_UWC_SIZE; } @@ -67,7 +68,7 @@ static inline void get_uwc_mmio_bar(struct vas_window *window, * space. Unlike MMIO regions (map_mmio_region() below), paste region must * be mapped cache-able and is only applicable to send windows. */ -static void *map_paste_region(struct vas_window *txwin) +static void *map_paste_region(struct pnv_vas_window *txwin) { int len; void *map; @@ -75,7 +76,7 @@ static void *map_paste_region(struct vas_window *txwin) u64 start; name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id, - txwin->winid); + txwin->vas_win.winid); if (!name) goto free_name; @@ -132,7 +133,7 @@ static void unmap_region(void *addr, u64 start, int len) /* * Unmap the paste address region for a window. */ -static void unmap_paste_region(struct vas_window *window) +static void unmap_paste_region(struct pnv_vas_window *window) { int len; u64 busaddr_start; @@ -153,7 +154,7 @@ static void unmap_paste_region(struct vas_window *window) * path, just minimize the time we hold the mutex for now. We can add * a per-instance mutex later if necessary. */ -static void unmap_winctx_mmio_bars(struct vas_window *window) +static void unmap_winctx_mmio_bars(struct pnv_vas_window *window) { int len; void *uwc_map; @@ -186,7 +187,7 @@ static void unmap_winctx_mmio_bars(struct vas_window *window) * OS/User Window Context (UWC) MMIO Base Address Region for the given window. * Map these bus addresses and save the mapped kernel addresses in @window. */ -static int map_winctx_mmio_bars(struct vas_window *window) +static int map_winctx_mmio_bars(struct pnv_vas_window *window) { int len; u64 start; @@ -214,7 +215,7 @@ static int map_winctx_mmio_bars(struct vas_window *window) * registers are not sequential. And, we can only write to offsets * with valid registers. */ -static void reset_window_regs(struct vas_window *window) +static void reset_window_regs(struct pnv_vas_window *window) { write_hvwc_reg(window, VREG(LPID), 0ULL); write_hvwc_reg(window, VREG(PID), 0ULL); @@ -270,7 +271,7 @@ static void reset_window_regs(struct vas_window *window) * want to add fields to vas_winctx and move the initialization to * init_vas_winctx_regs(). */ -static void init_xlate_regs(struct vas_window *window, bool user_win) +static void init_xlate_regs(struct pnv_vas_window *window, bool user_win) { u64 lpcr, val; @@ -335,7 +336,7 @@ static void init_xlate_regs(struct vas_window *window, bool user_win) * * TODO: Reserved (aka dedicated) send buffers are not supported yet. */ -static void init_rsvd_tx_buf_count(struct vas_window *txwin, +static void init_rsvd_tx_buf_count(struct pnv_vas_window *txwin, struct vas_winctx *winctx) { write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL); @@ -357,7 +358,7 @@ static void init_rsvd_tx_buf_count(struct vas_window *txwin, * as a one-time task? That could work for NX but what about other * receivers? Let the receivers tell us the rx-fifo buffers for now. */ -static void init_winctx_regs(struct vas_window *window, +static void init_winctx_regs(struct pnv_vas_window *window, struct vas_winctx *winctx) { u64 val; @@ -519,10 +520,10 @@ static int vas_assign_window_id(struct ida *ida) return winid; } -static void vas_window_free(struct vas_window *window) +static void vas_window_free(struct pnv_vas_window *window) { - int winid = window->winid; struct vas_instance *vinst = window->vinst; + int winid = window->vas_win.winid; unmap_winctx_mmio_bars(window); @@ -533,10 +534,10 @@ static void vas_window_free(struct vas_window *window) vas_release_window_id(&vinst->ida, winid); } -static struct vas_window *vas_window_alloc(struct vas_instance *vinst) +static struct pnv_vas_window *vas_window_alloc(struct vas_instance *vinst) { int winid; - struct vas_window *window; + struct pnv_vas_window *window; winid = vas_assign_window_id(&vinst->ida); if (winid < 0) @@ -547,7 +548,7 @@ static struct vas_window *vas_window_alloc(struct vas_instance *vinst) goto out_free; window->vinst = vinst; - window->winid = winid; + window->vas_win.winid = winid; if (map_winctx_mmio_bars(window)) goto out_free; @@ -562,7 +563,7 @@ out_free: return ERR_PTR(-ENOMEM); } -static void put_rx_win(struct vas_window *rxwin) +static void put_rx_win(struct pnv_vas_window *rxwin) { /* Better not be a send window! */ WARN_ON_ONCE(rxwin->tx_win); @@ -578,10 +579,11 @@ static void put_rx_win(struct vas_window *rxwin) * * NOTE: We access ->windows[] table and assume that vinst->mutex is held. */ -static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) +static struct pnv_vas_window *get_user_rxwin(struct vas_instance *vinst, + u32 pswid) { int vasid, winid; - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; decode_pswid(pswid, &vasid, &winid); @@ -590,7 +592,7 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) rxwin = vinst->windows[winid]; - if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW) + if (!rxwin || rxwin->tx_win || rxwin->vas_win.cop != VAS_COP_TYPE_FTW) return ERR_PTR(-EINVAL); return rxwin; @@ -602,10 +604,10 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) * * See also function header of set_vinst_win(). */ -static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst, +static struct pnv_vas_window *get_vinst_rxwin(struct vas_instance *vinst, enum vas_cop_type cop, u32 pswid) { - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; mutex_lock(&vinst->mutex); @@ -638,9 +640,9 @@ static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst, * window, we also save the window in the ->rxwin[] table. */ static void set_vinst_win(struct vas_instance *vinst, - struct vas_window *window) + struct pnv_vas_window *window) { - int id = window->winid; + int id = window->vas_win.winid; mutex_lock(&vinst->mutex); @@ -649,8 +651,8 @@ static void set_vinst_win(struct vas_instance *vinst, * unless its a user (FTW) window. */ if (!window->user_win && !window->tx_win) { - WARN_ON_ONCE(vinst->rxwin[window->cop]); - vinst->rxwin[window->cop] = window; + WARN_ON_ONCE(vinst->rxwin[window->vas_win.cop]); + vinst->rxwin[window->vas_win.cop] = window; } WARN_ON_ONCE(vinst->windows[id] != NULL); @@ -663,16 +665,16 @@ static void set_vinst_win(struct vas_instance *vinst, * Clear this window from the table(s) of windows for this VAS instance. * See also function header of set_vinst_win(). */ -static void clear_vinst_win(struct vas_window *window) +static void clear_vinst_win(struct pnv_vas_window *window) { - int id = window->winid; + int id = window->vas_win.winid; struct vas_instance *vinst = window->vinst; mutex_lock(&vinst->mutex); if (!window->user_win && !window->tx_win) { - WARN_ON_ONCE(!vinst->rxwin[window->cop]); - vinst->rxwin[window->cop] = NULL; + WARN_ON_ONCE(!vinst->rxwin[window->vas_win.cop]); + vinst->rxwin[window->vas_win.cop] = NULL; } WARN_ON_ONCE(vinst->windows[id] != window); @@ -681,7 +683,7 @@ static void clear_vinst_win(struct vas_window *window) mutex_unlock(&vinst->mutex); } -static void init_winctx_for_rxwin(struct vas_window *rxwin, +static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin, struct vas_rx_win_attr *rxattr, struct vas_winctx *winctx) { @@ -702,7 +704,7 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin, winctx->rx_fifo = rxattr->rx_fifo; winctx->rx_fifo_size = rxattr->rx_fifo_size; - winctx->wcreds_max = rxwin->wcreds_max; + winctx->wcreds_max = rxwin->vas_win.wcreds_max; winctx->pin_win = rxattr->pin_win; winctx->nx_win = rxattr->nx_win; @@ -851,7 +853,7 @@ EXPORT_SYMBOL_GPL(vas_init_rx_win_attr); struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, struct vas_rx_win_attr *rxattr) { - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; struct vas_winctx winctx; struct vas_instance *vinst; @@ -870,21 +872,21 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, rxwin = vas_window_alloc(vinst); if (IS_ERR(rxwin)) { pr_devel("Unable to allocate memory for Rx window\n"); - return rxwin; + return (struct vas_window *)rxwin; } rxwin->tx_win = false; rxwin->nx_win = rxattr->nx_win; rxwin->user_win = rxattr->user_win; - rxwin->cop = cop; - rxwin->wcreds_max = rxattr->wcreds_max; + rxwin->vas_win.cop = cop; + rxwin->vas_win.wcreds_max = rxattr->wcreds_max; init_winctx_for_rxwin(rxwin, rxattr, &winctx); init_winctx_regs(rxwin, &winctx); set_vinst_win(vinst, rxwin); - return rxwin; + return &rxwin->vas_win; } EXPORT_SYMBOL_GPL(vas_rx_win_open); @@ -905,7 +907,7 @@ void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop) } EXPORT_SYMBOL_GPL(vas_init_tx_win_attr); -static void init_winctx_for_txwin(struct vas_window *txwin, +static void init_winctx_for_txwin(struct pnv_vas_window *txwin, struct vas_tx_win_attr *txattr, struct vas_winctx *winctx) { @@ -926,7 +928,7 @@ static void init_winctx_for_txwin(struct vas_window *txwin, */ memset(winctx, 0, sizeof(struct vas_winctx)); - winctx->wcreds_max = txwin->wcreds_max; + winctx->wcreds_max = txwin->vas_win.wcreds_max; winctx->user_win = txattr->user_win; winctx->nx_win = txwin->rxwin->nx_win; @@ -946,13 +948,13 @@ static void init_winctx_for_txwin(struct vas_window *txwin, winctx->lpid = txattr->lpid; winctx->pidr = txattr->pidr; - winctx->rx_win_id = txwin->rxwin->winid; + winctx->rx_win_id = txwin->rxwin->vas_win.winid; /* * IRQ and fault window setup is successful. Set fault window * for the send window so that ready to handle faults. */ if (txwin->vinst->virq) - winctx->fault_win_id = txwin->vinst->fault_win->winid; + winctx->fault_win_id = txwin->vinst->fault_win->vas_win.winid; winctx->dma_type = VAS_DMA_TYPE_INJECT; winctx->tc_mode = txattr->tc_mode; @@ -962,7 +964,8 @@ static void init_winctx_for_txwin(struct vas_window *txwin, winctx->irq_port = txwin->vinst->irq_port; winctx->pswid = txattr->pswid ? txattr->pswid : - encode_pswid(txwin->vinst->vas_id, txwin->winid); + encode_pswid(txwin->vinst->vas_id, + txwin->vas_win.winid); } static bool tx_win_args_valid(enum vas_cop_type cop, @@ -993,8 +996,8 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, struct vas_tx_win_attr *attr) { int rc; - struct vas_window *txwin; - struct vas_window *rxwin; + struct pnv_vas_window *txwin; + struct pnv_vas_window *rxwin; struct vas_winctx winctx; struct vas_instance *vinst; @@ -1020,7 +1023,7 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, rxwin = get_vinst_rxwin(vinst, cop, attr->pswid); if (IS_ERR(rxwin)) { pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop); - return rxwin; + return (struct vas_window *)rxwin; } txwin = vas_window_alloc(vinst); @@ -1029,12 +1032,12 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, goto put_rxwin; } - txwin->cop = cop; + txwin->vas_win.cop = cop; txwin->tx_win = 1; txwin->rxwin = rxwin; txwin->nx_win = txwin->rxwin->nx_win; txwin->user_win = attr->user_win; - txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; + txwin->vas_win.wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; init_winctx_for_txwin(txwin, attr, &winctx); @@ -1064,56 +1067,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, rc = -ENODEV; goto free_window; } - - /* - * Window opened by a child thread may not be closed when - * it exits. So take reference to its pid and release it - * when the window is free by parent thread. - * Acquire a reference to the task's pid to make sure - * pid will not be re-used - needed only for multithread - * applications. - */ - txwin->pid = get_task_pid(current, PIDTYPE_PID); - /* - * Acquire a reference to the task's mm. - */ - txwin->mm = get_task_mm(current); - - if (!txwin->mm) { - put_pid(txwin->pid); - pr_err("VAS: pid(%d): mm_struct is not found\n", - current->pid); - rc = -EPERM; + rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); + if (rc) goto free_window; - } - mmgrab(txwin->mm); - mmput(txwin->mm); - mm_context_add_vas_window(txwin->mm); - /* - * Process closes window during exit. In the case of - * multithread application, the child thread can open - * window and can exit without closing it. Expects parent - * thread to use and close the window. So do not need - * to take pid reference for parent thread. - */ - txwin->tgid = find_get_pid(task_tgid_vnr(current)); - /* - * Even a process that has no foreign real address mapping can - * use an unpaired COPY instruction (to no real effect). Issue - * CP_ABORT to clear any pending COPY and prevent a covert - * channel. - * - * __switch_to() will issue CP_ABORT on future context switches - * if process / thread has any open VAS window (Use - * current->mm->context.vas_windows). - */ - asm volatile(PPC_CP_ABORT); + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); } set_vinst_win(vinst, txwin); - return txwin; + return &txwin->vas_win; free_window: vas_window_free(txwin); @@ -1132,12 +1095,14 @@ int vas_copy_crb(void *crb, int offset) EXPORT_SYMBOL_GPL(vas_copy_crb); #define RMA_LSMP_REPORT_ENABLE PPC_BIT(53) -int vas_paste_crb(struct vas_window *txwin, int offset, bool re) +int vas_paste_crb(struct vas_window *vwin, int offset, bool re) { + struct pnv_vas_window *txwin; int rc; void *addr; uint64_t val; + txwin = container_of(vwin, struct pnv_vas_window, vas_win); trace_vas_paste_crb(current, txwin); /* @@ -1167,7 +1132,7 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re) else rc = -EINVAL; - pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid, + pr_debug("Txwin #%d: Msg count %llu\n", txwin->vas_win.winid, read_hvwc_reg(txwin, VREG(LRFIFO_PUSH))); return rc; @@ -1187,7 +1152,7 @@ EXPORT_SYMBOL_GPL(vas_paste_crb); * user space. (NX-842 driver waits for CSB and Fast thread-wakeup * doesn't use credit checking). */ -static void poll_window_credits(struct vas_window *window) +static void poll_window_credits(struct pnv_vas_window *window) { u64 val; int creds, mode; @@ -1217,7 +1182,7 @@ retry: * and issue CRB Kill to stop all pending requests. Need only * if there is a bug in NX or fault handling in kernel. */ - if (creds < window->wcreds_max) { + if (creds < window->vas_win.wcreds_max) { val = 0; set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(msecs_to_jiffies(10)); @@ -1228,7 +1193,8 @@ retry: */ if (!(count % 1000)) pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n", - vas_window_pid(window), window->winid, + vas_window_pid(&window->vas_win), + window->vas_win.winid, creds, count); goto retry; @@ -1240,7 +1206,7 @@ retry: * short time to queue a CRB, so window should not be busy for too long. * Trying 5ms intervals. */ -static void poll_window_busy_state(struct vas_window *window) +static void poll_window_busy_state(struct pnv_vas_window *window) { int busy; u64 val; @@ -1260,7 +1226,8 @@ retry: */ if (!(count % 1000)) pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n", - vas_window_pid(window), window->winid, count); + vas_window_pid(&window->vas_win), + window->vas_win.winid, count); goto retry; } @@ -1282,7 +1249,7 @@ retry: * casting out becomes necessary we should consider offloading the * job to a worker thread, so the window close can proceed quickly. */ -static void poll_window_castout(struct vas_window *window) +static void poll_window_castout(struct pnv_vas_window *window) { /* stub for now */ } @@ -1291,7 +1258,7 @@ static void poll_window_castout(struct vas_window *window) * Unpin and close a window so no new requests are accepted and the * hardware can evict this window from cache if necessary. */ -static void unpin_close_window(struct vas_window *window) +static void unpin_close_window(struct pnv_vas_window *window) { u64 val; @@ -1313,11 +1280,15 @@ static void unpin_close_window(struct vas_window *window) * * Besides the hardware, kernel has some bookkeeping of course. */ -int vas_win_close(struct vas_window *window) +int vas_win_close(struct vas_window *vwin) { - if (!window) + struct pnv_vas_window *window; + + if (!vwin) return 0; + window = container_of(vwin, struct pnv_vas_window, vas_win); + if (!window->tx_win && atomic_read(&window->num_txwins) != 0) { pr_devel("Attempting to close an active Rx window!\n"); WARN_ON_ONCE(1); @@ -1339,12 +1310,8 @@ int vas_win_close(struct vas_window *window) /* if send window, drop reference to matching receive window */ if (window->tx_win) { if (window->user_win) { - /* Drop references to pid and mm */ - put_pid(window->pid); - if (window->mm) { - mm_context_remove_vas_window(window->mm); - mmdrop(window->mm); - } + put_vas_user_win_ref(&vwin->task_ref); + mm_context_remove_vas_window(vwin->task_ref.mm); } put_rx_win(window->rxwin); } @@ -1377,7 +1344,7 @@ EXPORT_SYMBOL_GPL(vas_win_close); * - The kernel with return credit on fault window after reading entry * from fault FIFO. */ -void vas_return_credit(struct vas_window *window, bool tx) +void vas_return_credit(struct pnv_vas_window *window, bool tx) { uint64_t val; @@ -1391,10 +1358,10 @@ void vas_return_credit(struct vas_window *window, bool tx) } } -struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, +struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst, uint32_t pswid) { - struct vas_window *window; + struct pnv_vas_window *window; int winid; if (!pswid) { @@ -1431,13 +1398,74 @@ struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, * by NX). */ if (!window->tx_win || !window->user_win || !window->nx_win || - window->cop == VAS_COP_TYPE_FAULT || - window->cop == VAS_COP_TYPE_FTW) { + window->vas_win.cop == VAS_COP_TYPE_FAULT || + window->vas_win.cop == VAS_COP_TYPE_FTW) { pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n", winid, window->tx_win, window->user_win, - window->nx_win, window->cop); + window->nx_win, window->vas_win.cop); WARN_ON(1); } return window; } + +static struct vas_window *vas_user_win_open(int vas_id, u64 flags, + enum vas_cop_type cop_type) +{ + struct vas_tx_win_attr txattr = {}; + + vas_init_tx_win_attr(&txattr, cop_type); + + txattr.lpid = mfspr(SPRN_LPID); + txattr.pidr = mfspr(SPRN_PID); + txattr.user_win = true; + txattr.rsvd_txbuf_count = false; + txattr.pswid = false; + + pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, + mfspr(SPRN_PID)); + + return vas_tx_win_open(vas_id, cop_type, &txattr); +} + +static u64 vas_user_win_paste_addr(struct vas_window *txwin) +{ + struct pnv_vas_window *win; + u64 paste_addr; + + win = container_of(txwin, struct pnv_vas_window, vas_win); + vas_win_paste_addr(win, &paste_addr, NULL); + + return paste_addr; +} + +static int vas_user_win_close(struct vas_window *txwin) +{ + vas_win_close(txwin); + + return 0; +} + +static const struct vas_user_win_ops vops = { + .open_win = vas_user_win_open, + .paste_addr = vas_user_win_paste_addr, + .close_win = vas_user_win_close, +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + + return vas_register_coproc_api(mod, cop_type, name, &vops); +} +EXPORT_SYMBOL_GPL(vas_register_api_powernv); + +void vas_unregister_api_powernv(void) +{ + vas_unregister_coproc_api(); +} +EXPORT_SYMBOL_GPL(vas_unregister_api_powernv); diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index c7db3190baca..8bb08e395de0 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -334,11 +334,11 @@ struct vas_instance { int fifo_in_progress; /* To wake up thread or return IRQ_HANDLED */ spinlock_t fault_lock; /* Protects fifo_in_progress update */ void *fault_fifo; - struct vas_window *fault_win; /* Fault window */ + struct pnv_vas_window *fault_win; /* Fault window */ struct mutex mutex; - struct vas_window *rxwin[VAS_COP_TYPE_MAX]; - struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; + struct pnv_vas_window *rxwin[VAS_COP_TYPE_MAX]; + struct pnv_vas_window *windows[VAS_WINDOWS_PER_CHIP]; char *name; char *dbgname; @@ -346,32 +346,24 @@ struct vas_instance { }; /* - * In-kernel state a VAS window. One per window. + * In-kernel state a VAS window on PowerNV. One per window. */ -struct vas_window { +struct pnv_vas_window { + struct vas_window vas_win; /* Fields common to send and receive windows */ struct vas_instance *vinst; - int winid; bool tx_win; /* True if send window */ bool nx_win; /* True if NX window */ bool user_win; /* True if user space window */ void *hvwc_map; /* HV window context */ void *uwc_map; /* OS/User window context */ - struct pid *pid; /* Linux process id of owner */ - struct pid *tgid; /* Thread group ID of owner */ - struct mm_struct *mm; /* Linux process mm_struct */ - int wcreds_max; /* Window credits */ - - char *dbgname; - struct dentry *dbgdir; /* Fields applicable only to send windows */ void *paste_kaddr; char *paste_addr_name; - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; - /* Feilds applicable only to receive windows */ - enum vas_cop_type cop; + /* Fields applicable only to receive windows */ atomic_t num_txwins; }; @@ -430,32 +422,32 @@ extern struct mutex vas_mutex; extern struct vas_instance *find_vas_instance(int vasid); extern void vas_init_dbgdir(void); extern void vas_instance_init_dbgdir(struct vas_instance *vinst); -extern void vas_window_init_dbgdir(struct vas_window *win); -extern void vas_window_free_dbgdir(struct vas_window *win); +extern void vas_window_init_dbgdir(struct pnv_vas_window *win); +extern void vas_window_free_dbgdir(struct pnv_vas_window *win); extern int vas_setup_fault_window(struct vas_instance *vinst); extern irqreturn_t vas_fault_thread_fn(int irq, void *data); extern irqreturn_t vas_fault_handler(int irq, void *dev_id); -extern void vas_return_credit(struct vas_window *window, bool tx); -extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, +extern void vas_return_credit(struct pnv_vas_window *window, bool tx); +extern struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst, uint32_t pswid); -extern void vas_win_paste_addr(struct vas_window *window, u64 *addr, - int *len); +extern void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, + int *len); static inline int vas_window_pid(struct vas_window *window) { - return pid_vnr(window->pid); + return pid_vnr(window->task_ref.pid); } -static inline void vas_log_write(struct vas_window *win, char *name, +static inline void vas_log_write(struct pnv_vas_window *win, char *name, void *regptr, u64 val) { if (val) pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n", - win->tx_win ? "Tx" : "Rx", win->winid, name, - regptr, val); + win->tx_win ? "Tx" : "Rx", win->vas_win.winid, + name, regptr, val); } -static inline void write_uwc_reg(struct vas_window *win, char *name, +static inline void write_uwc_reg(struct pnv_vas_window *win, char *name, s32 reg, u64 val) { void *regptr; @@ -466,7 +458,7 @@ static inline void write_uwc_reg(struct vas_window *win, char *name, out_be64(regptr, val); } -static inline void write_hvwc_reg(struct vas_window *win, char *name, +static inline void write_hvwc_reg(struct pnv_vas_window *win, char *name, s32 reg, u64 val) { void *regptr; @@ -477,7 +469,7 @@ static inline void write_hvwc_reg(struct vas_window *win, char *name, out_be64(regptr, val); } -static inline u64 read_hvwc_reg(struct vas_window *win, +static inline u64 read_hvwc_reg(struct pnv_vas_window *win, char *name __maybe_unused, s32 reg) { return in_be64(win->hvwc_map+reg); diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig index e32406e918d0..ebed94942d39 100644 --- a/arch/powerpc/platforms/ps3/Kconfig +++ b/arch/powerpc/platforms/ps3/Kconfig @@ -85,6 +85,15 @@ config PS3_SYS_MANAGER This support is required for PS3 system control. In general, all users will say Y or M. +config PS3_VERBOSE_RESULT + bool "PS3 Verbose LV1 hypercall results" if PS3_ADVANCED + depends on PPC_PS3 + help + Enables more verbose log mesages for LV1 hypercall results. + + If in doubt, say N here and reduce the size of the kernel by a + small amount. + config PS3_REPOSITORY_WRITE bool "PS3 Repository write support" if PS3_ADVANCED depends on PPC_PS3 diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index d094321964fb..a81eac35d900 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -6,6 +6,7 @@ * Copyright 2006 Sony Corp. */ +#include <linux/dma-mapping.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/memblock.h> @@ -1118,6 +1119,7 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, enum ps3_dma_region_type region_type, void *addr, unsigned long len) { unsigned long lpar_addr; + int result; lpar_addr = addr ? ps3_mm_phys_to_lpar(__pa(addr)) : 0; @@ -1129,6 +1131,16 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, r->offset -= map.r1.offset; r->len = len ? len : ALIGN(map.total, 1 << r->page_size); + dev->core.dma_mask = &r->dma_mask; + + result = dma_set_mask_and_coherent(&dev->core, DMA_BIT_MASK(32)); + + if (result < 0) { + dev_err(&dev->core, "%s:%d: dma_set_mask_and_coherent failed: %d\n", + __func__, __LINE__, result); + return result; + } + switch (dev->dev_type) { case PS3_DEVICE_TYPE_SB: r->region_ops = (USE_DYNAMIC_DMA) diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index e9ae5dd03593..3de9145c20bc 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -36,6 +36,7 @@ DEFINE_MUTEX(ps3_gpu_mutex); EXPORT_SYMBOL_GPL(ps3_gpu_mutex); static union ps3_firmware_version ps3_firmware_version; +static char ps3_firmware_version_str[16]; void ps3_get_firmware_version(union ps3_firmware_version *v) { @@ -182,6 +183,40 @@ static int ps3_set_dabr(unsigned long dabr, unsigned long dabrx) return lv1_set_dabr(dabr, dabrx) ? -1 : 0; } +static ssize_t ps3_fw_version_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%s", ps3_firmware_version_str); +} + +static int __init ps3_setup_sysfs(void) +{ + static struct kobj_attribute attr = __ATTR(fw-version, S_IRUGO, + ps3_fw_version_show, NULL); + static struct kobject *kobj; + int result; + + kobj = kobject_create_and_add("ps3", firmware_kobj); + + if (!kobj) { + pr_warn("%s:%d: kobject_create_and_add failed.\n", __func__, + __LINE__); + return -ENOMEM; + } + + result = sysfs_create_file(kobj, &attr.attr); + + if (result) { + pr_warn("%s:%d: sysfs_create_file failed.\n", __func__, + __LINE__); + kobject_put(kobj); + return -ENOMEM; + } + + return 0; +} +core_initcall(ps3_setup_sysfs); + static void __init ps3_setup_arch(void) { u64 tmp; @@ -190,9 +225,11 @@ static void __init ps3_setup_arch(void) lv1_get_version_info(&ps3_firmware_version.raw, &tmp); - printk(KERN_INFO "PS3 firmware version %u.%u.%u\n", - ps3_firmware_version.major, ps3_firmware_version.minor, - ps3_firmware_version.rev); + snprintf(ps3_firmware_version_str, sizeof(ps3_firmware_version_str), + "%u.%u.%u", ps3_firmware_version.major, + ps3_firmware_version.minor, ps3_firmware_version.rev); + + printk(KERN_INFO "PS3 firmware version %s\n", ps3_firmware_version_str); ps3_spu_set_platform(); diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index b431f41c6cb5..1a5665875165 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -64,9 +64,10 @@ static int ps3_open_hv_device_sb(struct ps3_system_bus_device *dev) result = lv1_open_device(dev->bus_id, dev->dev_id, 0); if (result) { - pr_debug("%s:%d: lv1_open_device failed: %s\n", __func__, - __LINE__, ps3_result(result)); - result = -EPERM; + pr_warn("%s:%d: lv1_open_device dev=%u.%u(%s) failed: %s\n", + __func__, __LINE__, dev->match_id, dev->match_sub_id, + dev_name(&dev->core), ps3_result(result)); + result = -EPERM; } done: @@ -120,7 +121,7 @@ static int ps3_open_hv_device_gpu(struct ps3_system_bus_device *dev) result = lv1_gpu_open(0); if (result) { - pr_debug("%s:%d: lv1_gpu_open failed: %s\n", __func__, + pr_warn("%s:%d: lv1_gpu_open failed: %s\n", __func__, __LINE__, ps3_result(result)); result = -EPERM; } diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index c8a2b0b05ac0..4cda0ef87be0 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -30,3 +30,4 @@ obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o obj-$(CONFIG_SUSPEND) += suspend.o +obj-$(CONFIG_PPC_VAS) += vas.o diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 8377f1f7c78e..36f66556a7c6 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -348,7 +348,8 @@ static int pseries_remove_mem_node(struct device_node *np) static bool lmb_is_removable(struct drmem_lmb *lmb) { - if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) + if ((lmb->flags & DRCONF_MEM_RESERVED) || + !(lmb->flags & DRCONF_MEM_ASSIGNED)) return false; #ifdef CONFIG_FA_DUMP @@ -401,7 +402,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) { struct drmem_lmb *lmb; - int lmbs_removed = 0; + int lmbs_reserved = 0; int lmbs_available = 0; int rc; @@ -435,12 +436,12 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) */ drmem_mark_lmb_reserved(lmb); - lmbs_removed++; - if (lmbs_removed == lmbs_to_remove) + lmbs_reserved++; + if (lmbs_reserved == lmbs_to_remove) break; } - if (lmbs_removed != lmbs_to_remove) { + if (lmbs_reserved != lmbs_to_remove) { pr_err("Memory hot-remove failed, adding LMB's back\n"); for_each_drmem_lmb(lmb) { @@ -453,6 +454,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) lmb->drc_index); drmem_remove_lmb_reservation(lmb); + + lmbs_reserved--; + if (lmbs_reserved == 0) + break; } rc = -EINVAL; @@ -466,6 +471,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) lmb->base_addr); drmem_remove_lmb_reservation(lmb); + + lmbs_reserved--; + if (lmbs_reserved == 0) + break; } rc = 0; } @@ -508,7 +517,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index) static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { struct drmem_lmb *lmb, *start_lmb, *end_lmb; - int lmbs_available = 0; int rc; pr_info("Attempting to hot-remove %u LMB(s) at %x\n", @@ -521,18 +529,29 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) if (rc) return -EINVAL; - /* Validate that there are enough LMBs to satisfy the request */ + /* + * Validate that all LMBs in range are not reserved. Note that it + * is ok if they are !ASSIGNED since our goal here is to remove the + * LMB range, regardless of whether some LMBs were already removed + * by any other reason. + * + * This is a contrast to what is done in remove_by_count() where we + * check for both RESERVED and !ASSIGNED (via lmb_is_removable()), + * because we want to remove a fixed amount of LMBs in that function. + */ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { - if (lmb->flags & DRCONF_MEM_RESERVED) - break; - - lmbs_available++; + if (lmb->flags & DRCONF_MEM_RESERVED) { + pr_err("Memory at %llx (drc index %x) is reserved\n", + lmb->base_addr, lmb->drc_index); + return -EINVAL; + } } - if (lmbs_available < lmbs_to_remove) - return -EINVAL; - for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + /* + * dlpar_remove_lmb() will error out if the LMB is already + * !ASSIGNED, but this case is a no-op for us. + */ if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) continue; @@ -551,6 +570,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) if (!drmem_lmb_reserved(lmb)) continue; + /* + * Setting the isolation state of an UNISOLATED/CONFIGURED + * device to UNISOLATE is a no-op, but the hypervisor can + * use it as a hint that the LMB removal failed. + */ + dlpar_unisolate_drc(lmb->drc_index); + rc = dlpar_add_lmb(lmb); if (rc) pr_err("Failed to add LMB, drc index %x\n", @@ -585,10 +611,6 @@ static inline int pseries_remove_mem_node(struct device_node *np) { return 0; } -static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog) -{ - return -EOPNOTSUPP; -} static int dlpar_remove_lmb(struct drmem_lmb *lmb) { return -EOPNOTSUPP; diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index ef26fe40efb0..e2b69cc3beaf 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -310,6 +310,13 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, dev_err(&p->pdev->dev, "Unknown performance stats, Err:0x%016lX\n", ret[0]); return -ENOENT; + } else if (rc == H_AUTHORITY) { + dev_info(&p->pdev->dev, + "Permission denied while accessing performance stats"); + return -EPERM; + } else if (rc == H_UNSUPPORTED) { + dev_dbg(&p->pdev->dev, "Performance stats unsupported\n"); + return -EOPNOTSUPP; } else if (rc != H_SUCCESS) { dev_err(&p->pdev->dev, "Failed to query performance stats, Err:%lld\n", rc); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 9d4ef65da7f3..2f636308cf60 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -593,8 +593,6 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.severity = MCE_SEV_SEVERE; else if (severity == RTAS_SEVERITY_ERROR) mce_err.severity = MCE_SEV_SEVERE; - else if (severity == RTAS_SEVERITY_FATAL) - mce_err.severity = MCE_SEV_FATAL; else mce_err.severity = MCE_SEV_FATAL; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 754e493b7c05..631a0d57b6cd 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -549,6 +549,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY) + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS) + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + + if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER) + security_ftr_clear(SEC_FTR_STF_BARRIER); + if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR)) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c new file mode 100644 index 000000000000..3385b5400cc6 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas.c @@ -0,0 +1,594 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020-21 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <asm/machdep.h> +#include <asm/hvcall.h> +#include <asm/plpar_wrappers.h> +#include <asm/vas.h> +#include "vas.h" + +#define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul +#define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul +/* The hypervisor allows one credit per window right now */ +#define DEF_WIN_CREDS 1 + +static struct vas_all_caps caps_all; +static bool copypaste_feat; + +static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; +static DEFINE_MUTEX(vas_pseries_mutex); + +static long hcall_return_busy_check(long rc) +{ + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + + return rc; +} + +/* + * Allocate VAS window hcall + */ +static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, + u8 wintype, u16 credits) +{ + long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + long rc; + + do { + rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, + credits, domain[0], domain[1], domain[2], + domain[3], domain[4], domain[5]); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) { + if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { + pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + win->vas_win.winid = retbuf[0]; + win->win_addr = retbuf[1]; + win->complete_irq = retbuf[2]; + win->fault_irq = retbuf[3]; + return 0; + } + + pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", + rc, wintype, credits); + + return -EIO; +} + +/* + * Deallocate VAS window hcall. + */ +static int h_deallocate_vas_window(u64 winid) +{ + long rc; + + do { + rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", + rc, winid); + return -EIO; +} + +/* + * Modify VAS window. + * After the window is opened with allocate window hcall, configure it + * with flags and LPAR PID before using. + */ +static int h_modify_vas_window(struct pseries_vas_window *win) +{ + long rc; + u32 lpid = mfspr(SPRN_PID); + + /* + * AMR value is not supported in Linux VAS implementation. + * The hypervisor ignores it if 0 is passed. + */ + do { + rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, + win->vas_win.winid, lpid, 0, + VAS_MOD_WIN_FLAGS, 0); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", + rc, win->vas_win.winid, lpid); + return -EIO; +} + +/* + * This hcall is used to determine the capabilities from the hypervisor. + * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES + * @query_type: If 0 is passed, the hypervisor returns the overall + * capabilities which provides all feature(s) that are + * available. Then query the hypervisor to get the + * corresponding capabilities for the specific feature. + * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS + * and VAS GZIP Default capabilities. + * H_QUERY_NX_CAPABILITIES provides NX GZIP + * capabilities. + * @result: Return buffer to save capabilities. + */ +int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) +{ + long rc; + + rc = plpar_hcall_norets(hcall, query_type, result); + + if (rc == H_SUCCESS) + return 0; + + pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n", + hcall, rc, query_type, result); + return -EIO; +} +EXPORT_SYMBOL_GPL(h_query_vas_capabilities); + +/* + * hcall to get fault CRB from the hypervisor. + */ +static int h_get_nx_fault(u32 winid, u64 buffer) +{ + long rc; + + rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", + rc, winid, buffer); + return -EIO; + +} + +/* + * Handle the fault interrupt. + * When the fault interrupt is received for each window, query the + * hypervisor to get the fault CRB on the specific fault. Then + * process the CRB by updating CSB or send signal if the user space + * CSB is invalid. + * Note: The hypervisor forwards an interrupt for each fault request. + * So one fault CRB to process for each H_GET_NX_FAULT hcall. + */ +irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) +{ + struct pseries_vas_window *txwin = data; + struct coprocessor_request_block crb; + struct vas_user_win_ref *tsk_ref; + int rc; + + rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); + if (!rc) { + tsk_ref = &txwin->vas_win.task_ref; + vas_dump_crb(&crb); + vas_update_csb(&crb, tsk_ref); + } + + return IRQ_HANDLED; +} + +/* + * Allocate window and setup IRQ mapping. + */ +static int allocate_setup_window(struct pseries_vas_window *txwin, + u64 *domain, u8 wintype) +{ + int rc; + + rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); + if (rc) + return rc; + /* + * On PowerVM, the hypervisor setup and forwards the fault + * interrupt per window. So the IRQ setup and fault handling + * will be done for each open window separately. + */ + txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); + if (!txwin->fault_virq) { + pr_err("Failed irq mapping %d\n", txwin->fault_irq); + rc = -EINVAL; + goto out_win; + } + + txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", + txwin->vas_win.winid); + if (!txwin->name) { + rc = -ENOMEM; + goto out_irq; + } + + rc = request_threaded_irq(txwin->fault_virq, NULL, + pseries_vas_fault_thread_fn, IRQF_ONESHOT, + txwin->name, txwin); + if (rc) { + pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", + txwin->vas_win.winid, txwin->fault_virq, rc); + goto out_free; + } + + txwin->vas_win.wcreds_max = DEF_WIN_CREDS; + + return 0; +out_free: + kfree(txwin->name); +out_irq: + irq_dispose_mapping(txwin->fault_virq); +out_win: + h_deallocate_vas_window(txwin->vas_win.winid); + return rc; +} + +static inline void free_irq_setup(struct pseries_vas_window *txwin) +{ + free_irq(txwin->fault_virq, txwin); + kfree(txwin->name); + irq_dispose_mapping(txwin->fault_virq); +} + +static struct vas_window *vas_allocate_window(int vas_id, u64 flags, + enum vas_cop_type cop_type) +{ + long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; + struct vas_cop_feat_caps *cop_feat_caps; + struct vas_caps *caps; + struct pseries_vas_window *txwin; + int rc; + + txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); + if (!txwin) + return ERR_PTR(-ENOMEM); + + /* + * A VAS window can have many credits which means that many + * requests can be issued simultaneously. But the hypervisor + * restricts one credit per window. + * The hypervisor introduces 2 different types of credits: + * Default credit type (Uses normal priority FIFO): + * A limited number of credits are assigned to partitions + * based on processor entitlement. But these credits may be + * over-committed on a system depends on whether the CPUs + * are in shared or dedicated modes - that is, more requests + * may be issued across the system than NX can service at + * once which can result in paste command failure (RMA_busy). + * Then the process has to resend requests or fall-back to + * SW compression. + * Quality of Service (QoS) credit type (Uses high priority FIFO): + * To avoid NX HW contention, the system admins can assign + * QoS credits for each LPAR so that this partition is + * guaranteed access to NX resources. These credits are + * assigned to partitions via the HMC. + * Refer PAPR for more information. + * + * Allocate window with QoS credits if user requested. Otherwise + * default credits are used. + */ + if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) + caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; + else + caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; + + cop_feat_caps = &caps->caps; + + if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) > + atomic_read(&cop_feat_caps->target_lpar_creds)) { + pr_err("Credits are not available to allocate window\n"); + rc = -EINVAL; + goto out; + } + + if (vas_id == -1) { + /* + * The user space is requesting to allocate a window on + * a VAS instance where the process is executing. + * On PowerVM, domain values are passed to the hypervisor + * to select VAS instance. Useful if the process is + * affinity to NUMA node. + * The hypervisor selects VAS instance if + * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. + * The h_allocate_vas_window hcall is defined to take a + * domain values as specified by h_home_node_associativity, + * So no unpacking needs to be done. + */ + rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, + VPHN_FLAG_VCPU, smp_processor_id()); + if (rc != H_SUCCESS) { + pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); + goto out; + } + } + + /* + * Allocate / Deallocate window hcalls and setup / free IRQs + * have to be protected with mutex. + * Open VAS window: Allocate window hcall and setup IRQ + * Close VAS window: Deallocate window hcall and free IRQ + * The hypervisor waits until all NX requests are + * completed before closing the window. So expects OS + * to handle NX faults, means IRQ can be freed only + * after the deallocate window hcall is returned. + * So once the window is closed with deallocate hcall before + * the IRQ is freed, it can be assigned to new allocate + * hcall with the same fault IRQ by the hypervisor. It can + * result in setup IRQ fail for the new window since the + * same fault IRQ is not freed by the OS before. + */ + mutex_lock(&vas_pseries_mutex); + rc = allocate_setup_window(txwin, (u64 *)&domain[0], + cop_feat_caps->win_type); + mutex_unlock(&vas_pseries_mutex); + if (rc) + goto out; + + /* + * Modify window and it is ready to use. + */ + rc = h_modify_vas_window(txwin); + if (!rc) + rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); + if (rc) + goto out_free; + + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); + txwin->win_type = cop_feat_caps->win_type; + mutex_lock(&vas_pseries_mutex); + list_add(&txwin->win_list, &caps->list); + mutex_unlock(&vas_pseries_mutex); + + return &txwin->vas_win; + +out_free: + /* + * Window is not operational. Free IRQ before closing + * window so that do not have to hold mutex. + */ + free_irq_setup(txwin); + h_deallocate_vas_window(txwin->vas_win.winid); +out: + atomic_dec(&cop_feat_caps->used_lpar_creds); + kfree(txwin); + return ERR_PTR(rc); +} + +static u64 vas_paste_address(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + return win->win_addr; +} + +static int deallocate_free_window(struct pseries_vas_window *win) +{ + int rc = 0; + + /* + * The hypervisor waits for all requests including faults + * are processed before closing the window - Means all + * credits have to be returned. In the case of fault + * request, a credit is returned after OS issues + * H_GET_NX_FAULT hcall. + * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW + * hcall. + */ + rc = h_deallocate_vas_window(win->vas_win.winid); + if (!rc) + free_irq_setup(win); + + return rc; +} + +static int vas_deallocate_window(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + struct vas_cop_feat_caps *caps; + int rc = 0; + + if (!vwin) + return -EINVAL; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + + /* Should not happen */ + if (win->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Window (%u): Invalid window type %u\n", + vwin->winid, win->win_type); + return -EINVAL; + } + + caps = &vascaps[win->win_type].caps; + mutex_lock(&vas_pseries_mutex); + rc = deallocate_free_window(win); + if (rc) { + mutex_unlock(&vas_pseries_mutex); + return rc; + } + + list_del(&win->win_list); + atomic_dec(&caps->used_lpar_creds); + mutex_unlock(&vas_pseries_mutex); + + put_vas_user_win_ref(&vwin->task_ref); + mm_context_remove_vas_window(vwin->task_ref.mm); + + kfree(win); + return 0; +} + +static const struct vas_user_win_ops vops_pseries = { + .open_win = vas_allocate_window, /* Open and configure window */ + .paste_addr = vas_paste_address, /* To do copy/paste */ + .close_win = vas_deallocate_window, /* Close window */ +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + int rc; + + if (!copypaste_feat) + return -ENOTSUPP; + + rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries); + + return rc; +} +EXPORT_SYMBOL_GPL(vas_register_api_pseries); + +void vas_unregister_api_pseries(void) +{ + vas_unregister_coproc_api(); +} +EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); + +/* + * Get the specific capabilities based on the feature type. + * Right now supports GZIP default and GZIP QoS capabilities. + */ +static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, + struct hv_vas_cop_feat_caps *hv_caps) +{ + struct vas_cop_feat_caps *caps; + struct vas_caps *vcaps; + int rc = 0; + + vcaps = &vascaps[type]; + memset(vcaps, 0, sizeof(*vcaps)); + INIT_LIST_HEAD(&vcaps->list); + + caps = &vcaps->caps; + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, + (u64)virt_to_phys(hv_caps)); + if (rc) + return rc; + + caps->user_mode = hv_caps->user_mode; + if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { + pr_err("User space COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + + caps->descriptor = be64_to_cpu(hv_caps->descriptor); + caps->win_type = hv_caps->win_type; + if (caps->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Unsupported window type %u\n", caps->win_type); + return -EINVAL; + } + caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); + caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); + atomic_set(&caps->target_lpar_creds, + be16_to_cpu(hv_caps->target_lpar_creds)); + if (feat == VAS_GZIP_DEF_FEAT) { + caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); + + if (caps->max_win_creds < DEF_WIN_CREDS) { + pr_err("Window creds(%u) > max allowed window creds(%u)\n", + DEF_WIN_CREDS, caps->max_win_creds); + return -EINVAL; + } + } + + copypaste_feat = true; + + return 0; +} + +static int __init pseries_vas_init(void) +{ + struct hv_vas_cop_feat_caps *hv_cop_caps; + struct hv_vas_all_caps *hv_caps; + int rc; + + /* + * Linux supports user space COPY/PASTE only with Radix + */ + if (!radix_enabled()) { + pr_err("API is supported only with radix page tables\n"); + return -ENOTSUPP; + } + + hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); + if (!hv_caps) + return -ENOMEM; + /* + * Get VAS overall capabilities by passing 0 to feature type. + */ + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, + (u64)virt_to_phys(hv_caps)); + if (rc) + goto out; + + caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); + caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); + + hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); + if (!hv_cop_caps) { + rc = -ENOMEM; + goto out; + } + /* + * QOS capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { + rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, + VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); + + if (rc) + goto out_cop; + } + /* + * Default capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { + rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, + VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); + if (rc) + goto out_cop; + } + + pr_info("GZIP feature is available\n"); + +out_cop: + kfree(hv_cop_caps); +out: + kfree(hv_caps); + return rc; +} +machine_device_initcall(pseries, pseries_vas_init); diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h new file mode 100644 index 000000000000..4ecb3fcabd10 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2020-21 IBM Corp. + */ + +#ifndef _VAS_H +#define _VAS_H +#include <asm/vas.h> +#include <linux/mutex.h> +#include <linux/stringify.h> + +/* + * VAS window modify flags + */ +#define VAS_MOD_WIN_CLOSE PPC_BIT(0) +#define VAS_MOD_WIN_JOBS_KILL PPC_BIT(1) +#define VAS_MOD_WIN_DR PPC_BIT(3) +#define VAS_MOD_WIN_PR PPC_BIT(4) +#define VAS_MOD_WIN_SF PPC_BIT(5) +#define VAS_MOD_WIN_TA PPC_BIT(6) +#define VAS_MOD_WIN_FLAGS (VAS_MOD_WIN_JOBS_KILL | VAS_MOD_WIN_DR | \ + VAS_MOD_WIN_PR | VAS_MOD_WIN_SF) + +#define VAS_WIN_ACTIVE 0x0 +#define VAS_WIN_CLOSED 0x1 +#define VAS_WIN_INACTIVE 0x2 /* Inactive due to HW failure */ +/* Process of being modified, deallocated, or quiesced */ +#define VAS_WIN_MOD_IN_PROCESS 0x3 + +#define VAS_COPY_PASTE_USER_MODE 0x00000001 +#define VAS_COP_OP_USER_MODE 0x00000010 + +/* + * Co-processor feature - GZIP QoS windows or GZIP default windows + */ +enum vas_cop_feat_type { + VAS_GZIP_QOS_FEAT_TYPE, + VAS_GZIP_DEF_FEAT_TYPE, + VAS_MAX_FEAT_TYPE, +}; + +/* + * Use to get feature specific capabilities from the + * hypervisor. + */ +struct hv_vas_cop_feat_caps { + __be64 descriptor; + u8 win_type; /* Default or QoS type */ + u8 user_mode; + __be16 max_lpar_creds; + __be16 max_win_creds; + union { + __be16 reserved; + __be16 def_lpar_creds; /* Used for default capabilities */ + }; + __be16 target_lpar_creds; +} __packed __aligned(0x1000); + +/* + * Feature specific (QoS or default) capabilities. + */ +struct vas_cop_feat_caps { + u64 descriptor; + u8 win_type; /* Default or QoS type */ + u8 user_mode; /* User mode copy/paste or COP HCALL */ + u16 max_lpar_creds; /* Max credits available in LPAR */ + /* Max credits can be assigned per window */ + u16 max_win_creds; + union { + u16 reserved; /* Used for QoS credit type */ + u16 def_lpar_creds; /* Used for default credit type */ + }; + /* Total LPAR available credits. Can be different from max LPAR */ + /* credits due to DLPAR operation */ + atomic_t target_lpar_creds; + atomic_t used_lpar_creds; /* Used credits so far */ + u16 avail_lpar_creds; /* Remaining available credits */ +}; + +/* + * Feature (QoS or Default) specific to store capabilities and + * the list of open windows. + */ +struct vas_caps { + struct vas_cop_feat_caps caps; + struct list_head list; /* List of open windows */ +}; + +/* + * To get window information from the hypervisor. + */ +struct hv_vas_win_lpar { + __be16 version; + u8 win_type; + u8 status; + __be16 credits; /* No of credits assigned to this window */ + __be16 reserved; + __be32 pid; /* LPAR Process ID */ + __be32 tid; /* LPAR Thread ID */ + __be64 win_addr; /* Paste address */ + __be32 interrupt; /* Interrupt when NX request completes */ + __be32 fault; /* Interrupt when NX sees fault */ + /* Associativity Domain Identifiers as returned in */ + /* H_HOME_NODE_ASSOCIATIVITY */ + __be64 domain[6]; + __be64 win_util; /* Number of bytes processed */ +} __packed __aligned(0x1000); + +struct pseries_vas_window { + struct vas_window vas_win; + u64 win_addr; /* Physical paste address */ + u8 win_type; /* QoS or Default window */ + u32 complete_irq; /* Completion interrupt */ + u32 fault_irq; /* Fault interrupt */ + u64 domain[6]; /* Associativity domain Ids */ + /* this window is allocated */ + u64 util; + + /* List of windows opened which is used for LPM */ + struct list_head win_list; + u64 flags; + char *name; + int fault_virq; +}; +#endif /* _VAS_H */ diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig index 304614c920aa..063d9195891f 100644 --- a/arch/powerpc/sysdev/xics/Kconfig +++ b/arch/powerpc/sysdev/xics/Kconfig @@ -12,3 +12,6 @@ config PPC_ICP_HV config PPC_ICS_RTAS def_bool n + +config PPC_ICS_NATIVE + def_bool n diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile index ba1e3117b1c0..747063927c6c 100644 --- a/arch/powerpc/sysdev/xics/Makefile +++ b/arch/powerpc/sysdev/xics/Makefile @@ -4,4 +4,5 @@ obj-y += xics-common.o obj-$(CONFIG_PPC_ICP_NATIVE) += icp-native.o obj-$(CONFIG_PPC_ICP_HV) += icp-hv.o obj-$(CONFIG_PPC_ICS_RTAS) += ics-rtas.o +obj-$(CONFIG_PPC_ICS_NATIVE) += ics-native.o obj-$(CONFIG_PPC_POWERNV) += ics-opal.o icp-opal.o diff --git a/arch/powerpc/sysdev/xics/ics-native.c b/arch/powerpc/sysdev/xics/ics-native.c new file mode 100644 index 000000000000..d450502f4053 --- /dev/null +++ b/arch/powerpc/sysdev/xics/ics-native.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * ICS backend for OPAL managed interrupts. + * + * Copyright 2011 IBM Corp. + */ + +//#define DEBUG + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/of.h> +#include <linux/spinlock.h> +#include <linux/msi.h> +#include <linux/list.h> + +#include <asm/prom.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/irq.h> +#include <asm/errno.h> +#include <asm/xics.h> +#include <asm/opal.h> +#include <asm/firmware.h> + +struct ics_native { + struct ics ics; + struct device_node *node; + void __iomem *base; + u32 ibase; + u32 icount; +}; +#define to_ics_native(_ics) container_of(_ics, struct ics_native, ics) + +static void __iomem *ics_native_xive(struct ics_native *in, unsigned int vec) +{ + return in->base + 0x800 + ((vec - in->ibase) << 2); +} + +static void ics_native_unmask_irq(struct irq_data *d) +{ + unsigned int vec = (unsigned int)irqd_to_hwirq(d); + struct ics *ics = irq_data_get_irq_chip_data(d); + struct ics_native *in = to_ics_native(ics); + unsigned int server; + + pr_devel("ics-native: unmask virq %d [hw 0x%x]\n", d->irq, vec); + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return; + + server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0); + out_be32(ics_native_xive(in, vec), (server << 8) | DEFAULT_PRIORITY); +} + +static unsigned int ics_native_startup(struct irq_data *d) +{ +#ifdef CONFIG_PCI_MSI + /* + * The generic MSI code returns with the interrupt disabled on the + * card, using the MSI mask bits. Firmware doesn't appear to unmask + * at that level, so we do it here by hand. + */ + if (irq_data_get_msi_desc(d)) + pci_msi_unmask_irq(d); +#endif + + /* unmask it */ + ics_native_unmask_irq(d); + return 0; +} + +static void ics_native_do_mask(struct ics_native *in, unsigned int vec) +{ + out_be32(ics_native_xive(in, vec), 0xff); +} + +static void ics_native_mask_irq(struct irq_data *d) +{ + unsigned int vec = (unsigned int)irqd_to_hwirq(d); + struct ics *ics = irq_data_get_irq_chip_data(d); + struct ics_native *in = to_ics_native(ics); + + pr_devel("ics-native: mask virq %d [hw 0x%x]\n", d->irq, vec); + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return; + ics_native_do_mask(in, vec); +} + +static int ics_native_set_affinity(struct irq_data *d, + const struct cpumask *cpumask, + bool force) +{ + unsigned int vec = (unsigned int)irqd_to_hwirq(d); + struct ics *ics = irq_data_get_irq_chip_data(d); + struct ics_native *in = to_ics_native(ics); + int server; + u32 xive; + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return -EINVAL; + + server = xics_get_irq_server(d->irq, cpumask, 1); + if (server == -1) { + pr_warn("%s: No online cpus in the mask %*pb for irq %d\n", + __func__, cpumask_pr_args(cpumask), d->irq); + return -1; + } + + xive = in_be32(ics_native_xive(in, vec)); + xive = (xive & 0xff) | (server << 8); + out_be32(ics_native_xive(in, vec), xive); + + return IRQ_SET_MASK_OK; +} + +static struct irq_chip ics_native_irq_chip = { + .name = "ICS", + .irq_startup = ics_native_startup, + .irq_mask = ics_native_mask_irq, + .irq_unmask = ics_native_unmask_irq, + .irq_eoi = NULL, /* Patched at init time */ + .irq_set_affinity = ics_native_set_affinity, + .irq_set_type = xics_set_irq_type, + .irq_retrigger = xics_retrigger, +}; + +static int ics_native_map(struct ics *ics, unsigned int virq) +{ + unsigned int vec = (unsigned int)virq_to_hw(virq); + struct ics_native *in = to_ics_native(ics); + + pr_devel("%s: vec=0x%x\n", __func__, vec); + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return -EINVAL; + + irq_set_chip_and_handler(virq, &ics_native_irq_chip, handle_fasteoi_irq); + irq_set_chip_data(virq, ics); + + return 0; +} + +static void ics_native_mask_unknown(struct ics *ics, unsigned long vec) +{ + struct ics_native *in = to_ics_native(ics); + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return; + + ics_native_do_mask(in, vec); +} + +static long ics_native_get_server(struct ics *ics, unsigned long vec) +{ + struct ics_native *in = to_ics_native(ics); + u32 xive; + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return -EINVAL; + + xive = in_be32(ics_native_xive(in, vec)); + return (xive >> 8) & 0xfff; +} + +static int ics_native_host_match(struct ics *ics, struct device_node *node) +{ + struct ics_native *in = to_ics_native(ics); + + return in->node == node; +} + +static struct ics ics_native_template = { + .map = ics_native_map, + .mask_unknown = ics_native_mask_unknown, + .get_server = ics_native_get_server, + .host_match = ics_native_host_match, +}; + +static int __init ics_native_add_one(struct device_node *np) +{ + struct ics_native *ics; + u32 ranges[2]; + int rc, count; + + ics = kzalloc(sizeof(struct ics_native), GFP_KERNEL); + if (!ics) + return -ENOMEM; + ics->node = of_node_get(np); + memcpy(&ics->ics, &ics_native_template, sizeof(struct ics)); + + ics->base = of_iomap(np, 0); + if (!ics->base) { + pr_err("Failed to map %pOFP\n", np); + rc = -ENOMEM; + goto fail; + } + + count = of_property_count_u32_elems(np, "interrupt-ranges"); + if (count < 2 || count & 1) { + pr_err("Failed to read interrupt-ranges of %pOFP\n", np); + rc = -EINVAL; + goto fail; + } + if (count > 2) { + pr_warn("ICS %pOFP has %d ranges, only one supported\n", + np, count >> 1); + } + rc = of_property_read_u32_array(np, "interrupt-ranges", + ranges, 2); + if (rc) { + pr_err("Failed to read interrupt-ranges of %pOFP\n", np); + goto fail; + } + ics->ibase = ranges[0]; + ics->icount = ranges[1]; + + pr_info("ICS native initialized for sources %d..%d\n", + ics->ibase, ics->ibase + ics->icount - 1); + + /* Register ourselves */ + xics_register_ics(&ics->ics); + + return 0; +fail: + of_node_put(ics->node); + kfree(ics); + return rc; +} + +int __init ics_native_init(void) +{ + struct device_node *ics; + bool found_one = false; + + /* We need to patch our irq chip's EOI to point to the + * right ICP + */ + ics_native_irq_chip.irq_eoi = icp_ops->eoi; + + /* Find native ICS in the device-tree */ + for_each_compatible_node(ics, NULL, "openpower,xics-sources") { + if (ics_native_add_one(ics) == 0) + found_one = true; + } + + if (found_one) + pr_info("ICS native backend registered\n"); + + return found_one ? 0 : -ENODEV; +} diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 7e4305c01bac..de41ab91f793 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -477,6 +477,8 @@ void __init xics_init(void) if (rc < 0) rc = ics_opal_init(); if (rc < 0) + rc = ics_native_init(); + if (rc < 0) pr_warn("XICS: Cannot find a Source Controller !\n"); /* Initialize common bits */ diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index c8173e92f19d..bd7ee794be92 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -100,7 +100,7 @@ static long *xmon_fault_jmp[NR_CPUS]; /* Breakpoint stuff */ struct bpt { unsigned long address; - struct ppc_inst *instr; + u32 *instr; atomic_t ref_count; int enabled; unsigned long pad; @@ -946,11 +946,11 @@ static void insert_bpts(void) } patch_instruction(bp->instr, instr); - patch_instruction(ppc_inst_next(bp->instr, &instr), + patch_instruction(ppc_inst_next(bp->instr, bp->instr), ppc_inst(bpinstr)); if (bp->enabled & BP_CIABR) continue; - if (patch_instruction((struct ppc_inst *)bp->address, + if (patch_instruction((u32 *)bp->address, ppc_inst(bpinstr)) != 0) { printf("Couldn't write instruction at %lx, " "disabling breakpoint there\n", bp->address); @@ -992,7 +992,7 @@ static void remove_bpts(void) if (mread_instr(bp->address, &instr) && ppc_inst_equal(instr, ppc_inst(bpinstr)) && patch_instruction( - (struct ppc_inst *)bp->address, ppc_inst_read(bp->instr)) != 0) + (u32 *)bp->address, ppc_inst_read(bp->instr)) != 0) printf("Couldn't remove breakpoint at %lx\n", bp->address); } @@ -2214,7 +2214,7 @@ mread_instr(unsigned long adrs, struct ppc_inst *instr) if (setjmp(bus_error_jmp) == 0) { catch_memory_errors = 1; sync(); - *instr = ppc_inst_read((struct ppc_inst *)adrs); + *instr = ppc_inst_read((u32 *)adrs); sync(); /* wait a little while to see if we get a machine check */ __delay(200); @@ -2975,7 +2975,7 @@ static void dump_log_buf(void) { struct kmsg_dump_iter iter; - unsigned char buf[128]; + static unsigned char buf[1024]; size_t len; if (setjmp(bus_error_jmp) != 0) { diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig index 23e3d0160e67..2a35e0e785bd 100644 --- a/drivers/crypto/nx/Kconfig +++ b/drivers/crypto/nx/Kconfig @@ -29,6 +29,7 @@ if CRYPTO_DEV_NX_COMPRESS config CRYPTO_DEV_NX_COMPRESS_PSERIES tristate "Compression acceleration support on pSeries platform" depends on PPC_PSERIES && IBMVIO + depends on PPC_VAS default y help Support for PowerPC Nest (NX) compression acceleration. This diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile index bc89a20e5d9d..d00181a26dd6 100644 --- a/drivers/crypto/nx/Makefile +++ b/drivers/crypto/nx/Makefile @@ -14,5 +14,5 @@ nx-crypto-objs := nx.o \ obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o nx-compress.o obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o nx-compress.o nx-compress-objs := nx-842.o -nx-compress-pseries-objs := nx-842-pseries.o +nx-compress-pseries-objs := nx-common-pseries.o nx-compress-powernv-objs := nx-common-powernv.o diff --git a/drivers/crypto/nx/nx-common-powernv.c b/drivers/crypto/nx/nx-common-powernv.c index 446f611726df..3b159f2fae17 100644 --- a/drivers/crypto/nx/nx-common-powernv.c +++ b/drivers/crypto/nx/nx-common-powernv.c @@ -1092,8 +1092,8 @@ static __init int nx_compress_powernv_init(void) * normal FIFO priority is assigned for userspace. * 842 compression is supported only in kernel. */ - ret = vas_register_coproc_api(THIS_MODULE, VAS_COP_TYPE_GZIP, - "nx-gzip"); + ret = vas_register_api_powernv(THIS_MODULE, VAS_COP_TYPE_GZIP, + "nx-gzip"); /* * GZIP is not supported in kernel right now. @@ -1129,7 +1129,7 @@ static void __exit nx_compress_powernv_exit(void) * use. So delete this API use for GZIP engine. */ if (!nx842_ct) - vas_unregister_coproc_api(); + vas_unregister_api_powernv(); crypto_unregister_alg(&nx842_powernv_alg); diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-common-pseries.c index cc8dd3072b8b..6671f6634dda 100644 --- a/drivers/crypto/nx/nx-842-pseries.c +++ b/drivers/crypto/nx/nx-common-pseries.c @@ -9,6 +9,8 @@ */ #include <asm/vio.h> +#include <asm/hvcall.h> +#include <asm/vas.h> #include "nx-842.h" #include "nx_csbcpb.h" /* struct nx_csbcpb */ @@ -19,6 +21,29 @@ MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); MODULE_ALIAS_CRYPTO("842"); MODULE_ALIAS_CRYPTO("842-nx"); +/* + * Coprocessor type specific capabilities from the hypervisor. + */ +struct hv_nx_cop_caps { + __be64 descriptor; + __be64 req_max_processed_len; /* Max bytes in one GZIP request */ + __be64 min_compress_len; /* Min compression size in bytes */ + __be64 min_decompress_len; /* Min decompression size in bytes */ +} __packed __aligned(0x1000); + +/* + * Coprocessor type specific capabilities. + */ +struct nx_cop_caps { + u64 descriptor; + u64 req_max_processed_len; /* Max bytes in one GZIP request */ + u64 min_compress_len; /* Min compression in bytes */ + u64 min_decompress_len; /* Min decompression in bytes */ +}; + +static u64 caps_feat; +static struct nx_cop_caps nx_cop_caps; + static struct nx842_constraints nx842_pseries_constraints = { .alignment = DDE_BUFFER_ALIGN, .multiple = DDE_BUFFER_LAST_MULT, @@ -942,6 +967,36 @@ static struct attribute_group nx842_attribute_group = { .attrs = nx842_sysfs_entries, }; +#define nxcop_caps_read(_name) \ +static ssize_t nxcop_##_name##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%lld\n", nx_cop_caps._name); \ +} + +#define NXCT_ATTR_RO(_name) \ + nxcop_caps_read(_name); \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, \ + 0444, \ + nxcop_##_name##_show, \ + NULL); + +NXCT_ATTR_RO(req_max_processed_len); +NXCT_ATTR_RO(min_compress_len); +NXCT_ATTR_RO(min_decompress_len); + +static struct attribute *nxcop_caps_sysfs_entries[] = { + &dev_attr_req_max_processed_len.attr, + &dev_attr_min_compress_len.attr, + &dev_attr_min_decompress_len.attr, + NULL, +}; + +static struct attribute_group nxcop_caps_attr_group = { + .name = "nx_gzip_caps", + .attrs = nxcop_caps_sysfs_entries, +}; + static struct nx842_driver nx842_pseries_driver = { .name = KBUILD_MODNAME, .owner = THIS_MODULE, @@ -1031,6 +1086,16 @@ static int nx842_probe(struct vio_dev *viodev, goto error; } + if (caps_feat) { + if (sysfs_create_group(&viodev->dev.kobj, + &nxcop_caps_attr_group)) { + dev_err(&viodev->dev, + "Could not create sysfs NX capability entries\n"); + ret = -1; + goto error; + } + } + return 0; error_unlock: @@ -1050,6 +1115,9 @@ static void nx842_remove(struct vio_dev *viodev) pr_info("Removing IBM Power 842 compression device\n"); sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group); + if (caps_feat) + sysfs_remove_group(&viodev->dev.kobj, &nxcop_caps_attr_group); + crypto_unregister_alg(&nx842_pseries_alg); spin_lock_irqsave(&devdata_mutex, flags); @@ -1065,6 +1133,64 @@ static void nx842_remove(struct vio_dev *viodev) kfree(old_devdata); } +/* + * Get NX capabilities from the hypervisor. + * Only NXGZIP capabilities are provided by the hypersvisor right + * now and these values are available to user space with sysfs. + */ +static void __init nxcop_get_capabilities(void) +{ + struct hv_vas_all_caps *hv_caps; + struct hv_nx_cop_caps *hv_nxc; + int rc; + + hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); + if (!hv_caps) + return; + /* + * Get NX overall capabilities with feature type=0 + */ + rc = h_query_vas_capabilities(H_QUERY_NX_CAPABILITIES, 0, + (u64)virt_to_phys(hv_caps)); + if (rc) + goto out; + + caps_feat = be64_to_cpu(hv_caps->feat_type); + /* + * NX-GZIP feature available + */ + if (caps_feat & VAS_NX_GZIP_FEAT_BIT) { + hv_nxc = kmalloc(sizeof(*hv_nxc), GFP_KERNEL); + if (!hv_nxc) + goto out; + /* + * Get capabilities for NX-GZIP feature + */ + rc = h_query_vas_capabilities(H_QUERY_NX_CAPABILITIES, + VAS_NX_GZIP_FEAT, + (u64)virt_to_phys(hv_nxc)); + } else { + pr_err("NX-GZIP feature is not available\n"); + rc = -EINVAL; + } + + if (!rc) { + nx_cop_caps.descriptor = be64_to_cpu(hv_nxc->descriptor); + nx_cop_caps.req_max_processed_len = + be64_to_cpu(hv_nxc->req_max_processed_len); + nx_cop_caps.min_compress_len = + be64_to_cpu(hv_nxc->min_compress_len); + nx_cop_caps.min_decompress_len = + be64_to_cpu(hv_nxc->min_decompress_len); + } else { + caps_feat = 0; + } + + kfree(hv_nxc); +out: + kfree(hv_caps); +} + static const struct vio_device_id nx842_vio_driver_ids[] = { {"ibm,compression-v1", "ibm,compression"}, {"", ""}, @@ -1092,6 +1218,10 @@ static int __init nx842_pseries_init(void) return -ENOMEM; RCU_INIT_POINTER(devdata, new_devdata); + /* + * Get NX capabilities from the hypervisor. + */ + nxcop_get_capabilities(); ret = vio_register_driver(&nx842_vio_driver); if (ret) { @@ -1101,6 +1231,12 @@ static int __init nx842_pseries_init(void) return ret; } + ret = vas_register_api_pseries(THIS_MODULE, VAS_COP_TYPE_GZIP, + "nx-gzip"); + + if (ret) + pr_err("NX-GZIP is not supported. Returned=%d\n", ret); + return 0; } @@ -1111,6 +1247,8 @@ static void __exit nx842_pseries_exit(void) struct nx842_devdata *old_devdata; unsigned long flags; + vas_unregister_api_pseries(); + crypto_unregister_alg(&nx842_pseries_alg); spin_lock_irqsave(&devdata_mutex, flags); diff --git a/drivers/ps3/ps3-vuart.c b/drivers/ps3/ps3-vuart.c index e34ae6a442c7..6328abd51ffa 100644 --- a/drivers/ps3/ps3-vuart.c +++ b/drivers/ps3/ps3-vuart.c @@ -358,7 +358,7 @@ static int ps3_vuart_raw_write(struct ps3_system_bus_device *dev, ps3_mm_phys_to_lpar(__pa(buf)), bytes, bytes_written); if (result) { - dev_dbg(&dev->core, "%s:%d: lv1_write_virtual_uart failed: " + dev_warn(&dev->core, "%s:%d: lv1_write_virtual_uart failed: " "%s\n", __func__, __LINE__, ps3_result(result)); return result; } diff --git a/drivers/ps3/ps3av.c b/drivers/ps3/ps3av.c index 9d66257e1da5..516e6d14d32e 100644 --- a/drivers/ps3/ps3av.c +++ b/drivers/ps3/ps3av.c @@ -217,9 +217,9 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf, /* send pkt */ res = ps3av_vuart_write(ps3av->dev, send_buf, write_len); if (res < 0) { - dev_dbg(&ps3av->dev->core, - "%s: ps3av_vuart_write() failed (result=%d)\n", - __func__, res); + dev_warn(&ps3av->dev->core, + "%s:%d: ps3av_vuart_write() failed: %s\n", __func__, + __LINE__, ps3_result(res)); return res; } @@ -230,9 +230,9 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf, res = ps3av_vuart_read(ps3av->dev, recv_buf, PS3AV_HDR_SIZE, timeout); if (res != PS3AV_HDR_SIZE) { - dev_dbg(&ps3av->dev->core, - "%s: ps3av_vuart_read() failed (result=%d)\n", - __func__, res); + dev_warn(&ps3av->dev->core, + "%s:%d: ps3av_vuart_read() failed: %s\n", __func__, + __LINE__, ps3_result(res)); return res; } @@ -240,9 +240,9 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf, res = ps3av_vuart_read(ps3av->dev, &recv_buf->cid, recv_buf->size, timeout); if (res < 0) { - dev_dbg(&ps3av->dev->core, - "%s: ps3av_vuart_read() failed (result=%d)\n", - __func__, res); + dev_warn(&ps3av->dev->core, + "%s:%d: ps3av_vuart_read() failed: %s\n", __func__, + __LINE__, ps3_result(res)); return res; } res += PS3AV_HDR_SIZE; /* total len */ @@ -251,8 +251,8 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf, } while (event); if ((cmd | PS3AV_REPLY_BIT) != recv_buf->cid) { - dev_dbg(&ps3av->dev->core, "%s: reply err (result=%x)\n", - __func__, recv_buf->cid); + dev_warn(&ps3av->dev->core, "%s:%d: reply err: %x\n", __func__, + __LINE__, recv_buf->cid); return -EINVAL; } diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c index 798f27f40cc2..72b11aa7e0a6 100644 --- a/drivers/tty/hvc/hvc_vio.c +++ b/drivers/tty/hvc/hvc_vio.c @@ -249,7 +249,7 @@ static void udbg_hvc_putc(char c) count = hvterm_hvsi_put_chars(0, &c, 1); break; } - } while(count == 0); + } while (count == 0 || count == -EAGAIN); } static int udbg_hvc_getc_poll(void) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 1883a4a9f16a..02d4020615a7 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -409,6 +409,9 @@ void dump_kprobe(struct kprobe *kp); void *alloc_insn_page(void); void free_insn_page(void *page); +void *alloc_optinsn_page(void); +void free_optinsn_page(void *page); + int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 745f08fdd7a6..8c0a6fdef771 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -321,11 +321,21 @@ int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum, } #ifdef CONFIG_OPTPROBES +void __weak *alloc_optinsn_page(void) +{ + return alloc_insn_page(); +} + +void __weak free_optinsn_page(void *page) +{ + free_insn_page(page); +} + /* For optimized_kprobe buffer */ struct kprobe_insn_cache kprobe_optinsn_slots = { .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex), - .alloc = alloc_insn_page, - .free = free_insn_page, + .alloc = alloc_optinsn_page, + .free = free_optinsn_page, .sym = KPROBE_OPTINSN_PAGE_SYM, .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages), /* .insn_size is initialized later */ diff --git a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c index 579f0215c6e7..9836838a529f 100644 --- a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c +++ b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c @@ -14,6 +14,7 @@ #include <time.h> #include <sys/types.h> #include <sys/time.h> +#include <sys/syscall.h> #include <signal.h> static volatile int soak_done; @@ -121,7 +122,7 @@ static void do_null_syscall(unsigned long nr) unsigned long i; for (i = 0; i < nr; i++) - getppid(); + syscall(__NR_gettid); } #define TIME(A, STR) \ diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index c5ecb4634094..010160690227 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -24,7 +24,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ fork_cleanup_test ebb_on_child_test \ ebb_on_willing_child_test back_to_back_ebbs_test \ lost_exception_test no_handler_test \ - cycles_with_mmcr2_test + cycles_with_mmcr2_test regs_access_pmccext_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h index b5bc2b616075..2c803b5b48d6 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h @@ -55,8 +55,6 @@ void ebb_global_disable(void); bool ebb_is_supported(void); void ebb_freeze_pmcs(void); void ebb_unfreeze_pmcs(void); -void event_ebb_init(struct event *e); -void event_leader_ebb_init(struct event *e); int count_pmc(int pmc, uint32_t sample_period); void dump_ebb_state(void); void dump_summary_ebb_state(void); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c index fc5bf4870d8e..01e827c31169 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c @@ -50,8 +50,6 @@ static int no_handler_test(void) event_close(&event); - dump_ebb_state(); - /* The real test is that we never took an EBB at 0x0 */ return 0; diff --git a/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c new file mode 100644 index 000000000000..1eda8e9932e8 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2021, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <setjmp.h> +#include <signal.h> + +#include "ebb.h" + + +/* + * Test that closing the EBB event clears MMCR0_PMCC and + * sets MMCR0_PMCCEXT preventing further read access to the + * group B PMU registers. + */ + +static int regs_access_pmccext(void) +{ + struct event event; + + SKIP_IF(!ebb_is_supported()); + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + FAIL_IF(event_open(&event)); + + ebb_enable_pmc_counting(1); + setup_ebb_handler(standard_ebb_callee); + ebb_global_enable(); + FAIL_IF(ebb_event_enable(&event)); + + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + + while (ebb_state.stats.ebb_count < 1) + FAIL_IF(core_busy_loop()); + + ebb_global_disable(); + event_close(&event); + + FAIL_IF(ebb_state.stats.ebb_count == 0); + + /* + * For ISA v3.1, verify the test takes a SIGILL when reading + * PMU regs after the event is closed. With the control bit + * in MMCR0 (PMCCEXT) restricting access to group B PMU regs, + * sigill is expected. + */ + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) + FAIL_IF(catch_sigill(dump_ebb_state)); + else + dump_ebb_state(); + + return 0; +} + +int main(void) +{ + return test_harness(regs_access_pmccext, "regs_access_pmccext"); +} diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index 844d18cd5f93..7488315fd847 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0+ TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2 +TEST_PROGS := mitigation-patching.sh + top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh new file mode 100755 index 000000000000..00197acb7ff1 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash + +set -euo pipefail + +TIMEOUT=10 + +function do_one +{ + local mitigation="$1" + local orig + local start + local now + + orig=$(cat "$mitigation") + + start=$EPOCHSECONDS + now=$start + + while [[ $((now-start)) -lt "$TIMEOUT" ]] + do + echo 0 > "$mitigation" + echo 1 > "$mitigation" + + now=$EPOCHSECONDS + done + + echo "$orig" > "$mitigation" +} + +rc=0 +cd /sys/kernel/debug/powerpc || rc=1 +if [[ "$rc" -ne 0 ]]; then + echo "Error: couldn't cd to /sys/kernel/debug/powerpc" >&2 + exit 1 +fi + +tainted=$(cat /proc/sys/kernel/tainted) +if [[ "$tainted" -ne 0 ]]; then + echo "Error: kernel already tainted!" >&2 + exit 1 +fi + +mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush uaccess_flush" + +for m in $mitigations +do + do_one "$m" & +done + +echo "Spawned threads enabling/disabling mitigations ..." + +if stress-ng > /dev/null 2>&1; then + stress="stress-ng" +elif stress > /dev/null 2>&1; then + stress="stress" +else + stress="" +fi + +if [[ -n "$stress" ]]; then + "$stress" -m "$(nproc)" -t "$TIMEOUT" & + echo "Spawned VM stressors ..." +fi + +echo "Waiting for timeout ..." +wait + +tainted=$(cat /proc/sys/kernel/tainted) +if [[ "$tainted" -ne 0 ]]; then + echo "Error: kernel became tainted!" >&2 + exit 1 +fi + +echo "OK" +exit 0 diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c index e2a0c07e8362..9ef37a9836ac 100644 --- a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c +++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c @@ -17,7 +17,6 @@ #include <pthread.h> #include <sys/mman.h> #include <unistd.h> -#include <pthread.h> #include "tm.h" #include "utils.h" diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 98c3b647f54d..e3d5c77a8612 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1753,16 +1753,25 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define SYSCALL_RET_SET(_regs, _val) \ do { \ typeof(_val) _result = (_val); \ - /* \ - * A syscall error is signaled by CR0 SO bit \ - * and the code is stored as a positive value. \ - */ \ - if (_result < 0) { \ - SYSCALL_RET(_regs) = -_result; \ - (_regs).ccr |= 0x10000000; \ - } else { \ + if ((_regs.trap & 0xfff0) == 0x3000) { \ + /* \ + * scv 0 system call uses -ve result \ + * for error, so no need to adjust. \ + */ \ SYSCALL_RET(_regs) = _result; \ - (_regs).ccr &= ~0x10000000; \ + } else { \ + /* \ + * A syscall error is signaled by the \ + * CR0 SO bit and the code is stored as \ + * a positive value. \ + */ \ + if (_result < 0) { \ + SYSCALL_RET(_regs) = -_result; \ + (_regs).ccr |= 0x10000000; \ + } else { \ + SYSCALL_RET(_regs) = _result; \ + (_regs).ccr &= ~0x10000000; \ + } \ } \ } while (0) # define SYSCALL_RET_SET_ON_PTRACE_EXIT |