diff options
Diffstat (limited to 'arch')
511 files changed, 10414 insertions, 5160 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index d007b2a15b22..76c0b54443b1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -245,6 +245,17 @@ config ARCH_TASK_STRUCT_ON_STACK config ARCH_TASK_STRUCT_ALLOCATOR bool +config HAVE_ARCH_THREAD_STRUCT_WHITELIST + bool + depends on !ARCH_TASK_STRUCT_ALLOCATOR + help + An architecture should select this to provide hardened usercopy + knowledge about what region of the thread_struct should be + whitelisted for copying to userspace. Normally this is only the + FPU registers. Specifically, arch_thread_struct_whitelist() + should be implemented. Without this, the entire thread_struct + field in task_struct will be left whitelisted. + # Select if arch has its private alloc_thread_stack() function config ARCH_THREAD_STACK_ALLOCATOR bool @@ -527,16 +538,10 @@ config HAVE_CC_STACKPROTECTOR - its compiler supports the -fstack-protector option - it has implemented a stack canary (e.g. __stack_chk_guard) -config CC_STACKPROTECTOR - def_bool n - help - Set when a stack-protector mode is enabled, so that the build - can enable kernel-side support for the GCC feature. - choice prompt "Stack Protector buffer overflow detection" depends on HAVE_CC_STACKPROTECTOR - default CC_STACKPROTECTOR_NONE + default CC_STACKPROTECTOR_AUTO help This option turns on the "stack-protector" GCC feature. This feature puts, at the beginning of functions, a canary value on @@ -553,7 +558,6 @@ config CC_STACKPROTECTOR_NONE config CC_STACKPROTECTOR_REGULAR bool "Regular" - select CC_STACKPROTECTOR help Functions will have the stack-protector canary logic added if they have an 8-byte or larger character array on the stack. @@ -567,7 +571,6 @@ config CC_STACKPROTECTOR_REGULAR config CC_STACKPROTECTOR_STRONG bool "Strong" - select CC_STACKPROTECTOR help Functions will have the stack-protector canary logic added in any of the following conditions: @@ -585,6 +588,12 @@ config CC_STACKPROTECTOR_STRONG about 20% of all kernel functions, which increases the kernel code size by about 2%. +config CC_STACKPROTECTOR_AUTO + bool "Automatic" + help + If the compiler supports it, the best available stack-protector + option will be chosen. + endchoice config THIN_ARCHIVES diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h index d2e4da93e68c..ca3322536f72 100644 --- a/arch/alpha/include/asm/futex.h +++ b/arch/alpha/include/asm/futex.h @@ -20,8 +20,8 @@ "3: .subsection 2\n" \ "4: br 1b\n" \ " .previous\n" \ - EXC(1b,3b,%1,$31) \ - EXC(2b,3b,%1,$31) \ + EXC(1b,3b,$31,%1) \ + EXC(2b,3b,$31,%1) \ : "=&r" (oldval), "=&r"(ret) \ : "r" (uaddr), "r"(oparg) \ : "memory") @@ -82,8 +82,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "3: .subsection 2\n" "4: br 1b\n" " .previous\n" - EXC(1b,3b,%0,$31) - EXC(2b,3b,%0,$31) + EXC(1b,3b,$31,%0) + EXC(2b,3b,$31,%0) : "+r"(ret), "=&r"(prev), "=&r"(cmp) : "r"(uaddr), "r"((long)(int)oldval), "r"(newval) : "memory"); diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index bfe784f2d4af..cb05d045efe3 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -40,15 +40,12 @@ typedef struct { struct thread_struct { }; #define INIT_THREAD { } -/* Return saved PC of a blocked thread. */ -struct task_struct; -extern unsigned long thread_saved_pc(struct task_struct *); - /* Do necessary setup to start up a newly executed thread. */ struct pt_regs; extern void start_thread(struct pt_regs *, unsigned long, unsigned long); /* Free all resources held by a thread. */ +struct task_struct; extern void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); diff --git a/arch/alpha/include/uapi/asm/termbits.h b/arch/alpha/include/uapi/asm/termbits.h index 05e0398a83a6..de6c8360fbe3 100644 --- a/arch/alpha/include/uapi/asm/termbits.h +++ b/arch/alpha/include/uapi/asm/termbits.h @@ -110,7 +110,11 @@ struct ktermios { #define VTDLY 00200000 #define VT0 00000000 #define VT1 00200000 -#define XTABS 01000000 /* Hmm.. Linux/i386 considers this part of TABDLY.. */ +/* + * Should be equivalent to TAB3, see description of TAB3 in + * POSIX.1-2008, Ch. 11.2.3 "Output Modes" + */ +#define XTABS TAB3 /* c_cflag bit meaning */ #define CBAUD 0000037 diff --git a/arch/alpha/kernel/console.c b/arch/alpha/kernel/console.c index 8e9a41966881..5476279329a6 100644 --- a/arch/alpha/kernel/console.c +++ b/arch/alpha/kernel/console.c @@ -21,6 +21,7 @@ struct pci_controller *pci_vga_hose; static struct resource alpha_vga = { .name = "alpha-vga+", + .flags = IORESOURCE_IO, .start = 0x3C0, .end = 0x3DF }; diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index 87da00579946..2e86ebb680ae 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -425,7 +425,7 @@ sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn) if (bus == 0 && dfn == 0) { hose = pci_isa_hose; } else { - dev = pci_get_bus_and_slot(bus, dfn); + dev = pci_get_domain_bus_and_slot(0, bus, dfn); if (!dev) return -ENODEV; hose = dev->sysdata; diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h index 2e4cb74fdc41..18043af45e2b 100644 --- a/arch/alpha/kernel/pci_impl.h +++ b/arch/alpha/kernel/pci_impl.h @@ -144,7 +144,8 @@ struct pci_iommu_arena }; #if defined(CONFIG_ALPHA_SRM) && \ - (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA)) + (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA) || \ + defined(CONFIG_ALPHA_AVANTI)) # define NEED_SRM_SAVE_RESTORE #else # undef NEED_SRM_SAVE_RESTORE diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 74bfb1f2d68e..48b81d015d8a 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -269,12 +269,13 @@ copy_thread(unsigned long clone_flags, unsigned long usp, application calling fork. */ if (clone_flags & CLONE_SETTLS) childti->pcb.unique = regs->r20; + else + regs->r20 = 0; /* OSF/1 has some strange fork() semantics. */ childti->pcb.usp = usp ?: rdusp(); *childregs = *regs; childregs->r0 = 0; childregs->r19 = 0; childregs->r20 = 1; /* OSF/1 has some strange fork() semantics. */ - regs->r20 = 0; stack = ((struct switch_stack *) regs) - 1; *childstack = *stack; childstack->r26 = (unsigned long) ret_from_fork; @@ -361,7 +362,7 @@ EXPORT_SYMBOL(dump_elf_task_fp); * all. -- r~ */ -unsigned long +static unsigned long thread_saved_pc(struct task_struct *t) { unsigned long base = (unsigned long)task_stack_page(t); diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 239dc0e601d5..ff4f54b86c7f 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -237,7 +237,7 @@ nautilus_init_pci(void) bus = hose->bus = bridge->bus; pcibios_claim_one_bus(bus); - irongate = pci_get_bus_and_slot(0, 0); + irongate = pci_get_domain_bus_and_slot(pci_domain_nr(bus), 0, 0); bus->self = irongate; bus->resource[0] = &irongate_io; bus->resource[1] = &irongate_mem; diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 4bd99a7b1c41..f43bd05dede2 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -160,11 +160,16 @@ void show_stack(struct task_struct *task, unsigned long *sp) for(i=0; i < kstack_depth_to_print; i++) { if (((long) stack & (THREAD_SIZE-1)) == 0) break; - if (i && ((i % 4) == 0)) - printk("\n "); - printk("%016lx ", *stack++); + if ((i % 4) == 0) { + if (i) + pr_cont("\n"); + printk(" "); + } else { + pr_cont(" "); + } + pr_cont("%016lx", *stack++); } - printk("\n"); + pr_cont("\n"); dik_show_trace(sp); } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 11647091fa6d..7e3d53575486 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -3,8 +3,8 @@ config ARM bool default y select ARCH_CLOCKSOURCE_DATA - select ARCH_DISCARD_MEMBLOCK if !HAVE_ARCH_PFN_VALID - select ARCH_HAS_DEBUG_VIRTUAL + select ARCH_DISCARD_MEMBLOCK if !HAVE_ARCH_PFN_VALID && !KEXEC + select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_SET_MEMORY @@ -51,6 +51,7 @@ config ARM select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) + select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARM_SMCCC if CPU_V7 select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 @@ -100,6 +101,7 @@ config ARM select OLD_SIGACTION select OLD_SIGSUSPEND3 select PERF_USE_VMALLOC + select REFCOUNT_FULL select RTC_LIB select SYS_SUPPORTS_APM_EMULATION # Above selects are sorted alphabetically; please add new ones @@ -1526,12 +1528,10 @@ config THUMB2_KERNEL bool "Compile the kernel in Thumb-2 mode" if !CPU_THUMBONLY depends on (CPU_V7 || CPU_V7M) && !CPU_V6 && !CPU_V6K default y if CPU_THUMBONLY - select ARM_ASM_UNIFIED select ARM_UNWIND help By enabling this option, the kernel will be compiled in - Thumb-2 mode. A compiler/assembler that understand the unified - ARM-Thumb syntax is needed. + Thumb-2 mode. If unsure, say N. @@ -1566,9 +1566,6 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11 Unless you are sure your tools don't have this problem, say Y. -config ARM_ASM_UNIFIED - bool - config ARM_PATCH_IDIV bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()" depends on CPU_32v7 && !XIP_KERNEL diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 17685e19aed8..78a647080ebc 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -3,10 +3,14 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -config ARM_PTDUMP +config ARM_PTDUMP_CORE + def_bool n + +config ARM_PTDUMP_DEBUGFS bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL depends on MMU + select ARM_PTDUMP_CORE select DEBUG_FS ---help--- Say Y here if you want to show the kernel pagetable layout in a @@ -16,6 +20,33 @@ config ARM_PTDUMP kernel. If in doubt, say "N" +config DEBUG_WX + bool "Warn on W+X mappings at boot" + select ARM_PTDUMP_CORE + ---help--- + Generate a warning if any W+X mappings are found at boot. + + This is useful for discovering cases where the kernel is leaving + W+X mappings after applying NX, as such mappings are a security risk. + + Look for a message in dmesg output like this: + + arm/mm: Checked W+X mappings: passed, no W+X pages found. + + or like this, if the check failed: + + arm/mm: Checked W+X mappings: FAILED, <N> W+X pages found. + + Note that even if the check fails, your kernel is possibly + still fine, as W+X mappings are not a security hole in + themselves, what they do is that they make the exploitation + of other unfixed kernel bugs easier. + + There is no runtime or memory usage effect of this option + once the kernel has booted up - it's a one time check. + + If in doubt, say "Y". + # RMK wants arm kernels compiled with frame pointers or stack unwinding. # If you know what you are doing and are willing to live without stack # traces, you can get a slightly smaller kernel by setting this option to diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 80351e505fd5..e83f5161fdd8 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -115,9 +115,11 @@ ifeq ($(CONFIG_ARM_UNWIND),y) CFLAGS_ABI +=-funwind-tables endif +# Accept old syntax despite ".syntax unified" +AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) + ifeq ($(CONFIG_THUMB2_KERNEL),y) AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it) -AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) CFLAGS_ISA :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb # Work around buggy relocation from gas if requested: @@ -125,7 +127,7 @@ ifeq ($(CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11),y) KBUILD_CFLAGS_MODULE +=-fno-optimize-sibling-calls endif else -CFLAGS_ISA :=$(call cc-option,-marm,) +CFLAGS_ISA :=$(call cc-option,-marm,) $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) endif diff --git a/arch/arm/boot/compressed/string.c b/arch/arm/boot/compressed/string.c index 309e1bbad75d..13c90abc68d6 100644 --- a/arch/arm/boot/compressed/string.c +++ b/arch/arm/boot/compressed/string.c @@ -130,8 +130,3 @@ void *memset(void *s, int c, size_t count) *xs++ = c; return s; } - -void __memzero(void *s, size_t count) -{ - memset(s, 0, count); -} diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index e6bf6774c4bb..2b963d8e76dd 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -56,6 +56,7 @@ SECTIONS .rodata : { *(.rodata) *(.rodata.*) + *(.data.rel.ro) } .piggydata : { *(.piggydata) @@ -101,6 +102,12 @@ SECTIONS * this symbol allows further debug in the near future. */ .image_end (NOLOAD) : { + /* + * EFI requires that the image is aligned to 512 bytes, and appended + * DTB requires that we know where the end of the image is. Ensure + * that both are satisfied by ensuring that there are no additional + * sections emitted into the decompressor image. + */ _edata_real = .; } @@ -128,3 +135,4 @@ SECTIONS .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } } +ASSERT(_edata_real == _edata, "error: zImage file size is incorrect"); diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 72c765a3b354..ab930581fc7a 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -780,6 +780,8 @@ reset-gpio = <&gpio4 6 GPIO_ACTIVE_HIGH>; /* 102 */ + lens-focus = <&ad5820>; + port { csi_cam1: endpoint { bus-type = <3>; /* CCP2 */ diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c index 4c10c6452678..f4dc1714a79e 100644 --- a/arch/arm/common/bL_switcher_dummy_if.c +++ b/arch/arm/common/bL_switcher_dummy_if.c @@ -57,3 +57,7 @@ static struct miscdevice bL_switcher_device = { &bL_switcher_fops }; module_misc_device(bL_switcher_device); + +MODULE_AUTHOR("Nicolas Pitre <nico@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("big.LITTLE switcher dummy user interface"); diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 4ecd5120fce7..a2c878769eaf 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -108,6 +108,7 @@ struct sa1111 { spinlock_t lock; void __iomem *base; struct sa1111_platform_data *pdata; + struct irq_domain *irqdomain; struct gpio_chip gc; #ifdef CONFIG_PM void *saved_state; @@ -125,7 +126,7 @@ struct sa1111_dev_info { unsigned long skpcr_mask; bool dma; unsigned int devid; - unsigned int irq[6]; + unsigned int hwirq[6]; }; static struct sa1111_dev_info sa1111_devices[] = { @@ -134,7 +135,7 @@ static struct sa1111_dev_info sa1111_devices[] = { .skpcr_mask = SKPCR_UCLKEN, .dma = true, .devid = SA1111_DEVID_USB, - .irq = { + .hwirq = { IRQ_USBPWR, IRQ_HCIM, IRQ_HCIBUFFACC, @@ -148,7 +149,7 @@ static struct sa1111_dev_info sa1111_devices[] = { .skpcr_mask = SKPCR_I2SCLKEN | SKPCR_L3CLKEN, .dma = true, .devid = SA1111_DEVID_SAC, - .irq = { + .hwirq = { AUDXMTDMADONEA, AUDXMTDMADONEB, AUDRCVDMADONEA, @@ -164,7 +165,7 @@ static struct sa1111_dev_info sa1111_devices[] = { .offset = SA1111_KBD, .skpcr_mask = SKPCR_PTCLKEN, .devid = SA1111_DEVID_PS2_KBD, - .irq = { + .hwirq = { IRQ_TPRXINT, IRQ_TPTXINT }, @@ -173,7 +174,7 @@ static struct sa1111_dev_info sa1111_devices[] = { .offset = SA1111_MSE, .skpcr_mask = SKPCR_PMCLKEN, .devid = SA1111_DEVID_PS2_MSE, - .irq = { + .hwirq = { IRQ_MSRXINT, IRQ_MSTXINT }, @@ -182,7 +183,7 @@ static struct sa1111_dev_info sa1111_devices[] = { .offset = 0x1800, .skpcr_mask = 0, .devid = SA1111_DEVID_PCMCIA, - .irq = { + .hwirq = { IRQ_S0_READY_NINT, IRQ_S0_CD_VALID, IRQ_S0_BVD1_STSCHG, @@ -193,6 +194,19 @@ static struct sa1111_dev_info sa1111_devices[] = { }, }; +static int sa1111_map_irq(struct sa1111 *sachip, irq_hw_number_t hwirq) +{ + return irq_create_mapping(sachip->irqdomain, hwirq); +} + +static void sa1111_handle_irqdomain(struct irq_domain *irqdomain, int irq) +{ + struct irq_desc *d = irq_to_desc(irq_linear_revmap(irqdomain, irq)); + + if (d) + generic_handle_irq_desc(d); +} + /* * SA1111 interrupt support. Since clearing an IRQ while there are * active IRQs causes the interrupt output to pulse, the upper levels @@ -202,49 +216,45 @@ static void sa1111_irq_handler(struct irq_desc *desc) { unsigned int stat0, stat1, i; struct sa1111 *sachip = irq_desc_get_handler_data(desc); + struct irq_domain *irqdomain; void __iomem *mapbase = sachip->base + SA1111_INTC; - stat0 = sa1111_readl(mapbase + SA1111_INTSTATCLR0); - stat1 = sa1111_readl(mapbase + SA1111_INTSTATCLR1); + stat0 = readl_relaxed(mapbase + SA1111_INTSTATCLR0); + stat1 = readl_relaxed(mapbase + SA1111_INTSTATCLR1); - sa1111_writel(stat0, mapbase + SA1111_INTSTATCLR0); + writel_relaxed(stat0, mapbase + SA1111_INTSTATCLR0); desc->irq_data.chip->irq_ack(&desc->irq_data); - sa1111_writel(stat1, mapbase + SA1111_INTSTATCLR1); + writel_relaxed(stat1, mapbase + SA1111_INTSTATCLR1); if (stat0 == 0 && stat1 == 0) { do_bad_IRQ(desc); return; } + irqdomain = sachip->irqdomain; + for (i = 0; stat0; i++, stat0 >>= 1) if (stat0 & 1) - generic_handle_irq(i + sachip->irq_base); + sa1111_handle_irqdomain(irqdomain, i); for (i = 32; stat1; i++, stat1 >>= 1) if (stat1 & 1) - generic_handle_irq(i + sachip->irq_base); + sa1111_handle_irqdomain(irqdomain, i); /* For level-based interrupts */ desc->irq_data.chip->irq_unmask(&desc->irq_data); } -#define SA1111_IRQMASK_LO(x) (1 << (x - sachip->irq_base)) -#define SA1111_IRQMASK_HI(x) (1 << (x - sachip->irq_base - 32)) - static u32 sa1111_irqmask(struct irq_data *d) { - struct sa1111 *sachip = irq_data_get_irq_chip_data(d); - - return BIT((d->irq - sachip->irq_base) & 31); + return BIT(irqd_to_hwirq(d) & 31); } static int sa1111_irqbank(struct irq_data *d) { - struct sa1111 *sachip = irq_data_get_irq_chip_data(d); - - return ((d->irq - sachip->irq_base) / 32) * 4; + return (irqd_to_hwirq(d) / 32) * 4; } static void sa1111_ack_irq(struct irq_data *d) @@ -257,9 +267,9 @@ static void sa1111_mask_irq(struct irq_data *d) void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d); u32 ie; - ie = sa1111_readl(mapbase + SA1111_INTEN0); + ie = readl_relaxed(mapbase + SA1111_INTEN0); ie &= ~sa1111_irqmask(d); - sa1111_writel(ie, mapbase + SA1111_INTEN0); + writel(ie, mapbase + SA1111_INTEN0); } static void sa1111_unmask_irq(struct irq_data *d) @@ -268,9 +278,9 @@ static void sa1111_unmask_irq(struct irq_data *d) void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d); u32 ie; - ie = sa1111_readl(mapbase + SA1111_INTEN0); + ie = readl_relaxed(mapbase + SA1111_INTEN0); ie |= sa1111_irqmask(d); - sa1111_writel(ie, mapbase + SA1111_INTEN0); + writel_relaxed(ie, mapbase + SA1111_INTEN0); } /* @@ -287,11 +297,11 @@ static int sa1111_retrigger_irq(struct irq_data *d) u32 ip, mask = sa1111_irqmask(d); int i; - ip = sa1111_readl(mapbase + SA1111_INTPOL0); + ip = readl_relaxed(mapbase + SA1111_INTPOL0); for (i = 0; i < 8; i++) { - sa1111_writel(ip ^ mask, mapbase + SA1111_INTPOL0); - sa1111_writel(ip, mapbase + SA1111_INTPOL0); - if (sa1111_readl(mapbase + SA1111_INTSTATCLR0) & mask) + writel_relaxed(ip ^ mask, mapbase + SA1111_INTPOL0); + writel_relaxed(ip, mapbase + SA1111_INTPOL0); + if (readl_relaxed(mapbase + SA1111_INTSTATCLR0) & mask) break; } @@ -313,13 +323,13 @@ static int sa1111_type_irq(struct irq_data *d, unsigned int flags) if ((!(flags & IRQ_TYPE_EDGE_RISING) ^ !(flags & IRQ_TYPE_EDGE_FALLING)) == 0) return -EINVAL; - ip = sa1111_readl(mapbase + SA1111_INTPOL0); + ip = readl_relaxed(mapbase + SA1111_INTPOL0); if (flags & IRQ_TYPE_EDGE_RISING) ip &= ~mask; else ip |= mask; - sa1111_writel(ip, mapbase + SA1111_INTPOL0); - sa1111_writel(ip, mapbase + SA1111_WAKEPOL0); + writel_relaxed(ip, mapbase + SA1111_INTPOL0); + writel_relaxed(ip, mapbase + SA1111_WAKEPOL0); return 0; } @@ -330,12 +340,12 @@ static int sa1111_wake_irq(struct irq_data *d, unsigned int on) void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d); u32 we, mask = sa1111_irqmask(d); - we = sa1111_readl(mapbase + SA1111_WAKEEN0); + we = readl_relaxed(mapbase + SA1111_WAKEEN0); if (on) we |= mask; else we &= ~mask; - sa1111_writel(we, mapbase + SA1111_WAKEEN0); + writel_relaxed(we, mapbase + SA1111_WAKEEN0); return 0; } @@ -350,10 +360,30 @@ static struct irq_chip sa1111_irq_chip = { .irq_set_wake = sa1111_wake_irq, }; +static int sa1111_irqdomain_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + struct sa1111 *sachip = d->host_data; + + /* Disallow unavailable interrupts */ + if (hwirq > SSPROR && hwirq < AUDXMTDMADONEA) + return -EINVAL; + + irq_set_chip_data(irq, sachip); + irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq); + irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE); + + return 0; +} + +static const struct irq_domain_ops sa1111_irqdomain_ops = { + .map = sa1111_irqdomain_map, + .xlate = irq_domain_xlate_twocell, +}; + static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base) { void __iomem *irqbase = sachip->base + SA1111_INTC; - unsigned i, irq; int ret; /* @@ -373,38 +403,40 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base) sachip->irq_base = ret; /* disable all IRQs */ - sa1111_writel(0, irqbase + SA1111_INTEN0); - sa1111_writel(0, irqbase + SA1111_INTEN1); - sa1111_writel(0, irqbase + SA1111_WAKEEN0); - sa1111_writel(0, irqbase + SA1111_WAKEEN1); + writel_relaxed(0, irqbase + SA1111_INTEN0); + writel_relaxed(0, irqbase + SA1111_INTEN1); + writel_relaxed(0, irqbase + SA1111_WAKEEN0); + writel_relaxed(0, irqbase + SA1111_WAKEEN1); /* * detect on rising edge. Note: Feb 2001 Errata for SA1111 * specifies that S0ReadyInt and S1ReadyInt should be '1'. */ - sa1111_writel(0, irqbase + SA1111_INTPOL0); - sa1111_writel(BIT(IRQ_S0_READY_NINT & 31) | - BIT(IRQ_S1_READY_NINT & 31), - irqbase + SA1111_INTPOL1); + writel_relaxed(0, irqbase + SA1111_INTPOL0); + writel_relaxed(BIT(IRQ_S0_READY_NINT & 31) | + BIT(IRQ_S1_READY_NINT & 31), + irqbase + SA1111_INTPOL1); /* clear all IRQs */ - sa1111_writel(~0, irqbase + SA1111_INTSTATCLR0); - sa1111_writel(~0, irqbase + SA1111_INTSTATCLR1); - - for (i = IRQ_GPAIN0; i <= SSPROR; i++) { - irq = sachip->irq_base + i; - irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq); - irq_set_chip_data(irq, sachip); - irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE); - } + writel_relaxed(~0, irqbase + SA1111_INTSTATCLR0); + writel_relaxed(~0, irqbase + SA1111_INTSTATCLR1); - for (i = AUDXMTDMADONEA; i <= IRQ_S1_BVD1_STSCHG; i++) { - irq = sachip->irq_base + i; - irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq); - irq_set_chip_data(irq, sachip); - irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE); + sachip->irqdomain = irq_domain_add_linear(NULL, SA1111_IRQ_NR, + &sa1111_irqdomain_ops, + sachip); + if (!sachip->irqdomain) { + irq_free_descs(sachip->irq_base, SA1111_IRQ_NR); + return -ENOMEM; } + irq_domain_associate_many(sachip->irqdomain, + sachip->irq_base + IRQ_GPAIN0, + IRQ_GPAIN0, SSPROR + 1 - IRQ_GPAIN0); + irq_domain_associate_many(sachip->irqdomain, + sachip->irq_base + AUDXMTDMADONEA, + AUDXMTDMADONEA, + IRQ_S1_BVD1_STSCHG + 1 - AUDXMTDMADONEA); + /* * Register SA1111 interrupt */ @@ -420,20 +452,22 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base) static void sa1111_remove_irq(struct sa1111 *sachip) { + struct irq_domain *domain = sachip->irqdomain; void __iomem *irqbase = sachip->base + SA1111_INTC; + int i; /* disable all IRQs */ - sa1111_writel(0, irqbase + SA1111_INTEN0); - sa1111_writel(0, irqbase + SA1111_INTEN1); - sa1111_writel(0, irqbase + SA1111_WAKEEN0); - sa1111_writel(0, irqbase + SA1111_WAKEEN1); + writel_relaxed(0, irqbase + SA1111_INTEN0); + writel_relaxed(0, irqbase + SA1111_INTEN1); + writel_relaxed(0, irqbase + SA1111_WAKEEN0); + writel_relaxed(0, irqbase + SA1111_WAKEEN1); - if (sachip->irq != NO_IRQ) { - irq_set_chained_handler_and_data(sachip->irq, NULL, NULL); - irq_free_descs(sachip->irq_base, SA1111_IRQ_NR); + irq_set_chained_handler_and_data(sachip->irq, NULL, NULL); + for (i = 0; i < SA1111_IRQ_NR; i++) + irq_dispose_mapping(irq_find_mapping(domain, i)); + irq_domain_remove(domain); - release_mem_region(sachip->phys + SA1111_INTC, 512); - } + release_mem_region(sachip->phys + SA1111_INTC, 512); } enum { @@ -572,7 +606,7 @@ static int sa1111_gpio_to_irq(struct gpio_chip *gc, unsigned offset) { struct sa1111 *sachip = gc_to_sa1111(gc); - return sachip->irq_base + offset; + return sa1111_map_irq(sachip, offset); } static int sa1111_setup_gpios(struct sa1111 *sachip) @@ -618,11 +652,11 @@ static void sa1111_wake(struct sa1111 *sachip) /* * Turn VCO on, and disable PLL Bypass. */ - r = sa1111_readl(sachip->base + SA1111_SKCR); + r = readl_relaxed(sachip->base + SA1111_SKCR); r &= ~SKCR_VCO_OFF; - sa1111_writel(r, sachip->base + SA1111_SKCR); + writel_relaxed(r, sachip->base + SA1111_SKCR); r |= SKCR_PLL_BYPASS | SKCR_OE_EN; - sa1111_writel(r, sachip->base + SA1111_SKCR); + writel_relaxed(r, sachip->base + SA1111_SKCR); /* * Wait lock time. SA1111 manual _doesn't_ @@ -634,7 +668,7 @@ static void sa1111_wake(struct sa1111 *sachip) * Enable RCLK. We also ensure that RDYEN is set. */ r |= SKCR_RCLKEN | SKCR_RDYEN; - sa1111_writel(r, sachip->base + SA1111_SKCR); + writel_relaxed(r, sachip->base + SA1111_SKCR); /* * Wait 14 RCLK cycles for the chip to finish coming out @@ -645,7 +679,7 @@ static void sa1111_wake(struct sa1111 *sachip) /* * Ensure all clocks are initially off. */ - sa1111_writel(0, sachip->base + SA1111_SKPCR); + writel_relaxed(0, sachip->base + SA1111_SKPCR); spin_unlock_irqrestore(&sachip->lock, flags); } @@ -675,7 +709,7 @@ sa1111_configure_smc(struct sa1111 *sachip, int sdram, unsigned int drac, if (cas_latency == 3) smcr |= SMCR_CLAT; - sa1111_writel(smcr, sachip->base + SA1111_SMCR); + writel_relaxed(smcr, sachip->base + SA1111_SMCR); /* * Now clear the bits in the DMA mask to work around the SA1111 @@ -723,8 +757,8 @@ sa1111_init_one_child(struct sa1111 *sachip, struct resource *parent, dev->mapbase = sachip->base + info->offset; dev->skpcr_mask = info->skpcr_mask; - for (i = 0; i < ARRAY_SIZE(info->irq); i++) - dev->irq[i] = sachip->irq_base + info->irq[i]; + for (i = 0; i < ARRAY_SIZE(info->hwirq); i++) + dev->hwirq[i] = info->hwirq[i]; /* * If the parent device has a DMA mask associated with it, and @@ -814,7 +848,7 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) /* * Probe for the chip. Only touch the SBI registers. */ - id = sa1111_readl(sachip->base + SA1111_SKID); + id = readl_relaxed(sachip->base + SA1111_SKID); if ((id & SKID_ID_MASK) != SKID_SA1111_ID) { printk(KERN_DEBUG "SA1111 not detected: ID = %08lx\n", id); ret = -ENODEV; @@ -833,11 +867,9 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) * The interrupt controller must be initialised before any * other device to ensure that the interrupts are available. */ - if (sachip->irq != NO_IRQ) { - ret = sa1111_setup_irq(sachip, pd->irq_base); - if (ret) - goto err_clk; - } + ret = sa1111_setup_irq(sachip, pd->irq_base); + if (ret) + goto err_clk; /* Setup the GPIOs - should really be done after the IRQ setup */ ret = sa1111_setup_gpios(sachip); @@ -864,8 +896,8 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) * DMA. It can otherwise be held firmly in the off position. * (currently, we always enable it.) */ - val = sa1111_readl(sachip->base + SA1111_SKPCR); - sa1111_writel(val | SKPCR_DCLKEN, sachip->base + SA1111_SKPCR); + val = readl_relaxed(sachip->base + SA1111_SKPCR); + writel_relaxed(val | SKPCR_DCLKEN, sachip->base + SA1111_SKPCR); /* * Enable the SA1110 memory bus request and grant signals. @@ -962,31 +994,31 @@ static int sa1111_suspend_noirq(struct device *dev) * Save state. */ base = sachip->base; - save->skcr = sa1111_readl(base + SA1111_SKCR); - save->skpcr = sa1111_readl(base + SA1111_SKPCR); - save->skcdr = sa1111_readl(base + SA1111_SKCDR); - save->skaud = sa1111_readl(base + SA1111_SKAUD); - save->skpwm0 = sa1111_readl(base + SA1111_SKPWM0); - save->skpwm1 = sa1111_readl(base + SA1111_SKPWM1); + save->skcr = readl_relaxed(base + SA1111_SKCR); + save->skpcr = readl_relaxed(base + SA1111_SKPCR); + save->skcdr = readl_relaxed(base + SA1111_SKCDR); + save->skaud = readl_relaxed(base + SA1111_SKAUD); + save->skpwm0 = readl_relaxed(base + SA1111_SKPWM0); + save->skpwm1 = readl_relaxed(base + SA1111_SKPWM1); - sa1111_writel(0, sachip->base + SA1111_SKPWM0); - sa1111_writel(0, sachip->base + SA1111_SKPWM1); + writel_relaxed(0, sachip->base + SA1111_SKPWM0); + writel_relaxed(0, sachip->base + SA1111_SKPWM1); base = sachip->base + SA1111_INTC; - save->intpol0 = sa1111_readl(base + SA1111_INTPOL0); - save->intpol1 = sa1111_readl(base + SA1111_INTPOL1); - save->inten0 = sa1111_readl(base + SA1111_INTEN0); - save->inten1 = sa1111_readl(base + SA1111_INTEN1); - save->wakepol0 = sa1111_readl(base + SA1111_WAKEPOL0); - save->wakepol1 = sa1111_readl(base + SA1111_WAKEPOL1); - save->wakeen0 = sa1111_readl(base + SA1111_WAKEEN0); - save->wakeen1 = sa1111_readl(base + SA1111_WAKEEN1); + save->intpol0 = readl_relaxed(base + SA1111_INTPOL0); + save->intpol1 = readl_relaxed(base + SA1111_INTPOL1); + save->inten0 = readl_relaxed(base + SA1111_INTEN0); + save->inten1 = readl_relaxed(base + SA1111_INTEN1); + save->wakepol0 = readl_relaxed(base + SA1111_WAKEPOL0); + save->wakepol1 = readl_relaxed(base + SA1111_WAKEPOL1); + save->wakeen0 = readl_relaxed(base + SA1111_WAKEEN0); + save->wakeen1 = readl_relaxed(base + SA1111_WAKEEN1); /* * Disable. */ - val = sa1111_readl(sachip->base + SA1111_SKCR); - sa1111_writel(val | SKCR_SLEEP, sachip->base + SA1111_SKCR); + val = readl_relaxed(sachip->base + SA1111_SKCR); + writel_relaxed(val | SKCR_SLEEP, sachip->base + SA1111_SKCR); clk_disable(sachip->clk); @@ -1023,7 +1055,7 @@ static int sa1111_resume_noirq(struct device *dev) * Ensure that the SA1111 is still here. * FIXME: shouldn't do this here. */ - id = sa1111_readl(sachip->base + SA1111_SKID); + id = readl_relaxed(sachip->base + SA1111_SKID); if ((id & SKID_ID_MASK) != SKID_SA1111_ID) { __sa1111_remove(sachip); dev_set_drvdata(dev, NULL); @@ -1047,26 +1079,26 @@ static int sa1111_resume_noirq(struct device *dev) */ spin_lock_irqsave(&sachip->lock, flags); - sa1111_writel(0, sachip->base + SA1111_INTC + SA1111_INTEN0); - sa1111_writel(0, sachip->base + SA1111_INTC + SA1111_INTEN1); + writel_relaxed(0, sachip->base + SA1111_INTC + SA1111_INTEN0); + writel_relaxed(0, sachip->base + SA1111_INTC + SA1111_INTEN1); base = sachip->base; - sa1111_writel(save->skcr, base + SA1111_SKCR); - sa1111_writel(save->skpcr, base + SA1111_SKPCR); - sa1111_writel(save->skcdr, base + SA1111_SKCDR); - sa1111_writel(save->skaud, base + SA1111_SKAUD); - sa1111_writel(save->skpwm0, base + SA1111_SKPWM0); - sa1111_writel(save->skpwm1, base + SA1111_SKPWM1); + writel_relaxed(save->skcr, base + SA1111_SKCR); + writel_relaxed(save->skpcr, base + SA1111_SKPCR); + writel_relaxed(save->skcdr, base + SA1111_SKCDR); + writel_relaxed(save->skaud, base + SA1111_SKAUD); + writel_relaxed(save->skpwm0, base + SA1111_SKPWM0); + writel_relaxed(save->skpwm1, base + SA1111_SKPWM1); base = sachip->base + SA1111_INTC; - sa1111_writel(save->intpol0, base + SA1111_INTPOL0); - sa1111_writel(save->intpol1, base + SA1111_INTPOL1); - sa1111_writel(save->inten0, base + SA1111_INTEN0); - sa1111_writel(save->inten1, base + SA1111_INTEN1); - sa1111_writel(save->wakepol0, base + SA1111_WAKEPOL0); - sa1111_writel(save->wakepol1, base + SA1111_WAKEPOL1); - sa1111_writel(save->wakeen0, base + SA1111_WAKEEN0); - sa1111_writel(save->wakeen1, base + SA1111_WAKEEN1); + writel_relaxed(save->intpol0, base + SA1111_INTPOL0); + writel_relaxed(save->intpol1, base + SA1111_INTPOL1); + writel_relaxed(save->inten0, base + SA1111_INTEN0); + writel_relaxed(save->inten1, base + SA1111_INTEN1); + writel_relaxed(save->wakepol0, base + SA1111_WAKEPOL0); + writel_relaxed(save->wakepol1, base + SA1111_WAKEPOL1); + writel_relaxed(save->wakeen0, base + SA1111_WAKEEN0); + writel_relaxed(save->wakeen1, base + SA1111_WAKEEN1); spin_unlock_irqrestore(&sachip->lock, flags); @@ -1153,7 +1185,7 @@ static unsigned int __sa1111_pll_clock(struct sa1111 *sachip) { unsigned int skcdr, fbdiv, ipdiv, opdiv; - skcdr = sa1111_readl(sachip->base + SA1111_SKCDR); + skcdr = readl_relaxed(sachip->base + SA1111_SKCDR); fbdiv = (skcdr & 0x007f) + 2; ipdiv = ((skcdr & 0x0f80) >> 7) + 2; @@ -1195,13 +1227,13 @@ void sa1111_select_audio_mode(struct sa1111_dev *sadev, int mode) spin_lock_irqsave(&sachip->lock, flags); - val = sa1111_readl(sachip->base + SA1111_SKCR); + val = readl_relaxed(sachip->base + SA1111_SKCR); if (mode == SA1111_AUDIO_I2S) { val &= ~SKCR_SELAC; } else { val |= SKCR_SELAC; } - sa1111_writel(val, sachip->base + SA1111_SKCR); + writel_relaxed(val, sachip->base + SA1111_SKCR); spin_unlock_irqrestore(&sachip->lock, flags); } @@ -1226,7 +1258,7 @@ int sa1111_set_audio_rate(struct sa1111_dev *sadev, int rate) if (div > 128) div = 128; - sa1111_writel(div - 1, sachip->base + SA1111_SKAUD); + writel_relaxed(div - 1, sachip->base + SA1111_SKAUD); return 0; } @@ -1244,7 +1276,7 @@ int sa1111_get_audio_rate(struct sa1111_dev *sadev) if (sadev->devid != SA1111_DEVID_SAC) return -EINVAL; - div = sa1111_readl(sachip->base + SA1111_SKAUD) + 1; + div = readl_relaxed(sachip->base + SA1111_SKAUD) + 1; return __sa1111_pll_clock(sachip) / (256 * div); } @@ -1261,10 +1293,10 @@ void sa1111_set_io_dir(struct sa1111_dev *sadev, #define MODIFY_BITS(port, mask, dir) \ if (mask) { \ - val = sa1111_readl(port); \ + val = readl_relaxed(port); \ val &= ~(mask); \ val |= (dir) & (mask); \ - sa1111_writel(val, port); \ + writel_relaxed(val, port); \ } spin_lock_irqsave(&sachip->lock, flags); @@ -1329,8 +1361,8 @@ int sa1111_enable_device(struct sa1111_dev *sadev) if (ret == 0) { spin_lock_irqsave(&sachip->lock, flags); - val = sa1111_readl(sachip->base + SA1111_SKPCR); - sa1111_writel(val | sadev->skpcr_mask, sachip->base + SA1111_SKPCR); + val = readl_relaxed(sachip->base + SA1111_SKPCR); + writel_relaxed(val | sadev->skpcr_mask, sachip->base + SA1111_SKPCR); spin_unlock_irqrestore(&sachip->lock, flags); } return ret; @@ -1348,8 +1380,8 @@ void sa1111_disable_device(struct sa1111_dev *sadev) unsigned int val; spin_lock_irqsave(&sachip->lock, flags); - val = sa1111_readl(sachip->base + SA1111_SKPCR); - sa1111_writel(val & ~sadev->skpcr_mask, sachip->base + SA1111_SKPCR); + val = readl_relaxed(sachip->base + SA1111_SKPCR); + writel_relaxed(val & ~sadev->skpcr_mask, sachip->base + SA1111_SKPCR); spin_unlock_irqrestore(&sachip->lock, flags); if (sachip->pdata && sachip->pdata->disable) @@ -1359,9 +1391,10 @@ EXPORT_SYMBOL(sa1111_disable_device); int sa1111_get_irq(struct sa1111_dev *sadev, unsigned num) { - if (num >= ARRAY_SIZE(sadev->irq)) + struct sa1111 *sachip = sa1111_chip_driver(sadev); + if (num >= ARRAY_SIZE(sadev->hwirq)) return -EINVAL; - return sadev->irq[num]; + return sa1111_map_irq(sachip, sadev->hwirq[num]); } EXPORT_SYMBOL_GPL(sa1111_get_irq); @@ -1379,36 +1412,6 @@ static int sa1111_match(struct device *_dev, struct device_driver *_drv) return !!(dev->devid & drv->devid); } -static int sa1111_bus_suspend(struct device *dev, pm_message_t state) -{ - struct sa1111_dev *sadev = to_sa1111_device(dev); - struct sa1111_driver *drv = SA1111_DRV(dev->driver); - int ret = 0; - - if (drv && drv->suspend) - ret = drv->suspend(sadev, state); - return ret; -} - -static int sa1111_bus_resume(struct device *dev) -{ - struct sa1111_dev *sadev = to_sa1111_device(dev); - struct sa1111_driver *drv = SA1111_DRV(dev->driver); - int ret = 0; - - if (drv && drv->resume) - ret = drv->resume(sadev); - return ret; -} - -static void sa1111_bus_shutdown(struct device *dev) -{ - struct sa1111_driver *drv = SA1111_DRV(dev->driver); - - if (drv && drv->shutdown) - drv->shutdown(to_sa1111_device(dev)); -} - static int sa1111_bus_probe(struct device *dev) { struct sa1111_dev *sadev = to_sa1111_device(dev); @@ -1436,9 +1439,6 @@ struct bus_type sa1111_bus_type = { .match = sa1111_match, .probe = sa1111_bus_probe, .remove = sa1111_bus_remove, - .suspend = sa1111_bus_suspend, - .resume = sa1111_bus_resume, - .shutdown = sa1111_bus_shutdown, }; EXPORT_SYMBOL(sa1111_bus_type); diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h index ce5ee762ed66..4cab9bb823fb 100644 --- a/arch/arm/include/asm/bitops.h +++ b/arch/arm/include/asm/bitops.h @@ -338,6 +338,7 @@ static inline int find_next_bit_le(const void *p, int size, int offset) #endif +#include <asm-generic/bitops/find.h> #include <asm-generic/bitops/le.h> /* diff --git a/arch/arm/include/asm/exception.h b/arch/arm/include/asm/exception.h index a7273ad9587a..58e039a851af 100644 --- a/arch/arm/include/asm/exception.h +++ b/arch/arm/include/asm/exception.h @@ -10,11 +10,10 @@ #include <linux/interrupt.h> -#define __exception __attribute__((section(".exception.text"))) #ifdef CONFIG_FUNCTION_GRAPH_TRACER #define __exception_irq_entry __irq_entry #else -#define __exception_irq_entry __exception +#define __exception_irq_entry #endif #endif /* __ASM_ARM_EXCEPTION_H */ diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h index 01c3d92624e5..8d1f498e5dd8 100644 --- a/arch/arm/include/asm/glue-cache.h +++ b/arch/arm/include/asm/glue-cache.h @@ -117,6 +117,10 @@ # endif #endif +#if defined(CONFIG_CACHE_B15_RAC) +# define MULTI_CACHE 1 +#endif + #if defined(CONFIG_CPU_V7M) # define MULTI_CACHE 1 #endif diff --git a/arch/arm/include/asm/hardware/cache-b15-rac.h b/arch/arm/include/asm/hardware/cache-b15-rac.h new file mode 100644 index 000000000000..3d43ec06fd35 --- /dev/null +++ b/arch/arm/include/asm/hardware/cache-b15-rac.h @@ -0,0 +1,10 @@ +#ifndef __ASM_ARM_HARDWARE_CACHE_B15_RAC_H +#define __ASM_ARM_HARDWARE_CACHE_B15_RAC_H + +#ifndef __ASSEMBLY__ + +void b15_flush_kern_cache_all(void); + +#endif + +#endif diff --git a/arch/arm/include/asm/hardware/sa1111.h b/arch/arm/include/asm/hardware/sa1111.h index 0bbf163d1ed3..798e520e8a49 100644 --- a/arch/arm/include/asm/hardware/sa1111.h +++ b/arch/arm/include/asm/hardware/sa1111.h @@ -16,33 +16,6 @@ #include <mach/bitfield.h> /* - * The SA1111 is always located at virtual 0xf4000000, and is always - * "native" endian. - */ - -#define SA1111_VBASE 0xf4000000 - -/* Don't use these! */ -#define SA1111_p2v( x ) ((x) - SA1111_BASE + SA1111_VBASE) -#define SA1111_v2p( x ) ((x) - SA1111_VBASE + SA1111_BASE) - -#ifndef __ASSEMBLY__ -#define _SA1111(x) ((x) + sa1111->resource.start) -#endif - -#define sa1111_writel(val,addr) __raw_writel(val, addr) -#define sa1111_readl(addr) __raw_readl(addr) - -/* - * 26 bits of the SA-1110 address bus are available to the SA-1111. - * Use these when feeding target addresses to the DMA engines. - */ - -#define SA1111_ADDR_WIDTH (26) -#define SA1111_ADDR_MASK ((1<<SA1111_ADDR_WIDTH)-1) -#define SA1111_DMA_ADDR(x) ((x)&SA1111_ADDR_MASK) - -/* * Don't ask the (SAC) DMA engines to move less than this amount. */ @@ -417,7 +390,7 @@ struct sa1111_dev { struct resource res; void __iomem *mapbase; unsigned int skpcr_mask; - unsigned int irq[6]; + unsigned int hwirq[6]; u64 dma_mask; }; @@ -431,9 +404,6 @@ struct sa1111_driver { unsigned int devid; int (*probe)(struct sa1111_dev *); int (*remove)(struct sa1111_dev *); - int (*suspend)(struct sa1111_dev *, pm_message_t); - int (*resume)(struct sa1111_dev *); - void (*shutdown)(struct sa1111_dev *); }; #define SA1111_DRV(_d) container_of((_d), struct sa1111_driver, drv) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index acbf9ec7b396..ef54013b5b9f 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -306,4 +306,11 @@ static inline void kvm_fpsimd_flush_cpu_state(void) {} static inline void kvm_arm_vhe_guest_enter(void) {} static inline void kvm_arm_vhe_guest_exit(void) {} + +static inline bool kvm_arm_harden_branch_predictor(void) +{ + /* No way to detect it yet, pretend it is not there. */ + return false; +} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h deleted file mode 100644 index 6bda945d31fa..000000000000 --- a/arch/arm/include/asm/kvm_psci.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2012 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef __ARM_KVM_PSCI_H__ -#define __ARM_KVM_PSCI_H__ - -#define KVM_ARM_PSCI_0_1 1 -#define KVM_ARM_PSCI_0_2 2 - -int kvm_psci_version(struct kvm_vcpu *vcpu); -int kvm_psci_call(struct kvm_vcpu *vcpu); - -#endif /* __ARM_KVM_PSCI_H__ */ diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 1f54e4e98c1e..496667703693 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -88,6 +88,7 @@ #else /* CONFIG_MMU */ #ifndef __ASSEMBLY__ +extern unsigned long setup_vectors_base(void); extern unsigned long vectors_base; #define VECTORS_BASE vectors_base #endif diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h index 960d9dc4f380..1f0de808d111 100644 --- a/arch/arm/include/asm/pci.h +++ b/arch/arm/include/asm/pci.h @@ -10,10 +10,7 @@ extern unsigned long pcibios_min_io; extern unsigned long pcibios_min_mem; #define PCIBIOS_MIN_MEM pcibios_min_mem -static inline int pcibios_assign_all_busses(void) -{ - return pci_has_flag(PCI_REASSIGN_ALL_RSRC); -} +#define pcibios_assign_all_busses() pci_has_flag(PCI_REASSIGN_ALL_BUS) #ifdef CONFIG_PCI_DOMAINS static inline int pci_proc_domain(struct pci_bus *bus) diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 338cbe0a18ef..1bf65b47808a 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -45,6 +45,16 @@ struct thread_struct { struct debug_info debug; }; +/* + * Everything usercopied to/from thread_struct is statically-sized, so + * no hardened usercopy whitelist is needed. + */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = *size = 0; +} + #define INIT_THREAD { } #define start_thread(regs,pc,sp) \ diff --git a/arch/arm/include/asm/ptdump.h b/arch/arm/include/asm/ptdump.h new file mode 100644 index 000000000000..3ebf9718288d --- /dev/null +++ b/arch/arm/include/asm/ptdump.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2014 ARM Ltd. */ +#ifndef __ASM_PTDUMP_H +#define __ASM_PTDUMP_H + +#ifdef CONFIG_ARM_PTDUMP_CORE + +#include <linux/mm_types.h> +#include <linux/seq_file.h> + +struct addr_marker { + unsigned long start_address; + char *name; +}; + +struct ptdump_info { + struct mm_struct *mm; + const struct addr_marker *markers; + unsigned long base_addr; +}; + +void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info); +#ifdef CONFIG_ARM_PTDUMP_DEBUGFS +int ptdump_debugfs_register(struct ptdump_info *info, const char *name); +#else +static inline int ptdump_debugfs_register(struct ptdump_info *info, + const char *name) +{ + return 0; +} +#endif /* CONFIG_ARM_PTDUMP_DEBUGFS */ + +void ptdump_check_wx(void); + +#endif /* CONFIG_ARM_PTDUMP_CORE */ + +#ifdef CONFIG_DEBUG_WX +#define debug_checkwx() ptdump_check_wx() +#else +#define debug_checkwx() do { } while (0) +#endif + +#endif /* __ASM_PTDUMP_H */ diff --git a/arch/arm/include/asm/sections.h b/arch/arm/include/asm/sections.h index 63dfe1f10335..4ceb4f757d4d 100644 --- a/arch/arm/include/asm/sections.h +++ b/arch/arm/include/asm/sections.h @@ -6,4 +6,25 @@ extern char _exiprom[]; +extern char __idmap_text_start[]; +extern char __idmap_text_end[]; +extern char __entry_text_start[]; +extern char __entry_text_end[]; +extern char __hyp_idmap_text_start[]; +extern char __hyp_idmap_text_end[]; + +static inline bool in_entry_text(unsigned long addr) +{ + return memory_contains(__entry_text_start, __entry_text_end, + (void *)addr, 1); +} + +static inline bool in_idmap_text(unsigned long addr) +{ + void *a = (void *)addr; + return memory_contains(__idmap_text_start, __idmap_text_end, a, 1) || + memory_contains(__hyp_idmap_text_start, __hyp_idmap_text_end, + a, 1); +} + #endif /* _ASM_ARM_SECTIONS_H */ diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h index f54a3136aac6..111a1d8a41dd 100644 --- a/arch/arm/include/asm/string.h +++ b/arch/arm/include/asm/string.h @@ -39,18 +39,4 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) return __memset64(p, v, n * 8, v >> 32); } -extern void __memzero(void *ptr, __kernel_size_t n); - -#define memset(p,v,n) \ - ({ \ - void *__p = (p); size_t __n = n; \ - if ((__n) != 0) { \ - if (__builtin_constant_p((v)) && (v) == 0) \ - __memzero((__p),(__n)); \ - else \ - memset((__p),(v),(__n)); \ - } \ - (__p); \ - }) - #endif diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h index f9a6c5fc3fd1..a00288d75ee6 100644 --- a/arch/arm/include/asm/traps.h +++ b/arch/arm/include/asm/traps.h @@ -28,18 +28,6 @@ static inline int __in_irqentry_text(unsigned long ptr) ptr < (unsigned long)&__irqentry_text_end; } -static inline int in_exception_text(unsigned long ptr) -{ - extern char __exception_text_start[]; - extern char __exception_text_end[]; - int in; - - in = ptr >= (unsigned long)&__exception_text_start && - ptr < (unsigned long)&__exception_text_end; - - return in ? : __in_irqentry_text(ptr); -} - extern void __init early_trap_init(void *); extern void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame); extern void ptrace_break(struct task_struct *tsk, struct pt_regs *regs); diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index a91ae499614c..2c3b952be63e 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h @@ -20,8 +20,10 @@ #ifndef __ASM_UNIFIED_H #define __ASM_UNIFIED_H -#if defined(__ASSEMBLY__) && defined(CONFIG_ARM_ASM_UNIFIED) +#if defined(__ASSEMBLY__) .syntax unified +#else +__asm__(".syntax unified"); #endif #ifdef CONFIG_CPU_V7M @@ -64,77 +66,4 @@ #endif /* CONFIG_THUMB2_KERNEL */ -#ifndef CONFIG_ARM_ASM_UNIFIED - -/* - * If the unified assembly syntax isn't used (in ARM mode), these - * macros expand to an empty string - */ -#ifdef __ASSEMBLY__ - .macro it, cond - .endm - .macro itt, cond - .endm - .macro ite, cond - .endm - .macro ittt, cond - .endm - .macro itte, cond - .endm - .macro itet, cond - .endm - .macro itee, cond - .endm - .macro itttt, cond - .endm - .macro ittte, cond - .endm - .macro ittet, cond - .endm - .macro ittee, cond - .endm - .macro itett, cond - .endm - .macro itete, cond - .endm - .macro iteet, cond - .endm - .macro iteee, cond - .endm -#else /* !__ASSEMBLY__ */ -__asm__( -" .macro it, cond\n" -" .endm\n" -" .macro itt, cond\n" -" .endm\n" -" .macro ite, cond\n" -" .endm\n" -" .macro ittt, cond\n" -" .endm\n" -" .macro itte, cond\n" -" .endm\n" -" .macro itet, cond\n" -" .endm\n" -" .macro itee, cond\n" -" .endm\n" -" .macro itttt, cond\n" -" .endm\n" -" .macro ittte, cond\n" -" .endm\n" -" .macro ittet, cond\n" -" .endm\n" -" .macro ittee, cond\n" -" .endm\n" -" .macro itett, cond\n" -" .endm\n" -" .macro itete, cond\n" -" .endm\n" -" .macro iteet, cond\n" -" .endm\n" -" .macro iteee, cond\n" -" .endm\n"); -#endif /* __ASSEMBLY__ */ - -#endif /* CONFIG_ARM_ASM_UNIFIED */ - #endif /* !__ASM_UNIFIED_H */ diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 5266fd9ad6b4..783fbb4de5f9 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -92,7 +92,6 @@ EXPORT_SYMBOL(__memset64); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); -EXPORT_SYMBOL(__memzero); EXPORT_SYMBOL(mmioset); EXPORT_SYMBOL(mmiocpy); diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c index 0cd0aefb3a8f..ed46ca69813d 100644 --- a/arch/arm/kernel/bios32.c +++ b/arch/arm/kernel/bios32.c @@ -527,7 +527,7 @@ void pci_common_init_dev(struct device *parent, struct hw_pci *hw) struct pci_sys_data *sys; LIST_HEAD(head); - pci_add_flags(PCI_REASSIGN_ALL_RSRC); + pci_add_flags(PCI_REASSIGN_ALL_BUS); if (hw->preinit) hw->preinit(); pcibios_init_hw(parent, hw, &head); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index fbc707626b3e..1752033b0070 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -82,11 +82,7 @@ #endif .endm -#ifdef CONFIG_KPROBES - .section .kprobes.text,"ax",%progbits -#else - .text -#endif + .section .entry.text,"ax",%progbits /* * Invalid mode handlers diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index e655dcd0a933..3c4f88701f22 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -37,6 +37,7 @@ saved_pc .req lr #define TRACE(x...) #endif + .section .entry.text,"ax",%progbits .align 5 #if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING)) /* diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 21dde771a7dd..6e0375e7db05 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -105,8 +105,9 @@ __mmap_switched: ARM( ldmia r4!, {r0, r1, sp} ) THUMB( ldmia r4!, {r0, r1, r3} ) THUMB( mov sp, r3 ) - sub r1, r1, r0 - bl __memzero @ clear .bss + sub r2, r1, r0 + mov r1, #0 + bl memset @ clear .bss ldmia r4, {r0, r1, r2, r3} str r9, [r0] @ Save processor ID diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index af2a7f1e3103..629e25152c0d 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -44,17 +44,17 @@ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]); /* Number of BRP/WRP registers on this CPU. */ -static int core_num_brps; -static int core_num_wrps; +static int core_num_brps __ro_after_init; +static int core_num_wrps __ro_after_init; /* Debug architecture version. */ -static u8 debug_arch; +static u8 debug_arch __ro_after_init; /* Does debug architecture support OS Save and Restore? */ -static bool has_ossr; +static bool has_ossr __ro_after_init; /* Maximum supported watchpoint length. */ -static u8 max_watchpoint_len; +static u8 max_watchpoint_len __ro_after_init; #define READ_WB_REG_CASE(OP2, M, VAL) \ case ((OP2 << 4) + M): \ diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index b4fbf00ee4ad..2da087926ebe 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -379,6 +379,9 @@ asmlinkage void secondary_start_kernel(void) cpu_init(); +#ifndef CONFIG_MMU + setup_vectors_base(); +#endif pr_debug("CPU%u: Booted secondary processor\n", cpu); preempt_disable(); diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index 65228bf4c6df..a56e7c856ab5 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -3,6 +3,7 @@ #include <linux/sched/debug.h> #include <linux/stacktrace.h> +#include <asm/sections.h> #include <asm/stacktrace.h> #include <asm/traps.h> @@ -63,7 +64,6 @@ EXPORT_SYMBOL(walk_stackframe); #ifdef CONFIG_STACKTRACE struct stack_trace_data { struct stack_trace *trace; - unsigned long last_pc; unsigned int no_sched_functions; unsigned int skip; }; @@ -87,16 +87,7 @@ static int save_trace(struct stackframe *frame, void *d) if (trace->nr_entries >= trace->max_entries) return 1; - /* - * in_exception_text() is designed to test if the PC is one of - * the functions which has an exception stack above it, but - * unfortunately what is in frame->pc is the return LR value, - * not the saved PC value. So, we need to track the previous - * frame PC value when doing this. - */ - addr = data->last_pc; - data->last_pc = frame->pc; - if (!in_exception_text(addr)) + if (!in_entry_text(frame->pc)) return 0; regs = (struct pt_regs *)frame->sp; @@ -114,7 +105,6 @@ static noinline void __save_stack_trace(struct task_struct *tsk, struct stackframe frame; data.trace = trace; - data.last_pc = ULONG_MAX; data.skip = trace->skip; data.no_sched_functions = nosched; diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 3e26c6f7a191..5e3633c24e63 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -72,7 +72,7 @@ void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long printk("Function entered at [<%08lx>] from [<%08lx>]\n", where, from); #endif - if (in_exception_text(where)) + if (in_entry_text(from)) dump_mem("", "Exception stack", frame + 4, frame + 4 + sizeof(struct pt_regs)); } @@ -433,7 +433,7 @@ static int call_undef_hook(struct pt_regs *regs, unsigned int instr) return fn ? fn(regs, instr) : 1; } -asmlinkage void __exception do_undefinstr(struct pt_regs *regs) +asmlinkage void do_undefinstr(struct pt_regs *regs) { unsigned int instr; siginfo_t info; diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index ec4b3f94ad80..12b87591eb7c 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -96,9 +96,9 @@ SECTIONS .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ IDMAP_TEXT - __exception_text_start = .; - *(.exception.text) - __exception_text_end = .; + __entry_text_start = .; + *(.entry.text) + __entry_text_end = .; IRQENTRY_TEXT TEXT_TEXT SCHED_TEXT diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index ee53f6518872..84a1ae3ce46e 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -105,9 +105,9 @@ SECTIONS .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ IDMAP_TEXT - __exception_text_start = .; - *(.exception.text) - __exception_text_end = .; + __entry_text_start = .; + *(.entry.text) + __entry_text_end = .; IRQENTRY_TEXT SOFTIRQENTRY_TEXT TEXT_TEXT diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index cf8bf6bf87c4..910bd8dabb3c 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -21,7 +21,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> #include <asm/kvm_mmu.h> -#include <asm/kvm_psci.h> +#include <kvm/arm_psci.h> #include <trace/events/kvm.h> #include "trace.h" @@ -36,9 +36,9 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_vcpu_hvc_get_imm(vcpu)); vcpu->stat.hvc_exit_stat++; - ret = kvm_psci_call(vcpu); + ret = kvm_hvc_call_handler(vcpu); if (ret < 0) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } @@ -47,7 +47,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_inject_undefined(vcpu); + /* + * "If an SMC instruction executed at Non-secure EL1 is + * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a + * Trap exception, not a Secure Monitor Call exception [...]" + * + * We need to advance the PC after the trap, as it would + * otherwise return to the same address... + */ + vcpu_set_reg(vcpu, 0, ~0UL); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 1; } diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 4cb0b9624d8f..ad25fd1872c7 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -8,7 +8,7 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ delay.o delay-loop.o findbit.o memchr.o memcpy.o \ - memmove.o memset.o memzero.o setbit.o \ + memmove.o memset.o setbit.o \ strchr.o strrchr.o \ testchangebit.o testclearbit.o testsetbit.o \ ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S deleted file mode 100644 index 0eded952e089..000000000000 --- a/arch/arm/lib/memzero.S +++ /dev/null @@ -1,137 +0,0 @@ -/* - * linux/arch/arm/lib/memzero.S - * - * Copyright (C) 1995-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <asm/assembler.h> -#include <asm/unwind.h> - - .text - .align 5 - .word 0 -/* - * Align the pointer in r0. r3 contains the number of bytes that we are - * mis-aligned by, and r1 is the number of bytes. If r1 < 4, then we - * don't bother; we use byte stores instead. - */ -UNWIND( .fnstart ) -1: subs r1, r1, #4 @ 1 do we have enough - blt 5f @ 1 bytes to align with? - cmp r3, #2 @ 1 - strltb r2, [r0], #1 @ 1 - strleb r2, [r0], #1 @ 1 - strb r2, [r0], #1 @ 1 - add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3)) -/* - * The pointer is now aligned and the length is adjusted. Try doing the - * memzero again. - */ - -ENTRY(__memzero) - mov r2, #0 @ 1 - ands r3, r0, #3 @ 1 unaligned? - bne 1b @ 1 -/* - * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary. - */ - cmp r1, #16 @ 1 we can skip this chunk if we - blt 4f @ 1 have < 16 bytes - -#if ! CALGN(1)+0 - -/* - * We need an extra register for this loop - save the return address and - * use the LR - */ - str lr, [sp, #-4]! @ 1 -UNWIND( .fnend ) -UNWIND( .fnstart ) -UNWIND( .save {lr} ) - mov ip, r2 @ 1 - mov lr, r2 @ 1 - -3: subs r1, r1, #64 @ 1 write 32 bytes out per loop - stmgeia r0!, {r2, r3, ip, lr} @ 4 - stmgeia r0!, {r2, r3, ip, lr} @ 4 - stmgeia r0!, {r2, r3, ip, lr} @ 4 - stmgeia r0!, {r2, r3, ip, lr} @ 4 - bgt 3b @ 1 - ldmeqfd sp!, {pc} @ 1/2 quick exit -/* - * No need to correct the count; we're only testing bits from now on - */ - tst r1, #32 @ 1 - stmneia r0!, {r2, r3, ip, lr} @ 4 - stmneia r0!, {r2, r3, ip, lr} @ 4 - tst r1, #16 @ 1 16 bytes or more? - stmneia r0!, {r2, r3, ip, lr} @ 4 - ldr lr, [sp], #4 @ 1 -UNWIND( .fnend ) - -#else - -/* - * This version aligns the destination pointer in order to write - * whole cache lines at once. - */ - - stmfd sp!, {r4-r7, lr} -UNWIND( .fnend ) -UNWIND( .fnstart ) -UNWIND( .save {r4-r7, lr} ) - mov r4, r2 - mov r5, r2 - mov r6, r2 - mov r7, r2 - mov ip, r2 - mov lr, r2 - - cmp r1, #96 - andgts ip, r0, #31 - ble 3f - - rsb ip, ip, #32 - sub r1, r1, ip - movs ip, ip, lsl #(32 - 4) - stmcsia r0!, {r4, r5, r6, r7} - stmmiia r0!, {r4, r5} - movs ip, ip, lsl #2 - strcs r2, [r0], #4 - -3: subs r1, r1, #64 - stmgeia r0!, {r2-r7, ip, lr} - stmgeia r0!, {r2-r7, ip, lr} - bgt 3b - ldmeqfd sp!, {r4-r7, pc} - - tst r1, #32 - stmneia r0!, {r2-r7, ip, lr} - tst r1, #16 - stmneia r0!, {r4-r7} - ldmfd sp!, {r4-r7, lr} -UNWIND( .fnend ) - -#endif - -UNWIND( .fnstart ) -4: tst r1, #8 @ 1 8 bytes or more? - stmneia r0!, {r2, r3} @ 2 - tst r1, #4 @ 1 4 bytes or more? - strne r2, [r0], #4 @ 1 -/* - * When we get here, we've got less than 4 bytes to zero. We - * may have an unaligned pointer as well. - */ -5: tst r1, #2 @ 1 2 bytes or more? - strneb r2, [r0], #1 @ 1 - strneb r2, [r0], #1 @ 1 - tst r1, #1 @ 1 a byte left over - strneb r2, [r0], #1 @ 1 - ret lr @ 1 -UNWIND( .fnend ) -ENDPROC(__memzero) diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c index 62e7bc3018f0..e457f299cd44 100644 --- a/arch/arm/mach-davinci/board-dm355-evm.c +++ b/arch/arm/mach-davinci/board-dm355-evm.c @@ -17,6 +17,7 @@ #include <linux/mtd/rawnand.h> #include <linux/i2c.h> #include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <linux/clk.h> #include <linux/videodev2.h> #include <media/i2c/tvp514x.h> @@ -108,11 +109,20 @@ static struct platform_device davinci_nand_device = { }, }; +static struct gpiod_lookup_table i2c_recovery_gpiod_table = { + .dev_id = "i2c_davinci", + .table = { + GPIO_LOOKUP("davinci_gpio", 15, "sda", + GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), + GPIO_LOOKUP("davinci_gpio", 14, "scl", + GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), + }, +}; + static struct davinci_i2c_platform_data i2c_pdata = { .bus_freq = 400 /* kHz */, .bus_delay = 0 /* usec */, - .sda_pin = 15, - .scl_pin = 14, + .gpio_recovery = true, }; static int dm355evm_mmc_gpios = -EINVAL; @@ -141,6 +151,7 @@ static struct i2c_board_info dm355evm_i2c_info[] = { static void __init evm_init_i2c(void) { + gpiod_add_lookup_table(&i2c_recovery_gpiod_table); davinci_init_i2c(&i2c_pdata); gpio_request(5, "dm355evm_msp"); diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c index b07c9b18d427..85e6fb33b1ee 100644 --- a/arch/arm/mach-davinci/board-dm644x-evm.c +++ b/arch/arm/mach-davinci/board-dm644x-evm.c @@ -13,6 +13,7 @@ #include <linux/dma-mapping.h> #include <linux/platform_device.h> #include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <linux/i2c.h> #include <linux/platform_data/pcf857x.h> #include <linux/platform_data/at24.h> @@ -595,18 +596,28 @@ static struct i2c_board_info __initdata i2c_info[] = { }, }; +static struct gpiod_lookup_table i2c_recovery_gpiod_table = { + .dev_id = "i2c_davinci", + .table = { + GPIO_LOOKUP("davinci_gpio", 44, "sda", + GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), + GPIO_LOOKUP("davinci_gpio", 43, "scl", + GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), + }, +}; + /* The msp430 uses a slow bitbanged I2C implementation (ergo 20 KHz), * which requires 100 usec of idle bus after i2c writes sent to it. */ static struct davinci_i2c_platform_data i2c_pdata = { .bus_freq = 20 /* kHz */, .bus_delay = 100 /* usec */, - .sda_pin = 44, - .scl_pin = 43, + .gpio_recovery = true, }; static void __init evm_init_i2c(void) { + gpiod_add_lookup_table(&i2c_recovery_gpiod_table); davinci_init_i2c(&i2c_pdata); i2c_add_driver(&dm6446evm_msp_driver); i2c_register_board_info(1, i2c_info, ARRAY_SIZE(i2c_info)); diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index 9b49867154bf..6b32dc527edc 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -10,7 +10,6 @@ menuconfig ARCH_MVEBU select ZONE_DMA if ARM_LPAE select GPIOLIB select PCI_QUIRKS if PCI - select OF_ADDRESS_PCI if ARCH_MVEBU diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig index 36e3c79f4973..07df3a59b13f 100644 --- a/arch/arm/mach-sa1100/Kconfig +++ b/arch/arm/mach-sa1100/Kconfig @@ -5,6 +5,7 @@ menu "SA11x0 Implementations" config SA1100_ASSABET bool "Assabet" select ARM_SA1110_CPUFREQ + select GPIO_REG help Say Y here if you are using the Intel(R) StrongARM(R) SA-1110 Microprocessor Development Board (also known as the Assabet). diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index d28ecb9ef172..f68241d995f2 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> +#include <linux/gpio/gpio-reg.h> #include <linux/ioport.h> #include <linux/platform_data/sa11x0-serial.h> #include <linux/serial_core.h> @@ -61,20 +62,45 @@ unsigned long SCR_value = ASSABET_SCR_INIT; EXPORT_SYMBOL(SCR_value); -static unsigned long BCR_value = ASSABET_BCR_DB1110; +static struct gpio_chip *assabet_bcr_gc; + +static const char *assabet_names[] = { + "cf_pwr", "cf_gfx_reset", "nsoft_reset", "irda_fsel", + "irda_md0", "irda_md1", "stereo_loopback", "ncf_bus_on", + "audio_pwr_on", "light_pwr_on", "lcd16data", "lcd_pwr_on", + "rs232_on", "nred_led", "ngreen_led", "vib_on", + "com_dtr", "com_rts", "radio_wake_mod", "i2c_enab", + "tvir_enab", "qmute", "radio_pwr_on", "spkr_off", + "rs232_valid", "com_dcd", "com_cts", "com_dsr", + "radio_cts", "radio_dsr", "radio_dcd", "radio_ri", +}; +/* The old deprecated interface */ void ASSABET_BCR_frob(unsigned int mask, unsigned int val) { - unsigned long flags; + unsigned long m = mask, v = val; - local_irq_save(flags); - BCR_value = (BCR_value & ~mask) | val; - ASSABET_BCR = BCR_value; - local_irq_restore(flags); + assabet_bcr_gc->set_multiple(assabet_bcr_gc, &m, &v); } - EXPORT_SYMBOL(ASSABET_BCR_frob); +static int __init assabet_init_gpio(void __iomem *reg, u32 def_val) +{ + struct gpio_chip *gc; + + writel_relaxed(def_val, reg); + + gc = gpio_reg_init(NULL, reg, -1, 32, "assabet", 0xff000000, def_val, + assabet_names, NULL, NULL); + + if (IS_ERR(gc)) + return PTR_ERR(gc); + + assabet_bcr_gc = gc; + + return gc->base; +} + /* * The codec reset goes to three devices, so we need to release * the rest when any one of these requests it. However, that @@ -146,7 +172,7 @@ static void adv7171_write(unsigned reg, unsigned val) unsigned gpdr = GPDR; unsigned gplr = GPLR; - ASSABET_BCR = BCR_value | ASSABET_BCR_AUDIO_ON; + ASSABET_BCR_frob(ASSABET_BCR_AUDIO_ON, ASSABET_BCR_AUDIO_ON); udelay(100); GPCR = SDA | SCK | MOD; /* clear L3 mode to ensure UDA1341 doesn't respond */ @@ -457,14 +483,6 @@ static void __init assabet_init(void) sa11x0_ppc_configure_mcp(); if (machine_has_neponset()) { - /* - * Angel sets this, but other bootloaders may not. - * - * This must precede any driver calls to BCR_set() - * or BCR_clear(). - */ - ASSABET_BCR = BCR_value = ASSABET_BCR_DB1111; - #ifndef CONFIG_ASSABET_NEPONSET printk( "Warning: Neponset detected but full support " "hasn't been configured in the kernel\n" ); @@ -748,12 +766,31 @@ static int __init assabet_leds_init(void) fs_initcall(assabet_leds_init); #endif +void __init assabet_init_irq(void) +{ + u32 def_val; + + sa1100_init_irq(); + + if (machine_has_neponset()) + def_val = ASSABET_BCR_DB1111; + else + def_val = ASSABET_BCR_DB1110; + + /* + * Angel sets this, but other bootloaders may not. + * + * This must precede any driver calls to BCR_set() or BCR_clear(). + */ + assabet_init_gpio((void *)&ASSABET_BCR, def_val); +} + MACHINE_START(ASSABET, "Intel-Assabet") .atag_offset = 0x100, .fixup = fixup_assabet, .map_io = assabet_map_io, .nr_irqs = SA1100_NR_IRQS, - .init_irq = sa1100_init_irq, + .init_irq = assabet_init_irq, .init_time = sa1100_timer_init, .init_machine = assabet_init, .init_late = sa11x0_init_late, diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c index a61a2432766b..b1823f445358 100644 --- a/arch/arm/mach-sa1100/neponset.c +++ b/arch/arm/mach-sa1100/neponset.c @@ -3,6 +3,8 @@ * linux/arch/arm/mach-sa1100/neponset.c */ #include <linux/err.h> +#include <linux/gpio/driver.h> +#include <linux/gpio/gpio-reg.h> #include <linux/init.h> #include <linux/ioport.h> #include <linux/irq.h> @@ -45,10 +47,13 @@ #define IRR_USAR (1 << 1) #define IRR_SA1111 (1 << 2) +#define NCR_NGPIO 7 + #define MDM_CTL0_RTS1 (1 << 0) #define MDM_CTL0_DTR1 (1 << 1) #define MDM_CTL0_RTS2 (1 << 2) #define MDM_CTL0_DTR2 (1 << 3) +#define MDM_CTL0_NGPIO 4 #define MDM_CTL1_CTS1 (1 << 0) #define MDM_CTL1_DSR1 (1 << 1) @@ -56,80 +61,87 @@ #define MDM_CTL1_CTS2 (1 << 3) #define MDM_CTL1_DSR2 (1 << 4) #define MDM_CTL1_DCD2 (1 << 5) +#define MDM_CTL1_NGPIO 6 #define AUD_SEL_1341 (1 << 0) #define AUD_MUTE_1341 (1 << 1) +#define AUD_NGPIO 2 extern void sa1110_mb_disable(void); +#define to_neponset_gpio_chip(x) container_of(x, struct neponset_gpio_chip, gc) + +static const char *neponset_ncr_names[] = { + "gp01_off", "tp_power", "ms_power", "enet_osc", + "spi_kb_wk_up", "a0vpp", "a1vpp" +}; + +static const char *neponset_mdmctl0_names[] = { + "rts3", "dtr3", "rts1", "dtr1", +}; + +static const char *neponset_mdmctl1_names[] = { + "cts3", "dsr3", "dcd3", "cts1", "dsr1", "dcd1" +}; + +static const char *neponset_aud_names[] = { + "sel_1341", "mute_1341", +}; + struct neponset_drvdata { void __iomem *base; struct platform_device *sa1111; struct platform_device *smc91x; unsigned irq_base; -#ifdef CONFIG_PM_SLEEP - u32 ncr0; - u32 mdm_ctl_0; -#endif + struct gpio_chip *gpio[4]; }; -static void __iomem *nep_base; +static struct neponset_drvdata *nep; void neponset_ncr_frob(unsigned int mask, unsigned int val) { - void __iomem *base = nep_base; - - if (base) { - unsigned long flags; - unsigned v; - - local_irq_save(flags); - v = readb_relaxed(base + NCR_0); - writeb_relaxed((v & ~mask) | val, base + NCR_0); - local_irq_restore(flags); - } else { - WARN(1, "nep_base unset\n"); - } + struct neponset_drvdata *n = nep; + unsigned long m = mask, v = val; + + if (nep) + n->gpio[0]->set_multiple(n->gpio[0], &m, &v); + else + WARN(1, "nep unset\n"); } EXPORT_SYMBOL(neponset_ncr_frob); static void neponset_set_mctrl(struct uart_port *port, u_int mctrl) { - void __iomem *base = nep_base; - u_int mdm_ctl0; + struct neponset_drvdata *n = nep; + unsigned long mask, val = 0; - if (!base) + if (!n) return; - mdm_ctl0 = readb_relaxed(base + MDM_CTL_0); if (port->mapbase == _Ser1UTCR0) { - if (mctrl & TIOCM_RTS) - mdm_ctl0 &= ~MDM_CTL0_RTS2; - else - mdm_ctl0 |= MDM_CTL0_RTS2; - - if (mctrl & TIOCM_DTR) - mdm_ctl0 &= ~MDM_CTL0_DTR2; - else - mdm_ctl0 |= MDM_CTL0_DTR2; + mask = MDM_CTL0_RTS2 | MDM_CTL0_DTR2; + + if (!(mctrl & TIOCM_RTS)) + val |= MDM_CTL0_RTS2; + + if (!(mctrl & TIOCM_DTR)) + val |= MDM_CTL0_DTR2; } else if (port->mapbase == _Ser3UTCR0) { - if (mctrl & TIOCM_RTS) - mdm_ctl0 &= ~MDM_CTL0_RTS1; - else - mdm_ctl0 |= MDM_CTL0_RTS1; - - if (mctrl & TIOCM_DTR) - mdm_ctl0 &= ~MDM_CTL0_DTR1; - else - mdm_ctl0 |= MDM_CTL0_DTR1; + mask = MDM_CTL0_RTS1 | MDM_CTL0_DTR1; + + if (!(mctrl & TIOCM_RTS)) + val |= MDM_CTL0_RTS1; + + if (!(mctrl & TIOCM_DTR)) + val |= MDM_CTL0_DTR1; } - writeb_relaxed(mdm_ctl0, base + MDM_CTL_0); + n->gpio[1]->set_multiple(n->gpio[1], &mask, &val); } static u_int neponset_get_mctrl(struct uart_port *port) { - void __iomem *base = nep_base; + void __iomem *base = nep->base; u_int ret = TIOCM_CD | TIOCM_CTS | TIOCM_DSR; u_int mdm_ctl1; @@ -231,6 +243,22 @@ static struct irq_chip nochip = { .irq_unmask = nochip_noop, }; +static int neponset_init_gpio(struct gpio_chip **gcp, + struct device *dev, const char *label, void __iomem *reg, + unsigned num, bool in, const char *const * names) +{ + struct gpio_chip *gc; + + gc = gpio_reg_init(dev, reg, -1, num, label, in ? 0xffffffff : 0, + readl_relaxed(reg), names, NULL, NULL); + if (IS_ERR(gc)) + return PTR_ERR(gc); + + *gcp = gc; + + return 0; +} + static struct sa1111_platform_data sa1111_info = { .disable_devs = SA1111_DEVID_PS2_MSE, }; @@ -274,7 +302,7 @@ static int neponset_probe(struct platform_device *dev) }; int ret, irq; - if (nep_base) + if (nep) return -EBUSY; irq = ret = platform_get_irq(dev, 0); @@ -330,6 +358,22 @@ static int neponset_probe(struct platform_device *dev) irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING); irq_set_chained_handler_and_data(irq, neponset_irq_handler, d); + /* Disable GPIO 0/1 drivers so the buttons work on the Assabet */ + writeb_relaxed(NCR_GP01_OFF, d->base + NCR_0); + + neponset_init_gpio(&d->gpio[0], &dev->dev, "neponset-ncr", + d->base + NCR_0, NCR_NGPIO, false, + neponset_ncr_names); + neponset_init_gpio(&d->gpio[1], &dev->dev, "neponset-mdm-ctl0", + d->base + MDM_CTL_0, MDM_CTL0_NGPIO, false, + neponset_mdmctl0_names); + neponset_init_gpio(&d->gpio[2], &dev->dev, "neponset-mdm-ctl1", + d->base + MDM_CTL_1, MDM_CTL1_NGPIO, true, + neponset_mdmctl1_names); + neponset_init_gpio(&d->gpio[3], &dev->dev, "neponset-aud-ctl", + d->base + AUD_CTL, AUD_NGPIO, false, + neponset_aud_names); + /* * We would set IRQ_GPIO25 to be a wake-up IRQ, but unfortunately * something on the Neponset activates this IRQ on sleep (eth?) @@ -340,16 +384,13 @@ static int neponset_probe(struct platform_device *dev) dev_info(&dev->dev, "Neponset daughter board, providing IRQ%u-%u\n", d->irq_base, d->irq_base + NEP_IRQ_NR - 1); - nep_base = d->base; + nep = d; sa1100_register_uart_fns(&neponset_port_fns); /* Ensure that the memory bus request/grant signals are setup */ sa1110_mb_disable(); - /* Disable GPIO 0/1 drivers so the buttons work on the Assabet */ - writeb_relaxed(NCR_GP01_OFF, d->base + NCR_0); - sa1111_resources[0].parent = sa1111_res; sa1111_resources[1].start = d->irq_base + NEP_IRQ_SA1111; sa1111_resources[1].end = d->irq_base + NEP_IRQ_SA1111; @@ -385,7 +426,7 @@ static int neponset_remove(struct platform_device *dev) platform_device_unregister(d->smc91x); irq_set_chained_handler(irq, NULL); irq_free_descs(d->irq_base, NEP_IRQ_NR); - nep_base = NULL; + nep = NULL; iounmap(d->base); kfree(d); @@ -393,30 +434,22 @@ static int neponset_remove(struct platform_device *dev) } #ifdef CONFIG_PM_SLEEP -static int neponset_suspend(struct device *dev) -{ - struct neponset_drvdata *d = dev_get_drvdata(dev); - - d->ncr0 = readb_relaxed(d->base + NCR_0); - d->mdm_ctl_0 = readb_relaxed(d->base + MDM_CTL_0); - - return 0; -} - static int neponset_resume(struct device *dev) { struct neponset_drvdata *d = dev_get_drvdata(dev); + int i, ret = 0; - writeb_relaxed(d->ncr0, d->base + NCR_0); - writeb_relaxed(d->mdm_ctl_0, d->base + MDM_CTL_0); + for (i = 0; i < ARRAY_SIZE(d->gpio); i++) { + ret = gpio_reg_resume(d->gpio[i]); + if (ret) + break; + } - return 0; + return ret; } static const struct dev_pm_ops neponset_pm_ops = { - .suspend_noirq = neponset_suspend, .resume_noirq = neponset_resume, - .freeze_noirq = neponset_suspend, .restore_noirq = neponset_resume, }; #define PM_OPS &neponset_pm_ops diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index fd9077a74fce..7f14acf67caf 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -909,6 +909,14 @@ config OUTER_CACHE_SYNC The outer cache has a outer_cache_fns.sync function pointer that can be used to drain the write buffer of the outer cache. +config CACHE_B15_RAC + bool "Enable the Broadcom Brahma-B15 read-ahead cache controller" + depends on ARCH_BRCMSTB + default y + help + This option enables the Broadcom Brahma-B15 read-ahead cache + controller. If disabled, the read-ahead cache remains off. + config CACHE_FEROCEON_L2 bool "Enable the Feroceon L2 cache controller" depends on ARCH_MV78XX0 || ARCH_MVEBU diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 01bcc33f59e3..9dbb84923e12 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -13,7 +13,8 @@ obj-y += nommu.o obj-$(CONFIG_ARM_MPU) += pmsa-v7.o endif -obj-$(CONFIG_ARM_PTDUMP) += dump.o +obj-$(CONFIG_ARM_PTDUMP_CORE) += dump.o +obj-$(CONFIG_ARM_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o @@ -103,6 +104,7 @@ AFLAGS_proc-v6.o :=-Wa,-march=armv6 AFLAGS_proc-v7.o :=-Wa,-march=armv7-a obj-$(CONFIG_OUTER_CACHE) += l2c-common.o +obj-$(CONFIG_CACHE_B15_RAC) += cache-b15-rac.o obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o obj-$(CONFIG_CACHE_L2X0_PMU) += cache-l2x0-pmu.o diff --git a/arch/arm/mm/cache-b15-rac.c b/arch/arm/mm/cache-b15-rac.c new file mode 100644 index 000000000000..d9586ba2e63c --- /dev/null +++ b/arch/arm/mm/cache-b15-rac.c @@ -0,0 +1,356 @@ +/* + * Broadcom Brahma-B15 CPU read-ahead cache management functions + * + * Copyright (C) 2015-2016 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/err.h> +#include <linux/spinlock.h> +#include <linux/io.h> +#include <linux/bitops.h> +#include <linux/of_address.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/syscore_ops.h> +#include <linux/reboot.h> + +#include <asm/cacheflush.h> +#include <asm/hardware/cache-b15-rac.h> + +extern void v7_flush_kern_cache_all(void); + +/* RAC register offsets, relative to the HIF_CPU_BIUCTRL register base */ +#define RAC_CONFIG0_REG (0x78) +#define RACENPREF_MASK (0x3) +#define RACPREFINST_SHIFT (0) +#define RACENINST_SHIFT (2) +#define RACPREFDATA_SHIFT (4) +#define RACENDATA_SHIFT (6) +#define RAC_CPU_SHIFT (8) +#define RACCFG_MASK (0xff) +#define RAC_CONFIG1_REG (0x7c) +#define RAC_FLUSH_REG (0x80) +#define FLUSH_RAC (1 << 0) + +/* Bitmask to enable instruction and data prefetching with a 256-bytes stride */ +#define RAC_DATA_INST_EN_MASK (1 << RACPREFINST_SHIFT | \ + RACENPREF_MASK << RACENINST_SHIFT | \ + 1 << RACPREFDATA_SHIFT | \ + RACENPREF_MASK << RACENDATA_SHIFT) + +#define RAC_ENABLED 0 +/* Special state where we want to bypass the spinlock and call directly + * into the v7 cache maintenance operations during suspend/resume + */ +#define RAC_SUSPENDED 1 + +static void __iomem *b15_rac_base; +static DEFINE_SPINLOCK(rac_lock); + +static u32 rac_config0_reg; + +/* Initialization flag to avoid checking for b15_rac_base, and to prevent + * multi-platform kernels from crashing here as well. + */ +static unsigned long b15_rac_flags; + +static inline u32 __b15_rac_disable(void) +{ + u32 val = __raw_readl(b15_rac_base + RAC_CONFIG0_REG); + __raw_writel(0, b15_rac_base + RAC_CONFIG0_REG); + dmb(); + return val; +} + +static inline void __b15_rac_flush(void) +{ + u32 reg; + + __raw_writel(FLUSH_RAC, b15_rac_base + RAC_FLUSH_REG); + do { + /* This dmb() is required to force the Bus Interface Unit + * to clean oustanding writes, and forces an idle cycle + * to be inserted. + */ + dmb(); + reg = __raw_readl(b15_rac_base + RAC_FLUSH_REG); + } while (reg & FLUSH_RAC); +} + +static inline u32 b15_rac_disable_and_flush(void) +{ + u32 reg; + + reg = __b15_rac_disable(); + __b15_rac_flush(); + return reg; +} + +static inline void __b15_rac_enable(u32 val) +{ + __raw_writel(val, b15_rac_base + RAC_CONFIG0_REG); + /* dsb() is required here to be consistent with __flush_icache_all() */ + dsb(); +} + +#define BUILD_RAC_CACHE_OP(name, bar) \ +void b15_flush_##name(void) \ +{ \ + unsigned int do_flush; \ + u32 val = 0; \ + \ + if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) { \ + v7_flush_##name(); \ + bar; \ + return; \ + } \ + \ + spin_lock(&rac_lock); \ + do_flush = test_bit(RAC_ENABLED, &b15_rac_flags); \ + if (do_flush) \ + val = b15_rac_disable_and_flush(); \ + v7_flush_##name(); \ + if (!do_flush) \ + bar; \ + else \ + __b15_rac_enable(val); \ + spin_unlock(&rac_lock); \ +} + +#define nobarrier + +/* The readahead cache present in the Brahma-B15 CPU is a special piece of + * hardware after the integrated L2 cache of the B15 CPU complex whose purpose + * is to prefetch instruction and/or data with a line size of either 64 bytes + * or 256 bytes. The rationale is that the data-bus of the CPU interface is + * optimized for 256-bytes transactions, and enabling the readahead cache + * provides a significant performance boost we want it enabled (typically + * twice the performance for a memcpy benchmark application). + * + * The readahead cache is transparent for Modified Virtual Addresses + * cache maintenance operations: ICIMVAU, DCIMVAC, DCCMVAC, DCCMVAU and + * DCCIMVAC. + * + * It is however not transparent for the following cache maintenance + * operations: DCISW, DCCSW, DCCISW, ICIALLUIS and ICIALLU which is precisely + * what we are patching here with our BUILD_RAC_CACHE_OP here. + */ +BUILD_RAC_CACHE_OP(kern_cache_all, nobarrier); + +static void b15_rac_enable(void) +{ + unsigned int cpu; + u32 enable = 0; + + for_each_possible_cpu(cpu) + enable |= (RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT)); + + b15_rac_disable_and_flush(); + __b15_rac_enable(enable); +} + +static int b15_rac_reboot_notifier(struct notifier_block *nb, + unsigned long action, + void *data) +{ + /* During kexec, we are not yet migrated on the boot CPU, so we need to + * make sure we are SMP safe here. Once the RAC is disabled, flag it as + * suspended such that the hotplug notifier returns early. + */ + if (action == SYS_RESTART) { + spin_lock(&rac_lock); + b15_rac_disable_and_flush(); + clear_bit(RAC_ENABLED, &b15_rac_flags); + set_bit(RAC_SUSPENDED, &b15_rac_flags); + spin_unlock(&rac_lock); + } + + return NOTIFY_DONE; +} + +static struct notifier_block b15_rac_reboot_nb = { + .notifier_call = b15_rac_reboot_notifier, +}; + +/* The CPU hotplug case is the most interesting one, we basically need to make + * sure that the RAC is disabled for the entire system prior to having a CPU + * die, in particular prior to this dying CPU having exited the coherency + * domain. + * + * Once this CPU is marked dead, we can safely re-enable the RAC for the + * remaining CPUs in the system which are still online. + * + * Offlining a CPU is the problematic case, onlining a CPU is not much of an + * issue since the CPU and its cache-level hierarchy will start filling with + * the RAC disabled, so L1 and L2 only. + * + * In this function, we should NOT have to verify any unsafe setting/condition + * b15_rac_base: + * + * It is protected by the RAC_ENABLED flag which is cleared by default, and + * being cleared when initial procedure is done. b15_rac_base had been set at + * that time. + * + * RAC_ENABLED: + * There is a small timing windows, in b15_rac_init(), between + * cpuhp_setup_state_*() + * ... + * set RAC_ENABLED + * However, there is no hotplug activity based on the Linux booting procedure. + * + * Since we have to disable RAC for all cores, we keep RAC on as long as as + * possible (disable it as late as possible) to gain the cache benefit. + * + * Thus, dying/dead states are chosen here + * + * We are choosing not do disable the RAC on a per-CPU basis, here, if we did + * we would want to consider disabling it as early as possible to benefit the + * other active CPUs. + */ + +/* Running on the dying CPU */ +static int b15_rac_dying_cpu(unsigned int cpu) +{ + /* During kexec/reboot, the RAC is disabled via the reboot notifier + * return early here. + */ + if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) + return 0; + + spin_lock(&rac_lock); + + /* Indicate that we are starting a hotplug procedure */ + __clear_bit(RAC_ENABLED, &b15_rac_flags); + + /* Disable the readahead cache and save its value to a global */ + rac_config0_reg = b15_rac_disable_and_flush(); + + spin_unlock(&rac_lock); + + return 0; +} + +/* Running on a non-dying CPU */ +static int b15_rac_dead_cpu(unsigned int cpu) +{ + /* During kexec/reboot, the RAC is disabled via the reboot notifier + * return early here. + */ + if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) + return 0; + + spin_lock(&rac_lock); + + /* And enable it */ + __b15_rac_enable(rac_config0_reg); + __set_bit(RAC_ENABLED, &b15_rac_flags); + + spin_unlock(&rac_lock); + + return 0; +} + +static int b15_rac_suspend(void) +{ + /* Suspend the read-ahead cache oeprations, forcing our cache + * implementation to fallback to the regular ARMv7 calls. + * + * We are guaranteed to be running on the boot CPU at this point and + * with every other CPU quiesced, so setting RAC_SUSPENDED is not racy + * here. + */ + rac_config0_reg = b15_rac_disable_and_flush(); + set_bit(RAC_SUSPENDED, &b15_rac_flags); + + return 0; +} + +static void b15_rac_resume(void) +{ + /* Coming out of a S3 suspend/resume cycle, the read-ahead cache + * register RAC_CONFIG0_REG will be restored to its default value, make + * sure we re-enable it and set the enable flag, we are also guaranteed + * to run on the boot CPU, so not racy again. + */ + __b15_rac_enable(rac_config0_reg); + clear_bit(RAC_SUSPENDED, &b15_rac_flags); +} + +static struct syscore_ops b15_rac_syscore_ops = { + .suspend = b15_rac_suspend, + .resume = b15_rac_resume, +}; + +static int __init b15_rac_init(void) +{ + struct device_node *dn; + int ret = 0, cpu; + u32 reg, en_mask = 0; + + dn = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl"); + if (!dn) + return -ENODEV; + + if (WARN(num_possible_cpus() > 4, "RAC only supports 4 CPUs\n")) + goto out; + + b15_rac_base = of_iomap(dn, 0); + if (!b15_rac_base) { + pr_err("failed to remap BIU control base\n"); + ret = -ENOMEM; + goto out; + } + + ret = register_reboot_notifier(&b15_rac_reboot_nb); + if (ret) { + pr_err("failed to register reboot notifier\n"); + iounmap(b15_rac_base); + goto out; + } + + if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, + "arm/cache-b15-rac:dead", + NULL, b15_rac_dead_cpu); + if (ret) + goto out_unmap; + + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING, + "arm/cache-b15-rac:dying", + NULL, b15_rac_dying_cpu); + if (ret) + goto out_cpu_dead; + } + + if (IS_ENABLED(CONFIG_PM_SLEEP)) + register_syscore_ops(&b15_rac_syscore_ops); + + spin_lock(&rac_lock); + reg = __raw_readl(b15_rac_base + RAC_CONFIG0_REG); + for_each_possible_cpu(cpu) + en_mask |= ((1 << RACPREFDATA_SHIFT) << (cpu * RAC_CPU_SHIFT)); + WARN(reg & en_mask, "Read-ahead cache not previously disabled\n"); + + b15_rac_enable(); + set_bit(RAC_ENABLED, &b15_rac_flags); + spin_unlock(&rac_lock); + + pr_info("Broadcom Brahma-B15 readahead cache at: 0x%p\n", + b15_rac_base + RAC_CONFIG0_REG); + + goto out; + +out_cpu_dead: + cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING); +out_unmap: + unregister_reboot_notifier(&b15_rac_reboot_nb); + iounmap(b15_rac_base); +out: + of_node_put(dn); + return ret; +} +arch_initcall(b15_rac_init); diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index de78109d002d..215df435bfb9 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -15,6 +15,7 @@ #include <asm/assembler.h> #include <asm/errno.h> #include <asm/unwind.h> +#include <asm/hardware/cache-b15-rac.h> #include "proc-macros.S" @@ -446,3 +447,23 @@ ENDPROC(v7_dma_unmap_area) @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) define_cache_functions v7 + + /* The Broadcom Brahma-B15 read-ahead cache requires some modifications + * to the v7_cache_fns, we only override the ones we need + */ +#ifndef CONFIG_CACHE_B15_RAC + globl_equ b15_flush_kern_cache_all, v7_flush_kern_cache_all +#endif + globl_equ b15_flush_icache_all, v7_flush_icache_all + globl_equ b15_flush_kern_cache_louis, v7_flush_kern_cache_louis + globl_equ b15_flush_user_cache_all, v7_flush_user_cache_all + globl_equ b15_flush_user_cache_range, v7_flush_user_cache_range + globl_equ b15_coherent_kern_range, v7_coherent_kern_range + globl_equ b15_coherent_user_range, v7_coherent_user_range + globl_equ b15_flush_kern_dcache_area, v7_flush_kern_dcache_area + + globl_equ b15_dma_map_area, v7_dma_map_area + globl_equ b15_dma_unmap_area, v7_dma_unmap_area + globl_equ b15_dma_flush_range, v7_dma_flush_range + + define_cache_functions b15 diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index fc3b44028cfb..084779c5c893 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -21,11 +21,7 @@ #include <asm/fixmap.h> #include <asm/memory.h> #include <asm/pgtable.h> - -struct addr_marker { - unsigned long start_address; - const char *name; -}; +#include <asm/ptdump.h> static struct addr_marker address_markers[] = { { MODULES_VADDR, "Modules" }, @@ -38,12 +34,26 @@ static struct addr_marker address_markers[] = { { -1, NULL }, }; +#define pt_dump_seq_printf(m, fmt, args...) \ +({ \ + if (m) \ + seq_printf(m, fmt, ##args); \ +}) + +#define pt_dump_seq_puts(m, fmt) \ +({ \ + if (m) \ + seq_printf(m, fmt); \ +}) + struct pg_state { struct seq_file *seq; const struct addr_marker *marker; unsigned long start_address; unsigned level; u64 current_prot; + bool check_wx; + unsigned long wx_pages; const char *current_domain; }; @@ -52,6 +62,8 @@ struct prot_bits { u64 val; const char *set; const char *clear; + bool ro_bit; + bool nx_bit; }; static const struct prot_bits pte_bits[] = { @@ -65,11 +77,13 @@ static const struct prot_bits pte_bits[] = { .val = L_PTE_RDONLY, .set = "ro", .clear = "RW", + .ro_bit = true, }, { .mask = L_PTE_XN, .val = L_PTE_XN, .set = "NX", .clear = "x ", + .nx_bit = true, }, { .mask = L_PTE_SHARED, .val = L_PTE_SHARED, @@ -133,11 +147,13 @@ static const struct prot_bits section_bits[] = { .val = L_PMD_SECT_RDONLY | PMD_SECT_AP2, .set = "ro", .clear = "RW", + .ro_bit = true, #elif __LINUX_ARM_ARCH__ >= 6 { .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, .val = PMD_SECT_APX | PMD_SECT_AP_WRITE, .set = " ro", + .ro_bit = true, }, { .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, .val = PMD_SECT_AP_WRITE, @@ -156,6 +172,7 @@ static const struct prot_bits section_bits[] = { .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, .val = 0, .set = " ro", + .ro_bit = true, }, { .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, .val = PMD_SECT_AP_WRITE, @@ -174,6 +191,7 @@ static const struct prot_bits section_bits[] = { .val = PMD_SECT_XN, .set = "NX", .clear = "x ", + .nx_bit = true, }, { .mask = PMD_SECT_S, .val = PMD_SECT_S, @@ -186,6 +204,8 @@ struct pg_level { const struct prot_bits *bits; size_t num; u64 mask; + const struct prot_bits *ro_bit; + const struct prot_bits *nx_bit; }; static struct pg_level pg_level[] = { @@ -214,10 +234,27 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits, size_t s = bits->clear; if (s) - seq_printf(st->seq, " %s", s); + pt_dump_seq_printf(st->seq, " %s", s); } } +static void note_prot_wx(struct pg_state *st, unsigned long addr) +{ + if (!st->check_wx) + return; + if ((st->current_prot & pg_level[st->level].ro_bit->mask) == + pg_level[st->level].ro_bit->val) + return; + if ((st->current_prot & pg_level[st->level].nx_bit->mask) == + pg_level[st->level].nx_bit->val) + return; + + WARN_ONCE(1, "arm/mm: Found insecure W+X mapping at address %pS\n", + (void *)st->start_address); + + st->wx_pages += (addr - st->start_address) / PAGE_SIZE; +} + static void note_page(struct pg_state *st, unsigned long addr, unsigned int level, u64 val, const char *domain) { @@ -228,7 +265,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->level = level; st->current_prot = prot; st->current_domain = domain; - seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); } else if (prot != st->current_prot || level != st->level || domain != st->current_domain || addr >= st->marker[1].start_address) { @@ -236,7 +273,8 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned long delta; if (st->current_prot) { - seq_printf(st->seq, "0x%08lx-0x%08lx ", + note_prot_wx(st, addr); + pt_dump_seq_printf(st->seq, "0x%08lx-0x%08lx ", st->start_address, addr); delta = (addr - st->start_address) >> 10; @@ -244,17 +282,19 @@ static void note_page(struct pg_state *st, unsigned long addr, delta >>= 10; unit++; } - seq_printf(st->seq, "%9lu%c", delta, *unit); + pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit); if (st->current_domain) - seq_printf(st->seq, " %s", st->current_domain); + pt_dump_seq_printf(st->seq, " %s", + st->current_domain); if (pg_level[st->level].bits) dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num); - seq_printf(st->seq, "\n"); + pt_dump_seq_printf(st->seq, "\n"); } if (addr >= st->marker[1].start_address) { st->marker++; - seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", + st->marker->name); } st->start_address = addr; st->current_prot = prot; @@ -335,61 +375,82 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) } } -static void walk_pgd(struct seq_file *m) +static void walk_pgd(struct pg_state *st, struct mm_struct *mm, + unsigned long start) { - pgd_t *pgd = swapper_pg_dir; - struct pg_state st; - unsigned long addr; + pgd_t *pgd = pgd_offset(mm, 0UL); unsigned i; - - memset(&st, 0, sizeof(st)); - st.seq = m; - st.marker = address_markers; + unsigned long addr; for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { - addr = i * PGDIR_SIZE; + addr = start + i * PGDIR_SIZE; if (!pgd_none(*pgd)) { - walk_pud(&st, pgd, addr); + walk_pud(st, pgd, addr); } else { - note_page(&st, addr, 1, pgd_val(*pgd), NULL); + note_page(st, addr, 1, pgd_val(*pgd), NULL); } } - - note_page(&st, 0, 0, 0, NULL); } -static int ptdump_show(struct seq_file *m, void *v) +void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info) { - walk_pgd(m); - return 0; -} + struct pg_state st = { + .seq = m, + .marker = info->markers, + .check_wx = false, + }; -static int ptdump_open(struct inode *inode, struct file *file) -{ - return single_open(file, ptdump_show, NULL); + walk_pgd(&st, info->mm, info->base_addr); + note_page(&st, 0, 0, 0, NULL); } -static const struct file_operations ptdump_fops = { - .open = ptdump_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int ptdump_init(void) +static void ptdump_initialize(void) { - struct dentry *pe; unsigned i, j; for (i = 0; i < ARRAY_SIZE(pg_level); i++) if (pg_level[i].bits) - for (j = 0; j < pg_level[i].num; j++) + for (j = 0; j < pg_level[i].num; j++) { pg_level[i].mask |= pg_level[i].bits[j].mask; + if (pg_level[i].bits[j].ro_bit) + pg_level[i].ro_bit = &pg_level[i].bits[j]; + if (pg_level[i].bits[j].nx_bit) + pg_level[i].nx_bit = &pg_level[i].bits[j]; + } address_markers[2].start_address = VMALLOC_START; +} + +static struct ptdump_info kernel_ptdump_info = { + .mm = &init_mm, + .markers = address_markers, + .base_addr = 0, +}; - pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, - &ptdump_fops); - return pe ? 0 : -ENOMEM; +void ptdump_check_wx(void) +{ + struct pg_state st = { + .seq = NULL, + .marker = (struct addr_marker[]) { + { 0, NULL}, + { -1, NULL}, + }, + .check_wx = true, + }; + + walk_pgd(&st, &init_mm, 0); + note_page(&st, 0, 0, 0, NULL); + if (st.wx_pages) + pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", + st.wx_pages); + else + pr_info("Checked W+X mappings: passed, no W+X pages found\n"); +} + +static int ptdump_init(void) +{ + ptdump_initialize(); + return ptdump_debugfs_register(&kernel_ptdump_info, + "kernel_page_tables"); } __initcall(ptdump_init); diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 42f585379e19..b75eada23d0a 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -21,7 +21,6 @@ #include <linux/highmem.h> #include <linux/perf_event.h> -#include <asm/exception.h> #include <asm/pgtable.h> #include <asm/system_misc.h> #include <asm/system_info.h> @@ -545,7 +544,7 @@ hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *) /* * Dispatch a data abort to the relevant handler. */ -asmlinkage void __exception +asmlinkage void do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { const struct fsr_info *inf = fsr_info + fsr_fs(fsr); @@ -578,7 +577,7 @@ hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs * ifsr_info[nr].name = name; } -asmlinkage void __exception +asmlinkage void do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) { const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr); diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 10bfba85eb96..1d1edd064199 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -16,8 +16,8 @@ * are not supported on any CPU using the idmap tables as its current * page tables. */ -pgd_t *idmap_pgd; -long long arch_phys_to_idmap_offset; +pgd_t *idmap_pgd __ro_after_init; +long long arch_phys_to_idmap_offset __ro_after_init; #ifdef CONFIG_ARM_LPAE static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index a1f11a7ee81b..bd6f4513539a 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -36,6 +36,7 @@ #include <asm/system_info.h> #include <asm/tlb.h> #include <asm/fixmap.h> +#include <asm/ptdump.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -738,6 +739,7 @@ static int __mark_rodata_ro(void *unused) void mark_rodata_ro(void) { stop_machine(__mark_rodata_ro, NULL, NULL); + debug_checkwx(); } void set_kernel_text_rw(void) diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index e4370810f4f1..7c087961b7ce 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -31,7 +31,7 @@ struct mpu_rgn_info mpu_rgn_info; #ifdef CONFIG_CPU_CP15 #ifdef CONFIG_CPU_HIGH_VECTOR -static unsigned long __init setup_vectors_base(void) +unsigned long setup_vectors_base(void) { unsigned long reg = get_cr(); @@ -57,7 +57,7 @@ static inline bool security_extensions_enabled(void) return 0; } -static unsigned long __init setup_vectors_base(void) +unsigned long setup_vectors_base(void) { unsigned long base = 0, reg = get_cr(); diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c index 976df60ac426..e2853bfff74e 100644 --- a/arch/arm/mm/pmsa-v7.c +++ b/arch/arm/mm/pmsa-v7.c @@ -6,6 +6,7 @@ #include <linux/bitops.h> #include <linux/memblock.h> +#include <linux/string.h> #include <asm/cacheflush.h> #include <asm/cp15.h> @@ -296,6 +297,7 @@ void __init adjust_lowmem_bounds_mpu(void) } } + memset(mem, 0, sizeof(mem)); num = allocate_region(mem_start, specified_mem_size, mem_max_regions, mem); for (i = 0; i < num; i++) { @@ -433,7 +435,7 @@ void __init mpu_setup(void) /* Background */ err |= mpu_setup_region(region++, 0, 32, - MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA, + MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0RW, 0, false); #ifdef CONFIG_XIP_KERNEL diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 01d64c0b2563..d55d493f9a1e 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -567,7 +567,7 @@ __v7_setup_stack: /* * Standard v7 proc info content */ -.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions +.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions, cache_fns = v7_cache_fns ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags) ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ @@ -583,7 +583,7 @@ __v7_setup_stack: .long \proc_fns .long v7wbi_tlb_fns .long v6_user_fns - .long v7_cache_fns + .long \cache_fns .endm #ifndef CONFIG_ARM_LPAE @@ -678,7 +678,7 @@ __v7_ca15mp_proc_info: __v7_b15mp_proc_info: .long 0x420f00f0 .long 0xff0ffff0 - __v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup + __v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup, cache_fns = b15_cache_fns .size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info /* diff --git a/arch/arm/mm/ptdump_debugfs.c b/arch/arm/mm/ptdump_debugfs.c new file mode 100644 index 000000000000..be8d87be4b93 --- /dev/null +++ b/arch/arm/mm/ptdump_debugfs.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#include <asm/ptdump.h> + +static int ptdump_show(struct seq_file *m, void *v) +{ + struct ptdump_info *info = m->private; + + ptdump_walk_pgd(m, info); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *file) +{ + return single_open(file, ptdump_show, inode->i_private); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int ptdump_debugfs_register(struct ptdump_info *info, const char *name) +{ + struct dentry *pe; + + pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); + return pe ? 0 : -ENOMEM; + +} diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 52d1cd14fda4..e90cc8a08186 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -32,6 +32,7 @@ #include <linux/percpu.h> #include <linux/bug.h> #include <asm/patch.h> +#include <asm/sections.h> #include "../decode-arm.h" #include "../decode-thumb.h" @@ -64,9 +65,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) int is; const struct decode_checker **checkers; - if (in_exception_text(addr)) - return -EINVAL; - #ifdef CONFIG_THUMB2_KERNEL thumb = true; addr &= ~1; /* Bit 0 would normally be set to indicate Thumb code */ @@ -680,3 +678,13 @@ int __init arch_init_kprobes() #endif return 0; } + +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + void *a = (void *)addr; + + return __in_irqentry_text(addr) || + in_entry_text(addr) || + in_idmap_text(addr) || + memory_contains(__kprobes_text_start, __kprobes_text_end, a, 1); +} diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b2b95f79c746..7381eeb7ef8e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -16,6 +16,7 @@ config ARM64 select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA select ARCH_HAS_KCOV + select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX @@ -91,6 +92,7 @@ config ARM64 select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_VMAP_STACK diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 3873dd7b5a32..1241fb211293 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -116,6 +116,24 @@ .endm /* + * Value prediction barrier + */ + .macro csdb + hint #20 + .endm + +/* + * Sanitise a 64-bit bounded index wrt speculation, returning zero if out + * of bounds. + */ + .macro mask_nospec64, idx, limit, tmp + sub \tmp, \idx, \limit + bic \tmp, \tmp, \idx + and \idx, \idx, \tmp, asr #63 + csdb + .endm + +/* * NOP sequence */ .macro nops, num @@ -514,7 +532,7 @@ alternative_endif * phys: physical address, preserved * ttbr: returns the TTBR value */ - .macro phys_to_ttbr, phys, ttbr + .macro phys_to_ttbr, ttbr, phys #ifdef CONFIG_ARM64_PA_BITS_52 orr \ttbr, \phys, \phys, lsr #46 and \ttbr, \ttbr, #TTBR_BADDR_MASK_52 @@ -523,6 +541,29 @@ alternative_endif #endif .endm + .macro phys_to_pte, pte, phys +#ifdef CONFIG_ARM64_PA_BITS_52 + /* + * We assume \phys is 64K aligned and this is guaranteed by only + * supporting this configuration with 64K pages. + */ + orr \pte, \phys, \phys, lsr #36 + and \pte, \pte, #PTE_ADDR_MASK +#else + mov \pte, \phys +#endif + .endm + + .macro pte_to_phys, phys, pte +#ifdef CONFIG_ARM64_PA_BITS_52 + ubfiz \phys, \pte, #(48 - 16 - 12), #16 + bfxil \phys, \pte, #16, #32 + lsl \phys, \phys, #16 +#else + and \phys, \pte, #PTE_ADDR_MASK +#endif + .endm + /** * Errata workaround prior to disable MMU. Insert an ISB immediately prior * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0. diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 77651c49ef44..f11518af96a9 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -32,6 +32,7 @@ #define dsb(opt) asm volatile("dsb " #opt : : : "memory") #define psb_csync() asm volatile("hint #17" : : : "memory") +#define csdb() asm volatile("hint #20" : : : "memory") #define mb() dsb(sy) #define rmb() dsb(ld) @@ -40,6 +41,27 @@ #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst) +/* + * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz + * and 0 otherwise. + */ +#define array_index_mask_nospec array_index_mask_nospec +static inline unsigned long array_index_mask_nospec(unsigned long idx, + unsigned long sz) +{ + unsigned long mask; + + asm volatile( + " cmp %1, %2\n" + " sbc %0, xzr, xzr\n" + : "=r" (mask) + : "r" (idx), "Ir" (sz) + : "cc"); + + csdb(); + return mask; +} + #define __smp_mb() dmb(ish) #define __smp_rmb() dmb(ishld) #define __smp_wmb() dmb(ishst) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 5bb2fd4674e7..07fe2479d310 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -48,9 +48,10 @@ do { \ } while (0) static inline int -arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) +arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr) { int oldval = 0, ret, tmp; + u32 __user *uaddr = __uaccess_mask_ptr(_uaddr); pagefault_disable(); @@ -88,15 +89,17 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, u32 oldval, u32 newval) { int ret = 0; u32 val, tmp; + u32 __user *uaddr; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + if (!access_ok(VERIFY_WRITE, _uaddr, sizeof(u32))) return -EFAULT; + uaddr = __uaccess_mask_ptr(_uaddr); uaccess_enable(); asm volatile("// futex_atomic_cmpxchg_inatomic\n" " prfm pstl1strm, %2\n" diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index e266f80e45b7..8758bb008436 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -12,7 +12,8 @@ /* * KASAN_SHADOW_START: beginning of the kernel virtual addresses. - * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses. + * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses, + * where N = (1 << KASAN_SHADOW_SCALE_SHIFT). */ #define KASAN_SHADOW_START (VA_START) #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) @@ -20,14 +21,16 @@ /* * This value is used to map an address to the corresponding shadow * address by the following formula: - * shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET; + * shadow_addr = (address >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET * - * (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END] - * cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET - * should satisfy the following equation: - * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61) + * (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) shadow addresses that lie in range + * [KASAN_SHADOW_OFFSET, KASAN_SHADOW_END) cover all 64-bits of virtual + * addresses. So KASAN_SHADOW_OFFSET should satisfy the following equation: + * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - + * (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT)) */ -#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3))) +#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \ + (64 - KASAN_SHADOW_SCALE_SHIFT))) void kasan_init(void); void kasan_copy_shadow(pgd_t *pgdir); diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 82386e860dd2..a780f6714b44 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -123,16 +123,8 @@ /* * Initial memory map attributes. */ -#define _SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define _SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -#define SWAPPER_PTE_FLAGS (_SWAPPER_PTE_FLAGS | PTE_NG) -#define SWAPPER_PMD_FLAGS (_SWAPPER_PMD_FLAGS | PMD_SECT_NG) -#else -#define SWAPPER_PTE_FLAGS _SWAPPER_PTE_FLAGS -#define SWAPPER_PMD_FLAGS _SWAPPER_PMD_FLAGS -#endif +#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) #if ARM64_SWAPPER_USES_SECTION_MAPS #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4485ae8e98de..a73f63aca68e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -415,4 +415,10 @@ static inline void kvm_arm_vhe_guest_exit(void) { local_daif_restore(DAIF_PROCCTX_NOIRQ); } + +static inline bool kvm_arm_harden_branch_predictor(void) +{ + return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR); +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h deleted file mode 100644 index bc39e557c56c..000000000000 --- a/arch/arm64/include/asm/kvm_psci.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef __ARM64_KVM_PSCI_H__ -#define __ARM64_KVM_PSCI_H__ - -#define KVM_ARM_PSCI_0_1 1 -#define KVM_ARM_PSCI_0_2 2 - -int kvm_psci_version(struct kvm_vcpu *vcpu); -int kvm_psci_call(struct kvm_vcpu *vcpu); - -#endif /* __ARM64_KVM_PSCI_H__ */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index d4bae7d6e0d8..50fa96a49792 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -85,7 +85,8 @@ * stack size when KASAN is in use. */ #ifdef CONFIG_KASAN -#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - 3)) +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT)) #define KASAN_THREAD_SHIFT 1 #else #define KASAN_SHADOW_SIZE (0) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 22a926825e3f..2db84df5eb42 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -37,13 +37,11 @@ #define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -#define PROT_DEFAULT (_PROT_DEFAULT | PTE_NG) -#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_SECT_NG) -#else -#define PROT_DEFAULT _PROT_DEFAULT -#define PROT_SECT_DEFAULT _PROT_SECT_DEFAULT -#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ +#define PTE_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PTE_NG : 0) +#define PMD_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PMD_SECT_NG : 0) + +#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) +#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) #define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) @@ -55,22 +53,22 @@ #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) #define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) -#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) -#define _HYP_PAGE_DEFAULT (_PAGE_DEFAULT & ~PTE_NG) +#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) +#define _HYP_PAGE_DEFAULT _PAGE_DEFAULT -#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) -#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) -#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) -#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) -#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) +#define PAGE_KERNEL __pgprot(PROT_NORMAL) +#define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) +#define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) +#define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) +#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) #define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) #define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) #define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) -#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) -#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) +#define PAGE_S2 __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) +#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index cee4ae25a5d1..fce604e3e599 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -21,6 +21,9 @@ #define TASK_SIZE_64 (UL(1) << VA_BITS) +#define KERNEL_DS UL(-1) +#define USER_DS (TASK_SIZE_64 - 1) + #ifndef __ASSEMBLY__ /* @@ -113,6 +116,16 @@ struct thread_struct { struct debug_info debug; /* debugging */ }; +/* + * Everything usercopied to/from thread_struct is statically-sized, so + * no hardened usercopy whitelist is needed. + */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = *size = 0; +} + #ifdef CONFIG_COMPAT #define task_user_tls(t) \ ({ \ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index fdb827c7832f..ebdae15d665d 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -87,8 +87,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) " cbnz %w1, 1f\n" " add %w1, %w0, %3\n" " casa %w0, %w1, %2\n" - " and %w1, %w1, #0xffff\n" - " eor %w1, %w1, %w0, lsr #16\n" + " sub %w1, %w1, %3\n" + " eor %w1, %w1, %w0\n" "1:") : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) : "I" (1 << TICKET_SHIFT) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 59fda5292936..543e11f0f657 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -35,16 +35,20 @@ #include <asm/compiler.h> #include <asm/extable.h> -#define KERNEL_DS (-1UL) #define get_ds() (KERNEL_DS) - -#define USER_DS TASK_SIZE_64 #define get_fs() (current_thread_info()->addr_limit) static inline void set_fs(mm_segment_t fs) { current_thread_info()->addr_limit = fs; + /* + * Prevent a mispredicted conditional call to set_fs from forwarding + * the wrong address limit to access_ok under speculation. + */ + dsb(nsh); + isb(); + /* On user-mode return, check fs is correct */ set_thread_flag(TIF_FSCHECK); @@ -66,22 +70,32 @@ static inline void set_fs(mm_segment_t fs) * Returns 1 if the range is valid, 0 otherwise. * * This is equivalent to the following test: - * (u65)addr + (u65)size <= current->addr_limit - * - * This needs 65-bit arithmetic. + * (u65)addr + (u65)size <= (u65)current->addr_limit + 1 */ -#define __range_ok(addr, size) \ -({ \ - unsigned long __addr = (unsigned long)(addr); \ - unsigned long flag, roksum; \ - __chk_user_ptr(addr); \ - asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls" \ - : "=&r" (flag), "=&r" (roksum) \ - : "1" (__addr), "Ir" (size), \ - "r" (current_thread_info()->addr_limit) \ - : "cc"); \ - flag; \ -}) +static inline unsigned long __range_ok(unsigned long addr, unsigned long size) +{ + unsigned long limit = current_thread_info()->addr_limit; + + __chk_user_ptr(addr); + asm volatile( + // A + B <= C + 1 for all A,B,C, in four easy steps: + // 1: X = A + B; X' = X % 2^64 + " adds %0, %0, %2\n" + // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4 + " csel %1, xzr, %1, hi\n" + // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' + // to compensate for the carry flag being set in step 4. For + // X > 2^64, X' merely has to remain nonzero, which it does. + " csinv %0, %0, xzr, cc\n" + // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1 + // comes from the carry in being clear. Otherwise, we are + // testing X' - C == 0, subject to the previous adjustments. + " sbcs xzr, %0, %1\n" + " cset %0, ls\n" + : "+r" (addr), "+r" (limit) : "Ir" (size) : "cc"); + + return addr; +} /* * When dealing with data aborts, watchpoints, or instruction traps we may end @@ -90,7 +104,7 @@ static inline void set_fs(mm_segment_t fs) */ #define untagged_addr(addr) sign_extend64(addr, 55) -#define access_ok(type, addr, size) __range_ok(addr, size) +#define access_ok(type, addr, size) __range_ok((unsigned long)(addr), size) #define user_addr_max get_fs #define _ASM_EXTABLE(from, to) \ @@ -221,6 +235,26 @@ static inline void uaccess_enable_not_uao(void) } /* + * Sanitise a uaccess pointer such that it becomes NULL if above the + * current addr_limit. + */ +#define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr) +static inline void __user *__uaccess_mask_ptr(const void __user *ptr) +{ + void __user *safe_ptr; + + asm volatile( + " bics xzr, %1, %2\n" + " csel %0, %1, xzr, eq\n" + : "=&r" (safe_ptr) + : "r" (ptr), "r" (current_thread_info()->addr_limit) + : "cc"); + + csdb(); + return safe_ptr; +} + +/* * The "__xxx" versions of the user access functions do not verify the address * space - it must have been done previously with a separate "access_ok()" * call. @@ -272,28 +306,33 @@ do { \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) -#define __get_user(x, ptr) \ +#define __get_user_check(x, ptr, err) \ ({ \ - int __gu_err = 0; \ - __get_user_err((x), (ptr), __gu_err); \ - __gu_err; \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + if (access_ok(VERIFY_READ, __p, sizeof(*__p))) { \ + __p = uaccess_mask_ptr(__p); \ + __get_user_err((x), __p, (err)); \ + } else { \ + (x) = 0; (err) = -EFAULT; \ + } \ }) #define __get_user_error(x, ptr, err) \ ({ \ - __get_user_err((x), (ptr), (err)); \ + __get_user_check((x), (ptr), (err)); \ (void)0; \ }) -#define get_user(x, ptr) \ +#define __get_user(x, ptr) \ ({ \ - __typeof__(*(ptr)) __user *__p = (ptr); \ - might_fault(); \ - access_ok(VERIFY_READ, __p, sizeof(*__p)) ? \ - __get_user((x), __p) : \ - ((x) = 0, -EFAULT); \ + int __gu_err = 0; \ + __get_user_check((x), (ptr), __gu_err); \ + __gu_err; \ }) +#define get_user __get_user + #define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ asm volatile( \ "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ @@ -336,43 +375,63 @@ do { \ uaccess_disable_not_uao(); \ } while (0) -#define __put_user(x, ptr) \ +#define __put_user_check(x, ptr, err) \ ({ \ - int __pu_err = 0; \ - __put_user_err((x), (ptr), __pu_err); \ - __pu_err; \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) { \ + __p = uaccess_mask_ptr(__p); \ + __put_user_err((x), __p, (err)); \ + } else { \ + (err) = -EFAULT; \ + } \ }) #define __put_user_error(x, ptr, err) \ ({ \ - __put_user_err((x), (ptr), (err)); \ + __put_user_check((x), (ptr), (err)); \ (void)0; \ }) -#define put_user(x, ptr) \ +#define __put_user(x, ptr) \ ({ \ - __typeof__(*(ptr)) __user *__p = (ptr); \ - might_fault(); \ - access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ? \ - __put_user((x), __p) : \ - -EFAULT; \ + int __pu_err = 0; \ + __put_user_check((x), (ptr), __pu_err); \ + __pu_err; \ }) +#define put_user __put_user + extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); -#define raw_copy_from_user __arch_copy_from_user +#define raw_copy_from_user(to, from, n) \ +({ \ + __arch_copy_from_user((to), __uaccess_mask_ptr(from), (n)); \ +}) + extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n); -#define raw_copy_to_user __arch_copy_to_user -extern unsigned long __must_check raw_copy_in_user(void __user *to, const void __user *from, unsigned long n); -extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); +#define raw_copy_to_user(to, from, n) \ +({ \ + __arch_copy_to_user(__uaccess_mask_ptr(to), (from), (n)); \ +}) + +extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n); +#define raw_copy_in_user(to, from, n) \ +({ \ + __arch_copy_in_user(__uaccess_mask_ptr(to), \ + __uaccess_mask_ptr(from), (n)); \ +}) + #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER -static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) +extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n); +static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n) { if (access_ok(VERIFY_WRITE, to, n)) - n = __clear_user(to, n); + n = __arch_clear_user(__uaccess_mask_ptr(to), n); return n; } +#define clear_user __clear_user extern long strncpy_from_user(char *dest, const char __user *src, long count); @@ -386,7 +445,7 @@ extern unsigned long __must_check __copy_user_flushcache(void *to, const void __ static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) { kasan_check_write(dst, size); - return __copy_user_flushcache(dst, src, size); + return __copy_user_flushcache(dst, __uaccess_mask_ptr(src), size); } #endif diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 252396a96c78..7b09487ff8fb 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -230,10 +230,10 @@ void __init acpi_boot_table_init(void) done: if (acpi_disabled) { - if (earlycon_init_is_deferred) + if (earlycon_acpi_spcr_enable) early_init_dt_scan_chosen_stdout(); } else { - parse_spcr(earlycon_init_is_deferred); + acpi_parse_spcr(earlycon_acpi_spcr_enable, true); if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); } diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 67368c7329c0..66be504edb6c 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -37,8 +37,8 @@ EXPORT_SYMBOL(clear_page); /* user mem (segment) */ EXPORT_SYMBOL(__arch_copy_from_user); EXPORT_SYMBOL(__arch_copy_to_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(raw_copy_in_user); +EXPORT_SYMBOL(__arch_clear_user); +EXPORT_SYMBOL(__arch_copy_in_user); /* physical memory */ EXPORT_SYMBOL(memstart_addr); diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S index 76225c2611ea..e5de33513b5d 100644 --- a/arch/arm64/kernel/bpi.S +++ b/arch/arm64/kernel/bpi.S @@ -17,6 +17,7 @@ */ #include <linux/linkage.h> +#include <linux/arm-smccc.h> .macro ventry target .rept 31 @@ -53,30 +54,6 @@ ENTRY(__bp_harden_hyp_vecs_start) vectors __kvm_hyp_vector .endr ENTRY(__bp_harden_hyp_vecs_end) -ENTRY(__psci_hyp_bp_inval_start) - sub sp, sp, #(8 * 18) - stp x16, x17, [sp, #(16 * 0)] - stp x14, x15, [sp, #(16 * 1)] - stp x12, x13, [sp, #(16 * 2)] - stp x10, x11, [sp, #(16 * 3)] - stp x8, x9, [sp, #(16 * 4)] - stp x6, x7, [sp, #(16 * 5)] - stp x4, x5, [sp, #(16 * 6)] - stp x2, x3, [sp, #(16 * 7)] - stp x0, x1, [sp, #(16 * 8)] - mov x0, #0x84000000 - smc #0 - ldp x16, x17, [sp, #(16 * 0)] - ldp x14, x15, [sp, #(16 * 1)] - ldp x12, x13, [sp, #(16 * 2)] - ldp x10, x11, [sp, #(16 * 3)] - ldp x8, x9, [sp, #(16 * 4)] - ldp x6, x7, [sp, #(16 * 5)] - ldp x4, x5, [sp, #(16 * 6)] - ldp x2, x3, [sp, #(16 * 7)] - ldp x0, x1, [sp, #(16 * 8)] - add sp, sp, #(8 * 18) -ENTRY(__psci_hyp_bp_inval_end) ENTRY(__qcom_hyp_sanitize_link_stack_start) stp x29, x30, [sp, #-16]! @@ -85,3 +62,22 @@ ENTRY(__qcom_hyp_sanitize_link_stack_start) .endr ldp x29, x30, [sp], #16 ENTRY(__qcom_hyp_sanitize_link_stack_end) + +.macro smccc_workaround_1 inst + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + \inst #0 + ldp x2, x3, [sp, #(8 * 0)] + ldp x0, x1, [sp, #(8 * 2)] + add sp, sp, #(8 * 4) +.endm + +ENTRY(__smccc_workaround_1_smc_start) + smccc_workaround_1 smc +ENTRY(__smccc_workaround_1_smc_end) + +ENTRY(__smccc_workaround_1_hvc_start) + smccc_workaround_1 hvc +ENTRY(__smccc_workaround_1_hvc_end) diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 2a752cb2a0f3..8021b46c9743 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -16,7 +16,7 @@ #include <asm/virt.h> .text -.pushsection .idmap.text, "ax" +.pushsection .idmap.text, "awx" /* * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index ed6881882231..07823595b7f0 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -67,9 +67,12 @@ static int cpu_enable_trap_ctr_access(void *__unused) DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); #ifdef CONFIG_KVM -extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[]; extern char __qcom_hyp_sanitize_link_stack_start[]; extern char __qcom_hyp_sanitize_link_stack_end[]; +extern char __smccc_workaround_1_smc_start[]; +extern char __smccc_workaround_1_smc_end[]; +extern char __smccc_workaround_1_hvc_start[]; +extern char __smccc_workaround_1_hvc_end[]; static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, const char *hyp_vecs_end) @@ -112,10 +115,12 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, spin_unlock(&bp_lock); } #else -#define __psci_hyp_bp_inval_start NULL -#define __psci_hyp_bp_inval_end NULL #define __qcom_hyp_sanitize_link_stack_start NULL #define __qcom_hyp_sanitize_link_stack_end NULL +#define __smccc_workaround_1_smc_start NULL +#define __smccc_workaround_1_smc_end NULL +#define __smccc_workaround_1_hvc_start NULL +#define __smccc_workaround_1_hvc_end NULL static void __install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, @@ -142,17 +147,59 @@ static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); } +#include <uapi/linux/psci.h> +#include <linux/arm-smccc.h> #include <linux/psci.h> -static int enable_psci_bp_hardening(void *data) +static void call_smc_arch_workaround_1(void) +{ + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); +} + +static void call_hvc_arch_workaround_1(void) +{ + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); +} + +static int enable_smccc_arch_workaround_1(void *data) { const struct arm64_cpu_capabilities *entry = data; + bp_hardening_cb_t cb; + void *smccc_start, *smccc_end; + struct arm_smccc_res res; + + if (!entry->matches(entry, SCOPE_LOCAL_CPU)) + return 0; + + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) + return 0; + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if (res.a0) + return 0; + cb = call_hvc_arch_workaround_1; + smccc_start = __smccc_workaround_1_hvc_start; + smccc_end = __smccc_workaround_1_hvc_end; + break; + + case PSCI_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if (res.a0) + return 0; + cb = call_smc_arch_workaround_1; + smccc_start = __smccc_workaround_1_smc_start; + smccc_end = __smccc_workaround_1_smc_end; + break; + + default: + return 0; + } - if (psci_ops.get_version) - install_bp_hardening_cb(entry, - (bp_hardening_cb_t)psci_ops.get_version, - __psci_hyp_bp_inval_start, - __psci_hyp_bp_inval_end); + install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); return 0; } @@ -333,22 +380,22 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - .enable = enable_psci_bp_hardening, + .enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - .enable = enable_psci_bp_hardening, + .enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), - .enable = enable_psci_bp_hardening, + .enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), - .enable = enable_psci_bp_hardening, + .enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, @@ -362,12 +409,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), - .enable = enable_psci_bp_hardening, + .enable = enable_smccc_arch_workaround_1, }, { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - .enable = enable_psci_bp_hardening, + .enable = enable_smccc_arch_workaround_1, }, #endif { diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0fb6a3151443..29b1f873e337 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -856,12 +856,23 @@ static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, int __unused) { + char const *str = "command line option"; u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - /* Forced on command line? */ + /* + * For reasons that aren't entirely clear, enabling KPTI on Cavium + * ThunderX leads to apparent I-cache corruption of kernel text, which + * ends as well as you might imagine. Don't even try. + */ + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456)) { + str = "ARM64_WORKAROUND_CAVIUM_27456"; + __kpti_forced = -1; + } + + /* Forced? */ if (__kpti_forced) { - pr_info_once("kernel page table isolation forced %s by command line option\n", - __kpti_forced > 0 ? "ON" : "OFF"); + pr_info_once("kernel page table isolation forced %s by %s\n", + __kpti_forced > 0 ? "ON" : "OFF", str); return __kpti_forced > 0; } @@ -881,6 +892,30 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, ID_AA64PFR0_CSV3_SHIFT); } +static int kpti_install_ng_mappings(void *__unused) +{ + typedef void (kpti_remap_fn)(int, int, phys_addr_t); + extern kpti_remap_fn idmap_kpti_install_ng_mappings; + kpti_remap_fn *remap_fn; + + static bool kpti_applied = false; + int cpu = smp_processor_id(); + + if (kpti_applied) + return 0; + + remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); + + cpu_install_idmap(); + remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir)); + cpu_uninstall_idmap(); + + if (!cpu) + kpti_applied = true; + + return 0; +} + static int __init parse_kpti(char *str) { bool enabled; @@ -1004,6 +1039,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_UNMAP_KERNEL_AT_EL0, .def_scope = SCOPE_SYSTEM, .matches = unmap_kernel_at_el0, + .enable = kpti_install_ng_mappings, }, #endif { diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b34e717d7597..ec2ee720e33e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -167,10 +167,10 @@ alternative_else_nop_endif .else add x21, sp, #S_FRAME_SIZE get_thread_info tsk - /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ + /* Save the task's original addr_limit and set USER_DS */ ldr x20, [tsk, #TSK_TI_ADDR_LIMIT] str x20, [sp, #S_ORIG_ADDR_LIMIT] - mov x20, #TASK_SIZE_64 + mov x20, #USER_DS str x20, [tsk, #TSK_TI_ADDR_LIMIT] /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ .endif /* \el == 0 */ @@ -324,6 +324,10 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] ldr lr, [sp, #S_LR] add sp, sp, #S_FRAME_SIZE // restore sp + /* + * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on eret context synchronization + * when returning from IPI handler, and when returning to user-space. + */ .if \el == 0 alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 @@ -378,6 +382,7 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 * x7 is reserved for the system call number in 32-bit mode. */ wsc_nr .req w25 // number of system calls +xsc_nr .req x25 // number of system calls (zero-extended) wscno .req w26 // syscall number xscno .req x26 // syscall number (zero-extended) stbl .req x27 // syscall table pointer @@ -766,7 +771,10 @@ el0_sp_pc: * Stack or PC alignment exception handling */ mrs x26, far_el1 - enable_daif + enable_da_f +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif ct_user_exit mov x0, x26 mov x1, x25 @@ -824,6 +832,11 @@ el0_irq_naked: #endif ct_user_exit +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + tbz x22, #55, 1f + bl do_el0_irq_bp_hardening +1: +#endif irq_handler #ifdef CONFIG_TRACE_IRQFLAGS @@ -935,6 +948,7 @@ el0_svc_naked: // compat entry point b.ne __sys_trace cmp wscno, wsc_nr // check upper syscall limit b.hs ni_sys + mask_nospec64 xscno, xsc_nr, x19 // enforce bounds for syscall number ldr x16, [stbl, xscno, lsl #3] // address in the syscall table blr x16 // call sys_* routine b ret_fast_syscall @@ -1013,16 +1027,9 @@ alternative_else_nop_endif orr \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp /* - * We avoid running the post_ttbr_update_workaround here because the - * user and kernel ASIDs don't have conflicting mappings, so any - * "blessing" as described in: - * - * http://lkml.kernel.org/r/56BB848A.6060603@caviumnetworks.com - * - * will not hurt correctness. Whilst this may partially defeat the - * point of using split ASIDs in the first place, it avoids - * the hit of invalidating the entire I-cache on every return to - * userspace. + * We avoid running the post_ttbr_update_workaround here because + * it's only needed by Cavium ThunderX, which requires KPTI to be + * disabled. */ .endm diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ba3ab04788dc..2b6b8b24e5ab 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -148,26 +148,6 @@ preserve_boot_args: ENDPROC(preserve_boot_args) /* - * Macro to arrange a physical address in a page table entry, taking care of - * 52-bit addresses. - * - * Preserves: phys - * Returns: pte - */ - .macro phys_to_pte, phys, pte -#ifdef CONFIG_ARM64_PA_BITS_52 - /* - * We assume \phys is 64K aligned and this is guaranteed by only - * supporting this configuration with 64K pages. - */ - orr \pte, \phys, \phys, lsr #36 - and \pte, \pte, #PTE_ADDR_MASK -#else - mov \pte, \phys -#endif - .endm - -/* * Macro to create a table entry to the next page. * * tbl: page table address @@ -181,7 +161,7 @@ ENDPROC(preserve_boot_args) */ .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 add \tmp1, \tbl, #PAGE_SIZE - phys_to_pte \tmp1, \tmp2 + phys_to_pte \tmp2, \tmp1 orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type lsr \tmp1, \virt, #\shift sub \ptrs, \ptrs, #1 @@ -207,7 +187,7 @@ ENDPROC(preserve_boot_args) * Returns: rtbl */ .macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1 -.Lpe\@: phys_to_pte \rtbl, \tmp1 +.Lpe\@: phys_to_pte \tmp1, \rtbl orr \tmp1, \tmp1, \flags // tmp1 = table entry str \tmp1, [\tbl, \index, lsl #3] add \rtbl, \rtbl, \inc // rtbl = pa next level @@ -475,7 +455,7 @@ ENDPROC(__primary_switched) * end early head section, begin head code that is also used for * hotplug and needs to have the same protections as the text region */ - .section ".idmap.text","ax" + .section ".idmap.text","awx" ENTRY(kimage_vaddr) .quad _text - TEXT_OFFSET @@ -776,8 +756,8 @@ ENTRY(__enable_mmu) update_early_cpu_boot_status 0, x1, x2 adrp x1, idmap_pg_dir adrp x2, swapper_pg_dir - phys_to_ttbr x1, x3 - phys_to_ttbr x2, x4 + phys_to_ttbr x3, x1 + phys_to_ttbr x4, x2 msr ttbr0_el1, x3 // load TTBR0 msr ttbr1_el1, x4 // load TTBR1 isb diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 84f5d52fddda..dd14ab8c9f72 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -34,12 +34,12 @@ * each stage of the walk. */ .macro break_before_make_ttbr_switch zero_page, page_table, tmp - phys_to_ttbr \zero_page, \tmp + phys_to_ttbr \tmp, \zero_page msr ttbr1_el1, \tmp isb tlbi vmalle1 dsb nsh - phys_to_ttbr \page_table, \tmp + phys_to_ttbr \tmp, \page_table msr ttbr1_el1, \tmp isb .endm diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 3affca3dd96a..75b220ba73a3 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -925,9 +925,8 @@ static void __armv8pmu_probe_pmu(void *info) pmceid[0] = read_sysreg(pmceid0_el0); pmceid[1] = read_sysreg(pmceid1_el0); - bitmap_from_u32array(cpu_pmu->pmceid_bitmap, - ARMV8_PMUV3_MAX_COMMON_EVENTS, pmceid, - ARRAY_SIZE(pmceid)); + bitmap_from_arr32(cpu_pmu->pmceid_bitmap, + pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 10dd16d7902d..bebec8ef9372 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -96,7 +96,7 @@ ENTRY(__cpu_suspend_enter) ret ENDPROC(__cpu_suspend_enter) - .pushsection ".idmap.text", "ax" + .pushsection ".idmap.text", "awx" ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly bl __cpu_setup diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 520b0dad3c62..e5e741bfffe1 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -22,13 +22,14 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> +#include <kvm/arm_psci.h> + #include <asm/esr.h> #include <asm/exception.h> #include <asm/kvm_asm.h> #include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> -#include <asm/kvm_psci.h> #include <asm/debug-monitors.h> #include <asm/traps.h> @@ -51,7 +52,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_vcpu_hvc_get_imm(vcpu)); vcpu->stat.hvc_exit_stat++; - ret = kvm_psci_call(vcpu); + ret = kvm_hvc_call_handler(vcpu); if (ret < 0) { vcpu_set_reg(vcpu, 0, ~0UL); return 1; @@ -62,7 +63,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { + /* + * "If an SMC instruction executed at Non-secure EL1 is + * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a + * Trap exception, not a Secure Monitor Call exception [...]" + * + * We need to advance the PC after the trap, as it would + * otherwise return to the same address... + */ vcpu_set_reg(vcpu, 0, ~0UL); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 1; } diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index e086c6eff8c6..5aa9ccf6db99 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -63,7 +63,7 @@ __do_hyp_init: cmp x0, #HVC_STUB_HCALL_NR b.lo __kvm_handle_stub_hvc - phys_to_ttbr x0, x4 + phys_to_ttbr x4, x0 msr ttbr0_el2, x4 mrs x4, tcr_el1 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index e4f37b9dd47c..f36464bd57c5 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -15,6 +15,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/arm-smccc.h> #include <linux/linkage.h> #include <asm/alternative.h> @@ -64,10 +65,11 @@ alternative_endif lsr x0, x1, #ESR_ELx_EC_SHIFT cmp x0, #ESR_ELx_EC_HVC64 + ccmp x0, #ESR_ELx_EC_HVC32, #4, ne b.ne el1_trap - mrs x1, vttbr_el2 // If vttbr is valid, the 64bit guest - cbnz x1, el1_trap // called HVC + mrs x1, vttbr_el2 // If vttbr is valid, the guest + cbnz x1, el1_hvc_guest // called HVC /* Here, we're pretty sure the host called HVC. */ ldp x0, x1, [sp], #16 @@ -100,6 +102,20 @@ alternative_endif eret +el1_hvc_guest: + /* + * Fastest possible path for ARM_SMCCC_ARCH_WORKAROUND_1. + * The workaround has already been applied on the host, + * so let's quickly get back to the guest. We don't bother + * restoring x1, as it can be clobbered anyway. + */ + ldr x1, [sp] // Guest's x0 + eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1 + cbnz w1, el1_trap + mov x0, x1 + add sp, sp, #16 + eret + el1_trap: /* * x0: ESR_EC diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 036e1f3d77a6..cac6a0500162 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -19,6 +19,8 @@ #include <linux/jump_label.h> #include <uapi/linux/psci.h> +#include <kvm/arm_psci.h> + #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> @@ -348,18 +350,6 @@ again: if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) goto again; - if (exit_code == ARM_EXCEPTION_TRAP && - (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC64 || - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32) && - vcpu_get_reg(vcpu, 0) == PSCI_0_2_FN_PSCI_VERSION) { - u64 val = PSCI_RET_NOT_SUPPORTED; - if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) - val = 2; - - vcpu_set_reg(vcpu, 0, val); - goto again; - } - if (static_branch_unlikely(&vgic_v2_cpuif_trap) && exit_code == ARM_EXCEPTION_TRAP) { bool valid; diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 3d69a8d41fa5..21ba0b29621b 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -21,7 +21,7 @@ .text -/* Prototype: int __clear_user(void *addr, size_t sz) +/* Prototype: int __arch_clear_user(void *addr, size_t sz) * Purpose : clear some user memory * Params : addr - user memory address to clear * : sz - number of bytes to clear @@ -29,7 +29,7 @@ * * Alignment fixed up by hardware. */ -ENTRY(__clear_user) +ENTRY(__arch_clear_user) uaccess_enable_not_uao x2, x3, x4 mov x2, x1 // save the size for fixup return subs x1, x1, #8 @@ -52,7 +52,7 @@ uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 uaccess_disable_not_uao x2, x3 ret -ENDPROC(__clear_user) +ENDPROC(__arch_clear_user) .section .fixup,"ax" .align 2 diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index fbb090f431a5..54b75deb1d16 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -64,14 +64,15 @@ .endm end .req x5 -ENTRY(raw_copy_in_user) + +ENTRY(__arch_copy_in_user) uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" uaccess_disable_not_uao x3, x4 mov x0, #0 ret -ENDPROC(raw_copy_in_user) +ENDPROC(__arch_copy_in_user) .section .fixup,"ax" .align 2 diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ce441d29e7f6..f76bb2c3c943 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -240,7 +240,7 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs, if (fsc_type == ESR_ELx_FSC_PERM) return true; - if (addr < USER_DS && system_uses_ttbr0_pan()) + if (addr < TASK_SIZE && system_uses_ttbr0_pan()) return fsc_type == ESR_ELx_FSC_FAULT && (regs->pstate & PSR_PAN_BIT); @@ -414,7 +414,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (addr < USER_DS && is_permission_fault(esr, regs, addr)) { + if (addr < TASK_SIZE && is_permission_fault(esr, regs, addr)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); @@ -707,6 +707,12 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, arm64_notify_die("", regs, &info, esr); } +asmlinkage void __exception do_el0_irq_bp_hardening(void) +{ + /* PC has already been checked in entry.S */ + arm64_apply_bp_hardening(); +} + asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, struct pt_regs *regs) @@ -731,6 +737,12 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, struct siginfo info; struct task_struct *tsk = current; + if (user_mode(regs)) { + if (instruction_pointer(regs) > TASK_SIZE) + arm64_apply_bp_hardening(); + local_irq_enable(); + } + if (show_unhandled_signals && unhandled_signal(tsk, SIGBUS)) pr_info_ratelimited("%s[%d]: %s exception: pc=%p sp=%p\n", tsk->comm, task_pid_nr(tsk), @@ -790,6 +802,9 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, if (interrupts_enabled(regs)) trace_hardirqs_off(); + if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE) + arm64_apply_bp_hardening(); + if (!inf->fn(addr, esr, regs)) { rv = 1; } else { diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index acba49fb5aac..6e02e6fb4c7b 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -135,7 +135,8 @@ static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, /* The early shadow maps everything to a single page of zeroes */ asmlinkage void __init kasan_early_init(void) { - BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61)); + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != + KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b44992ec9643..4694cda823c9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -118,6 +118,10 @@ static bool pgattr_change_is_safe(u64 old, u64 new) if ((old | new) & PTE_CONT) return false; + /* Transitioning from Global to Non-Global is safe */ + if (((old ^ new) == PTE_NG) && (new & PTE_NG)) + return true; + return ((old ^ new) & ~mask) == 0; } @@ -685,12 +689,14 @@ int kern_addr_valid(unsigned long addr) } #ifdef CONFIG_SPARSEMEM_VMEMMAP #if !ARM64_SWAPPER_USES_SECTION_MAPS -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { return vmemmap_populate_basepages(start, end, node); } #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long addr = start; unsigned long next; @@ -725,7 +731,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) return 0; } #endif /* CONFIG_ARM64_64K_PAGES */ -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 9f177aac6390..71baed7e592a 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -90,7 +90,7 @@ ENDPROC(cpu_do_suspend) * * x0: Address of context pointer */ - .pushsection ".idmap.text", "ax" + .pushsection ".idmap.text", "awx" ENTRY(cpu_do_resume) ldp x2, x3, [x0] ldp x4, x5, [x0, #16] @@ -153,7 +153,7 @@ ENDPROC(cpu_do_resume) ENTRY(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id - phys_to_ttbr x0, x3 + phys_to_ttbr x3, x0 #ifdef CONFIG_ARM64_SW_TTBR0_PAN bfi x3, x1, #48, #16 // set the ASID field in TTBR0 #endif @@ -165,7 +165,18 @@ ENTRY(cpu_do_switch_mm) b post_ttbr_update_workaround // Back to C code... ENDPROC(cpu_do_switch_mm) - .pushsection ".idmap.text", "ax" + .pushsection ".idmap.text", "awx" + +.macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2 + adrp \tmp1, empty_zero_page + phys_to_ttbr \tmp2, \tmp1 + msr ttbr1_el1, \tmp2 + isb + tlbi vmalle1 + dsb nsh + isb +.endm + /* * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd) * @@ -175,24 +186,201 @@ ENDPROC(cpu_do_switch_mm) ENTRY(idmap_cpu_replace_ttbr1) save_and_disable_daif flags=x2 - adrp x1, empty_zero_page - phys_to_ttbr x1, x3 + __idmap_cpu_set_reserved_ttbr1 x1, x3 + + phys_to_ttbr x3, x0 msr ttbr1_el1, x3 isb - tlbi vmalle1 - dsb nsh + restore_daif x2 + + ret +ENDPROC(idmap_cpu_replace_ttbr1) + .popsection + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + .pushsection ".idmap.text", "awx" + + .macro __idmap_kpti_get_pgtable_ent, type + dc cvac, cur_\()\type\()p // Ensure any existing dirty + dmb sy // lines are written back before + ldr \type, [cur_\()\type\()p] // loading the entry + tbz \type, #0, next_\()\type // Skip invalid entries + .endm + + .macro __idmap_kpti_put_pgtable_ent_ng, type + orr \type, \type, #PTE_NG // Same bit for blocks and pages + str \type, [cur_\()\type\()p] // Update the entry and ensure it + dc civac, cur_\()\type\()p // is visible to all CPUs. + .endm + +/* + * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper) + * + * Called exactly once from stop_machine context by each CPU found during boot. + */ +__idmap_kpti_flag: + .long 1 +ENTRY(idmap_kpti_install_ng_mappings) + cpu .req w0 + num_cpus .req w1 + swapper_pa .req x2 + swapper_ttb .req x3 + flag_ptr .req x4 + cur_pgdp .req x5 + end_pgdp .req x6 + pgd .req x7 + cur_pudp .req x8 + end_pudp .req x9 + pud .req x10 + cur_pmdp .req x11 + end_pmdp .req x12 + pmd .req x13 + cur_ptep .req x14 + end_ptep .req x15 + pte .req x16 + + mrs swapper_ttb, ttbr1_el1 + adr flag_ptr, __idmap_kpti_flag + + cbnz cpu, __idmap_kpti_secondary + + /* We're the boot CPU. Wait for the others to catch up */ + sevl +1: wfe + ldaxr w18, [flag_ptr] + eor w18, w18, num_cpus + cbnz w18, 1b + + /* We need to walk swapper, so turn off the MMU. */ + pre_disable_mmu_workaround + mrs x18, sctlr_el1 + bic x18, x18, #SCTLR_ELx_M + msr sctlr_el1, x18 isb - phys_to_ttbr x0, x3 - msr ttbr1_el1, x3 + /* Everybody is enjoying the idmap, so we can rewrite swapper. */ + /* PGD */ + mov cur_pgdp, swapper_pa + add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8) +do_pgd: __idmap_kpti_get_pgtable_ent pgd + tbnz pgd, #1, walk_puds + __idmap_kpti_put_pgtable_ent_ng pgd +next_pgd: + add cur_pgdp, cur_pgdp, #8 + cmp cur_pgdp, end_pgdp + b.ne do_pgd + + /* Publish the updated tables and nuke all the TLBs */ + dsb sy + tlbi vmalle1is + dsb ish isb - restore_daif x2 + /* We're done: fire up the MMU again */ + mrs x18, sctlr_el1 + orr x18, x18, #SCTLR_ELx_M + msr sctlr_el1, x18 + isb + /* Set the flag to zero to indicate that we're all done */ + str wzr, [flag_ptr] ret -ENDPROC(idmap_cpu_replace_ttbr1) + + /* PUD */ +walk_puds: + .if CONFIG_PGTABLE_LEVELS > 3 + pte_to_phys cur_pudp, pgd + add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8) +do_pud: __idmap_kpti_get_pgtable_ent pud + tbnz pud, #1, walk_pmds + __idmap_kpti_put_pgtable_ent_ng pud +next_pud: + add cur_pudp, cur_pudp, 8 + cmp cur_pudp, end_pudp + b.ne do_pud + b next_pgd + .else /* CONFIG_PGTABLE_LEVELS <= 3 */ + mov pud, pgd + b walk_pmds +next_pud: + b next_pgd + .endif + + /* PMD */ +walk_pmds: + .if CONFIG_PGTABLE_LEVELS > 2 + pte_to_phys cur_pmdp, pud + add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8) +do_pmd: __idmap_kpti_get_pgtable_ent pmd + tbnz pmd, #1, walk_ptes + __idmap_kpti_put_pgtable_ent_ng pmd +next_pmd: + add cur_pmdp, cur_pmdp, #8 + cmp cur_pmdp, end_pmdp + b.ne do_pmd + b next_pud + .else /* CONFIG_PGTABLE_LEVELS <= 2 */ + mov pmd, pud + b walk_ptes +next_pmd: + b next_pud + .endif + + /* PTE */ +walk_ptes: + pte_to_phys cur_ptep, pmd + add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8) +do_pte: __idmap_kpti_get_pgtable_ent pte + __idmap_kpti_put_pgtable_ent_ng pte +next_pte: + add cur_ptep, cur_ptep, #8 + cmp cur_ptep, end_ptep + b.ne do_pte + b next_pmd + + /* Secondary CPUs end up here */ +__idmap_kpti_secondary: + /* Uninstall swapper before surgery begins */ + __idmap_cpu_set_reserved_ttbr1 x18, x17 + + /* Increment the flag to let the boot CPU we're ready */ +1: ldxr w18, [flag_ptr] + add w18, w18, #1 + stxr w17, w18, [flag_ptr] + cbnz w17, 1b + + /* Wait for the boot CPU to finish messing around with swapper */ + sevl +1: wfe + ldxr w18, [flag_ptr] + cbnz w18, 1b + + /* All done, act like nothing happened */ + msr ttbr1_el1, swapper_ttb + isb + ret + + .unreq cpu + .unreq num_cpus + .unreq swapper_pa + .unreq swapper_ttb + .unreq flag_ptr + .unreq cur_pgdp + .unreq end_pgdp + .unreq pgd + .unreq cur_pudp + .unreq end_pudp + .unreq pud + .unreq cur_pmdp + .unreq end_pmdp + .unreq pmd + .unreq cur_ptep + .unreq end_ptep + .unreq pte +ENDPROC(idmap_kpti_install_ng_mappings) .popsection +#endif /* * __cpu_setup @@ -200,7 +388,7 @@ ENDPROC(idmap_cpu_replace_ttbr1) * Initialise the processor for turning the MMU on. Return in x0 the * value of the SCTLR_EL1 register. */ - .pushsection ".idmap.text", "ax" + .pushsection ".idmap.text", "awx" ENTRY(__cpu_setup) tlbi vmalle1 // Invalidate local TLB dsb nsh diff --git a/arch/cris/kernel/Makefile b/arch/cris/kernel/Makefile index e69de29bb2d1..f6bfee6c8c1b 100644 --- a/arch/cris/kernel/Makefile +++ b/arch/cris/kernel/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the CRIS port. +# + +CPPFLAGS_vmlinux.lds := -DDRAM_VIRTUAL_BASE=0x$(CONFIG_ETRAX_DRAM_VIRTUAL_BASE) +extra-y := vmlinux.lds + +obj-y := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o +obj-y += stacktrace.o + +obj-$(CONFIG_MODULES) += crisksyms.o +obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_SYSTEM_PROFILER) += profile.o + +clean: + diff --git a/arch/cris/kernel/setup.c b/arch/cris/kernel/setup.c index 524d47501a23..1b61a7207afb 100644 --- a/arch/cris/kernel/setup.c +++ b/arch/cris/kernel/setup.c @@ -24,6 +24,7 @@ #include <linux/of_fdt.h> #include <asm/setup.h> #include <arch/system.h> +#include <asm/sections.h> /* * Setup options @@ -31,7 +32,6 @@ struct screen_info screen_info; extern int root_mountflags; -extern char _etext, _edata, _end; char __initdata cris_command_line[COMMAND_LINE_SIZE] = { 0, }; diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index c44f002e8f6b..858602494096 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2610,17 +2610,10 @@ pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task) if (pid < 2) return -EPERM; if (pid != task_pid_vnr(current)) { - - read_lock(&tasklist_lock); - - p = find_task_by_vpid(pid); - /* make sure task cannot go away while we operate on it */ - if (p) get_task_struct(p); - - read_unlock(&tasklist_lock); - - if (p == NULL) return -ESRCH; + p = find_get_task_by_vpid(pid); + if (!p) + return -ESRCH; } ret = pfm_task_incompatible(ctx, p); diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index ac46f0d60b66..7d9bd20319ff 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -754,12 +754,14 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { return vmemmap_populate_basepages(start, end, node); } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 7af4e05bb61e..18278b448530 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -501,7 +501,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) if (map_start < map_end) memmap_init_zone((unsigned long)(map_end - map_start), args->nid, args->zone, page_to_pfn(map_start), - MEMMAP_EARLY); + MEMMAP_EARLY, NULL); return 0; } @@ -509,9 +509,10 @@ void __meminit memmap_init (unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { - if (!vmem_map) - memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY); - else { + if (!vmem_map) { + memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY, + NULL); + } else { struct page *start; struct memmap_init_callback_data args; @@ -647,13 +648,14 @@ mem_init (void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); @@ -662,7 +664,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -670,7 +672,7 @@ int arch_remove_memory(u64 start, u64 size) int ret; zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); if (ret) pr_warn("%s: Problem encountered in __remove_pages() as" " ret=%d\n", __func__, ret); diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index dda58cfe8c22..93b47b1f6fb4 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -311,7 +311,6 @@ static inline int bfchg_mem_test_and_change_bit(int nr, * functions. */ #if defined(CONFIG_CPU_HAS_NO_BITFIELDS) -#include <asm-generic/bitops/find.h> #include <asm-generic/bitops/ffz.h> #else @@ -441,6 +440,8 @@ static inline unsigned long ffz(unsigned long word) #endif +#include <asm-generic/bitops/find.h> + #ifdef __KERNEL__ #if defined(CONFIG_CPU_HAS_NO_BITFIELDS) diff --git a/arch/microblaze/Kconfig.platform b/arch/microblaze/Kconfig.platform index 1b3d8c849101..6996f397c16c 100644 --- a/arch/microblaze/Kconfig.platform +++ b/arch/microblaze/Kconfig.platform @@ -8,6 +8,7 @@ menu "Platform options" config OPT_LIB_FUNCTION bool "Optimalized lib function" + depends on CPU_LITTLE_ENDIAN default y help Allows turn on optimalized library function (memcpy and memmove). diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 830ee7d42fa0..d269dd4b8279 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -36,16 +36,21 @@ endif CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_DIV) += -mno-xl-soft-div CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_BARREL) += -mxl-barrel-shift CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare -CPUFLAGS-$(CONFIG_BIG_ENDIAN) += -mbig-endian -CPUFLAGS-$(CONFIG_LITTLE_ENDIAN) += -mlittle-endian + +ifdef CONFIG_CPU_BIG_ENDIAN +KBUILD_CFLAGS += -mbig-endian +KBUILD_AFLAGS += -mbig-endian +LD += -EB +else +KBUILD_CFLAGS += -mlittle-endian +KBUILD_AFLAGS += -mlittle-endian +LD += -EL +endif CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER)) # r31 holds current when in kernel mode -KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2) - -LDFLAGS := -LDFLAGS_vmlinux := +KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-y) $(CPUFLAGS-1) $(CPUFLAGS-2) head-y := arch/microblaze/kernel/head.o libs-y += arch/microblaze/lib/ diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h index 39b6315db82e..c7968139486f 100644 --- a/arch/microblaze/include/asm/io.h +++ b/arch/microblaze/include/asm/io.h @@ -36,7 +36,7 @@ extern resource_size_t isa_mem_base; #ifdef CONFIG_MMU #define page_to_bus(page) (page_to_phys(page)) -extern void iounmap(void __iomem *addr); +extern void iounmap(volatile void __iomem *addr); extern void __iomem *ioremap(phys_addr_t address, unsigned long size); #define ioremap_nocache(addr, size) ioremap((addr), (size)) diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index 4c0599239915..7f525962cdfa 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -127,7 +127,7 @@ void __iomem *ioremap(phys_addr_t addr, unsigned long size) } EXPORT_SYMBOL(ioremap); -void iounmap(void __iomem *addr) +void iounmap(volatile void __iomem *addr) { if ((__force void *)addr > high_memory && (unsigned long) addr < ioremap_bot) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ab98569994f0..449397c60b56 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -7,8 +7,6 @@ config MIPS select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST - select ARCH_MIGHT_HAVE_PC_PARPORT - select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_SUPPORTS_UPROBES select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if 64BIT @@ -119,12 +117,12 @@ config MIPS_GENERIC select SYS_SUPPORTS_MULTITHREADING select SYS_SUPPORTS_RELOCATABLE select SYS_SUPPORTS_SMARTMIPS - select USB_EHCI_BIG_ENDIAN_DESC if BIG_ENDIAN - select USB_EHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN - select USB_OHCI_BIG_ENDIAN_DESC if BIG_ENDIAN - select USB_OHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN - select USB_UHCI_BIG_ENDIAN_DESC if BIG_ENDIAN - select USB_UHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN + select USB_EHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN + select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN + select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN + select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN + select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN + select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select USE_OF help Select this to build a kernel which aims to support multiple boards, @@ -253,6 +251,7 @@ config BCM47XX select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_LITTLE_ENDIAN select SYS_SUPPORTS_MIPS16 + select SYS_SUPPORTS_ZBOOT select SYS_HAS_EARLY_PRINTK select USE_GENERIC_EARLY_PRINTK_8250 select GPIOLIB @@ -341,6 +340,8 @@ config MACH_DECSTATION config MACH_JAZZ bool "Jazz family of machines" + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO select FW_ARC select FW_ARC32 select ARCH_MAY_HAVE_PC_FDC @@ -476,6 +477,8 @@ config MACH_PISTACHIO config MIPS_MALTA bool "MIPS Malta board" select ARCH_MAY_HAVE_PC_FDC + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO select BOOT_ELF32 select BOOT_RAW select BUILTIN_DTB @@ -613,6 +616,7 @@ config SGI_IP22 bool "SGI IP22 (Indy/Indigo2)" select FW_ARC select FW_ARC32 + select ARCH_MIGHT_HAVE_PC_SERIO select BOOT_ELF32 select CEVT_R4K select CSRC_R4K @@ -675,6 +679,7 @@ config SGI_IP28 bool "SGI IP28 (Indigo2 R10k)" select FW_ARC select FW_ARC64 + select ARCH_MIGHT_HAVE_PC_SERIO select BOOT_ELF64 select CEVT_R4K select CSRC_R4K @@ -824,6 +829,8 @@ config SNI_RM select FW_ARC32 if CPU_LITTLE_ENDIAN select FW_SNIPROM if CPU_BIG_ENDIAN select ARCH_MAY_HAVE_PC_FDC + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO select BOOT_ELF32 select CEVT_R4K select CSRC_R4K diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 9f6a26d72f9f..d1ca839c3981 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -216,6 +216,12 @@ cflags-$(toolchain-msa) += -DTOOLCHAIN_SUPPORTS_MSA endif toolchain-virt := $(call cc-option-yn,$(mips-cflags) -mvirt) cflags-$(toolchain-virt) += -DTOOLCHAIN_SUPPORTS_VIRT +# For -mmicromips, use -Wa,-fatal-warnings to catch unsupported -mxpa which +# only warns +xpa-cflags-y := $(mips-cflags) +xpa-cflags-$(micromips-ase) += -mmicromips -Wa$(comma)-fatal-warnings +toolchain-xpa := $(call cc-option-yn,$(xpa-cflags-y) -mxpa) +cflags-$(toolchain-xpa) += -DTOOLCHAIN_SUPPORTS_XPA # # Firmware support @@ -228,7 +234,7 @@ libs-y += arch/mips/fw/lib/ # # Kernel compression # -ifdef SYS_SUPPORTS_ZBOOT +ifdef CONFIG_SYS_SUPPORTS_ZBOOT COMPRESSION_FNAME = vmlinuz else COMPRESSION_FNAME = vmlinux diff --git a/arch/mips/bcm47xx/Platform b/arch/mips/bcm47xx/Platform index 874b7ca4cd11..70783b75fd9d 100644 --- a/arch/mips/bcm47xx/Platform +++ b/arch/mips/bcm47xx/Platform @@ -5,3 +5,4 @@ platform-$(CONFIG_BCM47XX) += bcm47xx/ cflags-$(CONFIG_BCM47XX) += \ -I$(srctree)/arch/mips/include/asm/mach-bcm47xx load-$(CONFIG_BCM47XX) := 0xffffffff80001000 +zload-$(CONFIG_BCM47XX) += 0xffffffff80400000 diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index c675eece389a..adce180f3ee4 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -133,4 +133,8 @@ vmlinuz.srec: vmlinuz uzImage.bin: vmlinuz.bin FORCE $(call if_changed,uimage,none) -clean-files := $(objtree)/vmlinuz $(objtree)/vmlinuz.{32,ecoff,bin,srec} +clean-files += $(objtree)/vmlinuz +clean-files += $(objtree)/vmlinuz.32 +clean-files += $(objtree)/vmlinuz.ecoff +clean-files += $(objtree)/vmlinuz.bin +clean-files += $(objtree)/vmlinuz.srec diff --git a/arch/mips/boot/dts/ingenic/Makefile b/arch/mips/boot/dts/ingenic/Makefile index 6a31759839b4..5b1361a89e02 100644 --- a/arch/mips/boot/dts/ingenic/Makefile +++ b/arch/mips/boot/dts/ingenic/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_JZ4740_QI_LB60) += qi_lb60.dtb +dtb-$(CONFIG_JZ4770_GCW0) += gcw0.dtb dtb-$(CONFIG_JZ4780_CI20) += ci20.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) diff --git a/arch/mips/boot/dts/ingenic/gcw0.dts b/arch/mips/boot/dts/ingenic/gcw0.dts new file mode 100644 index 000000000000..35f0291e8d38 --- /dev/null +++ b/arch/mips/boot/dts/ingenic/gcw0.dts @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +/dts-v1/; + +#include "jz4770.dtsi" + +/ { + compatible = "gcw,zero", "ingenic,jz4770"; + model = "GCW Zero"; + + aliases { + serial0 = &uart0; + serial1 = &uart1; + serial2 = &uart2; + serial3 = &uart3; + }; + + chosen { + stdout-path = "serial2:57600n8"; + }; + + board { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + otg_phy: otg-phy { + compatible = "usb-nop-xceiv"; + clocks = <&cgu JZ4770_CLK_OTG_PHY>; + clock-names = "main_clk"; + }; + }; +}; + +&ext { + clock-frequency = <12000000>; +}; + +&uart2 { + status = "okay"; +}; + +&cgu { + /* Put high-speed peripherals under PLL1, such that we can change the + * PLL0 frequency on demand without having to suspend peripherals. + * We use a rate of 432 MHz, which is the least common multiple of + * 27 MHz (required by TV encoder) and 48 MHz (required by USB host). + */ + assigned-clocks = + <&cgu JZ4770_CLK_PLL1>, + <&cgu JZ4770_CLK_UHC>; + assigned-clock-parents = + <0>, + <&cgu JZ4770_CLK_PLL1>; + assigned-clock-rates = + <432000000>; +}; + +&uhc { + /* The WiFi module is connected to the UHC. */ + status = "okay"; +}; diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi new file mode 100644 index 000000000000..7c2804f3f5f1 --- /dev/null +++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <dt-bindings/clock/jz4770-cgu.h> + +/ { + #address-cells = <1>; + #size-cells = <1>; + compatible = "ingenic,jz4770"; + + cpuintc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + compatible = "mti,cpu-interrupt-controller"; + }; + + intc: interrupt-controller@10001000 { + compatible = "ingenic,jz4770-intc"; + reg = <0x10001000 0x40>; + + interrupt-controller; + #interrupt-cells = <1>; + + interrupt-parent = <&cpuintc>; + interrupts = <2>; + }; + + ext: ext { + compatible = "fixed-clock"; + #clock-cells = <0>; + }; + + osc32k: osc32k { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + }; + + cgu: jz4770-cgu@10000000 { + compatible = "ingenic,jz4770-cgu"; + reg = <0x10000000 0x100>; + + clocks = <&ext>, <&osc32k>; + clock-names = "ext", "osc32k"; + + #clock-cells = <1>; + }; + + pinctrl: pin-controller@10010000 { + compatible = "ingenic,jz4770-pinctrl"; + reg = <0x10010000 0x600>; + + #address-cells = <1>; + #size-cells = <0>; + + gpa: gpio@0 { + compatible = "ingenic,jz4770-gpio"; + reg = <0>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 0 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <17>; + }; + + gpb: gpio@1 { + compatible = "ingenic,jz4770-gpio"; + reg = <1>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 32 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <16>; + }; + + gpc: gpio@2 { + compatible = "ingenic,jz4770-gpio"; + reg = <2>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 64 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <15>; + }; + + gpd: gpio@3 { + compatible = "ingenic,jz4770-gpio"; + reg = <3>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 96 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <14>; + }; + + gpe: gpio@4 { + compatible = "ingenic,jz4770-gpio"; + reg = <4>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 128 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <13>; + }; + + gpf: gpio@5 { + compatible = "ingenic,jz4770-gpio"; + reg = <5>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 160 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <12>; + }; + }; + + uart0: serial@10030000 { + compatible = "ingenic,jz4770-uart"; + reg = <0x10030000 0x100>; + + clocks = <&ext>, <&cgu JZ4770_CLK_UART0>; + clock-names = "baud", "module"; + + interrupt-parent = <&intc>; + interrupts = <5>; + + status = "disabled"; + }; + + uart1: serial@10031000 { + compatible = "ingenic,jz4770-uart"; + reg = <0x10031000 0x100>; + + clocks = <&ext>, <&cgu JZ4770_CLK_UART1>; + clock-names = "baud", "module"; + + interrupt-parent = <&intc>; + interrupts = <4>; + + status = "disabled"; + }; + + uart2: serial@10032000 { + compatible = "ingenic,jz4770-uart"; + reg = <0x10032000 0x100>; + + clocks = <&ext>, <&cgu JZ4770_CLK_UART2>; + clock-names = "baud", "module"; + + interrupt-parent = <&intc>; + interrupts = <3>; + + status = "disabled"; + }; + + uart3: serial@10033000 { + compatible = "ingenic,jz4770-uart"; + reg = <0x10033000 0x100>; + + clocks = <&ext>, <&cgu JZ4770_CLK_UART3>; + clock-names = "baud", "module"; + + interrupt-parent = <&intc>; + interrupts = <2>; + + status = "disabled"; + }; + + uhc: uhc@13430000 { + compatible = "generic-ohci"; + reg = <0x13430000 0x1000>; + + clocks = <&cgu JZ4770_CLK_UHC>, <&cgu JZ4770_CLK_UHC_PHY>; + assigned-clocks = <&cgu JZ4770_CLK_UHC>; + assigned-clock-rates = <48000000>; + + interrupt-parent = <&intc>; + interrupts = <20>; + + status = "disabled"; + }; +}; diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index a55009edbb29..5e73fe755be6 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -153,7 +153,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP_SMART=y CONFIG_SLIP_MODE_SLIP6=y # CONFIG_INPUT is not set -# CONFIG_SERIO_I8042 is not set CONFIG_SERIO_RAW=m # CONFIG_VT is not set CONFIG_SERIAL_NONSTANDARD=y diff --git a/arch/mips/configs/gcw0_defconfig b/arch/mips/configs/gcw0_defconfig new file mode 100644 index 000000000000..99ac1fa3b35f --- /dev/null +++ b/arch/mips/configs/gcw0_defconfig @@ -0,0 +1,27 @@ +CONFIG_MACH_INGENIC=y +CONFIG_JZ4770_GCW0=y +CONFIG_HIGHMEM=y +# CONFIG_BOUNCE is not set +CONFIG_PREEMPT_VOLUNTARY=y +# CONFIG_SECCOMP is not set +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_EMBEDDED=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_SUSPEND is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_NETDEVICES=y +CONFIG_SERIAL_8250=y +# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_INGENIC=y +CONFIG_USB=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_NOP_USB_XCEIV=y +CONFIG_TMPFS=y diff --git a/arch/mips/configs/generic/board-ranchu.config b/arch/mips/configs/generic/board-ranchu.config new file mode 100644 index 000000000000..fee9ad4c5598 --- /dev/null +++ b/arch/mips/configs/generic/board-ranchu.config @@ -0,0 +1,30 @@ +CONFIG_VIRT_BOARD_RANCHU=y + +CONFIG_BATTERY_GOLDFISH=y +CONFIG_FB=y +CONFIG_FB_GOLDFISH=y +CONFIG_GOLDFISH=y +CONFIG_STAGING=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH_PIC=y +CONFIG_GOLDFISH_PIPE=y +CONFIG_GOLDFISH_TTY=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_GOLDFISH=y + +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y + +CONFIG_MAGIC_SYSRQ=y +CONFIG_POWER_SUPPLY=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_POWER_RESET_SYSCON_POWEROFF=y + +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y +CONFIG_NETDEVICES=y +CONFIG_VIRTIO_NET=y diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index a0d593248668..91a9c13e2c82 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -252,7 +252,6 @@ CONFIG_RT2800PCI=m CONFIG_WL12XX=m CONFIG_WL1251=m # CONFIG_INPUT is not set -# CONFIG_SERIO_I8042 is not set CONFIG_SERIO_LIBPS2=m CONFIG_SERIO_RAW=m CONFIG_SERIO_ALTERA_PS2=m diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index 1e26e58b9dc3..ebff297328ae 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -75,7 +75,6 @@ CONFIG_DE2104X=m CONFIG_TULIP=m CONFIG_TULIP_MMIO=y CONFIG_INPUT_EVDEV=m -# CONFIG_SERIO_I8042 is not set CONFIG_SERIO_MACEPS2=y CONFIG_SERIO_RAW=y # CONFIG_CONSOLE_TRANSLATIONS is not set diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 396408404487..df8a9a15ca83 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -312,9 +312,8 @@ CONFIG_HOSTAP_PCI=m CONFIG_IPW2100=m CONFIG_IPW2100_MONITOR=y CONFIG_LIBERTAS=m -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO_I8042 is not set +CONFIG_INPUT_MOUSEDEV=y +CONFIG_MOUSE_PS2_ELANTECH=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_POWER_RESET=y diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index 5691673a3327..14df9ef15d40 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -324,9 +324,7 @@ CONFIG_HOSTAP_PCI=m CONFIG_IPW2100=m CONFIG_IPW2100_MONITOR=y CONFIG_LIBERTAS=m -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO_I8042 is not set +CONFIG_INPUT_MOUSEDEV=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_POWER_RESET=y diff --git a/arch/mips/configs/malta_kvm_guest_defconfig b/arch/mips/configs/malta_kvm_guest_defconfig index e9cadb37d684..25092e344574 100644 --- a/arch/mips/configs/malta_kvm_guest_defconfig +++ b/arch/mips/configs/malta_kvm_guest_defconfig @@ -326,9 +326,7 @@ CONFIG_HOSTAP_PCI=m CONFIG_IPW2100=m CONFIG_IPW2100_MONITOR=y CONFIG_LIBERTAS=m -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO_I8042 is not set +CONFIG_INPUT_MOUSEDEV=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_POWER_RESET=y diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig index 77145ecaa23b..210bf609f785 100644 --- a/arch/mips/configs/malta_qemu_32r6_defconfig +++ b/arch/mips/configs/malta_qemu_32r6_defconfig @@ -126,6 +126,7 @@ CONFIG_PCNET32=y # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_WLAN is not set +CONFIG_INPUT_MOUSEDEV=y # CONFIG_VT is not set CONFIG_LEGACY_PTY_COUNT=4 CONFIG_SERIAL_8250=y diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig index cc2687cfdc13..e5934aa98397 100644 --- a/arch/mips/configs/maltaaprp_defconfig +++ b/arch/mips/configs/maltaaprp_defconfig @@ -126,6 +126,7 @@ CONFIG_PCNET32=y # CONFIG_NET_VENDOR_TOSHIBA is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_WLAN is not set +CONFIG_INPUT_MOUSEDEV=y # CONFIG_VT is not set CONFIG_LEGACY_PTY_COUNT=16 CONFIG_SERIAL_8250=y diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig index d8c8f5fb8918..cb2ca11c1789 100644 --- a/arch/mips/configs/maltasmvp_defconfig +++ b/arch/mips/configs/maltasmvp_defconfig @@ -127,6 +127,7 @@ CONFIG_PCNET32=y # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_WLAN is not set +CONFIG_INPUT_MOUSEDEV=y # CONFIG_VT is not set CONFIG_LEGACY_PTY_COUNT=4 CONFIG_SERIAL_8250=y diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig index 04827bc9f87f..be29fcec69fc 100644 --- a/arch/mips/configs/maltasmvp_eva_defconfig +++ b/arch/mips/configs/maltasmvp_eva_defconfig @@ -130,6 +130,7 @@ CONFIG_PCNET32=y # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_WLAN is not set +CONFIG_INPUT_MOUSEDEV=y # CONFIG_VT is not set CONFIG_LEGACY_PTY_COUNT=4 CONFIG_SERIAL_8250=y diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig index 7ea7c0ba2666..40462d4c90a0 100644 --- a/arch/mips/configs/maltaup_defconfig +++ b/arch/mips/configs/maltaup_defconfig @@ -125,6 +125,7 @@ CONFIG_PCNET32=y # CONFIG_NET_VENDOR_TOSHIBA is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_WLAN is not set +CONFIG_INPUT_MOUSEDEV=y # CONFIG_VT is not set CONFIG_LEGACY_PTY_COUNT=16 CONFIG_SERIAL_8250=y diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 2942610e4082..4e50176cb3df 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -321,9 +321,8 @@ CONFIG_HOSTAP_PCI=m CONFIG_IPW2100=m CONFIG_IPW2100_MONITOR=y CONFIG_LIBERTAS=m -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO_I8042 is not set +CONFIG_INPUT_MOUSEDEV=y +CONFIG_MOUSE_PS2_ELANTECH=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_POWER_RESET=y diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig index 7357248b3d7a..e8e1dd8e0e99 100644 --- a/arch/mips/configs/nlm_xlp_defconfig +++ b/arch/mips/configs/nlm_xlp_defconfig @@ -399,7 +399,6 @@ CONFIG_INPUT_EVDEV=y CONFIG_INPUT_EVBUG=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO_I8042 is not set CONFIG_SERIO_SERPORT=m CONFIG_SERIO_LIBPS2=y CONFIG_SERIO_RAW=m diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig index 1e18fd7de209..c4477a4d40c1 100644 --- a/arch/mips/configs/nlm_xlr_defconfig +++ b/arch/mips/configs/nlm_xlr_defconfig @@ -332,7 +332,6 @@ CONFIG_INPUT_EVDEV=y CONFIG_INPUT_EVBUG=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO_I8042 is not set CONFIG_SERIO_SERPORT=m CONFIG_SERIO_LIBPS2=y CONFIG_SERIO_RAW=m diff --git a/arch/mips/configs/pnx8335_stb225_defconfig b/arch/mips/configs/pnx8335_stb225_defconfig index 81b5eb89446c..e73cdb08fc6e 100644 --- a/arch/mips/configs/pnx8335_stb225_defconfig +++ b/arch/mips/configs/pnx8335_stb225_defconfig @@ -49,7 +49,6 @@ CONFIG_INPUT_EVDEV=m CONFIG_INPUT_EVBUG=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO_I8042 is not set # CONFIG_VT_CONSOLE is not set CONFIG_SERIAL_PNX8XXX=y CONFIG_SERIAL_PNX8XXX_CONSOLE=y diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig index c724bdd6a7e6..1edd8430ad61 100644 --- a/arch/mips/configs/sb1250_swarm_defconfig +++ b/arch/mips/configs/sb1250_swarm_defconfig @@ -65,7 +65,6 @@ CONFIG_NET_ETHERNET=y CONFIG_MII=y CONFIG_SB1250_MAC=y # CONFIG_INPUT is not set -# CONFIG_SERIO_I8042 is not set CONFIG_SERIO_RAW=m # CONFIG_VT is not set # CONFIG_HW_RANDOM is not set diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig index 52e0286a1612..2ff3b17bfab1 100644 --- a/arch/mips/generic/Kconfig +++ b/arch/mips/generic/Kconfig @@ -49,4 +49,14 @@ config FIT_IMAGE_FDT_XILFPGA Enable this to include the FDT for the MIPSfpga platform from Imagination Technologies in the FIT kernel image. +config VIRT_BOARD_RANCHU + bool "Support Ranchu platform for Android emulator" + help + This enables support for the platform used by Android emulator. + + Ranchu platform consists of a set of virtual devices. This platform + enables emulation of variety of virtual configurations while using + Android emulator. Android emulator is based on Qemu, and contains + the support for the same set of virtual devices. + endif diff --git a/arch/mips/generic/Makefile b/arch/mips/generic/Makefile index 874967363dbb..5c31e0c4697d 100644 --- a/arch/mips/generic/Makefile +++ b/arch/mips/generic/Makefile @@ -15,3 +15,4 @@ obj-y += proc.o obj-$(CONFIG_YAMON_DT_SHIM) += yamon-dt.o obj-$(CONFIG_LEGACY_BOARD_SEAD3) += board-sead3.o obj-$(CONFIG_KEXEC) += kexec.o +obj-$(CONFIG_VIRT_BOARD_RANCHU) += board-ranchu.o diff --git a/arch/mips/generic/board-ranchu.c b/arch/mips/generic/board-ranchu.c new file mode 100644 index 000000000000..59a8c18fa2cc --- /dev/null +++ b/arch/mips/generic/board-ranchu.c @@ -0,0 +1,93 @@ +/* + * Support code for virtual Ranchu board for MIPS. + * + * Author: Miodrag Dinic <miodrag.dinic@mips.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/of_address.h> +#include <linux/types.h> + +#include <asm/machine.h> +#include <asm/mipsregs.h> +#include <asm/time.h> + +#define GOLDFISH_TIMER_LOW 0x00 +#define GOLDFISH_TIMER_HIGH 0x04 + +static __init u64 read_rtc_time(void __iomem *base) +{ + u32 time_low; + u32 time_high; + + /* + * Reading the low address latches the high value + * as well so there is no fear that we may read + * inaccurate high value. + */ + time_low = readl(base + GOLDFISH_TIMER_LOW); + time_high = readl(base + GOLDFISH_TIMER_HIGH); + + return ((u64)time_high << 32) | time_low; +} + +static __init unsigned int ranchu_measure_hpt_freq(void) +{ + u64 rtc_start, rtc_current, rtc_delta; + unsigned int start, count; + struct device_node *np; + void __iomem *rtc_base; + + np = of_find_compatible_node(NULL, NULL, "google,goldfish-rtc"); + if (!np) + panic("%s(): Failed to find 'google,goldfish-rtc' dt node!", + __func__); + + rtc_base = of_iomap(np, 0); + if (!rtc_base) + panic("%s(): Failed to ioremap Goldfish RTC base!", __func__); + + /* + * Poll the nanosecond resolution RTC for one + * second to calibrate the CPU frequency. + */ + rtc_start = read_rtc_time(rtc_base); + start = read_c0_count(); + + do { + rtc_current = read_rtc_time(rtc_base); + rtc_delta = rtc_current - rtc_start; + } while (rtc_delta < NSEC_PER_SEC); + + count = read_c0_count() - start; + + /* + * Make sure the frequency will be a round number. + * Without this correction, the returned value may vary + * between subsequent emulation executions. + * + * TODO: Set this value using device tree. + */ + count += 5000; + count -= count % 10000; + + iounmap(rtc_base); + + return count; +} + +static const struct of_device_id ranchu_of_match[] __initconst = { + { + .compatible = "mti,ranchu", + }, + {} +}; + +MIPS_MACHINE(ranchu) = { + .matches = ranchu_of_match, + .measure_hpt_freq = ranchu_measure_hpt_freq, +}; diff --git a/arch/mips/generic/irq.c b/arch/mips/generic/irq.c index 394f8161e462..cb7fdaeef426 100644 --- a/arch/mips/generic/irq.c +++ b/arch/mips/generic/irq.c @@ -22,10 +22,10 @@ int get_c0_fdc_int(void) { int mips_cpu_fdc_irq; - if (cpu_has_veic) - panic("Unimplemented!"); - else if (mips_gic_present()) + if (mips_gic_present()) mips_cpu_fdc_irq = gic_get_c0_fdc_int(); + else if (cpu_has_veic) + panic("Unimplemented!"); else if (cp0_fdc_irq >= 0) mips_cpu_fdc_irq = MIPS_CPU_IRQ_BASE + cp0_fdc_irq; else @@ -38,10 +38,10 @@ int get_c0_perfcount_int(void) { int mips_cpu_perf_irq; - if (cpu_has_veic) - panic("Unimplemented!"); - else if (mips_gic_present()) + if (mips_gic_present()) mips_cpu_perf_irq = gic_get_c0_perfcount_int(); + else if (cpu_has_veic) + panic("Unimplemented!"); else if (cp0_perfcount_irq >= 0) mips_cpu_perf_irq = MIPS_CPU_IRQ_BASE + cp0_perfcount_irq; else @@ -54,10 +54,10 @@ unsigned int get_c0_compare_int(void) { int mips_cpu_timer_irq; - if (cpu_has_veic) - panic("Unimplemented!"); - else if (mips_gic_present()) + if (mips_gic_present()) mips_cpu_timer_irq = gic_get_c0_compare_int(); + else if (cpu_has_veic) + panic("Unimplemented!"); else mips_cpu_timer_irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq; diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h index e26a093bb17a..a301a8f4bc66 100644 --- a/arch/mips/include/asm/bootinfo.h +++ b/arch/mips/include/asm/bootinfo.h @@ -79,6 +79,8 @@ enum loongson_machine_type { */ #define MACH_INGENIC_JZ4730 0 /* JZ4730 SOC */ #define MACH_INGENIC_JZ4740 1 /* JZ4740 SOC */ +#define MACH_INGENIC_JZ4770 2 /* JZ4770 SOC */ +#define MACH_INGENIC_JZ4780 3 /* JZ4780 SOC */ extern char *system_type; const char *get_system_type(void); diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h index 77cad232a1c6..e8161e4dfde7 100644 --- a/arch/mips/include/asm/checksum.h +++ b/arch/mips/include/asm/checksum.h @@ -110,7 +110,7 @@ __wsum csum_partial_copy_nocheck(const void *src, void *dst, */ static inline __sum16 csum_fold(__wsum csum) { - u32 sum = (__force u32)csum;; + u32 sum = (__force u32)csum; sum += (sum << 16); csum = (sum < csum); diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h index 4f69f08717f6..8c286bedff3e 100644 --- a/arch/mips/include/asm/mach-loongson64/boot_param.h +++ b/arch/mips/include/asm/mach-loongson64/boot_param.h @@ -4,7 +4,7 @@ #define SYSTEM_RAM_LOW 1 #define SYSTEM_RAM_HIGH 2 -#define MEM_RESERVED 3 +#define SYSTEM_RAM_RESERVED 3 #define PCI_IO 4 #define PCI_MEM 5 #define LOONGSON_CFG_REG 6 diff --git a/arch/mips/include/asm/machine.h b/arch/mips/include/asm/machine.h index e0d9b373d415..f83879dadd1e 100644 --- a/arch/mips/include/asm/machine.h +++ b/arch/mips/include/asm/machine.h @@ -52,7 +52,7 @@ mips_machine_is_compatible(const struct mips_machine *mach, const void *fdt) if (!mach->matches) return NULL; - for (match = mach->matches; match->compatible; match++) { + for (match = mach->matches; match->compatible[0]; match++) { if (fdt_node_check_compatible(fdt, 0, match->compatible) == 0) return match; } diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 6b1f1ad0542c..858752dac337 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -1181,6 +1181,89 @@ static inline int mm_insn_16bit(u16 insn) #endif /* + * parse_r var, r - Helper assembler macro for parsing register names. + * + * This converts the register name in $n form provided in \r to the + * corresponding register number, which is assigned to the variable \var. It is + * needed to allow explicit encoding of instructions in inline assembly where + * registers are chosen by the compiler in $n form, allowing us to avoid using + * fixed register numbers. + * + * It also allows newer instructions (not implemented by the assembler) to be + * transparently implemented using assembler macros, instead of needing separate + * cases depending on toolchain support. + * + * Simple usage example: + * __asm__ __volatile__("parse_r __rt, %0\n\t" + * ".insn\n\t" + * "# di %0\n\t" + * ".word (0x41606000 | (__rt << 16))" + * : "=r" (status); + */ + +/* Match an individual register number and assign to \var */ +#define _IFC_REG(n) \ + ".ifc \\r, $" #n "\n\t" \ + "\\var = " #n "\n\t" \ + ".endif\n\t" + +__asm__(".macro parse_r var r\n\t" + "\\var = -1\n\t" + _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) + _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) + _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) + _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) + _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) + _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) + _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) + _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) + ".iflt \\var\n\t" + ".error \"Unable to parse register name \\r\"\n\t" + ".endif\n\t" + ".endm"); + +#undef _IFC_REG + +/* + * C macros for generating assembler macros for common instruction formats. + * + * The names of the operands can be chosen by the caller, and the encoding of + * register operand \<Rn> is assigned to __<Rn> where it can be accessed from + * the ENC encodings. + */ + +/* Instructions with no operands */ +#define _ASM_MACRO_0(OP, ENC) \ + __asm__(".macro " #OP "\n\t" \ + ENC \ + ".endm") + +/* Instructions with 2 register operands */ +#define _ASM_MACRO_2R(OP, R1, R2, ENC) \ + __asm__(".macro " #OP " " #R1 ", " #R2 "\n\t" \ + "parse_r __" #R1 ", \\" #R1 "\n\t" \ + "parse_r __" #R2 ", \\" #R2 "\n\t" \ + ENC \ + ".endm") + +/* Instructions with 3 register operands */ +#define _ASM_MACRO_3R(OP, R1, R2, R3, ENC) \ + __asm__(".macro " #OP " " #R1 ", " #R2 ", " #R3 "\n\t" \ + "parse_r __" #R1 ", \\" #R1 "\n\t" \ + "parse_r __" #R2 ", \\" #R2 "\n\t" \ + "parse_r __" #R3 ", \\" #R3 "\n\t" \ + ENC \ + ".endm") + +/* Instructions with 2 register operands and 1 optional select operand */ +#define _ASM_MACRO_2R_1S(OP, R1, R2, SEL3, ENC) \ + __asm__(".macro " #OP " " #R1 ", " #R2 ", " #SEL3 " = 0\n\t" \ + "parse_r __" #R1 ", \\" #R1 "\n\t" \ + "parse_r __" #R2 ", \\" #R2 "\n\t" \ + ENC \ + ".endm") + +/* * TLB Invalidate Flush */ static inline void tlbinvf(void) @@ -1245,14 +1328,14 @@ do { \ * Macros to access the system control coprocessor */ -#define __read_32bit_c0_register(source, sel) \ +#define ___read_32bit_c0_register(source, sel, vol) \ ({ unsigned int __res; \ if (sel == 0) \ - __asm__ __volatile__( \ + __asm__ vol( \ "mfc0\t%0, " #source "\n\t" \ : "=r" (__res)); \ else \ - __asm__ __volatile__( \ + __asm__ vol( \ ".set\tmips32\n\t" \ "mfc0\t%0, " #source ", " #sel "\n\t" \ ".set\tmips0\n\t" \ @@ -1260,18 +1343,18 @@ do { \ __res; \ }) -#define __read_64bit_c0_register(source, sel) \ +#define ___read_64bit_c0_register(source, sel, vol) \ ({ unsigned long long __res; \ if (sizeof(unsigned long) == 4) \ - __res = __read_64bit_c0_split(source, sel); \ + __res = __read_64bit_c0_split(source, sel, vol); \ else if (sel == 0) \ - __asm__ __volatile__( \ + __asm__ vol( \ ".set\tmips3\n\t" \ "dmfc0\t%0, " #source "\n\t" \ ".set\tmips0" \ : "=r" (__res)); \ else \ - __asm__ __volatile__( \ + __asm__ vol( \ ".set\tmips64\n\t" \ "dmfc0\t%0, " #source ", " #sel "\n\t" \ ".set\tmips0" \ @@ -1279,6 +1362,18 @@ do { \ __res; \ }) +#define __read_32bit_c0_register(source, sel) \ + ___read_32bit_c0_register(source, sel, __volatile__) + +#define __read_const_32bit_c0_register(source, sel) \ + ___read_32bit_c0_register(source, sel,) + +#define __read_64bit_c0_register(source, sel) \ + ___read_64bit_c0_register(source, sel, __volatile__) + +#define __read_const_64bit_c0_register(source, sel) \ + ___read_64bit_c0_register(source, sel,) + #define __write_32bit_c0_register(register, sel, value) \ do { \ if (sel == 0) \ @@ -1316,6 +1411,11 @@ do { \ (unsigned long) __read_32bit_c0_register(reg, sel) : \ (unsigned long) __read_64bit_c0_register(reg, sel)) +#define __read_const_ulong_c0_register(reg, sel) \ + ((sizeof(unsigned long) == 4) ? \ + (unsigned long) __read_const_32bit_c0_register(reg, sel) : \ + (unsigned long) __read_const_64bit_c0_register(reg, sel)) + #define __write_ulong_c0_register(reg, sel, val) \ do { \ if (sizeof(unsigned long) == 4) \ @@ -1346,14 +1446,14 @@ do { \ * These versions are only needed for systems with more than 38 bits of * physical address space running the 32-bit kernel. That's none atm :-) */ -#define __read_64bit_c0_split(source, sel) \ +#define __read_64bit_c0_split(source, sel, vol) \ ({ \ unsigned long long __val; \ unsigned long __flags; \ \ local_irq_save(__flags); \ if (sel == 0) \ - __asm__ __volatile__( \ + __asm__ vol( \ ".set\tmips64\n\t" \ "dmfc0\t%L0, " #source "\n\t" \ "dsra\t%M0, %L0, 32\n\t" \ @@ -1361,7 +1461,7 @@ do { \ ".set\tmips0" \ : "=r" (__val)); \ else \ - __asm__ __volatile__( \ + __asm__ vol( \ ".set\tmips64\n\t" \ "dmfc0\t%L0, " #source ", " #sel "\n\t" \ "dsra\t%M0, %L0, 32\n\t" \ @@ -1404,37 +1504,43 @@ do { \ local_irq_restore(__flags); \ } while (0) -#define __readx_32bit_c0_register(source) \ +#ifndef TOOLCHAIN_SUPPORTS_XPA +_ASM_MACRO_2R_1S(mfhc0, rt, rs, sel, + _ASM_INSN_IF_MIPS(0x40400000 | __rt << 16 | __rs << 11 | \\sel) + _ASM_INSN32_IF_MM(0x000000f4 | __rt << 21 | __rs << 16 | \\sel << 11)); +_ASM_MACRO_2R_1S(mthc0, rt, rd, sel, + _ASM_INSN_IF_MIPS(0x40c00000 | __rt << 16 | __rd << 11 | \\sel) + _ASM_INSN32_IF_MM(0x000002f4 | __rt << 21 | __rd << 16 | \\sel << 11)); +#define _ASM_SET_XPA "" +#else /* !TOOLCHAIN_SUPPORTS_XPA */ +#define _ASM_SET_XPA ".set\txpa\n\t" +#endif + +#define __readx_32bit_c0_register(source, sel) \ ({ \ unsigned int __res; \ \ __asm__ __volatile__( \ " .set push \n" \ - " .set noat \n" \ " .set mips32r2 \n" \ - " # mfhc0 $1, %1 \n" \ - _ASM_INSN_IF_MIPS(0x40410000 | ((%1 & 0x1f) << 11)) \ - _ASM_INSN32_IF_MM(0x002000f4 | ((%1 & 0x1f) << 16)) \ - " move %0, $1 \n" \ + _ASM_SET_XPA \ + " mfhc0 %0, " #source ", %1 \n" \ " .set pop \n" \ : "=r" (__res) \ - : "i" (source)); \ + : "i" (sel)); \ __res; \ }) -#define __writex_32bit_c0_register(register, value) \ +#define __writex_32bit_c0_register(register, sel, value) \ do { \ __asm__ __volatile__( \ " .set push \n" \ - " .set noat \n" \ " .set mips32r2 \n" \ - " move $1, %0 \n" \ - " # mthc0 $1, %1 \n" \ - _ASM_INSN_IF_MIPS(0x40c10000 | ((%1 & 0x1f) << 11)) \ - _ASM_INSN32_IF_MM(0x002002f4 | ((%1 & 0x1f) << 16)) \ + _ASM_SET_XPA \ + " mthc0 %z0, " #register ", %1 \n" \ " .set pop \n" \ : \ - : "r" (value), "i" (register)); \ + : "Jr" (value), "i" (sel)); \ } while (0) #define read_c0_index() __read_32bit_c0_register($0, 0) @@ -1446,14 +1552,14 @@ do { \ #define read_c0_entrylo0() __read_ulong_c0_register($2, 0) #define write_c0_entrylo0(val) __write_ulong_c0_register($2, 0, val) -#define readx_c0_entrylo0() __readx_32bit_c0_register(2) -#define writex_c0_entrylo0(val) __writex_32bit_c0_register(2, val) +#define readx_c0_entrylo0() __readx_32bit_c0_register($2, 0) +#define writex_c0_entrylo0(val) __writex_32bit_c0_register($2, 0, val) #define read_c0_entrylo1() __read_ulong_c0_register($3, 0) #define write_c0_entrylo1(val) __write_ulong_c0_register($3, 0, val) -#define readx_c0_entrylo1() __readx_32bit_c0_register(3) -#define writex_c0_entrylo1(val) __writex_32bit_c0_register(3, val) +#define readx_c0_entrylo1() __readx_32bit_c0_register($3, 0) +#define writex_c0_entrylo1(val) __writex_32bit_c0_register($3, 0, val) #define read_c0_conf() __read_32bit_c0_register($3, 0) #define write_c0_conf(val) __write_32bit_c0_register($3, 0, val) @@ -1541,7 +1647,7 @@ do { \ #define read_c0_epc() __read_ulong_c0_register($14, 0) #define write_c0_epc(val) __write_ulong_c0_register($14, 0, val) -#define read_c0_prid() __read_32bit_c0_register($15, 0) +#define read_c0_prid() __read_const_32bit_c0_register($15, 0) #define read_c0_cmgcrbase() __read_ulong_c0_register($15, 3) @@ -1830,18 +1936,44 @@ do { \ * Macros to access the guest system control coprocessor */ -#ifdef TOOLCHAIN_SUPPORTS_VIRT +#ifndef TOOLCHAIN_SUPPORTS_VIRT +_ASM_MACRO_2R_1S(mfgc0, rt, rs, sel, + _ASM_INSN_IF_MIPS(0x40600000 | __rt << 16 | __rs << 11 | \\sel) + _ASM_INSN32_IF_MM(0x000004fc | __rt << 21 | __rs << 16 | \\sel << 11)); +_ASM_MACRO_2R_1S(dmfgc0, rt, rs, sel, + _ASM_INSN_IF_MIPS(0x40600100 | __rt << 16 | __rs << 11 | \\sel) + _ASM_INSN32_IF_MM(0x580004fc | __rt << 21 | __rs << 16 | \\sel << 11)); +_ASM_MACRO_2R_1S(mtgc0, rt, rd, sel, + _ASM_INSN_IF_MIPS(0x40600200 | __rt << 16 | __rd << 11 | \\sel) + _ASM_INSN32_IF_MM(0x000006fc | __rt << 21 | __rd << 16 | \\sel << 11)); +_ASM_MACRO_2R_1S(dmtgc0, rt, rd, sel, + _ASM_INSN_IF_MIPS(0x40600300 | __rt << 16 | __rd << 11 | \\sel) + _ASM_INSN32_IF_MM(0x580006fc | __rt << 21 | __rd << 16 | \\sel << 11)); +_ASM_MACRO_0(tlbgp, _ASM_INSN_IF_MIPS(0x42000010) + _ASM_INSN32_IF_MM(0x0000017c)); +_ASM_MACRO_0(tlbgr, _ASM_INSN_IF_MIPS(0x42000009) + _ASM_INSN32_IF_MM(0x0000117c)); +_ASM_MACRO_0(tlbgwi, _ASM_INSN_IF_MIPS(0x4200000a) + _ASM_INSN32_IF_MM(0x0000217c)); +_ASM_MACRO_0(tlbgwr, _ASM_INSN_IF_MIPS(0x4200000e) + _ASM_INSN32_IF_MM(0x0000317c)); +_ASM_MACRO_0(tlbginvf, _ASM_INSN_IF_MIPS(0x4200000c) + _ASM_INSN32_IF_MM(0x0000517c)); +#define _ASM_SET_VIRT "" +#else /* !TOOLCHAIN_SUPPORTS_VIRT */ +#define _ASM_SET_VIRT ".set\tvirt\n\t" +#endif #define __read_32bit_gc0_register(source, sel) \ ({ int __res; \ __asm__ __volatile__( \ ".set\tpush\n\t" \ ".set\tmips32r2\n\t" \ - ".set\tvirt\n\t" \ - "mfgc0\t%0, $%1, %2\n\t" \ + _ASM_SET_VIRT \ + "mfgc0\t%0, " #source ", %1\n\t" \ ".set\tpop" \ : "=r" (__res) \ - : "i" (source), "i" (sel)); \ + : "i" (sel)); \ __res; \ }) @@ -1850,11 +1982,11 @@ do { \ __asm__ __volatile__( \ ".set\tpush\n\t" \ ".set\tmips64r2\n\t" \ - ".set\tvirt\n\t" \ - "dmfgc0\t%0, $%1, %2\n\t" \ + _ASM_SET_VIRT \ + "dmfgc0\t%0, " #source ", %1\n\t" \ ".set\tpop" \ : "=r" (__res) \ - : "i" (source), "i" (sel)); \ + : "i" (sel)); \ __res; \ }) @@ -1863,11 +1995,11 @@ do { \ __asm__ __volatile__( \ ".set\tpush\n\t" \ ".set\tmips32r2\n\t" \ - ".set\tvirt\n\t" \ - "mtgc0\t%z0, $%1, %2\n\t" \ + _ASM_SET_VIRT \ + "mtgc0\t%z0, " #register ", %1\n\t" \ ".set\tpop" \ : : "Jr" ((unsigned int)(value)), \ - "i" (register), "i" (sel)); \ + "i" (sel)); \ } while (0) #define __write_64bit_gc0_register(register, sel, value) \ @@ -1875,75 +2007,13 @@ do { \ __asm__ __volatile__( \ ".set\tpush\n\t" \ ".set\tmips64r2\n\t" \ - ".set\tvirt\n\t" \ - "dmtgc0\t%z0, $%1, %2\n\t" \ + _ASM_SET_VIRT \ + "dmtgc0\t%z0, " #register ", %1\n\t" \ ".set\tpop" \ : : "Jr" (value), \ - "i" (register), "i" (sel)); \ + "i" (sel)); \ } while (0) -#else /* TOOLCHAIN_SUPPORTS_VIRT */ - -#define __read_32bit_gc0_register(source, sel) \ -({ int __res; \ - __asm__ __volatile__( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "# mfgc0\t$1, $%1, %2\n\t" \ - _ASM_INSN_IF_MIPS(0x40610000 | %1 << 11 | %2) \ - _ASM_INSN32_IF_MM(0x002004fc | %1 << 16 | %2 << 11) \ - "move\t%0, $1\n\t" \ - ".set\tpop" \ - : "=r" (__res) \ - : "i" (source), "i" (sel)); \ - __res; \ -}) - -#define __read_64bit_gc0_register(source, sel) \ -({ unsigned long long __res; \ - __asm__ __volatile__( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "# dmfgc0\t$1, $%1, %2\n\t" \ - _ASM_INSN_IF_MIPS(0x40610100 | %1 << 11 | %2) \ - _ASM_INSN32_IF_MM(0x582004fc | %1 << 16 | %2 << 11) \ - "move\t%0, $1\n\t" \ - ".set\tpop" \ - : "=r" (__res) \ - : "i" (source), "i" (sel)); \ - __res; \ -}) - -#define __write_32bit_gc0_register(register, sel, value) \ -do { \ - __asm__ __volatile__( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "move\t$1, %z0\n\t" \ - "# mtgc0\t$1, $%1, %2\n\t" \ - _ASM_INSN_IF_MIPS(0x40610200 | %1 << 11 | %2) \ - _ASM_INSN32_IF_MM(0x002006fc | %1 << 16 | %2 << 11) \ - ".set\tpop" \ - : : "Jr" ((unsigned int)(value)), \ - "i" (register), "i" (sel)); \ -} while (0) - -#define __write_64bit_gc0_register(register, sel, value) \ -do { \ - __asm__ __volatile__( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "move\t$1, %z0\n\t" \ - "# dmtgc0\t$1, $%1, %2\n\t" \ - _ASM_INSN_IF_MIPS(0x40610300 | %1 << 11 | %2) \ - _ASM_INSN32_IF_MM(0x582006fc | %1 << 16 | %2 << 11) \ - ".set\tpop" \ - : : "Jr" (value), \ - "i" (register), "i" (sel)); \ -} while (0) - -#endif /* !TOOLCHAIN_SUPPORTS_VIRT */ - #define __read_ulong_gc0_register(reg, sel) \ ((sizeof(unsigned long) == 4) ? \ (unsigned long) __read_32bit_gc0_register(reg, sel) : \ @@ -1957,207 +2027,207 @@ do { \ __write_64bit_gc0_register(reg, sel, val); \ } while (0) -#define read_gc0_index() __read_32bit_gc0_register(0, 0) -#define write_gc0_index(val) __write_32bit_gc0_register(0, 0, val) +#define read_gc0_index() __read_32bit_gc0_register($0, 0) +#define write_gc0_index(val) __write_32bit_gc0_register($0, 0, val) -#define read_gc0_entrylo0() __read_ulong_gc0_register(2, 0) -#define write_gc0_entrylo0(val) __write_ulong_gc0_register(2, 0, val) +#define read_gc0_entrylo0() __read_ulong_gc0_register($2, 0) +#define write_gc0_entrylo0(val) __write_ulong_gc0_register($2, 0, val) -#define read_gc0_entrylo1() __read_ulong_gc0_register(3, 0) -#define write_gc0_entrylo1(val) __write_ulong_gc0_register(3, 0, val) +#define read_gc0_entrylo1() __read_ulong_gc0_register($3, 0) +#define write_gc0_entrylo1(val) __write_ulong_gc0_register($3, 0, val) -#define read_gc0_context() __read_ulong_gc0_register(4, 0) -#define write_gc0_context(val) __write_ulong_gc0_register(4, 0, val) +#define read_gc0_context() __read_ulong_gc0_register($4, 0) +#define write_gc0_context(val) __write_ulong_gc0_register($4, 0, val) -#define read_gc0_contextconfig() __read_32bit_gc0_register(4, 1) -#define write_gc0_contextconfig(val) __write_32bit_gc0_register(4, 1, val) +#define read_gc0_contextconfig() __read_32bit_gc0_register($4, 1) +#define write_gc0_contextconfig(val) __write_32bit_gc0_register($4, 1, val) -#define read_gc0_userlocal() __read_ulong_gc0_register(4, 2) -#define write_gc0_userlocal(val) __write_ulong_gc0_register(4, 2, val) +#define read_gc0_userlocal() __read_ulong_gc0_register($4, 2) +#define write_gc0_userlocal(val) __write_ulong_gc0_register($4, 2, val) -#define read_gc0_xcontextconfig() __read_ulong_gc0_register(4, 3) -#define write_gc0_xcontextconfig(val) __write_ulong_gc0_register(4, 3, val) +#define read_gc0_xcontextconfig() __read_ulong_gc0_register($4, 3) +#define write_gc0_xcontextconfig(val) __write_ulong_gc0_register($4, 3, val) -#define read_gc0_pagemask() __read_32bit_gc0_register(5, 0) -#define write_gc0_pagemask(val) __write_32bit_gc0_register(5, 0, val) +#define read_gc0_pagemask() __read_32bit_gc0_register($5, 0) +#define write_gc0_pagemask(val) __write_32bit_gc0_register($5, 0, val) -#define read_gc0_pagegrain() __read_32bit_gc0_register(5, 1) -#define write_gc0_pagegrain(val) __write_32bit_gc0_register(5, 1, val) +#define read_gc0_pagegrain() __read_32bit_gc0_register($5, 1) +#define write_gc0_pagegrain(val) __write_32bit_gc0_register($5, 1, val) -#define read_gc0_segctl0() __read_ulong_gc0_register(5, 2) -#define write_gc0_segctl0(val) __write_ulong_gc0_register(5, 2, val) +#define read_gc0_segctl0() __read_ulong_gc0_register($5, 2) +#define write_gc0_segctl0(val) __write_ulong_gc0_register($5, 2, val) -#define read_gc0_segctl1() __read_ulong_gc0_register(5, 3) -#define write_gc0_segctl1(val) __write_ulong_gc0_register(5, 3, val) +#define read_gc0_segctl1() __read_ulong_gc0_register($5, 3) +#define write_gc0_segctl1(val) __write_ulong_gc0_register($5, 3, val) -#define read_gc0_segctl2() __read_ulong_gc0_register(5, 4) -#define write_gc0_segctl2(val) __write_ulong_gc0_register(5, 4, val) +#define read_gc0_segctl2() __read_ulong_gc0_register($5, 4) +#define write_gc0_segctl2(val) __write_ulong_gc0_register($5, 4, val) -#define read_gc0_pwbase() __read_ulong_gc0_register(5, 5) -#define write_gc0_pwbase(val) __write_ulong_gc0_register(5, 5, val) +#define read_gc0_pwbase() __read_ulong_gc0_register($5, 5) +#define write_gc0_pwbase(val) __write_ulong_gc0_register($5, 5, val) -#define read_gc0_pwfield() __read_ulong_gc0_register(5, 6) -#define write_gc0_pwfield(val) __write_ulong_gc0_register(5, 6, val) +#define read_gc0_pwfield() __read_ulong_gc0_register($5, 6) +#define write_gc0_pwfield(val) __write_ulong_gc0_register($5, 6, val) -#define read_gc0_pwsize() __read_ulong_gc0_register(5, 7) -#define write_gc0_pwsize(val) __write_ulong_gc0_register(5, 7, val) +#define read_gc0_pwsize() __read_ulong_gc0_register($5, 7) +#define write_gc0_pwsize(val) __write_ulong_gc0_register($5, 7, val) -#define read_gc0_wired() __read_32bit_gc0_register(6, 0) -#define write_gc0_wired(val) __write_32bit_gc0_register(6, 0, val) +#define read_gc0_wired() __read_32bit_gc0_register($6, 0) +#define write_gc0_wired(val) __write_32bit_gc0_register($6, 0, val) -#define read_gc0_pwctl() __read_32bit_gc0_register(6, 6) -#define write_gc0_pwctl(val) __write_32bit_gc0_register(6, 6, val) - -#define read_gc0_hwrena() __read_32bit_gc0_register(7, 0) -#define write_gc0_hwrena(val) __write_32bit_gc0_register(7, 0, val) - -#define read_gc0_badvaddr() __read_ulong_gc0_register(8, 0) -#define write_gc0_badvaddr(val) __write_ulong_gc0_register(8, 0, val) - -#define read_gc0_badinstr() __read_32bit_gc0_register(8, 1) -#define write_gc0_badinstr(val) __write_32bit_gc0_register(8, 1, val) - -#define read_gc0_badinstrp() __read_32bit_gc0_register(8, 2) -#define write_gc0_badinstrp(val) __write_32bit_gc0_register(8, 2, val) - -#define read_gc0_count() __read_32bit_gc0_register(9, 0) - -#define read_gc0_entryhi() __read_ulong_gc0_register(10, 0) -#define write_gc0_entryhi(val) __write_ulong_gc0_register(10, 0, val) - -#define read_gc0_compare() __read_32bit_gc0_register(11, 0) -#define write_gc0_compare(val) __write_32bit_gc0_register(11, 0, val) - -#define read_gc0_status() __read_32bit_gc0_register(12, 0) -#define write_gc0_status(val) __write_32bit_gc0_register(12, 0, val) - -#define read_gc0_intctl() __read_32bit_gc0_register(12, 1) -#define write_gc0_intctl(val) __write_32bit_gc0_register(12, 1, val) - -#define read_gc0_cause() __read_32bit_gc0_register(13, 0) -#define write_gc0_cause(val) __write_32bit_gc0_register(13, 0, val) - -#define read_gc0_epc() __read_ulong_gc0_register(14, 0) -#define write_gc0_epc(val) __write_ulong_gc0_register(14, 0, val) - -#define read_gc0_prid() __read_32bit_gc0_register(15, 0) - -#define read_gc0_ebase() __read_32bit_gc0_register(15, 1) -#define write_gc0_ebase(val) __write_32bit_gc0_register(15, 1, val) - -#define read_gc0_ebase_64() __read_64bit_gc0_register(15, 1) -#define write_gc0_ebase_64(val) __write_64bit_gc0_register(15, 1, val) - -#define read_gc0_config() __read_32bit_gc0_register(16, 0) -#define read_gc0_config1() __read_32bit_gc0_register(16, 1) -#define read_gc0_config2() __read_32bit_gc0_register(16, 2) -#define read_gc0_config3() __read_32bit_gc0_register(16, 3) -#define read_gc0_config4() __read_32bit_gc0_register(16, 4) -#define read_gc0_config5() __read_32bit_gc0_register(16, 5) -#define read_gc0_config6() __read_32bit_gc0_register(16, 6) -#define read_gc0_config7() __read_32bit_gc0_register(16, 7) -#define write_gc0_config(val) __write_32bit_gc0_register(16, 0, val) -#define write_gc0_config1(val) __write_32bit_gc0_register(16, 1, val) -#define write_gc0_config2(val) __write_32bit_gc0_register(16, 2, val) -#define write_gc0_config3(val) __write_32bit_gc0_register(16, 3, val) -#define write_gc0_config4(val) __write_32bit_gc0_register(16, 4, val) -#define write_gc0_config5(val) __write_32bit_gc0_register(16, 5, val) -#define write_gc0_config6(val) __write_32bit_gc0_register(16, 6, val) -#define write_gc0_config7(val) __write_32bit_gc0_register(16, 7, val) - -#define read_gc0_lladdr() __read_ulong_gc0_register(17, 0) -#define write_gc0_lladdr(val) __write_ulong_gc0_register(17, 0, val) - -#define read_gc0_watchlo0() __read_ulong_gc0_register(18, 0) -#define read_gc0_watchlo1() __read_ulong_gc0_register(18, 1) -#define read_gc0_watchlo2() __read_ulong_gc0_register(18, 2) -#define read_gc0_watchlo3() __read_ulong_gc0_register(18, 3) -#define read_gc0_watchlo4() __read_ulong_gc0_register(18, 4) -#define read_gc0_watchlo5() __read_ulong_gc0_register(18, 5) -#define read_gc0_watchlo6() __read_ulong_gc0_register(18, 6) -#define read_gc0_watchlo7() __read_ulong_gc0_register(18, 7) -#define write_gc0_watchlo0(val) __write_ulong_gc0_register(18, 0, val) -#define write_gc0_watchlo1(val) __write_ulong_gc0_register(18, 1, val) -#define write_gc0_watchlo2(val) __write_ulong_gc0_register(18, 2, val) -#define write_gc0_watchlo3(val) __write_ulong_gc0_register(18, 3, val) -#define write_gc0_watchlo4(val) __write_ulong_gc0_register(18, 4, val) -#define write_gc0_watchlo5(val) __write_ulong_gc0_register(18, 5, val) -#define write_gc0_watchlo6(val) __write_ulong_gc0_register(18, 6, val) -#define write_gc0_watchlo7(val) __write_ulong_gc0_register(18, 7, val) - -#define read_gc0_watchhi0() __read_32bit_gc0_register(19, 0) -#define read_gc0_watchhi1() __read_32bit_gc0_register(19, 1) -#define read_gc0_watchhi2() __read_32bit_gc0_register(19, 2) -#define read_gc0_watchhi3() __read_32bit_gc0_register(19, 3) -#define read_gc0_watchhi4() __read_32bit_gc0_register(19, 4) -#define read_gc0_watchhi5() __read_32bit_gc0_register(19, 5) -#define read_gc0_watchhi6() __read_32bit_gc0_register(19, 6) -#define read_gc0_watchhi7() __read_32bit_gc0_register(19, 7) -#define write_gc0_watchhi0(val) __write_32bit_gc0_register(19, 0, val) -#define write_gc0_watchhi1(val) __write_32bit_gc0_register(19, 1, val) -#define write_gc0_watchhi2(val) __write_32bit_gc0_register(19, 2, val) -#define write_gc0_watchhi3(val) __write_32bit_gc0_register(19, 3, val) -#define write_gc0_watchhi4(val) __write_32bit_gc0_register(19, 4, val) -#define write_gc0_watchhi5(val) __write_32bit_gc0_register(19, 5, val) -#define write_gc0_watchhi6(val) __write_32bit_gc0_register(19, 6, val) -#define write_gc0_watchhi7(val) __write_32bit_gc0_register(19, 7, val) - -#define read_gc0_xcontext() __read_ulong_gc0_register(20, 0) -#define write_gc0_xcontext(val) __write_ulong_gc0_register(20, 0, val) - -#define read_gc0_perfctrl0() __read_32bit_gc0_register(25, 0) -#define write_gc0_perfctrl0(val) __write_32bit_gc0_register(25, 0, val) -#define read_gc0_perfcntr0() __read_32bit_gc0_register(25, 1) -#define write_gc0_perfcntr0(val) __write_32bit_gc0_register(25, 1, val) -#define read_gc0_perfcntr0_64() __read_64bit_gc0_register(25, 1) -#define write_gc0_perfcntr0_64(val) __write_64bit_gc0_register(25, 1, val) -#define read_gc0_perfctrl1() __read_32bit_gc0_register(25, 2) -#define write_gc0_perfctrl1(val) __write_32bit_gc0_register(25, 2, val) -#define read_gc0_perfcntr1() __read_32bit_gc0_register(25, 3) -#define write_gc0_perfcntr1(val) __write_32bit_gc0_register(25, 3, val) -#define read_gc0_perfcntr1_64() __read_64bit_gc0_register(25, 3) -#define write_gc0_perfcntr1_64(val) __write_64bit_gc0_register(25, 3, val) -#define read_gc0_perfctrl2() __read_32bit_gc0_register(25, 4) -#define write_gc0_perfctrl2(val) __write_32bit_gc0_register(25, 4, val) -#define read_gc0_perfcntr2() __read_32bit_gc0_register(25, 5) -#define write_gc0_perfcntr2(val) __write_32bit_gc0_register(25, 5, val) -#define read_gc0_perfcntr2_64() __read_64bit_gc0_register(25, 5) -#define write_gc0_perfcntr2_64(val) __write_64bit_gc0_register(25, 5, val) -#define read_gc0_perfctrl3() __read_32bit_gc0_register(25, 6) -#define write_gc0_perfctrl3(val) __write_32bit_gc0_register(25, 6, val) -#define read_gc0_perfcntr3() __read_32bit_gc0_register(25, 7) -#define write_gc0_perfcntr3(val) __write_32bit_gc0_register(25, 7, val) -#define read_gc0_perfcntr3_64() __read_64bit_gc0_register(25, 7) -#define write_gc0_perfcntr3_64(val) __write_64bit_gc0_register(25, 7, val) - -#define read_gc0_errorepc() __read_ulong_gc0_register(30, 0) -#define write_gc0_errorepc(val) __write_ulong_gc0_register(30, 0, val) - -#define read_gc0_kscratch1() __read_ulong_gc0_register(31, 2) -#define read_gc0_kscratch2() __read_ulong_gc0_register(31, 3) -#define read_gc0_kscratch3() __read_ulong_gc0_register(31, 4) -#define read_gc0_kscratch4() __read_ulong_gc0_register(31, 5) -#define read_gc0_kscratch5() __read_ulong_gc0_register(31, 6) -#define read_gc0_kscratch6() __read_ulong_gc0_register(31, 7) -#define write_gc0_kscratch1(val) __write_ulong_gc0_register(31, 2, val) -#define write_gc0_kscratch2(val) __write_ulong_gc0_register(31, 3, val) -#define write_gc0_kscratch3(val) __write_ulong_gc0_register(31, 4, val) -#define write_gc0_kscratch4(val) __write_ulong_gc0_register(31, 5, val) -#define write_gc0_kscratch5(val) __write_ulong_gc0_register(31, 6, val) -#define write_gc0_kscratch6(val) __write_ulong_gc0_register(31, 7, val) +#define read_gc0_pwctl() __read_32bit_gc0_register($6, 6) +#define write_gc0_pwctl(val) __write_32bit_gc0_register($6, 6, val) + +#define read_gc0_hwrena() __read_32bit_gc0_register($7, 0) +#define write_gc0_hwrena(val) __write_32bit_gc0_register($7, 0, val) + +#define read_gc0_badvaddr() __read_ulong_gc0_register($8, 0) +#define write_gc0_badvaddr(val) __write_ulong_gc0_register($8, 0, val) + +#define read_gc0_badinstr() __read_32bit_gc0_register($8, 1) +#define write_gc0_badinstr(val) __write_32bit_gc0_register($8, 1, val) + +#define read_gc0_badinstrp() __read_32bit_gc0_register($8, 2) +#define write_gc0_badinstrp(val) __write_32bit_gc0_register($8, 2, val) + +#define read_gc0_count() __read_32bit_gc0_register($9, 0) + +#define read_gc0_entryhi() __read_ulong_gc0_register($10, 0) +#define write_gc0_entryhi(val) __write_ulong_gc0_register($10, 0, val) + +#define read_gc0_compare() __read_32bit_gc0_register($11, 0) +#define write_gc0_compare(val) __write_32bit_gc0_register($11, 0, val) + +#define read_gc0_status() __read_32bit_gc0_register($12, 0) +#define write_gc0_status(val) __write_32bit_gc0_register($12, 0, val) + +#define read_gc0_intctl() __read_32bit_gc0_register($12, 1) +#define write_gc0_intctl(val) __write_32bit_gc0_register($12, 1, val) + +#define read_gc0_cause() __read_32bit_gc0_register($13, 0) +#define write_gc0_cause(val) __write_32bit_gc0_register($13, 0, val) + +#define read_gc0_epc() __read_ulong_gc0_register($14, 0) +#define write_gc0_epc(val) __write_ulong_gc0_register($14, 0, val) + +#define read_gc0_prid() __read_32bit_gc0_register($15, 0) + +#define read_gc0_ebase() __read_32bit_gc0_register($15, 1) +#define write_gc0_ebase(val) __write_32bit_gc0_register($15, 1, val) + +#define read_gc0_ebase_64() __read_64bit_gc0_register($15, 1) +#define write_gc0_ebase_64(val) __write_64bit_gc0_register($15, 1, val) + +#define read_gc0_config() __read_32bit_gc0_register($16, 0) +#define read_gc0_config1() __read_32bit_gc0_register($16, 1) +#define read_gc0_config2() __read_32bit_gc0_register($16, 2) +#define read_gc0_config3() __read_32bit_gc0_register($16, 3) +#define read_gc0_config4() __read_32bit_gc0_register($16, 4) +#define read_gc0_config5() __read_32bit_gc0_register($16, 5) +#define read_gc0_config6() __read_32bit_gc0_register($16, 6) +#define read_gc0_config7() __read_32bit_gc0_register($16, 7) +#define write_gc0_config(val) __write_32bit_gc0_register($16, 0, val) +#define write_gc0_config1(val) __write_32bit_gc0_register($16, 1, val) +#define write_gc0_config2(val) __write_32bit_gc0_register($16, 2, val) +#define write_gc0_config3(val) __write_32bit_gc0_register($16, 3, val) +#define write_gc0_config4(val) __write_32bit_gc0_register($16, 4, val) +#define write_gc0_config5(val) __write_32bit_gc0_register($16, 5, val) +#define write_gc0_config6(val) __write_32bit_gc0_register($16, 6, val) +#define write_gc0_config7(val) __write_32bit_gc0_register($16, 7, val) + +#define read_gc0_lladdr() __read_ulong_gc0_register($17, 0) +#define write_gc0_lladdr(val) __write_ulong_gc0_register($17, 0, val) + +#define read_gc0_watchlo0() __read_ulong_gc0_register($18, 0) +#define read_gc0_watchlo1() __read_ulong_gc0_register($18, 1) +#define read_gc0_watchlo2() __read_ulong_gc0_register($18, 2) +#define read_gc0_watchlo3() __read_ulong_gc0_register($18, 3) +#define read_gc0_watchlo4() __read_ulong_gc0_register($18, 4) +#define read_gc0_watchlo5() __read_ulong_gc0_register($18, 5) +#define read_gc0_watchlo6() __read_ulong_gc0_register($18, 6) +#define read_gc0_watchlo7() __read_ulong_gc0_register($18, 7) +#define write_gc0_watchlo0(val) __write_ulong_gc0_register($18, 0, val) +#define write_gc0_watchlo1(val) __write_ulong_gc0_register($18, 1, val) +#define write_gc0_watchlo2(val) __write_ulong_gc0_register($18, 2, val) +#define write_gc0_watchlo3(val) __write_ulong_gc0_register($18, 3, val) +#define write_gc0_watchlo4(val) __write_ulong_gc0_register($18, 4, val) +#define write_gc0_watchlo5(val) __write_ulong_gc0_register($18, 5, val) +#define write_gc0_watchlo6(val) __write_ulong_gc0_register($18, 6, val) +#define write_gc0_watchlo7(val) __write_ulong_gc0_register($18, 7, val) + +#define read_gc0_watchhi0() __read_32bit_gc0_register($19, 0) +#define read_gc0_watchhi1() __read_32bit_gc0_register($19, 1) +#define read_gc0_watchhi2() __read_32bit_gc0_register($19, 2) +#define read_gc0_watchhi3() __read_32bit_gc0_register($19, 3) +#define read_gc0_watchhi4() __read_32bit_gc0_register($19, 4) +#define read_gc0_watchhi5() __read_32bit_gc0_register($19, 5) +#define read_gc0_watchhi6() __read_32bit_gc0_register($19, 6) +#define read_gc0_watchhi7() __read_32bit_gc0_register($19, 7) +#define write_gc0_watchhi0(val) __write_32bit_gc0_register($19, 0, val) +#define write_gc0_watchhi1(val) __write_32bit_gc0_register($19, 1, val) +#define write_gc0_watchhi2(val) __write_32bit_gc0_register($19, 2, val) +#define write_gc0_watchhi3(val) __write_32bit_gc0_register($19, 3, val) +#define write_gc0_watchhi4(val) __write_32bit_gc0_register($19, 4, val) +#define write_gc0_watchhi5(val) __write_32bit_gc0_register($19, 5, val) +#define write_gc0_watchhi6(val) __write_32bit_gc0_register($19, 6, val) +#define write_gc0_watchhi7(val) __write_32bit_gc0_register($19, 7, val) + +#define read_gc0_xcontext() __read_ulong_gc0_register($20, 0) +#define write_gc0_xcontext(val) __write_ulong_gc0_register($20, 0, val) + +#define read_gc0_perfctrl0() __read_32bit_gc0_register($25, 0) +#define write_gc0_perfctrl0(val) __write_32bit_gc0_register($25, 0, val) +#define read_gc0_perfcntr0() __read_32bit_gc0_register($25, 1) +#define write_gc0_perfcntr0(val) __write_32bit_gc0_register($25, 1, val) +#define read_gc0_perfcntr0_64() __read_64bit_gc0_register($25, 1) +#define write_gc0_perfcntr0_64(val) __write_64bit_gc0_register($25, 1, val) +#define read_gc0_perfctrl1() __read_32bit_gc0_register($25, 2) +#define write_gc0_perfctrl1(val) __write_32bit_gc0_register($25, 2, val) +#define read_gc0_perfcntr1() __read_32bit_gc0_register($25, 3) +#define write_gc0_perfcntr1(val) __write_32bit_gc0_register($25, 3, val) +#define read_gc0_perfcntr1_64() __read_64bit_gc0_register($25, 3) +#define write_gc0_perfcntr1_64(val) __write_64bit_gc0_register($25, 3, val) +#define read_gc0_perfctrl2() __read_32bit_gc0_register($25, 4) +#define write_gc0_perfctrl2(val) __write_32bit_gc0_register($25, 4, val) +#define read_gc0_perfcntr2() __read_32bit_gc0_register($25, 5) +#define write_gc0_perfcntr2(val) __write_32bit_gc0_register($25, 5, val) +#define read_gc0_perfcntr2_64() __read_64bit_gc0_register($25, 5) +#define write_gc0_perfcntr2_64(val) __write_64bit_gc0_register($25, 5, val) +#define read_gc0_perfctrl3() __read_32bit_gc0_register($25, 6) +#define write_gc0_perfctrl3(val) __write_32bit_gc0_register($25, 6, val) +#define read_gc0_perfcntr3() __read_32bit_gc0_register($25, 7) +#define write_gc0_perfcntr3(val) __write_32bit_gc0_register($25, 7, val) +#define read_gc0_perfcntr3_64() __read_64bit_gc0_register($25, 7) +#define write_gc0_perfcntr3_64(val) __write_64bit_gc0_register($25, 7, val) + +#define read_gc0_errorepc() __read_ulong_gc0_register($30, 0) +#define write_gc0_errorepc(val) __write_ulong_gc0_register($30, 0, val) + +#define read_gc0_kscratch1() __read_ulong_gc0_register($31, 2) +#define read_gc0_kscratch2() __read_ulong_gc0_register($31, 3) +#define read_gc0_kscratch3() __read_ulong_gc0_register($31, 4) +#define read_gc0_kscratch4() __read_ulong_gc0_register($31, 5) +#define read_gc0_kscratch5() __read_ulong_gc0_register($31, 6) +#define read_gc0_kscratch6() __read_ulong_gc0_register($31, 7) +#define write_gc0_kscratch1(val) __write_ulong_gc0_register($31, 2, val) +#define write_gc0_kscratch2(val) __write_ulong_gc0_register($31, 3, val) +#define write_gc0_kscratch3(val) __write_ulong_gc0_register($31, 4, val) +#define write_gc0_kscratch4(val) __write_ulong_gc0_register($31, 5, val) +#define write_gc0_kscratch5(val) __write_ulong_gc0_register($31, 6, val) +#define write_gc0_kscratch6(val) __write_ulong_gc0_register($31, 7, val) /* Cavium OCTEON (cnMIPS) */ -#define read_gc0_cvmcount() __read_ulong_gc0_register(9, 6) -#define write_gc0_cvmcount(val) __write_ulong_gc0_register(9, 6, val) +#define read_gc0_cvmcount() __read_ulong_gc0_register($9, 6) +#define write_gc0_cvmcount(val) __write_ulong_gc0_register($9, 6, val) -#define read_gc0_cvmctl() __read_64bit_gc0_register(9, 7) -#define write_gc0_cvmctl(val) __write_64bit_gc0_register(9, 7, val) +#define read_gc0_cvmctl() __read_64bit_gc0_register($9, 7) +#define write_gc0_cvmctl(val) __write_64bit_gc0_register($9, 7, val) -#define read_gc0_cvmmemctl() __read_64bit_gc0_register(11, 7) -#define write_gc0_cvmmemctl(val) __write_64bit_gc0_register(11, 7, val) +#define read_gc0_cvmmemctl() __read_64bit_gc0_register($11, 7) +#define write_gc0_cvmmemctl(val) __write_64bit_gc0_register($11, 7, val) -#define read_gc0_cvmmemctl2() __read_64bit_gc0_register(16, 6) -#define write_gc0_cvmmemctl2(val) __write_64bit_gc0_register(16, 6, val) +#define read_gc0_cvmmemctl2() __read_64bit_gc0_register($16, 6) +#define write_gc0_cvmmemctl2(val) __write_64bit_gc0_register($16, 6, val) /* * Macros to access the floating point coprocessor control registers @@ -2581,8 +2651,6 @@ static inline void tlb_write_random(void) ".set reorder"); } -#ifdef TOOLCHAIN_SUPPORTS_VIRT - /* * Guest TLB operations. * @@ -2593,7 +2661,7 @@ static inline void guest_tlb_probe(void) __asm__ __volatile__( ".set push\n\t" ".set noreorder\n\t" - ".set virt\n\t" + _ASM_SET_VIRT "tlbgp\n\t" ".set pop"); } @@ -2603,7 +2671,7 @@ static inline void guest_tlb_read(void) __asm__ __volatile__( ".set push\n\t" ".set noreorder\n\t" - ".set virt\n\t" + _ASM_SET_VIRT "tlbgr\n\t" ".set pop"); } @@ -2613,7 +2681,7 @@ static inline void guest_tlb_write_indexed(void) __asm__ __volatile__( ".set push\n\t" ".set noreorder\n\t" - ".set virt\n\t" + _ASM_SET_VIRT "tlbgwi\n\t" ".set pop"); } @@ -2623,7 +2691,7 @@ static inline void guest_tlb_write_random(void) __asm__ __volatile__( ".set push\n\t" ".set noreorder\n\t" - ".set virt\n\t" + _ASM_SET_VIRT "tlbgwr\n\t" ".set pop"); } @@ -2636,63 +2704,11 @@ static inline void guest_tlbinvf(void) __asm__ __volatile__( ".set push\n\t" ".set noreorder\n\t" - ".set virt\n\t" + _ASM_SET_VIRT "tlbginvf\n\t" ".set pop"); } -#else /* TOOLCHAIN_SUPPORTS_VIRT */ - -/* - * Guest TLB operations. - * - * It is responsibility of the caller to take care of any TLB hazards. - */ -static inline void guest_tlb_probe(void) -{ - __asm__ __volatile__( - "# tlbgp\n\t" - _ASM_INSN_IF_MIPS(0x42000010) - _ASM_INSN32_IF_MM(0x0000017c)); -} - -static inline void guest_tlb_read(void) -{ - __asm__ __volatile__( - "# tlbgr\n\t" - _ASM_INSN_IF_MIPS(0x42000009) - _ASM_INSN32_IF_MM(0x0000117c)); -} - -static inline void guest_tlb_write_indexed(void) -{ - __asm__ __volatile__( - "# tlbgwi\n\t" - _ASM_INSN_IF_MIPS(0x4200000a) - _ASM_INSN32_IF_MM(0x0000217c)); -} - -static inline void guest_tlb_write_random(void) -{ - __asm__ __volatile__( - "# tlbgwr\n\t" - _ASM_INSN_IF_MIPS(0x4200000e) - _ASM_INSN32_IF_MM(0x0000317c)); -} - -/* - * Guest TLB Invalidate Flush - */ -static inline void guest_tlbinvf(void) -{ - __asm__ __volatile__( - "# tlbginvf\n\t" - _ASM_INSN_IF_MIPS(0x4200000c) - _ASM_INSN32_IF_MM(0x0000517c)); -} - -#endif /* !TOOLCHAIN_SUPPORTS_VIRT */ - /* * Manipulate bits in a register. */ diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h index b1845102f8f9..b4f9577ed96a 100644 --- a/arch/mips/include/asm/msa.h +++ b/arch/mips/include/asm/msa.h @@ -160,7 +160,23 @@ static inline void init_msa_upper(void) _init_msa_upper(); } -#ifdef TOOLCHAIN_SUPPORTS_MSA +#ifndef TOOLCHAIN_SUPPORTS_MSA +/* + * Define assembler macros using .word for the c[ft]cmsa instructions in order + * to allow compilation with toolchains that do not support MSA. Once all + * toolchains in use support MSA these can be removed. + */ +_ASM_MACRO_2R(cfcmsa, rd, cs, + _ASM_INSN_IF_MIPS(0x787e0019 | __cs << 11 | __rd << 6) + _ASM_INSN32_IF_MM(0x587e0016 | __cs << 11 | __rd << 6)); +_ASM_MACRO_2R(ctcmsa, cd, rs, + _ASM_INSN_IF_MIPS(0x783e0019 | __rs << 11 | __cd << 6) + _ASM_INSN32_IF_MM(0x583e0016 | __rs << 11 | __cd << 6)); +#define _ASM_SET_MSA "" +#else /* TOOLCHAIN_SUPPORTS_MSA */ +#define _ASM_SET_MSA ".set\tfp=64\n\t" \ + ".set\tmsa\n\t" +#endif #define __BUILD_MSA_CTL_REG(name, cs) \ static inline unsigned int read_msa_##name(void) \ @@ -168,8 +184,7 @@ static inline unsigned int read_msa_##name(void) \ unsigned int reg; \ __asm__ __volatile__( \ " .set push\n" \ - " .set fp=64\n" \ - " .set msa\n" \ + _ASM_SET_MSA \ " cfcmsa %0, $" #cs "\n" \ " .set pop\n" \ : "=r"(reg)); \ @@ -180,52 +195,12 @@ static inline void write_msa_##name(unsigned int val) \ { \ __asm__ __volatile__( \ " .set push\n" \ - " .set fp=64\n" \ - " .set msa\n" \ + _ASM_SET_MSA \ " ctcmsa $" #cs ", %0\n" \ " .set pop\n" \ : : "r"(val)); \ } -#else /* !TOOLCHAIN_SUPPORTS_MSA */ - -/* - * Define functions using .word for the c[ft]cmsa instructions in order to - * allow compilation with toolchains that do not support MSA. Once all - * toolchains in use support MSA these can be removed. - */ - -#define __BUILD_MSA_CTL_REG(name, cs) \ -static inline unsigned int read_msa_##name(void) \ -{ \ - unsigned int reg; \ - __asm__ __volatile__( \ - " .set push\n" \ - " .set noat\n" \ - " # cfcmsa $1, $%1\n" \ - _ASM_INSN_IF_MIPS(0x787e0059 | %1 << 11) \ - _ASM_INSN32_IF_MM(0x587e0056 | %1 << 11) \ - " move %0, $1\n" \ - " .set pop\n" \ - : "=r"(reg) : "i"(cs)); \ - return reg; \ -} \ - \ -static inline void write_msa_##name(unsigned int val) \ -{ \ - __asm__ __volatile__( \ - " .set push\n" \ - " .set noat\n" \ - " move $1, %0\n" \ - " # ctcmsa $%1, $1\n" \ - _ASM_INSN_IF_MIPS(0x783e0819 | %1 << 6) \ - _ASM_INSN32_IF_MM(0x583e0816 | %1 << 6) \ - " .set pop\n" \ - : : "r"(val), "i"(cs)); \ -} - -#endif /* !TOOLCHAIN_SUPPORTS_MSA */ - __BUILD_MSA_CTL_REG(ir, 0) __BUILD_MSA_CTL_REG(csr, 1) __BUILD_MSA_CTL_REG(access, 2) diff --git a/arch/mips/jazz/setup.c b/arch/mips/jazz/setup.c index e4374a5651ce..448fd41792e4 100644 --- a/arch/mips/jazz/setup.c +++ b/arch/mips/jazz/setup.c @@ -32,22 +32,22 @@ static struct resource jazz_io_resources[] = { .start = 0x00, .end = 0x1f, .name = "dma1", - .flags = IORESOURCE_BUSY + .flags = IORESOURCE_IO | IORESOURCE_BUSY }, { .start = 0x40, .end = 0x5f, .name = "timer", - .flags = IORESOURCE_BUSY + .flags = IORESOURCE_IO | IORESOURCE_BUSY }, { .start = 0x80, .end = 0x8f, .name = "dma page reg", - .flags = IORESOURCE_BUSY + .flags = IORESOURCE_IO | IORESOURCE_BUSY }, { .start = 0xc0, .end = 0xdf, .name = "dma2", - .flags = IORESOURCE_BUSY + .flags = IORESOURCE_IO | IORESOURCE_BUSY } }; diff --git a/arch/mips/jz4740/Kconfig b/arch/mips/jz4740/Kconfig index 643af2012e14..4dd0c446ecec 100644 --- a/arch/mips/jz4740/Kconfig +++ b/arch/mips/jz4740/Kconfig @@ -8,6 +8,10 @@ config JZ4740_QI_LB60 bool "Qi Hardware Ben NanoNote" select MACH_JZ4740 +config JZ4770_GCW0 + bool "Game Consoles Worldwide GCW Zero" + select MACH_JZ4770 + config JZ4780_CI20 bool "MIPS Creator CI20" select MACH_JZ4780 @@ -18,6 +22,12 @@ config MACH_JZ4740 bool select SYS_HAS_CPU_MIPS32_R1 +config MACH_JZ4770 + bool + select MIPS_CPU_SCACHE + select SYS_HAS_CPU_MIPS32_R2 + select SYS_SUPPORTS_HIGHMEM + config MACH_JZ4780 bool select MIPS_CPU_SCACHE diff --git a/arch/mips/jz4740/prom.c b/arch/mips/jz4740/prom.c index 47e857194ce6..eb9f2f97bedb 100644 --- a/arch/mips/jz4740/prom.c +++ b/arch/mips/jz4740/prom.c @@ -20,33 +20,12 @@ #include <linux/serial_reg.h> #include <asm/bootinfo.h> +#include <asm/fw/fw.h> #include <asm/mach-jz4740/base.h> -static __init void jz4740_init_cmdline(int argc, char *argv[]) -{ - unsigned int count = COMMAND_LINE_SIZE - 1; - int i; - char *dst = &(arcs_cmdline[0]); - char *src; - - for (i = 1; i < argc && count; ++i) { - src = argv[i]; - while (*src && count) { - *dst++ = *src++; - --count; - } - *dst++ = ' '; - } - if (i > 1) - --dst; - - *dst = 0; -} - void __init prom_init(void) { - jz4740_init_cmdline((int)fw_arg0, (char **)fw_arg1); - mips_machtype = MACH_INGENIC_JZ4740; + fw_init_cmdline(); } void __init prom_free_prom_memory(void) diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c index 6d0152321819..afb40f8bce96 100644 --- a/arch/mips/jz4740/setup.c +++ b/arch/mips/jz4740/setup.c @@ -53,6 +53,16 @@ static void __init jz4740_detect_mem(void) add_memory_region(0, size, BOOT_MEM_RAM); } +static unsigned long __init get_board_mach_type(const void *fdt) +{ + if (!fdt_node_check_compatible(fdt, 0, "ingenic,jz4780")) + return MACH_INGENIC_JZ4780; + if (!fdt_node_check_compatible(fdt, 0, "ingenic,jz4770")) + return MACH_INGENIC_JZ4770; + + return MACH_INGENIC_JZ4740; +} + void __init plat_mem_setup(void) { int offset; @@ -63,6 +73,8 @@ void __init plat_mem_setup(void) offset = fdt_path_offset(__dtb_start, "/memory"); if (offset < 0) jz4740_detect_mem(); + + mips_machtype = get_board_mach_type(__dtb_start); } void __init device_tree_init(void) @@ -75,10 +87,14 @@ void __init device_tree_init(void) const char *get_system_type(void) { - if (IS_ENABLED(CONFIG_MACH_JZ4780)) + switch (mips_machtype) { + case MACH_INGENIC_JZ4780: return "JZ4780"; - - return "JZ4740"; + case MACH_INGENIC_JZ4770: + return "JZ4770"; + default: + return "JZ4740"; + } } void __init arch_init_irq(void) diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c index bb1ad5119da4..2ca9160f642a 100644 --- a/arch/mips/jz4740/time.c +++ b/arch/mips/jz4740/time.c @@ -113,7 +113,7 @@ static struct clock_event_device jz4740_clockevent = { #ifdef CONFIG_MACH_JZ4740 .irq = JZ4740_IRQ_TCU0, #endif -#ifdef CONFIG_MACH_JZ4780 +#if defined(CONFIG_MACH_JZ4770) || defined(CONFIG_MACH_JZ4780) .irq = JZ4780_IRQ_TCU2, #endif }; diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index b79ed9af9886..e48f6c0a9e4a 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -399,7 +399,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs) * * @regs: Pointer to pt_regs * @insn: branch instruction to decode - * @returns: -EFAULT on error and forces SIGILL, and on success + * Return: -EFAULT on error and forces SIGILL, and on success * returns 0 or BRANCH_LIKELY_TAKEN as appropriate after * evaluating the branch. * diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index e68e6e04063a..1025f937ab0e 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -388,15 +388,16 @@ LEAF(mips_cps_boot_vpes) #elif defined(CONFIG_MIPS_MT) - .set push - .set MIPS_ISA_LEVEL_RAW - .set mt - /* If the core doesn't support MT then return */ has_mt t0, 5f /* Enter VPE configuration state */ + .set push + .set MIPS_ISA_LEVEL_RAW + .set mt dvpe + .set pop + PTR_LA t1, 1f jr.hb t1 nop @@ -422,6 +423,10 @@ LEAF(mips_cps_boot_vpes) mtc0 t0, CP0_VPECONTROL ehb + .set push + .set MIPS_ISA_LEVEL_RAW + .set mt + /* Skip the VPE if its TC is not halted */ mftc0 t0, CP0_TCHALT beqz t0, 2f @@ -495,6 +500,8 @@ LEAF(mips_cps_boot_vpes) ehb evpe + .set pop + /* Check whether this VPE is meant to be running */ li t0, 1 sll t0, t0, a1 @@ -509,7 +516,7 @@ LEAF(mips_cps_boot_vpes) 1: jr.hb t0 nop -2: .set pop +2: #endif /* CONFIG_MIPS_MT_SMP */ diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 99285be0e088..7f3dfdbc3657 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -361,7 +361,7 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, * If fails when getting the stack address of the non-leaf function's * ra, stop function graph tracer and return */ - if (parent_ra_addr == 0) + if (parent_ra_addr == NULL) goto out; #endif /* *parent_ra_addr = return_hooker; */ diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 702c678de116..85bc601e9a0d 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -826,25 +826,6 @@ static void __init arch_mem_init(char **cmdline_p) struct memblock_region *reg; extern void plat_mem_setup(void); - /* call board setup routine */ - plat_mem_setup(); - - /* - * Make sure all kernel memory is in the maps. The "UP" and - * "DOWN" are opposite for initdata since if it crosses over - * into another memory section you don't want that to be - * freed when the initdata is freed. - */ - arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT, - PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT, - BOOT_MEM_RAM); - arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, - PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, - BOOT_MEM_INIT_RAM); - - pr_info("Determined physical RAM map:\n"); - print_memory_map(); - #if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE) strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); #else @@ -872,6 +853,26 @@ static void __init arch_mem_init(char **cmdline_p) } #endif #endif + + /* call board setup routine */ + plat_mem_setup(); + + /* + * Make sure all kernel memory is in the maps. The "UP" and + * "DOWN" are opposite for initdata since if it crosses over + * into another memory section you don't want that to be + * freed when the initdata is freed. + */ + arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT, + PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT, + BOOT_MEM_RAM); + arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, + PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, + BOOT_MEM_INIT_RAM); + + pr_info("Determined physical RAM map:\n"); + print_memory_map(); + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index ecc1a853f48d..03f1026ad148 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -439,8 +439,6 @@ void play_dead(void) pr_debug("CPU%d going offline\n", cpu); if (cpu_has_mipsmt || cpu_has_vp) { - core = cpu_core(&cpu_data[cpu]); - /* Look for another online VPE within the core */ for_each_online_cpu(cpu_death_sibling) { if (!cpus_are_siblings(cpu, cpu_death_sibling)) diff --git a/arch/mips/kernel/watch.c b/arch/mips/kernel/watch.c index 19fcab7348b1..0e61a5b7647f 100644 --- a/arch/mips/kernel/watch.c +++ b/arch/mips/kernel/watch.c @@ -18,27 +18,24 @@ void mips_install_watch_registers(struct task_struct *t) { struct mips3264_watch_reg_state *watches = &t->thread.watch.mips3264; + unsigned int watchhi = MIPS_WATCHHI_G | /* Trap all ASIDs */ + MIPS_WATCHHI_IRW; /* Clear result bits */ + switch (current_cpu_data.watch_reg_use_cnt) { default: BUG(); case 4: write_c0_watchlo3(watches->watchlo[3]); - /* Write 1 to the I, R, and W bits to clear them, and - 1 to G so all ASIDs are trapped. */ - write_c0_watchhi3(MIPS_WATCHHI_G | MIPS_WATCHHI_IRW | - watches->watchhi[3]); + write_c0_watchhi3(watchhi | watches->watchhi[3]); case 3: write_c0_watchlo2(watches->watchlo[2]); - write_c0_watchhi2(MIPS_WATCHHI_G | MIPS_WATCHHI_IRW | - watches->watchhi[2]); + write_c0_watchhi2(watchhi | watches->watchhi[2]); case 2: write_c0_watchlo1(watches->watchlo[1]); - write_c0_watchhi1(MIPS_WATCHHI_G | MIPS_WATCHHI_IRW | - watches->watchhi[1]); + write_c0_watchhi1(watchhi | watches->watchhi[1]); case 1: write_c0_watchlo0(watches->watchlo[0]); - write_c0_watchhi0(MIPS_WATCHHI_G | MIPS_WATCHHI_IRW | - watches->watchhi[0]); + write_c0_watchhi0(watchhi | watches->watchhi[0]); } } @@ -51,21 +48,19 @@ void mips_read_watch_registers(void) { struct mips3264_watch_reg_state *watches = ¤t->thread.watch.mips3264; + unsigned int watchhi_mask = MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW; + switch (current_cpu_data.watch_reg_use_cnt) { default: BUG(); case 4: - watches->watchhi[3] = (read_c0_watchhi3() & - (MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW)); + watches->watchhi[3] = (read_c0_watchhi3() & watchhi_mask); case 3: - watches->watchhi[2] = (read_c0_watchhi2() & - (MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW)); + watches->watchhi[2] = (read_c0_watchhi2() & watchhi_mask); case 2: - watches->watchhi[1] = (read_c0_watchhi1() & - (MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW)); + watches->watchhi[1] = (read_c0_watchhi1() & watchhi_mask); case 1: - watches->watchhi[0] = (read_c0_watchhi0() & - (MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW)); + watches->watchhi[0] = (read_c0_watchhi0() & watchhi_mask); } if (current_cpu_data.watch_reg_use_cnt == 1 && (watches->watchhi[0] & MIPS_WATCHHI_IRW) == 0) { diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig index 6f109bb54cdb..bc2fdbfa8223 100644 --- a/arch/mips/loongson64/Kconfig +++ b/arch/mips/loongson64/Kconfig @@ -17,7 +17,6 @@ config LEMOTE_FULOONG2E select I8259 select ISA select IRQ_MIPS_CPU - select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_LITTLE_ENDIAN select SYS_SUPPORTS_HIGHMEM @@ -49,7 +48,6 @@ config LEMOTE_MACH2F select ISA select SYS_HAS_CPU_LOONGSON2F select SYS_HAS_EARLY_PRINTK - select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN diff --git a/arch/mips/loongson64/common/mem.c b/arch/mips/loongson64/common/mem.c index b01d52473da8..c549e525fc11 100644 --- a/arch/mips/loongson64/common/mem.c +++ b/arch/mips/loongson64/common/mem.c @@ -79,7 +79,7 @@ void __init prom_init_memory(void) (u64)loongson_memmap->map[i].mem_size << 20, BOOT_MEM_RAM); break; - case MEM_RESERVED: + case SYSTEM_RAM_RESERVED: add_memory_region(loongson_memmap->map[i].mem_start, (u64)loongson_memmap->map[i].mem_size << 20, BOOT_MEM_RESERVED); diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index f17ef520799a..9717106de4a5 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -166,7 +166,7 @@ static void __init szmem(unsigned int node) memblock_add_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), node); break; - case MEM_RESERVED: + case SYSTEM_RAM_RESERVED: pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", (u32)node_id, mem_type, mem_start, mem_size); add_memory_region((node_id << 44) + mem_start, diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index da6c1c0c30c1..62deb025970b 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -451,7 +451,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; } - /* Fall through */ + /* fall through */ case jr_op: /* For R6, JR already emulated in jalr_op */ if (NO_R6EMU && insn.r_format.func == jr_op) @@ -471,10 +471,11 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + /* fall through */ case bltzl_op: if (NO_R6EMU) break; + /* fall through */ case bltz_op: if ((long)regs->regs[insn.i_format.rs] < 0) *contpc = regs->cp0_epc + @@ -494,10 +495,11 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + /* fall through */ case bgezl_op: if (NO_R6EMU) break; + /* fall through */ case bgez_op: if ((long)regs->regs[insn.i_format.rs] >= 0) *contpc = regs->cp0_epc + @@ -512,11 +514,12 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, break; case jalx_op: set_isa16_mode(bit); + /* fall through */ case jal_op: regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + /* fall through */ case j_op: *contpc = regs->cp0_epc + dec_insn.pc_inc; *contpc >>= 28; @@ -528,6 +531,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case beql_op: if (NO_R6EMU) break; + /* fall through */ case beq_op: if (regs->regs[insn.i_format.rs] == regs->regs[insn.i_format.rt]) @@ -542,6 +546,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case bnel_op: if (NO_R6EMU) break; + /* fall through */ case bne_op: if (regs->regs[insn.i_format.rs] != regs->regs[insn.i_format.rt]) @@ -556,6 +561,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case blezl_op: if (!insn.i_format.rt && NO_R6EMU) break; + /* fall through */ case blez_op: /* @@ -593,6 +599,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case bgtzl_op: if (!insn.i_format.rt && NO_R6EMU) break; + /* fall through */ case bgtz_op: /* * Compact branches for R6 for the @@ -729,7 +736,8 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, return 1; } - /* R2/R6 compatible cop1 instruction. Fall through */ + /* R2/R6 compatible cop1 instruction */ + /* fall through */ case cop2_op: case cop1x_op: if (insn.i_format.rs == bc_op) { @@ -1190,7 +1198,8 @@ emul: if (!cpu_has_mips_r6 || delay_slot(xcp)) return SIGILL; - cond = likely = 0; + likely = 0; + cond = 0; fpr = ¤t->thread.fpu.fpr[MIPSInst_RT(ir)]; bit0 = get_fpr32(fpr, 0) & 0x1; switch (MIPSInst_RS(ir)) { @@ -1220,14 +1229,14 @@ emul: case bcfl_op: if (cpu_has_mips_2_3_4_5_r) likely = 1; - /* Fall through */ + /* fall through */ case bcf_op: cond = !cond; break; case bctl_op: if (cpu_has_mips_2_3_4_5_r) likely = 1; - /* Fall through */ + /* fall through */ case bct_op: break; } @@ -1353,7 +1362,8 @@ branch_common: return SIGILL; /* a real fpu computation instruction */ - if ((sig = fpu_emu(xcp, ctx, ir))) + sig = fpu_emu(xcp, ctx, ir); + if (sig) return sig; } break; diff --git a/arch/mips/math-emu/dp_add.c b/arch/mips/math-emu/dp_add.c index 8954ef031f84..678de20e4cb1 100644 --- a/arch/mips/math-emu/dp_add.c +++ b/arch/mips/math-emu/dp_add.c @@ -104,8 +104,7 @@ union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - - /* FALL THROUGH */ + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; diff --git a/arch/mips/math-emu/dp_div.c b/arch/mips/math-emu/dp_div.c index f4746f7c5f63..3063ae3ab3b9 100644 --- a/arch/mips/math-emu/dp_div.c +++ b/arch/mips/math-emu/dp_div.c @@ -103,6 +103,7 @@ union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c index 5bec64f2884e..d1f984b40344 100644 --- a/arch/mips/math-emu/dp_fmax.c +++ b/arch/mips/math-emu/dp_fmax.c @@ -96,6 +96,7 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; @@ -224,6 +225,7 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c index a287b23818d8..f98b96135c8d 100644 --- a/arch/mips/math-emu/dp_fmin.c +++ b/arch/mips/math-emu/dp_fmin.c @@ -96,6 +96,7 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; @@ -224,6 +225,7 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c index 7ad79ed411f5..7ea2f8222026 100644 --- a/arch/mips/math-emu/dp_maddf.c +++ b/arch/mips/math-emu/dp_maddf.c @@ -16,7 +16,7 @@ /* 128 bits shift right logical with rounding. */ -void srl128(u64 *hptr, u64 *lptr, int count) +static void srl128(u64 *hptr, u64 *lptr, int count) { u64 low; @@ -157,6 +157,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): if (zc == IEEE754_CLASS_INF) @@ -173,7 +174,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); - /* fall through to real computations */ + /* continue to real computations */ } /* Finally get to do some computation */ @@ -201,9 +202,6 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm. */ - /* 32 * 32 => 64 */ -#define DPXMULT(x, y) ((u64)(x) * (u64)y) - lxm = xm; hxm = xm >> 32; lym = ym; diff --git a/arch/mips/math-emu/dp_mul.c b/arch/mips/math-emu/dp_mul.c index 60c8bfe40947..c34a6cdf1b25 100644 --- a/arch/mips/math-emu/dp_mul.c +++ b/arch/mips/math-emu/dp_mul.c @@ -101,6 +101,7 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; @@ -128,9 +129,6 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y) * Multiply 64 bits xm, ym to give high 64 bits rm with stickness. */ - /* 32 * 32 => 64 */ -#define DPXMULT(x, y) ((u64)(x) * (u64)y) - lxm = xm; hxm = xm >> 32; lym = ym; diff --git a/arch/mips/math-emu/dp_sqrt.c b/arch/mips/math-emu/dp_sqrt.c index cea907b83146..1d26c92e5295 100644 --- a/arch/mips/math-emu/dp_sqrt.c +++ b/arch/mips/math-emu/dp_sqrt.c @@ -91,7 +91,8 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x) scalx -= 256; } - y = x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); + x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); + y = x; /* magic initial approximation to almost 8 sig. bits */ yh = y.bits >> 32; @@ -108,7 +109,8 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x) /* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */ /* t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */ - z = t = ieee754dp_mul(y, y); + t = ieee754dp_mul(y, y); + z = t; t.bexp += 0x001; t = ieee754dp_add(t, z); z = ieee754dp_mul(ieee754dp_sub(x, z), y); @@ -140,7 +142,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x) switch (oldcsr.rm) { case FPU_CSR_RU: y.bits += 1; - /* drop through */ + /* fall through */ case FPU_CSR_RN: t.bits += 1; break; diff --git a/arch/mips/math-emu/dp_sub.c b/arch/mips/math-emu/dp_sub.c index fc17a781b9ae..3cc48b86519b 100644 --- a/arch/mips/math-emu/dp_sub.c +++ b/arch/mips/math-emu/dp_sub.c @@ -106,7 +106,7 @@ union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* FALL THROUGH */ + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): /* normalize ym,ye */ diff --git a/arch/mips/math-emu/ieee754dp.h b/arch/mips/math-emu/ieee754dp.h index 9ba023004eb6..a56707b75282 100644 --- a/arch/mips/math-emu/ieee754dp.h +++ b/arch/mips/math-emu/ieee754dp.h @@ -55,6 +55,9 @@ static inline int ieee754dp_finite(union ieee754dp x) #define XDPSRS1(v) \ (((v) >> 1) | ((v) & 1)) +/* 32bit * 32bit => 64bit unsigned integer multiplication */ +#define DPXMULT(x, y) ((u64)(x) * (u64)y) + /* convert denormal to normalized with extended exponent */ #define DPDNORMx(m,e) \ while ((m >> DP_FBITS) == 0) { m <<= 1; e--; } diff --git a/arch/mips/math-emu/sp_add.c b/arch/mips/math-emu/sp_add.c index c55c0c00bca8..51dced9fbdaf 100644 --- a/arch/mips/math-emu/sp_add.c +++ b/arch/mips/math-emu/sp_add.c @@ -104,8 +104,7 @@ union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - - /* FALL THROUGH */ + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; diff --git a/arch/mips/math-emu/sp_div.c b/arch/mips/math-emu/sp_div.c index 23587b31ca87..5d2904960eb8 100644 --- a/arch/mips/math-emu/sp_div.c +++ b/arch/mips/math-emu/sp_div.c @@ -103,6 +103,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; diff --git a/arch/mips/math-emu/sp_fdp.c b/arch/mips/math-emu/sp_fdp.c index 5060e8fdcb0b..36a50f9082d1 100644 --- a/arch/mips/math-emu/sp_fdp.c +++ b/arch/mips/math-emu/sp_fdp.c @@ -46,7 +46,8 @@ union ieee754sp ieee754sp_fdp(union ieee754dp x) case IEEE754_CLASS_SNAN: x = ieee754dp_nanxcpt(x); EXPLODEXDP; - /* Fall through. */ + /* fall through */ + case IEEE754_CLASS_QNAN: y = ieee754sp_nan_fdp(xs, xm); if (!ieee754_csr.nan2008) { diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c index 74a5a00d2f22..22019ed691df 100644 --- a/arch/mips/math-emu/sp_fmax.c +++ b/arch/mips/math-emu/sp_fmax.c @@ -96,6 +96,7 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; @@ -224,6 +225,7 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c index c51385f46b09..feaec3985cca 100644 --- a/arch/mips/math-emu/sp_fmin.c +++ b/arch/mips/math-emu/sp_fmin.c @@ -96,6 +96,7 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; @@ -224,6 +225,7 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c index f823338dbb65..07ba675401e2 100644 --- a/arch/mips/math-emu/sp_maddf.c +++ b/arch/mips/math-emu/sp_maddf.c @@ -126,6 +126,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): if (zc == IEEE754_CLASS_INF) @@ -142,7 +143,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); - /* fall through to real computations */ + /* continue to real computations */ } /* Finally get to do some computation */ diff --git a/arch/mips/math-emu/sp_mul.c b/arch/mips/math-emu/sp_mul.c index 4015101fbc37..fde71e293ec4 100644 --- a/arch/mips/math-emu/sp_mul.c +++ b/arch/mips/math-emu/sp_mul.c @@ -101,6 +101,7 @@ union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; diff --git a/arch/mips/math-emu/sp_sqrt.c b/arch/mips/math-emu/sp_sqrt.c index 67059c33a250..9cc83f012342 100644 --- a/arch/mips/math-emu/sp_sqrt.c +++ b/arch/mips/math-emu/sp_sqrt.c @@ -82,7 +82,8 @@ union ieee754sp ieee754sp_sqrt(union ieee754sp x) /* generate sqrt(x) bit by bit */ ix += ix; - q = s = 0; /* q = sqrt(x) */ + s = 0; + q = 0; /* q = sqrt(x) */ r = 0x01000000; /* r = moving bit from right to left */ while (r != 0) { diff --git a/arch/mips/math-emu/sp_sub.c b/arch/mips/math-emu/sp_sub.c index dc998ed47295..9f2ff72c3d6b 100644 --- a/arch/mips/math-emu/sp_sub.c +++ b/arch/mips/math-emu/sp_sub.c @@ -106,6 +106,7 @@ union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; diff --git a/arch/mips/math-emu/sp_tlong.c b/arch/mips/math-emu/sp_tlong.c index a2450c7e452a..bca5ac995801 100644 --- a/arch/mips/math-emu/sp_tlong.c +++ b/arch/mips/math-emu/sp_tlong.c @@ -20,7 +20,6 @@ */ #include "ieee754sp.h" -#include "ieee754dp.h" s64 ieee754sp_tlong(union ieee754sp x) { diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 237532e89919..dcafa43613b6 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -370,11 +370,6 @@ static void mips_dma_sync_sg_for_device(struct device *dev, } } -static int mips_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return 0; -} - static int mips_dma_supported(struct device *dev, u64 mask) { return plat_dma_supported(dev, mask); @@ -401,7 +396,6 @@ static const struct dma_map_ops mips_default_dma_map_ops = { .sync_single_for_device = mips_dma_sync_single_for_device, .sync_sg_for_cpu = mips_dma_sync_sg_for_cpu, .sync_sg_for_device = mips_dma_sync_sg_for_device, - .mapping_error = mips_dma_mapping_error, .dma_supported = mips_dma_supported, .cache_sync = mips_dma_cache_sync, }; diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index 548acb7f8557..394673991bab 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -16,6 +16,7 @@ #include <asm/mmu_context.h> #include <asm/r4kcache.h> #include <asm/mips-cps.h> +#include <asm/bootinfo.h> /* * MIPS32/MIPS64 L2 cache handling @@ -220,6 +221,14 @@ static inline int __init mips_sc_probe(void) else return 0; + /* + * According to config2 it would be 5-ways, but that is contradicted + * by all documentation. + */ + if (current_cpu_type() == CPU_JZRISC && + mips_machtype == MACH_INGENIC_JZ4770) + c->scache.ways = 4; + c->scache.waysize = c->scache.sets * c->scache.linesz; c->scache.waybit = __ffs(c->scache.waysize); diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index de34adb76157..7b63914d2e58 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -47,31 +47,31 @@ static struct resource standard_io_resources[] = { .name = "dma1", .start = 0x00, .end = 0x1f, - .flags = IORESOURCE_BUSY + .flags = IORESOURCE_IO | IORESOURCE_BUSY }, { .name = "timer", .start = 0x40, .end = 0x5f, - .flags = IORESOURCE_BUSY + .flags = IORESOURCE_IO | IORESOURCE_BUSY }, { .name = "keyboard", .start = 0x60, .end = 0x6f, - .flags = IORESOURCE_BUSY + .flags = IORESOURCE_IO | IORESOURCE_BUSY }, { .name = "dma page reg", .start = 0x80, .end = 0x8f, - .flags = IORESOURCE_BUSY + .flags = IORESOURCE_IO | IORESOURCE_BUSY }, { .name = "dma2", .start = 0xc0, .end = 0xdf, - .flags = IORESOURCE_BUSY + .flags = IORESOURCE_IO | IORESOURCE_BUSY }, }; diff --git a/arch/mips/txx9/rbtx4939/setup.c b/arch/mips/txx9/rbtx4939/setup.c index 8b937300fb7f..fd26fadc8617 100644 --- a/arch/mips/txx9/rbtx4939/setup.c +++ b/arch/mips/txx9/rbtx4939/setup.c @@ -186,7 +186,7 @@ static void __init rbtx4939_update_ioc_pen(void) #define RBTX4939_MAX_7SEGLEDS 8 -#if IS_ENABLED(CONFIG_LEDS_CLASS) +#if IS_BUILTIN(CONFIG_LEDS_CLASS) static u8 led_val[RBTX4939_MAX_7SEGLEDS]; struct rbtx4939_led_data { struct led_classdev cdev; @@ -261,7 +261,7 @@ static inline void rbtx4939_led_setup(void) static void __rbtx4939_7segled_putc(unsigned int pos, unsigned char val) { -#if IS_ENABLED(CONFIG_LEDS_CLASS) +#if IS_BUILTIN(CONFIG_LEDS_CLASS) unsigned long flags; local_irq_save(flags); /* bit7: reserved for LED class */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 73fcf592ee91..73ce5dd07642 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -141,8 +141,10 @@ config PPC select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API if PPC64 + select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE select ARCH_HAS_SG_CHAIN + select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64 select ARCH_HAS_UBSAN_SANITIZE_ALL @@ -150,6 +152,7 @@ config PPC select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 @@ -180,8 +183,6 @@ config PPC select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK - select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) - select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select HAVE_CBPF_JIT if !PPC64 select HAVE_CONTEXT_TRACKING if PPC64 select HAVE_DEBUG_KMEMLEAK @@ -868,6 +869,21 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +config PPC_MEM_KEYS + prompt "PowerPC Memory Protection Keys" + def_bool y + depends on PPC_BOOK3S_64 + select ARCH_USES_HIGH_VMA_FLAGS + select ARCH_HAS_PKEYS + help + Memory Protection Keys provides a mechanism for enforcing + page-based protections, but without requiring modification of the + page tables when an application changes protection domains. + + For details, see Documentation/vm/protection-keys.txt + + If unsure, say y. + endmenu config ISA_DMA_API diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 657c33cd4eee..c45424c64e19 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -90,6 +90,10 @@ config MSI_BITMAP_SELFTEST depends on DEBUG_KERNEL default n +config PPC_IRQ_SOFT_MASK_DEBUG + bool "Include extra checks for powerpc irq soft masking" + default n + config XMON bool "Include xmon kernel debugger" depends on DEBUG_KERNEL @@ -368,7 +372,7 @@ config PPC_PTDUMP config PPC_HTDUMP def_bool y - depends on PPC_PTDUMP && PPC_BOOK3S + depends on PPC_PTDUMP && PPC_BOOK3S_64 config PPC_FAST_ENDIAN_SWITCH bool "Deprecated fast endian-switch syscall" diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 1381693a4a51..ccd2556bdb53 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -63,6 +63,7 @@ UTS_MACHINE := $(subst $(space),,$(machine-y)) ifdef CONFIG_PPC32 KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o else +KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/powerpc/kernel/module.lds ifeq ($(call ld-ifversion, -ge, 225000000, y),y) # Have the linker provide sfpr if possible. # There is a corresponding test in arch/powerpc/lib/Makefile diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 08782f55b89f..ef6549e57157 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -108,10 +108,10 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ $(libfdt) libfdt-wrapper.c \ ns16550.c serial.c simple_alloc.c div64.S util.S \ elf_util.c $(zlib-y) devtree.c stdlib.c \ - oflib.c ofconsole.c cuboot.c cpm-serial.c \ - uartlite.c opal.c + oflib.c ofconsole.c cuboot.c + src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c -src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S +src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c ifndef CONFIG_PPC64_BOOT_WRAPPER src-wlib-y += crtsavres.S endif @@ -120,6 +120,8 @@ src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c src-wlib-$(CONFIG_EMBEDDED6xx) += mpsc.c mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c +src-wlib-$(CONFIG_XILINX_VIRTEX) += uartlite.c +src-wlib-$(CONFIG_CPM) += cpm-serial.c src-plat-y := of.c epapr.c src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \ diff --git a/arch/powerpc/boot/dts/a3m071.dts b/arch/powerpc/boot/dts/a3m071.dts index bf81b8f9704c..187ce458d03a 100644 --- a/arch/powerpc/boot/dts/a3m071.dts +++ b/arch/powerpc/boot/dts/a3m071.dts @@ -105,24 +105,24 @@ reg = <0 0x0 0x02000000>; compatible = "cfi-flash"; bank-width = <2>; - partition@0x0 { + partition@0 { label = "u-boot"; reg = <0x00000000 0x00040000>; read-only; }; - partition@0x00040000 { + partition@40000 { label = "env"; reg = <0x00040000 0x00020000>; }; - partition@0x00060000 { + partition@60000 { label = "dtb"; reg = <0x00060000 0x00020000>; }; - partition@0x00080000 { + partition@80000 { label = "kernel"; reg = <0x00080000 0x00500000>; }; - partition@0x00580000 { + partition@580000 { label = "root"; reg = <0x00580000 0x00A80000>; }; diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts index e61d5dc598c1..746779202a12 100644 --- a/arch/powerpc/boot/dts/akebono.dts +++ b/arch/powerpc/boot/dts/akebono.dts @@ -216,7 +216,7 @@ interrupts = <39 2>; }; - IIC0: i2c@00000000 { + IIC0: i2c@0 { compatible = "ibm,iic-476gtr", "ibm,iic"; reg = <0x0 0x00000020>; interrupt-parent = <&MPIC>; @@ -229,7 +229,7 @@ }; }; - IIC1: i2c@00000100 { + IIC1: i2c@100 { compatible = "ibm,iic-476gtr", "ibm,iic"; reg = <0x100 0x00000020>; interrupt-parent = <&MPIC>; diff --git a/arch/powerpc/boot/dts/c2k.dts b/arch/powerpc/boot/dts/c2k.dts index 1e32903cb0a8..27f169e3ade9 100644 --- a/arch/powerpc/boot/dts/c2k.dts +++ b/arch/powerpc/boot/dts/c2k.dts @@ -276,14 +276,14 @@ >; }; - cpu-error@0070 { + cpu-error@70 { compatible = "marvell,mv64360-cpu-error"; reg = <0x0070 0x10 0x0128 0x28>; interrupts = <3>; interrupt-parent = <&PIC>; }; - sram-ctrl@0380 { + sram-ctrl@380 { compatible = "marvell,mv64360-sram-ctrl"; reg = <0x0380 0x80>; interrupts = <13>; @@ -311,7 +311,7 @@ interrupt-parent = <&PIC>; }; /* Devices attached to the device controller */ - devicebus@045c { + devicebus@45c { #address-cells = <2>; #size-cells = <1>; compatible = "marvell,mv64306-devctrl"; diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts index 4191e1850ea1..f2ad5815f08d 100644 --- a/arch/powerpc/boot/dts/currituck.dts +++ b/arch/powerpc/boot/dts/currituck.dts @@ -108,7 +108,7 @@ reg = <0x50000000 0x4>; }; - IIC0: i2c@00000000 { + IIC0: i2c@0 { compatible = "ibm,iic-currituck", "ibm,iic"; reg = <0x0 0x00000014>; interrupt-parent = <&MPIC>; diff --git a/arch/powerpc/boot/dts/fsl/mpc8568mds.dts b/arch/powerpc/boot/dts/fsl/mpc8568mds.dts index 01706a339603..bc3e8039bdc7 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8568mds.dts +++ b/arch/powerpc/boot/dts/fsl/mpc8568mds.dts @@ -126,7 +126,7 @@ par_io@e0100 { num-ports = <7>; - pio1: ucc_pin@01 { + pio1: ucc_pin@1 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x4 0xa 0x1 0x0 0x2 0x0 /* TxD0 */ @@ -154,7 +154,7 @@ 0x1 0x1f 0x2 0x0 0x3 0x0>; /* GTX125 */ }; - pio2: ucc_pin@02 { + pio2: ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x5 0xa 0x1 0x0 0x2 0x0 /* TxD0 */ @@ -228,22 +228,22 @@ /* These are the same PHYs as on * gianfar's MDIO bus */ - qe_phy0: ethernet-phy@07 { + qe_phy0: ethernet-phy@7 { interrupt-parent = <&mpic>; interrupts = <1 1 0 0>; reg = <0x7>; }; - qe_phy1: ethernet-phy@01 { + qe_phy1: ethernet-phy@1 { interrupt-parent = <&mpic>; interrupts = <2 1 0 0>; reg = <0x1>; }; - qe_phy2: ethernet-phy@02 { + qe_phy2: ethernet-phy@2 { interrupt-parent = <&mpic>; interrupts = <1 1 0 0>; reg = <0x2>; }; - qe_phy3: ethernet-phy@03 { + qe_phy3: ethernet-phy@3 { interrupt-parent = <&mpic>; interrupts = <2 1 0 0>; reg = <0x3>; diff --git a/arch/powerpc/boot/dts/fsl/mpc8569mds.dts b/arch/powerpc/boot/dts/fsl/mpc8569mds.dts index 76b2bd6f7742..d8367ceddea6 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8569mds.dts +++ b/arch/powerpc/boot/dts/fsl/mpc8569mds.dts @@ -141,7 +141,7 @@ gpio-controller; }; - pio1: ucc_pin@01 { + pio1: ucc_pin@1 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -161,7 +161,7 @@ 0x2 0x14 0x1 0x0 0x2 0x0>; /* ENET1_GTXCLK */ }; - pio2: ucc_pin@02 { + pio2: ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -181,7 +181,7 @@ 0x2 0x2 0x1 0x0 0x2 0x0>; /* ENET2_GTXCLK */ }; - pio3: ucc_pin@03 { + pio3: ucc_pin@3 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -201,7 +201,7 @@ 0x2 0x19 0x1 0x0 0x2 0x0>; /* ENET3_GTXCLK */ }; - pio4: ucc_pin@04 { + pio4: ucc_pin@4 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -272,30 +272,30 @@ reg = <0x2120 0x18>; compatible = "fsl,ucc-mdio"; - qe_phy0: ethernet-phy@07 { + qe_phy0: ethernet-phy@7 { interrupt-parent = <&mpic>; interrupts = <1 1 0 0>; reg = <0x7>; }; - qe_phy1: ethernet-phy@01 { + qe_phy1: ethernet-phy@1 { interrupt-parent = <&mpic>; interrupts = <2 1 0 0>; reg = <0x1>; }; - qe_phy2: ethernet-phy@02 { + qe_phy2: ethernet-phy@2 { interrupt-parent = <&mpic>; interrupts = <3 1 0 0>; reg = <0x2>; }; - qe_phy3: ethernet-phy@03 { + qe_phy3: ethernet-phy@3 { interrupt-parent = <&mpic>; interrupts = <4 1 0 0>; reg = <0x3>; }; - qe_phy5: ethernet-phy@04 { + qe_phy5: ethernet-phy@4 { reg = <0x04>; }; - qe_phy7: ethernet-phy@06 { + qe_phy7: ethernet-phy@6 { reg = <0x6>; }; tbi1: tbi-phy@11 { diff --git a/arch/powerpc/boot/dts/fsl/p1021mds.dts b/arch/powerpc/boot/dts/fsl/p1021mds.dts index 291454c75dda..1047802f4d2a 100644 --- a/arch/powerpc/boot/dts/fsl/p1021mds.dts +++ b/arch/powerpc/boot/dts/fsl/p1021mds.dts @@ -202,7 +202,7 @@ ranges = <0x0 0xe0100 0x60>; device_type = "par_io"; num-ports = <3>; - pio1: ucc_pin@01 { + pio1: ucc_pin@1 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -225,7 +225,7 @@ 0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */ }; - pio2: ucc_pin@02 { + pio2: ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -296,7 +296,7 @@ interrupts = <4 1 0 0>; reg = <0x0>; }; - qe_phy1: ethernet-phy@03 { + qe_phy1: ethernet-phy@3 { interrupt-parent = <&mpic>; interrupts = <5 1 0 0>; reg = <0x3>; diff --git a/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi index d44bb12debb0..0a5434a631c3 100644 --- a/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi @@ -245,7 +245,7 @@ ranges = <0x0 0xe0100 0x60>; device_type = "par_io"; num-ports = <3>; - pio1: ucc_pin@01 { + pio1: ucc_pin@1 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -268,7 +268,7 @@ 0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */ }; - pio2: ucc_pin@02 { + pio2: ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -283,7 +283,7 @@ 0x1 0x8 0x2 0x0 0x2 0x0>; /* ENET5_RX_ER_SER5_CD_B */ }; - pio3: ucc_pin@03 { + pio3: ucc_pin@3 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x0 0x16 0x2 0x0 0x2 0x0 /* SER7_CD_B*/ @@ -293,7 +293,7 @@ 0x0 0x15 0x1 0x0 0x2 0x0>; /* SER7_TXD0*/ }; - pio4: ucc_pin@04 { + pio4: ucc_pin@4 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x0 0x2 0x0 0x2 0x0 /* SER3_CD_B*/ diff --git a/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts b/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts index b15acbaea34b..ea33b57f8774 100644 --- a/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts +++ b/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts @@ -106,7 +106,7 @@ interrupts = <4 1 0 0>; reg = <0x6>; }; - qe_phy1: ethernet-phy@03 { + qe_phy1: ethernet-phy@3 { interrupt-parent = <&mpic>; interrupts = <5 1 0 0>; reg = <0x3>; diff --git a/arch/powerpc/boot/dts/fsl/p1025twr.dtsi b/arch/powerpc/boot/dts/fsl/p1025twr.dtsi index 08816fb474f5..ab75b8f29ae2 100644 --- a/arch/powerpc/boot/dts/fsl/p1025twr.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1025twr.dtsi @@ -172,7 +172,7 @@ ranges = <0x0 0xe0100 0x60>; device_type = "par_io"; num-ports = <3>; - pio1: ucc_pin@01 { + pio1: ucc_pin@1 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -195,7 +195,7 @@ 0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */ }; - pio2: ucc_pin@02 { + pio2: ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -210,7 +210,7 @@ 0x1 0x8 0x2 0x0 0x2 0x0>; /* ENET5_RX_ER_SER5_CD_B */ }; - pio3: ucc_pin@03 { + pio3: ucc_pin@3 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x0 0x16 0x2 0x0 0x2 0x0 /* SER7_CD_B*/ @@ -220,7 +220,7 @@ 0x0 0x15 0x1 0x0 0x2 0x0>; /* SER7_TXD0*/ }; - pio4: ucc_pin@04 { + pio4: ucc_pin@4 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x0 0x2 0x0 0x2 0x0 /* SER3_CD_B*/ diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts index 621f2c6ee6ad..65ff34c49025 100644 --- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts @@ -61,7 +61,7 @@ }; mdio@fc000 { - phy_sgmii_2: ethernet-phy@03 { + phy_sgmii_2: ethernet-phy@3 { reg = <0x03>; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts index fcd2aeb5b8ac..4fa15f48a4c3 100644 --- a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts @@ -77,23 +77,23 @@ }; mdio0: mdio@fc000 { - phy_sgmii_0: ethernet-phy@02 { + phy_sgmii_0: ethernet-phy@2 { reg = <0x02>; }; - phy_sgmii_1: ethernet-phy@03 { + phy_sgmii_1: ethernet-phy@3 { reg = <0x03>; }; - phy_sgmii_2: ethernet-phy@01 { + phy_sgmii_2: ethernet-phy@1 { reg = <0x01>; }; - phy_rgmii_0: ethernet-phy@04 { + phy_rgmii_0: ethernet-phy@4 { reg = <0x04>; }; - phy_rgmii_1: ethernet-phy@05 { + phy_rgmii_1: ethernet-phy@5 { reg = <0x05>; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb.dts b/arch/powerpc/boot/dts/fsl/t1042rdb.dts index 2c138627b1b4..3ebb712224cb 100644 --- a/arch/powerpc/boot/dts/fsl/t1042rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1042rdb.dts @@ -59,7 +59,7 @@ }; mdio@fc000 { - phy_sgmii_2: ethernet-phy@03 { + phy_sgmii_2: ethernet-phy@3 { reg = <0x03>; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index 5fdddbd2a62b..099a598c74c0 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -148,15 +148,15 @@ }; mdio0: mdio@fc000 { - phy_sgmii_2: ethernet-phy@03 { + phy_sgmii_2: ethernet-phy@3 { reg = <0x03>; }; - phy_rgmii_0: ethernet-phy@01 { + phy_rgmii_0: ethernet-phy@1 { reg = <0x01>; }; - phy_rgmii_1: ethernet-phy@02 { + phy_rgmii_1: ethernet-phy@2 { reg = <0x02>; }; }; diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts index f10a64aeb83b..6560283c5aec 100644 --- a/arch/powerpc/boot/dts/fsp2.dts +++ b/arch/powerpc/boot/dts/fsp2.dts @@ -583,21 +583,21 @@ }; }; - OHCI1: ohci@02040000 { + OHCI1: ohci@2040000 { compatible = "ohci-le"; reg = <0x02040000 0xa0>; interrupt-parent = <&UIC1_3>; interrupts = <28 0x8 29 0x8>; }; - OHCI2: ohci@02080000 { + OHCI2: ohci@2080000 { compatible = "ohci-le"; reg = <0x02080000 0xa0>; interrupt-parent = <&UIC1_3>; interrupts = <30 0x8 31 0x8>; }; - EHCI: ehci@02000000 { + EHCI: ehci@2000000 { compatible = "usb-ehci"; reg = <0x02000000 0xa4>; interrupt-parent = <&UIC1_3>; diff --git a/arch/powerpc/boot/dts/gamecube.dts b/arch/powerpc/boot/dts/gamecube.dts index ef3be0e58b02..58d06c9ee08b 100644 --- a/arch/powerpc/boot/dts/gamecube.dts +++ b/arch/powerpc/boot/dts/gamecube.dts @@ -54,13 +54,13 @@ ranges = <0x0c000000 0x0c000000 0x00010000>; interrupt-parent = <&PIC>; - video@0c002000 { + video@c002000 { compatible = "nintendo,flipper-vi"; reg = <0x0c002000 0x100>; interrupts = <8>; }; - processor-interface@0c003000 { + processor-interface@c003000 { compatible = "nintendo,flipper-pi"; reg = <0x0c003000 0x100>; @@ -71,7 +71,7 @@ }; }; - dsp@0c005000 { + dsp@c005000 { #address-cells = <1>; #size-cells = <1>; compatible = "nintendo,flipper-dsp"; @@ -84,26 +84,26 @@ }; }; - disk@0c006000 { + disk@c006000 { compatible = "nintendo,flipper-di"; reg = <0x0c006000 0x40>; interrupts = <2>; }; - audio@0c006c00 { + audio@c006c00 { compatible = "nintendo,flipper-ai"; reg = <0x0c006c00 0x20>; interrupts = <6>; }; - gamepad-controller@0c006400 { + gamepad-controller@c006400 { compatible = "nintendo,flipper-si"; reg = <0x0c006400 0x100>; interrupts = <3>; }; /* External Interface bus */ - exi@0c006800 { + exi@c006800 { compatible = "nintendo,flipper-exi"; reg = <0x0c006800 0x40>; virtual-reg = <0x0c006800>; diff --git a/arch/powerpc/boot/dts/haleakala.dts b/arch/powerpc/boot/dts/haleakala.dts index 2b256694eca6..cb16dad43c92 100644 --- a/arch/powerpc/boot/dts/haleakala.dts +++ b/arch/powerpc/boot/dts/haleakala.dts @@ -237,7 +237,7 @@ }; }; - PCIE0: pciex@0a0000000 { + PCIE0: pciex@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts index 5ba7f01e2a29..2a3413221cc1 100644 --- a/arch/powerpc/boot/dts/kilauea.dts +++ b/arch/powerpc/boot/dts/kilauea.dts @@ -322,7 +322,7 @@ }; }; - PCIE0: pciex@0a0000000 { + PCIE0: pciex@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -363,7 +363,7 @@ 0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@0c0000000 { + PCIE1: pciex@c0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/kmeter1.dts b/arch/powerpc/boot/dts/kmeter1.dts index 983aee185793..9fa33d9ba966 100644 --- a/arch/powerpc/boot/dts/kmeter1.dts +++ b/arch/powerpc/boot/dts/kmeter1.dts @@ -434,27 +434,27 @@ compatible = "fsl,ucc-mdio"; /* Piggy2 (UCC4, MDIO 0x00, RMII) */ - phy_piggy2: ethernet-phy@00 { + phy_piggy2: ethernet-phy@0 { reg = <0x0>; }; /* Eth-1 (UCC5, MDIO 0x08, RMII) */ - phy_eth1: ethernet-phy@08 { + phy_eth1: ethernet-phy@8 { reg = <0x08>; }; /* Eth-2 (UCC6, MDIO 0x09, RMII) */ - phy_eth2: ethernet-phy@09 { + phy_eth2: ethernet-phy@9 { reg = <0x09>; }; /* Eth-3 (UCC7, MDIO 0x0a, RMII) */ - phy_eth3: ethernet-phy@0a { + phy_eth3: ethernet-phy@a { reg = <0x0a>; }; /* Eth-4 (UCC8, MDIO 0x0b, RMII) */ - phy_eth4: ethernet-phy@0b { + phy_eth4: ethernet-phy@b { reg = <0x0b>; }; diff --git a/arch/powerpc/boot/dts/makalu.dts b/arch/powerpc/boot/dts/makalu.dts index 63d48b632c84..bf8fe1629392 100644 --- a/arch/powerpc/boot/dts/makalu.dts +++ b/arch/powerpc/boot/dts/makalu.dts @@ -268,7 +268,7 @@ }; }; - PCIE0: pciex@0a0000000 { + PCIE0: pciex@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -309,7 +309,7 @@ 0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@0c0000000 { + PCIE1: pciex@c0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/mpc832x_mds.dts b/arch/powerpc/boot/dts/mpc832x_mds.dts index 0793cdf0d46e..49c7d657118a 100644 --- a/arch/powerpc/boot/dts/mpc832x_mds.dts +++ b/arch/powerpc/boot/dts/mpc832x_mds.dts @@ -186,7 +186,7 @@ device_type = "par_io"; num-ports = <7>; - pio3: ucc_pin@03 { + pio3: ucc_pin@3 { pio-map = < /* port pin dir open_drain assignment has_irq */ 3 4 3 0 2 0 /* MDIO */ @@ -208,7 +208,7 @@ 1 12 1 0 1 0 /* TX_EN */ 1 13 2 0 1 0>; /* CRS */ }; - pio4: ucc_pin@04 { + pio4: ucc_pin@4 { pio-map = < /* port pin dir open_drain assignment has_irq */ 3 31 2 0 1 0 /* RX_CLK (CLK7) */ @@ -228,7 +228,7 @@ 1 30 1 0 1 0 /* TX_EN */ 1 31 2 0 1 0>; /* CRS */ }; - pio5: ucc_pin@05 { + pio5: ucc_pin@5 { pio-map = < /* * open has @@ -352,12 +352,12 @@ reg = <0x2320 0x18>; compatible = "fsl,ucc-mdio"; - phy3: ethernet-phy@03 { + phy3: ethernet-phy@3 { interrupt-parent = <&ipic>; interrupts = <17 0x8>; reg = <0x3>; }; - phy4: ethernet-phy@04 { + phy4: ethernet-phy@4 { interrupt-parent = <&ipic>; interrupts = <18 0x8>; reg = <0x4>; diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts index 91df1eb16667..647cae14c16d 100644 --- a/arch/powerpc/boot/dts/mpc832x_rdb.dts +++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts @@ -175,7 +175,7 @@ gpio-controller; }; - ucc2pio:ucc_pin@02 { + ucc2pio:ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 3 4 3 0 2 0 /* MDIO */ @@ -197,7 +197,7 @@ 0 30 1 0 1 0 /* TX_EN */ 0 31 2 0 1 0>; /* CRS */ }; - ucc3pio:ucc_pin@03 { + ucc3pio:ucc_pin@3 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0 13 2 0 1 0 /* RX_CLK (CLK9) */ @@ -310,12 +310,12 @@ reg = <0x3120 0x18>; compatible = "fsl,ucc-mdio"; - phy00:ethernet-phy@00 { + phy00:ethernet-phy@0 { interrupt-parent = <&ipic>; interrupts = <0>; reg = <0x0>; }; - phy04:ethernet-phy@04 { + phy04:ethernet-phy@4 { interrupt-parent = <&ipic>; interrupts = <0>; reg = <0x4>; diff --git a/arch/powerpc/boot/dts/mpc836x_mds.dts b/arch/powerpc/boot/dts/mpc836x_mds.dts index ecb6ccd3a6aa..539fd9f72eda 100644 --- a/arch/powerpc/boot/dts/mpc836x_mds.dts +++ b/arch/powerpc/boot/dts/mpc836x_mds.dts @@ -228,7 +228,7 @@ gpio-controller; }; - pio1: ucc_pin@01 { + pio1: ucc_pin@1 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0 3 1 0 1 0 /* TxD0 */ @@ -255,7 +255,7 @@ 2 9 1 0 3 0 /* GTX_CLK - CLK10 */ 2 8 2 0 1 0>; /* GTX125 - CLK9 */ }; - pio2: ucc_pin@02 { + pio2: ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0 17 1 0 1 0 /* TxD0 */ @@ -393,12 +393,12 @@ reg = <0x2120 0x18>; compatible = "fsl,ucc-mdio"; - phy0: ethernet-phy@00 { + phy0: ethernet-phy@0 { interrupt-parent = <&ipic>; interrupts = <17 0x8>; reg = <0x0>; }; - phy1: ethernet-phy@01 { + phy1: ethernet-phy@1 { interrupt-parent = <&ipic>; interrupts = <18 0x8>; reg = <0x1>; diff --git a/arch/powerpc/boot/dts/sbc8548-altflash.dts b/arch/powerpc/boot/dts/sbc8548-altflash.dts index 0b38a0defd2c..8967a56adad4 100644 --- a/arch/powerpc/boot/dts/sbc8548-altflash.dts +++ b/arch/powerpc/boot/dts/sbc8548-altflash.dts @@ -40,12 +40,12 @@ compatible = "intel,JS28F128", "cfi-flash"; bank-width = <4>; device-width = <1>; - partition@0x0 { + partition@0 { label = "space"; /* FC000000 -> FFEFFFFF */ reg = <0x00000000 0x03f00000>; }; - partition@0x03f00000 { + partition@3f00000 { label = "bootloader"; /* FFF00000 -> FFFFFFFF */ reg = <0x03f00000 0x00100000>; @@ -95,12 +95,12 @@ reg = <0x6 0x0 0x800000>; bank-width = <1>; device-width = <1>; - partition@0x0 { + partition@0 { label = "space"; /* EF800000 -> EFF9FFFF */ reg = <0x00000000 0x007a0000>; }; - partition@0x7a0000 { + partition@7a0000 { label = "bootloader"; /* EFFA0000 -> EFFFFFFF */ reg = <0x007a0000 0x00060000>; diff --git a/arch/powerpc/boot/dts/sbc8548.dts b/arch/powerpc/boot/dts/sbc8548.dts index 1df2a0955668..9bdb828a504e 100644 --- a/arch/powerpc/boot/dts/sbc8548.dts +++ b/arch/powerpc/boot/dts/sbc8548.dts @@ -38,12 +38,12 @@ reg = <0x0 0x0 0x800000>; bank-width = <1>; device-width = <1>; - partition@0x0 { + partition@0 { label = "space"; /* FF800000 -> FFF9FFFF */ reg = <0x00000000 0x007a0000>; }; - partition@0x7a0000 { + partition@7a0000 { label = "bootloader"; /* FFFA0000 -> FFFFFFFF */ reg = <0x007a0000 0x00060000>; @@ -92,12 +92,12 @@ compatible = "intel,JS28F128", "cfi-flash"; bank-width = <4>; device-width = <1>; - partition@0x0 { + partition@0 { label = "space"; /* EC000000 -> EFEFFFFF */ reg = <0x00000000 0x03f00000>; }; - partition@0x03f00000 { + partition@3f00000 { label = "bootloader"; /* EFF00000 -> EFFFFFFF */ reg = <0x03f00000 0x00100000>; diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts index 77528c9a8dbd..17a5babb098d 100644 --- a/arch/powerpc/boot/dts/wii.dts +++ b/arch/powerpc/boot/dts/wii.dts @@ -65,14 +65,14 @@ 0x0d800000 0x0d800000 0x00800000>; interrupt-parent = <&PIC0>; - video@0c002000 { + video@c002000 { compatible = "nintendo,hollywood-vi", "nintendo,flipper-vi"; reg = <0x0c002000 0x100>; interrupts = <8>; }; - processor-interface@0c003000 { + processor-interface@c003000 { compatible = "nintendo,hollywood-pi", "nintendo,flipper-pi"; reg = <0x0c003000 0x100>; @@ -84,7 +84,7 @@ }; }; - dsp@0c005000 { + dsp@c005000 { #address-cells = <1>; #size-cells = <1>; compatible = "nintendo,hollywood-dsp", @@ -93,14 +93,14 @@ interrupts = <6>; }; - gamepad-controller@0d006400 { + gamepad-controller@d006400 { compatible = "nintendo,hollywood-si", "nintendo,flipper-si"; reg = <0x0d006400 0x100>; interrupts = <3>; }; - audio@0c006c00 { + audio@c006c00 { compatible = "nintendo,hollywood-ai", "nintendo,flipper-ai"; reg = <0x0d006c00 0x20>; @@ -108,7 +108,7 @@ }; /* External Interface bus */ - exi@0d006800 { + exi@d006800 { compatible = "nintendo,hollywood-exi", "nintendo,flipper-exi"; reg = <0x0d006800 0x40>; @@ -116,7 +116,7 @@ interrupts = <4>; }; - usb@0d040000 { + usb@d040000 { compatible = "nintendo,hollywood-usb-ehci", "usb-ehci"; reg = <0x0d040000 0x100>; @@ -124,7 +124,7 @@ interrupt-parent = <&PIC1>; }; - usb@0d050000 { + usb@d050000 { compatible = "nintendo,hollywood-usb-ohci", "usb-ohci"; reg = <0x0d050000 0x100>; @@ -132,7 +132,7 @@ interrupt-parent = <&PIC1>; }; - usb@0d060000 { + usb@d060000 { compatible = "nintendo,hollywood-usb-ohci", "usb-ohci"; reg = <0x0d060000 0x100>; @@ -140,7 +140,7 @@ interrupt-parent = <&PIC1>; }; - sd@0d070000 { + sd@d070000 { compatible = "nintendo,hollywood-sdhci", "sdhci"; reg = <0x0d070000 0x200>; @@ -148,7 +148,7 @@ interrupt-parent = <&PIC1>; }; - sdio@0d080000 { + sdio@d080000 { compatible = "nintendo,hollywood-sdhci", "sdhci"; reg = <0x0d080000 0x200>; @@ -156,14 +156,14 @@ interrupt-parent = <&PIC1>; }; - ipc@0d000000 { + ipc@d000000 { compatible = "nintendo,hollywood-ipc"; reg = <0x0d000000 0x10>; interrupts = <30>; interrupt-parent = <&PIC1>; }; - PIC1: pic1@0d800030 { + PIC1: pic1@d800030 { #interrupt-cells = <1>; compatible = "nintendo,hollywood-pic"; reg = <0x0d800030 0x10>; @@ -171,7 +171,7 @@ interrupts = <14>; }; - GPIO: gpio@0d8000c0 { + GPIO: gpio@d8000c0 { #gpio-cells = <2>; compatible = "nintendo,hollywood-gpio"; reg = <0x0d8000c0 0x40>; @@ -203,12 +203,12 @@ */ }; - control@0d800100 { + control@d800100 { compatible = "nintendo,hollywood-control"; reg = <0x0d800100 0x300>; }; - disk@0d806000 { + disk@d806000 { compatible = "nintendo,hollywood-di"; reg = <0x0d806000 0x40>; interrupts = <2>; diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index 7b5c02b1afd0..88955095ec07 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -124,20 +124,26 @@ int serial_console_init(void) else if (dt_is_compatible(devp, "marvell,mv64360-mpsc")) rc = mpsc_console_init(devp, &serial_cd); #endif +#ifdef CONFIG_CPM else if (dt_is_compatible(devp, "fsl,cpm1-scc-uart") || dt_is_compatible(devp, "fsl,cpm1-smc-uart") || dt_is_compatible(devp, "fsl,cpm2-scc-uart") || dt_is_compatible(devp, "fsl,cpm2-smc-uart")) rc = cpm_console_init(devp, &serial_cd); +#endif #ifdef CONFIG_PPC_MPC52XX else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart")) rc = mpc5200_psc_console_init(devp, &serial_cd); #endif +#ifdef CONFIG_XILINX_VIRTEX else if (dt_is_compatible(devp, "xlnx,opb-uartlite-1.00.b") || dt_is_compatible(devp, "xlnx,xps-uartlite-1.00.a")) rc = uartlite_console_init(devp, &serial_cd); +#endif +#ifdef CONFIG_PPC64_BOOT_WRAPPER else if (dt_is_compatible(devp, "ibm,opal-console-raw")) rc = opal_console_init(devp, &serial_cd); +#endif /* Add other serial console driver calls here */ diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index f1f176c29fa3..5320735395e7 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -13,7 +13,6 @@ CONFIG_EXPERT=y CONFIG_PARTITION_ADVANCED=y CONFIG_MPC86XADS=y CONFIG_8xx_COPYBACK=y -CONFIG_8xx_CPU6=y CONFIG_GEN_RTC=y CONFIG_HZ_1000=y CONFIG_MATH_EMULATION=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 73dab7a37386..9e92aa6a52ba 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -96,6 +96,7 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_VIRTIO_BLK=m +CONFIG_BLK_DEV_NVME=y CONFIG_IDE=y CONFIG_BLK_DEV_IDECD=y CONFIG_BLK_DEV_GENERIC=y @@ -112,6 +113,7 @@ CONFIG_SCSI_CXGB3_ISCSI=m CONFIG_SCSI_CXGB4_ISCSI=m CONFIG_SCSI_BNX2_ISCSI=m CONFIG_BE2ISCSI=m +CONFIG_SCSI_AACRAID=y CONFIG_SCSI_MPT2SAS=m CONFIG_SCSI_SYM53C8XX_2=m CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 016579ef16d3..30a155c0a6b0 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -311,6 +311,29 @@ static inline int pte_present(pte_t pte) return pte_val(pte) & _PAGE_PRESENT; } +/* + * We only find page table entry in the last level + * Hence no need for other accessors + */ +#define pte_access_permitted pte_access_permitted +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + unsigned long pteval = pte_val(pte); + /* + * A read-only access is controlled by _PAGE_USER bit. + * We have _PAGE_READ set for WRITE and EXECUTE + */ + unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER; + + if (write) + need_pte_bits |= _PAGE_WRITE; + + if ((pteval & need_pte_bits) != need_pte_bits) + return false; + + return true; +} + /* Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. * diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 2d9df40446f6..949d691094a4 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -17,6 +17,12 @@ #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) +#define H_PAGE_F_GIX_SHIFT 53 +#define H_PAGE_F_SECOND _RPAGE_RPN44 /* HPTE is in 2ndary HPTEG */ +#define H_PAGE_F_GIX (_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41) +#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ +#define H_PAGE_HASHPTE _RPAGE_RSV2 /* software: PTE & hash are busy */ + /* PTE flags to conserve for HPTE identification */ #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \ H_PAGE_F_SECOND | H_PAGE_F_GIX) @@ -49,6 +55,20 @@ static inline int hash__hugepd_ok(hugepd_t hpd) } #endif +/* + * 4K PTE format is different from 64K PTE format. Saving the hash_slot is just + * a matter of returning the PTE bits that need to be modified. On 64K PTE, + * things are a little more involved and hence needs many more parameters to + * accomplish the same. However we want to abstract this out from the caller by + * keeping the prototype consistent across the two formats. + */ +static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, + unsigned int subpg_index, unsigned long hidx) +{ + return (hidx << H_PAGE_F_GIX_SHIFT) & + (H_PAGE_F_SECOND | H_PAGE_F_GIX); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline char *get_hpte_slot_array(pmd_t *pmdp) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index cb46d1034f33..338b7da468ce 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -13,21 +13,17 @@ */ #define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */ #define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */ +#define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */ +#define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ + /* * We need to differentiate between explicit huge page and THP huge * page, since THP huge page also need to track real subpage details */ #define H_PAGE_THP_HUGE H_PAGE_4K_PFN -/* - * Used to track subpage group valid if H_PAGE_COMBO is set - * This overloads H_PAGE_F_GIX and H_PAGE_F_SECOND - */ -#define H_PAGE_COMBO_VALID (H_PAGE_F_GIX | H_PAGE_F_SECOND) - /* PTE flags to conserve for HPTE identification */ -#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \ - H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO) +#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO) /* * we support 16 fragments per PTE page of 64K size. */ @@ -55,24 +51,57 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) unsigned long *hidxp; rpte.pte = pte; - rpte.hidx = 0; - if (pte_val(pte) & H_PAGE_COMBO) { - /* - * Make sure we order the hidx load against the H_PAGE_COMBO - * check. The store side ordering is done in __hash_page_4K - */ - smp_rmb(); - hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); - rpte.hidx = *hidxp; - } + + /* + * Ensure that we do not read the hidx before we read the PTE. Because + * the writer side is expected to finish writing the hidx first followed + * by the PTE, by using smp_wmb(). pte_set_hash_slot() ensures that. + */ + smp_rmb(); + + hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); + rpte.hidx = *hidxp; return rpte; } +/* + * shift the hidx representation by one-modulo-0xf; i.e hidx 0 is respresented + * as 1, 1 as 2,... , and 0xf as 0. This convention lets us represent a + * invalid hidx 0xf with a 0x0 bit value. PTEs are anyway zero'd when + * allocated. We dont have to zero them gain; thus save on the initialization. + */ +#define HIDX_UNSHIFT_BY_ONE(x) ((x + 0xfUL) & 0xfUL) /* shift backward by one */ +#define HIDX_SHIFT_BY_ONE(x) ((x + 0x1UL) & 0xfUL) /* shift forward by one */ +#define HIDX_BITS(x, index) (x << (index << 2)) +#define BITS_TO_HIDX(x, index) ((x >> (index << 2)) & 0xfUL) +#define INVALID_RPTE_HIDX 0x0UL + static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) { - if ((pte_val(rpte.pte) & H_PAGE_COMBO)) - return (rpte.hidx >> (index<<2)) & 0xf; - return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf; + return HIDX_UNSHIFT_BY_ONE(BITS_TO_HIDX(rpte.hidx, index)); +} + +/* + * Commit the hidx and return PTE bits that needs to be modified. The caller is + * expected to modify the PTE bits accordingly and commit the PTE to memory. + */ +static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, + unsigned int subpg_index, unsigned long hidx) +{ + unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); + + rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index); + *hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index); + + /* + * Anyone reading PTE must ensure hidx bits are read after reading the + * PTE by using the read-side barrier smp_rmb(). __real_pte() can be + * used for that. + */ + smp_wmb(); + + /* No PTE bits to be modified, return 0x0UL */ + return 0x0UL; } #define __rpte_to_pte(r) ((r).pte) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index ecb1239d74f4..0920eff731b3 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -9,11 +9,6 @@ * */ #define H_PTE_NONE_MASK _PAGE_HPTEFLAGS -#define H_PAGE_F_GIX_SHIFT 56 -#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ -#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */ -#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44) -#define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ #ifdef CONFIG_PPC_64K_PAGES #include <asm/book3s/64/hash-64k.h> @@ -167,6 +162,9 @@ static inline int hash__pte_none(pte_t pte) return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0; } +unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift, + int ssize, real_pte_t rpte, unsigned int subpg_index); + /* This low level function performs the actual PTE insertion * Setting the PTE depends on the MMU type and other factors. It's * an horrible mess that I'm not going to try to clean up now but diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index e91e115a816f..50ed64fba4ae 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -90,6 +90,8 @@ #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) +#define HPTE_R_KEY_BIT0 ASM_CONST(0x2000000000000000) +#define HPTE_R_KEY_BIT1 ASM_CONST(0x1000000000000000) #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) #define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000) @@ -104,6 +106,9 @@ #define HPTE_R_C ASM_CONST(0x0000000000000080) #define HPTE_R_R ASM_CONST(0x0000000000000100) #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) +#define HPTE_R_KEY_BIT2 ASM_CONST(0x0000000000000800) +#define HPTE_R_KEY_BIT3 ASM_CONST(0x0000000000000400) +#define HPTE_R_KEY_BIT4 ASM_CONST(0x0000000000000200) #define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI) #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index c9448e19847a..0abeb0e2d616 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -108,6 +108,16 @@ typedef struct { #ifdef CONFIG_SPAPR_TCE_IOMMU struct list_head iommu_group_mem_list; #endif + +#ifdef CONFIG_PPC_MEM_KEYS + /* + * Each bit represents one protection key. + * bit set -> key allocated + * bit unset -> key available for allocation + */ + u32 pkey_allocation_map; + s16 execute_only_pkey; /* key holding execute-only protection */ +#endif } mm_context_t; /* diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 6ca1208cedcb..51017726d495 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -14,8 +14,9 @@ */ #define _PAGE_BIT_SWAP_TYPE 0 +#define _PAGE_NA 0 #define _PAGE_RO 0 -#define _PAGE_SHARED 0 +#define _PAGE_USER 0 #define _PAGE_EXEC 0x00001 /* execute permission */ #define _PAGE_WRITE 0x00002 /* write access allowed */ @@ -39,6 +40,7 @@ #define _RPAGE_RSV2 0x0800000000000000UL #define _RPAGE_RSV3 0x0400000000000000UL #define _RPAGE_RSV4 0x0200000000000000UL +#define _RPAGE_RSV5 0x00040UL #define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */ @@ -58,6 +60,25 @@ /* Max physical address bit as per radix table */ #define _RPAGE_PA_MAX 57 +#ifdef CONFIG_PPC_MEM_KEYS +#ifdef CONFIG_PPC_64K_PAGES +#define H_PTE_PKEY_BIT0 _RPAGE_RSV1 +#define H_PTE_PKEY_BIT1 _RPAGE_RSV2 +#else /* CONFIG_PPC_64K_PAGES */ +#define H_PTE_PKEY_BIT0 0 /* _RPAGE_RSV1 is not available */ +#define H_PTE_PKEY_BIT1 0 /* _RPAGE_RSV2 is not available */ +#endif /* CONFIG_PPC_64K_PAGES */ +#define H_PTE_PKEY_BIT2 _RPAGE_RSV3 +#define H_PTE_PKEY_BIT3 _RPAGE_RSV4 +#define H_PTE_PKEY_BIT4 _RPAGE_RSV5 +#else /* CONFIG_PPC_MEM_KEYS */ +#define H_PTE_PKEY_BIT0 0 +#define H_PTE_PKEY_BIT1 0 +#define H_PTE_PKEY_BIT2 0 +#define H_PTE_PKEY_BIT3 0 +#define H_PTE_PKEY_BIT4 0 +#endif /* CONFIG_PPC_MEM_KEYS */ + /* * Max physical address bit we will use for now. * @@ -121,13 +142,16 @@ #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ _PAGE_SOFT_DIRTY) + +#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ + H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) /* * Mask of bits returned by pte_pgprot() */ #define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \ H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \ _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | H_PTE_PKEY) /* * We define 2 sets of base prot bits, one for basic pages (ie, * cacheable kernel and user pages) and one for non cacheable @@ -546,6 +570,40 @@ static inline int pte_present(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT)); } + +#ifdef CONFIG_PPC_MEM_KEYS +extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute); +#else +static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute) +{ + return true; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + +#define pte_access_permitted pte_access_permitted +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + unsigned long pteval = pte_val(pte); + /* Also check for pte_user */ + unsigned long clear_pte_bits = _PAGE_PRIVILEGED; + /* + * _PAGE_READ is needed for any access and will be + * cleared for PROT_NONE + */ + unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_READ; + + if (write) + need_pte_bits |= _PAGE_WRITE; + + if ((pteval & need_pte_bits) != need_pte_bits) + return false; + + if ((pteval & clear_pte_bits) == clear_pte_bits) + return false; + + return arch_pte_access_permitted(pte_val(pte), write, 0); +} + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. @@ -850,6 +908,11 @@ static inline int pud_bad(pud_t pud) return hash__pud_bad(pud); } +#define pud_access_permitted pud_access_permitted +static inline bool pud_access_permitted(pud_t pud, bool write) +{ + return pte_access_permitted(pud_pte(pud), write); +} #define pgd_write(pgd) pte_write(pgd_pte(pgd)) static inline void pgd_set(pgd_t *pgdp, unsigned long val) @@ -889,6 +952,12 @@ static inline int pgd_bad(pgd_t pgd) return hash__pgd_bad(pgd); } +#define pgd_access_permitted pgd_access_permitted +static inline bool pgd_access_permitted(pgd_t pgd, bool write) +{ + return pte_access_permitted(pgd_pte(pgd), write); +} + extern struct page *pgd_page(pgd_t pgd); /* Pointers in the page table tree are physical addresses */ @@ -1009,6 +1078,12 @@ static inline int pmd_protnone(pmd_t pmd) #define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) #define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) +#define pmd_access_permitted pmd_access_permitted +static inline bool pmd_access_permitted(pmd_t pmd, bool write) +{ + return pte_access_permitted(pmd_pte(pmd), write); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index 849ecaae9e79..64d02a704bcb 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -51,6 +51,7 @@ static inline void arch_leave_lazy_mmu_mode(void) #define arch_flush_lazy_mmu_mode() do {} while (0) +extern void hash__tlbiel_all(unsigned int action); extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, unsigned long flags); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 6a9e68003387..8eea90f80e45 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -11,6 +11,12 @@ static inline int mmu_get_ap(int psize) return mmu_psize_defs[psize].ap; } +#ifdef CONFIG_PPC_RADIX_MMU +extern void radix__tlbiel_all(unsigned int action); +#else +static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); }; +#endif + extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, @@ -47,4 +53,5 @@ extern void radix__flush_tlb_lpid(unsigned long lpid); extern void radix__flush_tlb_all(void); extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, unsigned long address); + #endif diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 58b576f654b3..0cac17253513 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -8,6 +8,44 @@ #include <asm/book3s/64/tlbflush-hash.h> #include <asm/book3s/64/tlbflush-radix.h> +/* TLB flush actions. Used as argument to tlbiel_all() */ +enum { + TLB_INVAL_SCOPE_GLOBAL = 0, /* invalidate all TLBs */ + TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */ +}; + +#ifdef CONFIG_PPC_NATIVE +static inline void tlbiel_all(void) +{ + /* + * This is used for host machine check and bootup. + * + * This uses early_radix_enabled and implementations use + * early_cpu_has_feature etc because that works early in boot + * and this is the machine check path which is not performance + * critical. + */ + if (early_radix_enabled()) + radix__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL); + else + hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL); +} +#else +static inline void tlbiel_all(void) { BUG(); }; +#endif + +static inline void tlbiel_all_lpid(bool radix) +{ + /* + * This is used for guest machine check. + */ + if (radix) + radix__tlbiel_all(TLB_INVAL_SCOPE_LPID); + else + hash__tlbiel_all(TLB_INVAL_SCOPE_LPID); +} + + #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE static inline void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 3c04249bcf39..fd06dbe7d7d3 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -133,9 +133,11 @@ struct pt_regs; extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); extern void _exception(int, struct pt_regs *, int, unsigned long); +extern void _exception_pkey(int, struct pt_regs *, int, unsigned long, int); extern void die(const char *, struct pt_regs *, long); extern bool die_will_crash(void); - +extern void panic_flush_kmsg_start(void); +extern void panic_flush_kmsg_end(void); #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index abef812de7f8..812535f40124 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -31,8 +31,10 @@ unsigned int create_cond_branch(const unsigned int *addr, unsigned long target, int flags); int patch_branch(unsigned int *addr, unsigned long target, int flags); int patch_instruction(unsigned int *addr, unsigned int instr); +int raw_patch_instruction(unsigned int *addr, unsigned int instr); int instr_is_relative_branch(unsigned int instr); +int instr_is_relative_link_branch(unsigned int instr); int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr); unsigned long branch_target(const unsigned int *instr); unsigned int translate_branch(const unsigned int *dest, diff --git a/arch/powerpc/include/asm/cpm.h b/arch/powerpc/include/asm/cpm.h index b925df1b87d0..4c24ea8209bb 100644 --- a/arch/powerpc/include/asm/cpm.h +++ b/arch/powerpc/include/asm/cpm.h @@ -166,6 +166,6 @@ static inline int cpm_command(u32 command, u8 opcode) } #endif /* CONFIG_CPM */ -int cpm2_gpiochip_add32(struct device_node *np); +int cpm2_gpiochip_add32(struct device *dev); #endif diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h index 3db821876d48..a116fe931789 100644 --- a/arch/powerpc/include/asm/cpm1.h +++ b/arch/powerpc/include/asm/cpm1.h @@ -605,5 +605,7 @@ enum cpm_clk { }; int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode); +int cpm1_gpiochip_add16(struct device *dev); +int cpm1_gpiochip_add32(struct device *dev); #endif /* __CPM1__ */ diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 0546663a98db..a2c5c95882cf 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -107,12 +107,6 @@ struct cpu_spec { * called in real mode to handle SLB and TLB errors. */ long (*machine_check_early)(struct pt_regs *regs); - - /* - * Processor specific routine to flush tlbs. - */ - void (*flush_tlb)(unsigned int action); - }; extern struct cpu_spec *cur_cpu_spec; @@ -133,12 +127,6 @@ extern void cpu_feature_keys_init(void); static inline void cpu_feature_keys_init(void) { } #endif -/* TLB flush actions. Used as argument to cpu_spec.flush_tlb() hook */ -enum { - TLB_INVAL_SCOPE_GLOBAL = 0, /* invalidate all TLBs */ - TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */ -}; - #endif /* __ASSEMBLY__ */ /* CPU kernel features */ @@ -207,7 +195,7 @@ enum { #define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000) #define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000) #define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000) -/* Free LONG_ASM_CONST(0x0020000000000000) */ +#define CPU_FTR_PKEY LONG_ASM_CONST(0x0020000000000000) #define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000) #define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000) #define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000) @@ -454,7 +442,7 @@ enum { CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | \ - CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX) + CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY) #define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -464,7 +452,7 @@ enum { CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) #define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ @@ -476,7 +464,8 @@ enum { CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ + CPU_FTR_PKEY) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h new file mode 100644 index 000000000000..ce242b9ea8c6 --- /dev/null +++ b/arch/powerpc/include/asm/drmem.h @@ -0,0 +1,102 @@ +/* + * drmem.h: Power specific logical memory block representation + * + * Copyright 2017 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERPC_LMB_H +#define _ASM_POWERPC_LMB_H + +struct drmem_lmb { + u64 base_addr; + u32 drc_index; + u32 aa_index; + u32 flags; +}; + +struct drmem_lmb_info { + struct drmem_lmb *lmbs; + int n_lmbs; + u32 lmb_size; +}; + +extern struct drmem_lmb_info *drmem_info; + +#define for_each_drmem_lmb_in_range(lmb, start, end) \ + for ((lmb) = (start); (lmb) <= (end); (lmb)++) + +#define for_each_drmem_lmb(lmb) \ + for_each_drmem_lmb_in_range((lmb), \ + &drmem_info->lmbs[0], \ + &drmem_info->lmbs[drmem_info->n_lmbs - 1]) + +/* + * The of_drconf_cell_v1 struct defines the layout of the LMB data + * specified in the ibm,dynamic-memory device tree property. + * The property itself is a 32-bit value specifying the number of + * LMBs followed by an array of of_drconf_cell_v1 entries, one + * per LMB. + */ +struct of_drconf_cell_v1 { + __be64 base_addr; + __be32 drc_index; + __be32 reserved; + __be32 aa_index; + __be32 flags; +}; + +/* + * Version 2 of the ibm,dynamic-memory property is defined as a + * 32-bit value specifying the number of LMB sets followed by an + * array of of_drconf_cell_v2 entries, one per LMB set. + */ +struct of_drconf_cell_v2 { + u32 seq_lmbs; + u64 base_addr; + u32 drc_index; + u32 aa_index; + u32 flags; +} __packed; + +#define DRCONF_MEM_ASSIGNED 0x00000008 +#define DRCONF_MEM_AI_INVALID 0x00000040 +#define DRCONF_MEM_RESERVED 0x00000080 + +static inline u32 drmem_lmb_size(void) +{ + return drmem_info->lmb_size; +} + +#define DRMEM_LMB_RESERVED 0x80000000 + +static inline void drmem_mark_lmb_reserved(struct drmem_lmb *lmb) +{ + lmb->flags |= DRMEM_LMB_RESERVED; +} + +static inline void drmem_remove_lmb_reservation(struct drmem_lmb *lmb) +{ + lmb->flags &= ~DRMEM_LMB_RESERVED; +} + +static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb) +{ + return lmb->flags & DRMEM_LMB_RESERVED; +} + +u64 drmem_lmb_memory_max(void); +void __init walk_drmem_lmbs(struct device_node *dn, + void (*func)(struct drmem_lmb *, const __be32 **)); +int drmem_update_dt(void); + +#ifdef CONFIG_PPC_PSERIES +void __init walk_drmem_lmbs_early(unsigned long node, + void (*func)(struct drmem_lmb *, const __be32 **)); +#endif + +#endif /* _ASM_POWERPC_LMB_H */ diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 5161c37dd039..fd37cc101f4f 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -214,6 +214,7 @@ struct eeh_ops { int (*write_config)(struct pci_dn *pdn, int where, int size, u32 val); int (*next_error)(struct eeh_pe **pe); int (*restore_config)(struct pci_dn *pdn); + int (*notify_resume)(struct pci_dn *pdn); }; extern int eeh_subsystem_flags; @@ -297,6 +298,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option); int eeh_pe_configure(struct eeh_pe *pe); int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, unsigned long addr, unsigned long mask); +int eeh_restore_vf_config(struct pci_dn *pdn); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 7197b179c1b1..176dfb73d42c 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -251,18 +251,40 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) std r10,area+EX_R10(r13); /* save r10 - r12 */ \ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) -#define __EXCEPTION_PROLOG_1(area, extra, vec) \ +#define __EXCEPTION_PROLOG_1_PRE(area) \ OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ SAVE_CTR(r10, area); \ - mfcr r9; \ - extra(vec); \ + mfcr r9; + +#define __EXCEPTION_PROLOG_1_POST(area) \ std r11,area+EX_R11(r13); \ std r12,area+EX_R12(r13); \ GET_SCRATCH0(r10); \ std r10,area+EX_R13(r13) + +/* + * This version of the EXCEPTION_PROLOG_1 will carry + * addition parameter called "bitmask" to support + * checking of the interrupt maskable level in the SOFTEN_TEST. + * Intended to be used in MASKABLE_EXCPETION_* macros. + */ +#define MASKABLE_EXCEPTION_PROLOG_1(area, extra, vec, bitmask) \ + __EXCEPTION_PROLOG_1_PRE(area); \ + extra(vec, bitmask); \ + __EXCEPTION_PROLOG_1_POST(area); + +/* + * This version of the EXCEPTION_PROLOG_1 is intended + * to be used in STD_EXCEPTION* macros + */ +#define _EXCEPTION_PROLOG_1(area, extra, vec) \ + __EXCEPTION_PROLOG_1_PRE(area); \ + extra(vec); \ + __EXCEPTION_PROLOG_1_POST(area); + #define EXCEPTION_PROLOG_1(area, extra, vec) \ - __EXCEPTION_PROLOG_1(area, extra, vec) + _EXCEPTION_PROLOG_1(area, extra, vec) #define __EXCEPTION_PROLOG_PSERIES_1(label, h) \ ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ @@ -485,7 +507,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mflr r9; /* Get LR, later save to stack */ \ ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ std r9,_LINK(r1); \ - lbz r10,PACASOFTIRQEN(r13); \ + lbz r10,PACAIRQSOFTMASK(r13); \ mfspr r11,SPRN_XER; /* save XER in stackframe */ \ std r10,SOFTE(r1); \ std r11,_XER(r1); \ @@ -549,22 +571,23 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define SOFTEN_VALUE_0xe80 PACA_IRQ_DBELL #define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI #define SOFTEN_VALUE_0xea0 PACA_IRQ_EE +#define SOFTEN_VALUE_0xf00 PACA_IRQ_PMI -#define __SOFTEN_TEST(h, vec) \ - lbz r10,PACASOFTIRQEN(r13); \ - cmpwi r10,0; \ +#define __SOFTEN_TEST(h, vec, bitmask) \ + lbz r10,PACAIRQSOFTMASK(r13); \ + andi. r10,r10,bitmask; \ li r10,SOFTEN_VALUE_##vec; \ - beq masked_##h##interrupt + bne masked_##h##interrupt -#define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec) +#define _SOFTEN_TEST(h, vec, bitmask) __SOFTEN_TEST(h, vec, bitmask) -#define SOFTEN_TEST_PR(vec) \ +#define SOFTEN_TEST_PR(vec, bitmask) \ KVMTEST(EXC_STD, vec); \ - _SOFTEN_TEST(EXC_STD, vec) + _SOFTEN_TEST(EXC_STD, vec, bitmask) -#define SOFTEN_TEST_HV(vec) \ +#define SOFTEN_TEST_HV(vec, bitmask) \ KVMTEST(EXC_HV, vec); \ - _SOFTEN_TEST(EXC_HV, vec) + _SOFTEN_TEST(EXC_HV, vec, bitmask) #define KVMTEST_PR(vec) \ KVMTEST(EXC_STD, vec) @@ -572,53 +595,57 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define KVMTEST_HV(vec) \ KVMTEST(EXC_HV, vec) -#define SOFTEN_NOTEST_PR(vec) _SOFTEN_TEST(EXC_STD, vec) -#define SOFTEN_NOTEST_HV(vec) _SOFTEN_TEST(EXC_HV, vec) +#define SOFTEN_NOTEST_PR(vec, bitmask) _SOFTEN_TEST(EXC_STD, vec, bitmask) +#define SOFTEN_NOTEST_HV(vec, bitmask) _SOFTEN_TEST(EXC_HV, vec, bitmask) -#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ +#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ - __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ EXCEPTION_PROLOG_PSERIES_1(label, h); -#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ - __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) +#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ + __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) -#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \ +#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label, bitmask) \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ - EXC_STD, SOFTEN_TEST_PR) + EXC_STD, SOFTEN_TEST_PR, bitmask) -#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec); \ +#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\ EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD) -#define MASKABLE_EXCEPTION_HV(loc, vec, label) \ +#define MASKABLE_EXCEPTION_HV(loc, vec, label, bitmask) \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ - EXC_HV, SOFTEN_TEST_HV) + EXC_HV, SOFTEN_TEST_HV, bitmask) -#define MASKABLE_EXCEPTION_HV_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ +#define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) -#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ +#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ - __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) -#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ - __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) +#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)\ + __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) -#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label) \ +#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label, bitmask) \ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ - EXC_STD, SOFTEN_NOTEST_PR) + EXC_STD, SOFTEN_NOTEST_PR, bitmask) + +#define MASKABLE_RELON_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\ + EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD); -#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \ +#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label, bitmask) \ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ - EXC_HV, SOFTEN_TEST_HV) + EXC_HV, SOFTEN_TEST_HV, bitmask) -#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ +#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\ EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) /* diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 8645897472b1..511acfd7ab0d 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -51,6 +51,8 @@ #define FW_FEATURE_BEST_ENERGY ASM_CONST(0x0000000080000000) #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000) #define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) +#define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000) +#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000400000000) #ifndef __ASSEMBLY__ @@ -67,7 +69,8 @@ enum { FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO | FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN | - FW_FEATURE_HPT_RESIZE, + FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | + FW_FEATURE_DRC_INFO, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL, FW_FEATURE_POWERNV_ALWAYS = 0, diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 456f9e7b8d83..5986d473722b 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -29,6 +29,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending) #define __ARCH_SET_SOFTIRQ_PENDING +#define __ARCH_IRQ_EXIT_IRQS_DISABLED #define set_softirq_pending(x) __this_cpu_write(irq_stat.__softirq_pending, (x)) #define or_softirq_pending(x) __this_cpu_or(irq_stat.__softirq_pending, (x)) diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index fdcff76e9a25..7e0e93f24cb7 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -178,7 +178,7 @@ name: * TRAMP_REAL_* - real, unrelocated helpers (virt can call these) * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use) * TRAMP_KVM - KVM handlers that get put into real, unrelocated - * EXC_COMMON_* - virt, relocated common handlers + * EXC_COMMON - virt, relocated common handlers * * The EXC handlers are given a name, and branch to name_common, or the * appropriate KVM or masking function. Vector handler verieties are as @@ -211,7 +211,6 @@ name: * EXC_COMMON_BEGIN/END - used to open-code the handler * EXC_COMMON * EXC_COMMON_ASYNC - * EXC_COMMON_HV * * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers. @@ -269,14 +268,14 @@ name: STD_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \ EXC_VIRT_END(name, start, size); -#define EXC_REAL_MASKABLE(name, start, size) \ +#define EXC_REAL_MASKABLE(name, start, size, bitmask) \ EXC_REAL_BEGIN(name, start, size); \ - MASKABLE_EXCEPTION_PSERIES(start, start, name##_common); \ + MASKABLE_EXCEPTION_PSERIES(start, start, name##_common, bitmask);\ EXC_REAL_END(name, start, size); -#define EXC_VIRT_MASKABLE(name, start, size, realvec) \ +#define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \ EXC_VIRT_BEGIN(name, start, size); \ - MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \ + MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common, bitmask);\ EXC_VIRT_END(name, start, size); #define EXC_REAL_HV(name, start, size) \ @@ -305,13 +304,13 @@ name: #define __EXC_REAL_OOL_MASKABLE(name, start, size) \ __EXC_REAL_OOL(name, start, size); -#define __TRAMP_REAL_OOL_MASKABLE(name, vec) \ +#define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask) \ TRAMP_REAL_BEGIN(tramp_real_##name); \ - MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common); \ + MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common, bitmask); \ -#define EXC_REAL_OOL_MASKABLE(name, start, size) \ +#define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask) \ __EXC_REAL_OOL_MASKABLE(name, start, size); \ - __TRAMP_REAL_OOL_MASKABLE(name, start); + __TRAMP_REAL_OOL_MASKABLE(name, start, bitmask); #define __EXC_REAL_OOL_HV_DIRECT(name, start, size, handler) \ EXC_REAL_BEGIN(name, start, size); \ @@ -332,13 +331,13 @@ name: #define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \ __EXC_REAL_OOL(name, start, size); -#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec) \ +#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec, bitmask) \ TRAMP_REAL_BEGIN(tramp_real_##name); \ - MASKABLE_EXCEPTION_HV_OOL(vec, name##_common); \ + MASKABLE_EXCEPTION_HV_OOL(vec, name##_common, bitmask); \ -#define EXC_REAL_OOL_MASKABLE_HV(name, start, size) \ +#define EXC_REAL_OOL_MASKABLE_HV(name, start, size, bitmask) \ __EXC_REAL_OOL_MASKABLE_HV(name, start, size); \ - __TRAMP_REAL_OOL_MASKABLE_HV(name, start); + __TRAMP_REAL_OOL_MASKABLE_HV(name, start, bitmask); #define __EXC_VIRT_OOL(name, start, size) \ EXC_VIRT_BEGIN(name, start, size); \ @@ -356,13 +355,13 @@ name: #define __EXC_VIRT_OOL_MASKABLE(name, start, size) \ __EXC_VIRT_OOL(name, start, size); -#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec) \ +#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) \ TRAMP_VIRT_BEGIN(tramp_virt_##name); \ - MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \ + MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common, bitmask);\ -#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec) \ +#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask) \ __EXC_VIRT_OOL_MASKABLE(name, start, size); \ - __TRAMP_VIRT_OOL_MASKABLE(name, realvec); + __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask); #define __EXC_VIRT_OOL_HV(name, start, size) \ __EXC_VIRT_OOL(name, start, size); @@ -378,13 +377,13 @@ name: #define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \ __EXC_VIRT_OOL(name, start, size); -#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec) \ +#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask) \ TRAMP_VIRT_BEGIN(tramp_virt_##name); \ - MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \ + MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common, bitmask);\ -#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec) \ +#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec, bitmask) \ __EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \ - __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec); + __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask); #define TRAMP_KVM(area, n) \ TRAMP_KVM_BEGIN(do_kvm_##n); \ @@ -413,10 +412,6 @@ name: EXC_COMMON_BEGIN(name); \ STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr); \ -#define EXC_COMMON_HV(name, realvec, hdlr) \ - EXC_COMMON_BEGIN(name); \ - STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr); \ - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_HEAD_64_H */ diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h index 85b7a1a21e22..9c14f7b5c46c 100644 --- a/arch/powerpc/include/asm/hmi.h +++ b/arch/powerpc/include/asm/hmi.h @@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void); static inline void wait_for_subcore_guest_exit(void) { } static inline void wait_for_tb_resync(void) { } #endif + +struct pt_regs; +extern long hmi_handle_debugtrig(struct pt_regs *regs); + #endif /* __ASM_PPC64_HMI_H__ */ diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 14c9d44f355b..1a4847f67ea8 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -47,8 +47,7 @@ static inline pte_t *hugepd_page(hugepd_t hpd) { BUG_ON(!hugepd_ok(hpd)); #ifdef CONFIG_PPC_8xx - return (pte_t *)__va(hpd_val(hpd) & - ~(_PMD_PAGE_MASK | _PMD_PRESENT_MASK)); + return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK); #else return (pte_t *)((hpd_val(hpd) & ~HUGEPD_SHIFT_MASK) | PD_HUGE); diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 3818fa0164f0..88e5e8f17e98 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -27,6 +27,15 @@ #define PACA_IRQ_DEC 0x08 /* Or FIT */ #define PACA_IRQ_EE_EDGE 0x10 /* BookE only */ #define PACA_IRQ_HMI 0x20 +#define PACA_IRQ_PMI 0x40 + +/* + * flags for paca->irq_soft_mask + */ +#define IRQS_ENABLED 0 +#define IRQS_DISABLED 1 /* local_irq_disable() interrupts */ +#define IRQS_PMI_DISABLED 2 +#define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED) #endif /* CONFIG_PPC64 */ @@ -43,46 +52,112 @@ extern void unknown_exception(struct pt_regs *regs); #ifdef CONFIG_PPC64 #include <asm/paca.h> -static inline unsigned long arch_local_save_flags(void) +static inline notrace unsigned long irq_soft_mask_return(void) { unsigned long flags; asm volatile( "lbz %0,%1(13)" : "=r" (flags) - : "i" (offsetof(struct paca_struct, soft_enabled))); + : "i" (offsetof(struct paca_struct, irq_soft_mask))); + + return flags; +} + +/* + * The "memory" clobber acts as both a compiler barrier + * for the critical section and as a clobber because + * we changed paca->irq_soft_mask + */ +static inline notrace void irq_soft_mask_set(unsigned long mask) +{ +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + /* + * The irq mask must always include the STD bit if any are set. + * + * and interrupts don't get replayed until the standard + * interrupt (local_irq_disable()) is unmasked. + * + * Other masks must only provide additional masking beyond + * the standard, and they are also not replayed until the + * standard interrupt becomes unmasked. + * + * This could be changed, but it will require partial + * unmasks to be replayed, among other things. For now, take + * the simple approach. + */ + WARN_ON(mask && !(mask & IRQS_DISABLED)); +#endif + + asm volatile( + "stb %0,%1(13)" + : + : "r" (mask), + "i" (offsetof(struct paca_struct, irq_soft_mask)) + : "memory"); +} + +static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask) +{ + unsigned long flags; + +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON(mask && !(mask & IRQS_DISABLED)); +#endif + + asm volatile( + "lbz %0,%1(13); stb %2,%1(13)" + : "=&r" (flags) + : "i" (offsetof(struct paca_struct, irq_soft_mask)), + "r" (mask) + : "memory"); return flags; } -static inline unsigned long arch_local_irq_disable(void) +static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask) { - unsigned long flags, zero; + unsigned long flags, tmp; asm volatile( - "li %1,0; lbz %0,%2(13); stb %1,%2(13)" - : "=r" (flags), "=&r" (zero) - : "i" (offsetof(struct paca_struct, soft_enabled)) + "lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)" + : "=&r" (flags), "=r" (tmp) + : "i" (offsetof(struct paca_struct, irq_soft_mask)), + "r" (mask) : "memory"); +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED)); +#endif + return flags; } +static inline unsigned long arch_local_save_flags(void) +{ + return irq_soft_mask_return(); +} + +static inline void arch_local_irq_disable(void) +{ + irq_soft_mask_set(IRQS_DISABLED); +} + extern void arch_local_irq_restore(unsigned long); static inline void arch_local_irq_enable(void) { - arch_local_irq_restore(1); + arch_local_irq_restore(IRQS_ENABLED); } static inline unsigned long arch_local_irq_save(void) { - return arch_local_irq_disable(); + return irq_soft_mask_set_return(IRQS_DISABLED); } static inline bool arch_irqs_disabled_flags(unsigned long flags) { - return flags == 0; + return flags & IRQS_DISABLED; } static inline bool arch_irqs_disabled(void) @@ -90,6 +165,55 @@ static inline bool arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } +#ifdef CONFIG_PPC_BOOK3S +/* + * To support disabling and enabling of irq with PMI, set of + * new powerpc_local_irq_pmu_save() and powerpc_local_irq_restore() + * functions are added. These macros are implemented using generic + * linux local_irq_* code from include/linux/irqflags.h. + */ +#define raw_local_irq_pmu_save(flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = irq_soft_mask_or_return(IRQS_DISABLED | \ + IRQS_PMI_DISABLED); \ + } while(0) + +#define raw_local_irq_pmu_restore(flags) \ + do { \ + typecheck(unsigned long, flags); \ + arch_local_irq_restore(flags); \ + } while(0) + +#ifdef CONFIG_TRACE_IRQFLAGS +#define powerpc_local_irq_pmu_save(flags) \ + do { \ + raw_local_irq_pmu_save(flags); \ + trace_hardirqs_off(); \ + } while(0) +#define powerpc_local_irq_pmu_restore(flags) \ + do { \ + if (raw_irqs_disabled_flags(flags)) { \ + raw_local_irq_pmu_restore(flags); \ + trace_hardirqs_off(); \ + } else { \ + trace_hardirqs_on(); \ + raw_local_irq_pmu_restore(flags); \ + } \ + } while(0) +#else +#define powerpc_local_irq_pmu_save(flags) \ + do { \ + raw_local_irq_pmu_save(flags); \ + } while(0) +#define powerpc_local_irq_pmu_restore(flags) \ + do { \ + raw_local_irq_pmu_restore(flags); \ + } while (0) +#endif /* CONFIG_TRACE_IRQFLAGS */ + +#endif /* CONFIG_PPC_BOOK3S */ + #ifdef CONFIG_PPC_BOOK3E #define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory") #define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory") @@ -98,14 +222,13 @@ static inline bool arch_irqs_disabled(void) #define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1) #endif -#define hard_irq_disable() do { \ - u8 _was_enabled; \ - __hard_irq_disable(); \ - _was_enabled = local_paca->soft_enabled; \ - local_paca->soft_enabled = 0; \ - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ - if (_was_enabled) \ - trace_hardirqs_off(); \ +#define hard_irq_disable() do { \ + unsigned long flags; \ + __hard_irq_disable(); \ + flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \ + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ + if (!arch_irqs_disabled_flags(flags)) \ + trace_hardirqs_off(); \ } while(0) static inline bool lazy_irq_pending(void) @@ -127,7 +250,7 @@ static inline void may_hard_irq_enable(void) static inline bool arch_irq_disabled_regs(struct pt_regs *regs) { - return !regs->softe; + return (regs->softe & IRQS_DISABLED); } extern bool prep_irq_for_idle(void); diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index fad0e6ff460f..d76cb11be3e3 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -35,6 +35,13 @@ #define THREAD_IMC_ENABLE 0x8000000000000000ULL /* + * For debugfs interface for imc-mode and imc-command + */ +#define IMC_CNTL_BLK_OFFSET 0x3FC00 +#define IMC_CNTL_BLK_CMD_OFFSET 8 +#define IMC_CNTL_BLK_MODE_OFFSET 32 + +/* * Structure to hold memory address information for imc units. */ struct imc_mem_info { @@ -71,7 +78,7 @@ struct imc_events { struct imc_pmu { struct pmu pmu; struct imc_mem_info *mem_info; - struct imc_events **events; + struct imc_events *events; /* * Attribute groups for the PMU. Slot 0 used for * format attribute, slot 1 used for cpusmask attribute, diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index 1aeb5f13b8c4..1a6c1ce17735 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -47,14 +47,14 @@ * be clobbered. */ #define RECONCILE_IRQ_STATE(__rA, __rB) \ - lbz __rA,PACASOFTIRQEN(r13); \ + lbz __rA,PACAIRQSOFTMASK(r13); \ lbz __rB,PACAIRQHAPPENED(r13); \ - cmpwi cr0,__rA,0; \ - li __rA,0; \ + andi. __rA,__rA,IRQS_DISABLED; \ + li __rA,IRQS_DISABLED; \ ori __rB,__rB,PACA_IRQ_HARD_DIS; \ stb __rB,PACAIRQHAPPENED(r13); \ - beq 44f; \ - stb __rA,PACASOFTIRQEN(r13); \ + bne 44f; \ + stb __rA,PACAIRQSOFTMASK(r13); \ TRACE_DISABLE_INTS; \ 44: @@ -64,9 +64,9 @@ #define RECONCILE_IRQ_STATE(__rA, __rB) \ lbz __rA,PACAIRQHAPPENED(r13); \ - li __rB,0; \ + li __rB,IRQS_DISABLED; \ ori __rA,__rA,PACA_IRQ_HARD_DIS; \ - stb __rB,PACASOFTIRQEN(r13); \ + stb __rB,PACAIRQSOFTMASK(r13); \ stb __rA,PACAIRQHAPPENED(r13) #endif #endif diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 4419d435639a..9dcbfa6bbb91 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -73,6 +73,8 @@ extern void kexec_smp_wait(void); /* get and clear naca physid, wait for master to copy new code to 0 */ extern int crashing_cpu; extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)); +extern void crash_ipi_callback(struct pt_regs *); +extern int crash_wake_offline; struct kimage; struct pt_regs; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 941c2a3f231b..9db18287b5f4 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -873,7 +873,7 @@ static inline void kvmppc_fix_ee_before_entry(void) /* Only need to enable IRQs by hard enabling them after this */ local_paca->irq_happened = 0; - local_paca->soft_enabled = 1; + irq_soft_mask_set(IRQS_ENABLED); #endif } diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h index 600a68bd77f5..fdd00939270b 100644 --- a/arch/powerpc/include/asm/local.h +++ b/arch/powerpc/include/asm/local.h @@ -2,76 +2,64 @@ #ifndef _ARCH_POWERPC_LOCAL_H #define _ARCH_POWERPC_LOCAL_H +#ifdef CONFIG_PPC_BOOK3S_64 + #include <linux/percpu.h> #include <linux/atomic.h> +#include <linux/irqflags.h> + +#include <asm/hw_irq.h> typedef struct { - atomic_long_t a; + long v; } local_t; -#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } - -#define local_read(l) atomic_long_read(&(l)->a) -#define local_set(l,i) atomic_long_set(&(l)->a, (i)) +#define LOCAL_INIT(i) { (i) } -#define local_add(i,l) atomic_long_add((i),(&(l)->a)) -#define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) -#define local_inc(l) atomic_long_inc(&(l)->a) -#define local_dec(l) atomic_long_dec(&(l)->a) - -static __inline__ long local_add_return(long a, local_t *l) +static __inline__ long local_read(local_t *l) { - long t; - - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%2,0) " # local_add_return\n\ - add %0,%1,%0\n" - PPC405_ERR77(0,%2) - PPC_STLCX "%0,0,%2 \n\ - bne- 1b" - : "=&r" (t) - : "r" (a), "r" (&(l->a.counter)) - : "cc", "memory"); - - return t; + return READ_ONCE(l->v); } -#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) - -static __inline__ long local_sub_return(long a, local_t *l) +static __inline__ void local_set(local_t *l, long i) { - long t; + WRITE_ONCE(l->v, i); +} - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%2,0) " # local_sub_return\n\ - subf %0,%1,%0\n" - PPC405_ERR77(0,%2) - PPC_STLCX "%0,0,%2 \n\ - bne- 1b" - : "=&r" (t) - : "r" (a), "r" (&(l->a.counter)) - : "cc", "memory"); +#define LOCAL_OP(op, c_op) \ +static __inline__ void local_##op(long i, local_t *l) \ +{ \ + unsigned long flags; \ + \ + powerpc_local_irq_pmu_save(flags); \ + l->v c_op i; \ + powerpc_local_irq_pmu_restore(flags); \ +} - return t; +#define LOCAL_OP_RETURN(op, c_op) \ +static __inline__ long local_##op##_return(long a, local_t *l) \ +{ \ + long t; \ + unsigned long flags; \ + \ + powerpc_local_irq_pmu_save(flags); \ + t = (l->v c_op a); \ + powerpc_local_irq_pmu_restore(flags); \ + \ + return t; \ } -static __inline__ long local_inc_return(local_t *l) -{ - long t; +#define LOCAL_OPS(op, c_op) \ + LOCAL_OP(op, c_op) \ + LOCAL_OP_RETURN(op, c_op) - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%1,0) " # local_inc_return\n\ - addic %0,%0,1\n" - PPC405_ERR77(0,%1) - PPC_STLCX "%0,0,%1 \n\ - bne- 1b" - : "=&r" (t) - : "r" (&(l->a.counter)) - : "cc", "xer", "memory"); +LOCAL_OPS(add, +=) +LOCAL_OPS(sub, -=) - return t; -} +#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) +#define local_inc_return(l) local_add_return(1LL, l) +#define local_inc(l) local_inc_return(l) /* * local_inc_and_test - increment and test @@ -81,28 +69,39 @@ static __inline__ long local_inc_return(local_t *l) * and returns true if the result is zero, or false for all * other cases. */ -#define local_inc_and_test(l) (local_inc_return(l) == 0) +#define local_inc_and_test(l) (local_inc_return(l) == 0) -static __inline__ long local_dec_return(local_t *l) +#define local_dec_return(l) local_sub_return(1LL, l) +#define local_dec(l) local_dec_return(l) +#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0) +#define local_dec_and_test(l) (local_dec_return((l)) == 0) + +static __inline__ long local_cmpxchg(local_t *l, long o, long n) { long t; + unsigned long flags; - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_return\n\ - addic %0,%0,-1\n" - PPC405_ERR77(0,%1) - PPC_STLCX "%0,0,%1\n\ - bne- 1b" - : "=&r" (t) - : "r" (&(l->a.counter)) - : "cc", "xer", "memory"); + powerpc_local_irq_pmu_save(flags); + t = l->v; + if (t == o) + l->v = n; + powerpc_local_irq_pmu_restore(flags); return t; } -#define local_cmpxchg(l, o, n) \ - (cmpxchg_local(&((l)->a.counter), (o), (n))) -#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n))) +static __inline__ long local_xchg(local_t *l, long n) +{ + long t; + unsigned long flags; + + powerpc_local_irq_pmu_save(flags); + t = l->v; + l->v = n; + powerpc_local_irq_pmu_restore(flags); + + return t; +} /** * local_add_unless - add unless the number is a given value @@ -115,62 +114,35 @@ static __inline__ long local_dec_return(local_t *l) */ static __inline__ int local_add_unless(local_t *l, long a, long u) { - long t; - - __asm__ __volatile__ ( -"1:" PPC_LLARX(%0,0,%1,0) " # local_add_unless\n\ - cmpw 0,%0,%3 \n\ - beq- 2f \n\ - add %0,%2,%0 \n" - PPC405_ERR77(0,%2) - PPC_STLCX "%0,0,%1 \n\ - bne- 1b \n" -" subf %0,%2,%0 \n\ -2:" - : "=&r" (t) - : "r" (&(l->a.counter)), "r" (a), "r" (u) - : "cc", "memory"); - - return t != u; -} - -#define local_inc_not_zero(l) local_add_unless((l), 1, 0) - -#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0) -#define local_dec_and_test(l) (local_dec_return((l)) == 0) - -/* - * Atomically test *l and decrement if it is greater than 0. - * The function returns the old value of *l minus 1. - */ -static __inline__ long local_dec_if_positive(local_t *l) -{ - long t; + unsigned long flags; + int ret = 0; - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_if_positive\n\ - cmpwi %0,1\n\ - addi %0,%0,-1\n\ - blt- 2f\n" - PPC405_ERR77(0,%1) - PPC_STLCX "%0,0,%1\n\ - bne- 1b" - "\n\ -2:" : "=&b" (t) - : "r" (&(l->a.counter)) - : "cc", "memory"); + powerpc_local_irq_pmu_save(flags); + if (l->v != u) { + l->v += a; + ret = 1; + } + powerpc_local_irq_pmu_restore(flags); - return t; + return ret; } +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + /* Use these for per-cpu local_t variables: on some archs they are * much more efficient than these naive implementations. Note they take * a variable, not an address. */ -#define __local_inc(l) ((l)->a.counter++) -#define __local_dec(l) ((l)->a.counter++) -#define __local_add(i,l) ((l)->a.counter+=(i)) -#define __local_sub(i,l) ((l)->a.counter-=(i)) +#define __local_inc(l) ((l)->v++) +#define __local_dec(l) ((l)->v++) +#define __local_add(i,l) ((l)->v+=(i)) +#define __local_sub(i,l) ((l)->v-=(i)) + +#else /* CONFIG_PPC64 */ + +#include <asm-generic/local.h> + +#endif /* CONFIG_PPC64 */ #endif /* _ARCH_POWERPC_LOCAL_H */ diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index cd2fc1cc1cc7..ffe7c71e1132 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -173,11 +173,19 @@ struct machdep_calls { /* Called after scan and before resource survey */ void (*pcibios_fixup_phb)(struct pci_controller *hose); + /* + * Called after device has been added to bus and + * before sysfs has been created. + */ + void (*pcibios_bus_add_device)(struct pci_dev *pdev); + resource_size_t (*pcibios_default_alignment)(void); #ifdef CONFIG_PCI_IOV void (*pcibios_fixup_sriov)(struct pci_dev *pdev); resource_size_t (*pcibios_iov_resource_alignment)(struct pci_dev *, int resno); + int (*pcibios_sriov_enable)(struct pci_dev *pdev, u16 num_vfs); + int (*pcibios_sriov_disable)(struct pci_dev *pdev); #endif /* CONFIG_PCI_IOV */ /* Called to shutdown machine specific hardware not already controlled diff --git a/arch/powerpc/include/asm/membarrier.h b/arch/powerpc/include/asm/membarrier.h new file mode 100644 index 000000000000..6e20bb5c74ea --- /dev/null +++ b/arch/powerpc/include/asm/membarrier.h @@ -0,0 +1,27 @@ +#ifndef _ASM_POWERPC_MEMBARRIER_H +#define _ASM_POWERPC_MEMBARRIER_H + +static inline void membarrier_arch_switch_mm(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) +{ + /* + * Only need the full barrier when switching between processes. + * Barrier when switching from kernel to userspace is not + * required here, given that it is implied by mmdrop(). Barrier + * when switching from userspace to kernel is not needed after + * store to rq->curr. + */ + if (likely(!(atomic_read(&next->membarrier_state) & + (MEMBARRIER_STATE_PRIVATE_EXPEDITED | + MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev)) + return; + + /* + * The membarrier system call requires a full memory barrier + * after storing to rq->curr, before going back to user-space. + */ + smp_mb(); +} + +#endif /* _ASM_POWERPC_MEMBARRIER_H */ diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 30922f699341..07e3f54de9e3 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -13,6 +13,7 @@ #include <asm/cputable.h> #include <linux/mm.h> +#include <linux/pkeys.h> #include <asm/cpu_has_feature.h> /* @@ -22,13 +23,23 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { - return (prot & PROT_SAO) ? VM_SAO : 0; +#ifdef CONFIG_PPC_MEM_KEYS + return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey)); +#else + return ((prot & PROT_SAO) ? VM_SAO : 0); +#endif } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) { +#ifdef CONFIG_PPC_MEM_KEYS + return (vm_flags & VM_SAO) ? + __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) : + __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags)); +#else return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0); +#endif } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 5bb3dbede41a..2f806e329648 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -29,17 +29,25 @@ #define MI_Kp 0x40000000 /* Should always be set */ /* - * All pages' PP exec bits are set to 000, which means Execute for Supervisor - * and no Execute for User. - * Then we use the APG to say whether accesses are according to Page rules, - * "all Supervisor" rules (Exec for all) and "all User" rules (Exec for noone) - * Therefore, we define 4 APG groups. msb is _PAGE_EXEC, lsb is _PAGE_USER - * 0 (00) => Not User, no exec => 11 (all accesses performed as user) - * 1 (01) => User but no exec => 11 (all accesses performed as user) - * 2 (10) => Not User, exec => 01 (rights according to page definition) - * 3 (11) => User, exec => 00 (all accesses performed as supervisor) - */ -#define MI_APG_INIT 0xf4ffffff + * All pages' PP data bits are set to either 001 or 011 by copying _PAGE_EXEC + * into bit 21 in the ITLBmiss handler (bit 21 is the middle bit), which means + * respectively NA for All or X for Supervisor and no access for User. + * Then we use the APG to say whether accesses are according to Page rules or + * "all Supervisor" rules (Access to all) + * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP: + * When that bit is not set access is done iaw "all user" + * which means no access iaw page rules. + * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED + * 0x => No access => 11 (all accesses performed as user iaw page definition) + * 10 => No user => 01 (all accesses performed according to page definition) + * 11 => User => 00 (all accesses performed as supervisor iaw page definition) + * We define all 16 groups so that all other bits of APG can take any value + */ +#ifdef CONFIG_SWAP +#define MI_APG_INIT 0xf4f4f4f4 +#else +#define MI_APG_INIT 0x44444444 +#endif /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -102,17 +110,25 @@ #define MD_Kp 0x40000000 /* Should always be set */ /* - * All pages' PP data bits are set to either 000 or 011, which means + * All pages' PP data bits are set to either 000 or 011 or 001, which means * respectively RW for Supervisor and no access for User, or RO for - * Supervisor and no access for user. + * Supervisor and no access for user and NA for ALL. * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PAGE_USER - * 0 => No user => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor - * according to page definition) - */ -#define MD_APG_INIT 0x4fffffff + * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP: + * When that bit is not set access is done iaw "all user" + * which means no access iaw page rules. + * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED + * 0x => No access => 11 (all accesses performed as user iaw page definition) + * 10 => No user => 01 (all accesses performed according to page definition) + * 11 => User => 00 (all accesses performed as supervisor iaw page definition) + * We define all 16 groups so that all other bits of APG can take any value + */ +#ifdef CONFIG_SWAP +#define MD_APG_INIT 0xf4f4f4f4 +#else +#define MD_APG_INIT 0x44444444 +#endif /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in @@ -164,6 +180,12 @@ */ #define SPRN_M_TW 799 +/* APGs */ +#define M_APG0 0x00000000 +#define M_APG1 0x00000020 +#define M_APG2 0x00000040 +#define M_APG3 0x00000060 + #ifndef __ASSEMBLY__ typedef struct { unsigned int id; diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 6364f5c2cc3e..bb38312cff28 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -260,6 +260,15 @@ static inline bool early_radix_enabled(void) } #endif +#ifdef CONFIG_PPC_MEM_KEYS +extern u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address); +#else +static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) +{ + return 0; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + #endif /* !__ASSEMBLY__ */ /* The kernel use the constants below to index in the page sizes array. diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index e2a2b8400490..051b3d63afe3 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -187,11 +187,33 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, { } +#ifdef CONFIG_PPC_MEM_KEYS +bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, + bool execute, bool foreign); +#else /* CONFIG_PPC_MEM_KEYS */ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { /* by default, allow everything */ return true; } + +#define pkey_mm_init(mm) +#define thread_pkey_regs_save(thread) +#define thread_pkey_regs_restore(new_thread, old_thread) +#define thread_pkey_regs_init(thread) + +static inline int vma_pkey(struct vm_area_struct *vma) +{ + return 0; +} + +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +{ + return 0x0UL; +} + +#endif /* CONFIG_PPC_MEM_KEYS */ + #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_MMU_CONTEXT_H */ diff --git a/arch/powerpc/include/asm/mpic_timer.h b/arch/powerpc/include/asm/mpic_timer.h index 0e23cd4ac8aa..13e6702ec458 100644 --- a/arch/powerpc/include/asm/mpic_timer.h +++ b/arch/powerpc/include/asm/mpic_timer.h @@ -29,17 +29,17 @@ struct mpic_timer { #ifdef CONFIG_MPIC_TIMER struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, - const struct timeval *time); + time64_t time); void mpic_start_timer(struct mpic_timer *handle); void mpic_stop_timer(struct mpic_timer *handle); -void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time); +void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time); void mpic_free_timer(struct mpic_timer *handle); #else struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, - const struct timeval *time) { return NULL; } + time64_t time) { return NULL; } void mpic_start_timer(struct mpic_timer *handle) { } void mpic_stop_timer(struct mpic_timer *handle) { } -void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) { } +void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time) { } void mpic_free_timer(struct mpic_timer *handle) { } #endif diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index e97f58689ca7..9c80939b4d14 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -4,10 +4,6 @@ #ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); -extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, - bool exclude_self); -#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace - #else static inline void arch_touch_nmi_watchdog(void) {} #endif diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index d072139ff2e5..29d37bd1f3b3 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -61,7 +61,8 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pte_page) { - *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_PRESENT); + *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_USER | + _PMD_PRESENT); } #define pmd_pgtable(pmd) pmd_page(pmd) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index cc2bfec3aa3b..504a3c36ce5c 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -282,7 +282,7 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - unsigned long clr = ~pte_val(entry) & _PAGE_RO; + unsigned long clr = ~pte_val(entry) & (_PAGE_RO | _PAGE_NA); pte_update(ptep, clr, set); } diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 6dc0180fd5c7..f04cb46ae8a1 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -31,37 +31,34 @@ /* Definitions for 8xx embedded chips. */ #define _PAGE_PRESENT 0x0001 /* Page is valid */ #define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ -#define _PAGE_SHARED 0x0004 /* No ASID (context) compare */ -#define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */ +#define _PAGE_PRIVILEGED 0x0004 /* No ASID (context) compare */ +#define _PAGE_HUGE 0x0008 /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/ #define _PAGE_DIRTY 0x0100 /* C: page changed */ /* These 4 software bits must be masked out when the L2 entry is loaded * into the TLB. */ #define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */ -#define _PAGE_USER 0x0020 /* Copied to L1 APG lsb */ -#define _PAGE_EXEC 0x0040 /* Copied to L1 APG */ -#define _PAGE_WRITETHRU 0x0080 /* software: caching is write through */ -#define _PAGE_ACCESSED 0x0800 /* software: page referenced */ +#define _PAGE_SPECIAL 0x0020 /* SW entry */ +#define _PAGE_EXEC 0x0040 /* Copied to PP (bit 21) in ITLB */ +#define _PAGE_ACCESSED 0x0080 /* software: page referenced */ +#define _PAGE_NA 0x0200 /* Supervisor NA, User no access */ #define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ #define _PMD_PRESENT 0x0001 -#define _PMD_BAD 0x0ff0 +#define _PMD_BAD 0x0fd0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c #define _PMD_PAGE_512K 0x0004 +#define _PMD_USER 0x0020 /* APG 1 */ /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 -/* We need to add _PAGE_SHARED to kernel pages */ -#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_RO) -#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_EXEC) -#define _PAGE_KERNEL_RW (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ - _PAGE_HWWRITE) -#define _PAGE_KERNEL_RWX (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ - _PAGE_HWWRITE | _PAGE_EXEC) +#ifdef CONFIG_PPC_16K_PAGES +#define _PAGE_PSIZE _PAGE_HUGE +#endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_8xx_H */ diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 5c68f4a59f75..c56de1e8026f 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -45,6 +45,29 @@ static inline int pte_present(pte_t pte) return pte_val(pte) & _PAGE_PRESENT; } +/* + * We only find page table entry in the last level + * Hence no need for other accessors + */ +#define pte_access_permitted pte_access_permitted +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + unsigned long pteval = pte_val(pte); + /* + * A read-only access is controlled by _PAGE_USER bit. + * We have _PAGE_READ set for WRITE and EXECUTE + */ + unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER; + + if (write) + need_pte_bits |= _PAGE_WRITE; + + if ((pteval & need_pte_bits) != need_pte_bits) + return false; + + return true; +} + /* Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. * @@ -103,7 +126,7 @@ static inline pte_t pte_mkspecial(pte_t pte) static inline pte_t pte_mkhuge(pte_t pte) { - return pte; + return __pte(pte_val(pte) | _PAGE_HUGE); } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) @@ -212,8 +235,10 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addre #define pgprot_cached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ _PAGE_COHERENT)) +#if _PAGE_WRITETHRU != 0 #define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ _PAGE_COHERENT | _PAGE_WRITETHRU)) +#endif #define pgprot_cached_noncoherent(prot) \ (__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL)) diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h index 2da4532ca377..ccee8eb509bb 100644 --- a/arch/powerpc/include/asm/nohash/pte-book3e.h +++ b/arch/powerpc/include/asm/nohash/pte-book3e.h @@ -55,6 +55,7 @@ #define _PAGE_KERNEL_RWX (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX) #define _PAGE_KERNEL_ROX (_PAGE_BAP_SR | _PAGE_BAP_SX) #define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */ +#define _PAGE_PRIVILEGED (_PAGE_BAP_SR) #define _PAGE_HASHPTE 0 #define _PAGE_BUSY 0 diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 233c7504b1f2..24c73f5575ee 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -201,7 +201,10 @@ #define OPAL_SET_POWER_SHIFT_RATIO 155 #define OPAL_SENSOR_GROUP_CLEAR 156 #define OPAL_PCI_SET_P2P 157 -#define OPAL_LAST 157 +#define OPAL_NPU_SPA_SETUP 159 +#define OPAL_NPU_SPA_CLEAR_CACHE 160 +#define OPAL_NPU_TL_SET 161 +#define OPAL_LAST 161 /* Device tree flags */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 0c545f7fc77b..12e70fb58700 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -34,6 +34,12 @@ int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr, uint64_t bdf); int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid, uint64_t lpcr); +int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn, + uint64_t addr, uint64_t PE_mask); +int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn, + uint64_t PE_handle); +int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap, + uint64_t rate_phys, uint32_t size); int64_t opal_console_write(int64_t term_number, __be64 *length, const uint8_t *buffer); int64_t opal_console_read(int64_t term_number, __be64 *length, diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 23ac7fc0af23..b62c31037cad 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -159,7 +159,7 @@ struct paca_struct { u64 saved_r1; /* r1 save for RTAS calls or PM */ u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ - u8 soft_enabled; /* irq soft-enable flag */ + u8 irq_soft_mask; /* mask for irq soft masking */ u8 irq_happened; /* irq happened while soft-disabled */ u8 io_sync; /* writel() needs spin_unlock sync */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ @@ -239,8 +239,7 @@ struct paca_struct { */ u64 exrfi[EX_SIZE] __aligned(0x80); void *rfi_flush_fallback_area; - u64 l1d_flush_congruence; - u64 l1d_flush_sets; + u64 l1d_flush_size; #endif }; diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 62ed83db04ae..94d449031b18 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -197,25 +197,22 @@ struct pci_dn { struct iommu_table_group *table_group; /* for phb's or bridges */ int pci_ext_config_space; /* for pci devices */ - - struct pci_dev *pcidev; /* back-pointer to the pci device */ #ifdef CONFIG_EEH struct eeh_dev *edev; /* eeh device */ #endif #define IODA_INVALID_PE 0xFFFFFFFF -#ifdef CONFIG_PPC_POWERNV unsigned int pe_number; - int vf_index; /* VF index in the PF */ #ifdef CONFIG_PCI_IOV + int vf_index; /* VF index in the PF */ u16 vfs_expanded; /* number of VFs IOV BAR expanded */ u16 num_vfs; /* number of VFs enabled*/ unsigned int *pe_num_map; /* PE# for the first VF PE or array */ bool m64_single_mode; /* Use M64 BAR in Single Mode */ #define IODA_INVALID_M64 (-1) - int (*m64_map)[PCI_SRIOV_NUM_BARS]; + int (*m64_map)[PCI_SRIOV_NUM_BARS]; /* Only used on powernv */ + int last_allow_rc; /* Only used on pseries */ #endif /* CONFIG_PCI_IOV */ int mps; /* Maximum Payload Size */ -#endif struct list_head child_list; struct list_head list; struct resource holes[PCI_SRIOV_NUM_BARS]; diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 8dc32eacc97c..d82802ff5088 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -121,6 +121,8 @@ extern int remove_phb_dynamic(struct pci_controller *phb); extern struct pci_dev *of_create_pci_dev(struct device_node *node, struct pci_bus *bus, int devfn); +extern unsigned int pci_parse_of_flags(u32 addr0, int bridge); + extern void of_scan_pci_bridge(struct pci_dev *dev); extern void of_scan_bus(struct device_node *node, struct pci_bus *bus); diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h new file mode 100644 index 000000000000..0409c80c32c0 --- /dev/null +++ b/arch/powerpc/include/asm/pkeys.h @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * PowerPC Memory Protection Keys management + * + * Copyright 2017, Ram Pai, IBM Corporation. + */ + +#ifndef _ASM_POWERPC_KEYS_H +#define _ASM_POWERPC_KEYS_H + +#include <linux/jump_label.h> +#include <asm/firmware.h> + +DECLARE_STATIC_KEY_TRUE(pkey_disabled); +extern int pkeys_total; /* total pkeys as per device tree */ +extern u32 initial_allocation_mask; /* bits set for reserved keys */ + +/* + * Define these here temporarily so we're not dependent on patching linux/mm.h. + * Once it's updated we can drop these. + */ +#ifndef VM_PKEY_BIT0 +# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0 +# define VM_PKEY_BIT0 VM_HIGH_ARCH_0 +# define VM_PKEY_BIT1 VM_HIGH_ARCH_1 +# define VM_PKEY_BIT2 VM_HIGH_ARCH_2 +# define VM_PKEY_BIT3 VM_HIGH_ARCH_3 +# define VM_PKEY_BIT4 VM_HIGH_ARCH_4 +#endif + +#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \ + VM_PKEY_BIT3 | VM_PKEY_BIT4) + +/* Override any generic PKEY permission defines */ +#define PKEY_DISABLE_EXECUTE 0x4 +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS | \ + PKEY_DISABLE_WRITE | \ + PKEY_DISABLE_EXECUTE) + +static inline u64 pkey_to_vmflag_bits(u16 pkey) +{ + return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS); +} + +static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) +{ + if (static_branch_likely(&pkey_disabled)) + return 0x0UL; + + return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT4 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT3 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT1 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT0 : 0x0UL)); +} + +static inline int vma_pkey(struct vm_area_struct *vma) +{ + if (static_branch_likely(&pkey_disabled)) + return 0; + return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT; +} + +#define arch_max_pkey() pkeys_total + +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +{ + return (((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL)); +} + +static inline u16 pte_to_pkey_bits(u64 pteflags) +{ + return (((pteflags & H_PTE_PKEY_BIT0) ? 0x10 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? 0x8 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? 0x2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT4) ? 0x1 : 0x0UL)); +} + +#define pkey_alloc_mask(pkey) (0x1 << pkey) + +#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map) + +#define __mm_pkey_allocated(mm, pkey) { \ + mm_pkey_allocation_map(mm) |= pkey_alloc_mask(pkey); \ +} + +#define __mm_pkey_free(mm, pkey) { \ + mm_pkey_allocation_map(mm) &= ~pkey_alloc_mask(pkey); \ +} + +#define __mm_pkey_is_allocated(mm, pkey) \ + (mm_pkey_allocation_map(mm) & pkey_alloc_mask(pkey)) + +#define __mm_pkey_is_reserved(pkey) (initial_allocation_mask & \ + pkey_alloc_mask(pkey)) + +static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) +{ + /* A reserved key is never considered as 'explicitly allocated' */ + return ((pkey < arch_max_pkey()) && + !__mm_pkey_is_reserved(pkey) && + __mm_pkey_is_allocated(mm, pkey)); +} + +extern void __arch_activate_pkey(int pkey); +extern void __arch_deactivate_pkey(int pkey); +/* + * Returns a positive, 5-bit key on success, or -1 on failure. + * Relies on the mmap_sem to protect against concurrency in mm_pkey_alloc() and + * mm_pkey_free(). + */ +static inline int mm_pkey_alloc(struct mm_struct *mm) +{ + /* + * Note: this is the one and only place we make sure that the pkey is + * valid as far as the hardware is concerned. The rest of the kernel + * trusts that only good, valid pkeys come out of here. + */ + u32 all_pkeys_mask = (u32)(~(0x0)); + int ret; + + if (static_branch_likely(&pkey_disabled)) + return -1; + + /* + * Are we out of pkeys? We must handle this specially because ffz() + * behavior is undefined if there are no zeros. + */ + if (mm_pkey_allocation_map(mm) == all_pkeys_mask) + return -1; + + ret = ffz((u32)mm_pkey_allocation_map(mm)); + __mm_pkey_allocated(mm, ret); + + /* + * Enable the key in the hardware + */ + if (ret > 0) + __arch_activate_pkey(ret); + return ret; +} + +static inline int mm_pkey_free(struct mm_struct *mm, int pkey) +{ + if (static_branch_likely(&pkey_disabled)) + return -1; + + if (!mm_pkey_is_allocated(mm, pkey)) + return -EINVAL; + + /* + * Disable the key in the hardware + */ + __arch_deactivate_pkey(pkey); + __mm_pkey_free(mm, pkey); + + return 0; +} + +/* + * Try to dedicate one of the protection keys to be used as an + * execute-only protection key. + */ +extern int __execute_only_pkey(struct mm_struct *mm); +static inline int execute_only_pkey(struct mm_struct *mm) +{ + if (static_branch_likely(&pkey_disabled)) + return -1; + + return __execute_only_pkey(mm); +} + +extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma, + int prot, int pkey); +static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, + int prot, int pkey) +{ + if (static_branch_likely(&pkey_disabled)) + return 0; + + /* + * Is this an mprotect_pkey() call? If so, never override the value that + * came from the user. + */ + if (pkey != -1) + return pkey; + + return __arch_override_mprotect_pkey(vma, prot, pkey); +} + +extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val); +static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val) +{ + if (static_branch_likely(&pkey_disabled)) + return -EINVAL; + return __arch_set_user_pkey_access(tsk, pkey, init_val); +} + +static inline bool arch_pkeys_enabled(void) +{ + return !static_branch_likely(&pkey_disabled); +} + +extern void pkey_mm_init(struct mm_struct *mm); +extern bool arch_supports_pkeys(int cap); +extern unsigned int arch_usable_pkeys(void); +extern void thread_pkey_regs_save(struct thread_struct *thread); +extern void thread_pkey_regs_restore(struct thread_struct *new_thread, + struct thread_struct *old_thread); +extern void thread_pkey_regs_init(struct thread_struct *thread); +#endif /*_ASM_POWERPC_KEYS_H */ diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h new file mode 100644 index 000000000000..f6945d3bc971 --- /dev/null +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#ifndef _ASM_PNV_OCXL_H +#define _ASM_PNV_OCXL_H + +#include <linux/pci.h> + +#define PNV_OCXL_TL_MAX_TEMPLATE 63 +#define PNV_OCXL_TL_BITS_PER_RATE 4 +#define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8) + +extern int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, + u16 *supported); +extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); + +extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, + char *rate_buf, int rate_buf_size); +extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, + uint64_t rate_buf_phys, int rate_buf_size); + +extern int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq); +extern void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, + void __iomem *tfc, void __iomem *pe_handle); +extern int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, + void __iomem **dar, void __iomem **tfc, + void __iomem **pe_handle); + +extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, + void **platform_data); +extern void pnv_ocxl_spa_release(void *platform_data); +extern int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle); + +extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr); +extern void pnv_ocxl_free_xive_irq(u32 irq); + +#endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ce0930d68857..ab5c1588b487 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -236,6 +236,7 @@ #define PPC_INST_RFCI 0x4c000066 #define PPC_INST_RFDI 0x4c00004e #define PPC_INST_RFMCI 0x4c00004c +#define PPC_INST_MFSPR 0x7c0002a6 #define PPC_INST_MFSPR_DSCR 0x7c1102a6 #define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR 0x7c1103a6 @@ -383,6 +384,7 @@ #define __PPC_ME64(s) __PPC_MB64(s) #define __PPC_BI(s) (((s) & 0x1f) << 16) #define __PPC_CT(t) (((t) & 0x0f) << 21) +#define __PPC_SPR(r) ((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11)) /* * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index bdab3b74eb98..01299cdc9806 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -309,6 +309,11 @@ struct thread_struct { struct thread_vr_state ckvr_state; /* Checkpointed VR state */ unsigned long ckvrsave; /* Checkpointed VRSAVE */ #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#ifdef CONFIG_PPC_MEM_KEYS + unsigned long amr; + unsigned long iamr; + unsigned long uamor; +#endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 825bd5998701..b04c5ce8191b 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -80,21 +80,20 @@ extern void of_instantiate_rtc(void); extern int of_get_ibm_chip_id(struct device_node *np); -/* The of_drconf_cell struct defines the layout of the LMB array - * specified in the device tree property - * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory - */ -struct of_drconf_cell { - u64 base_addr; - u32 drc_index; - u32 reserved; - u32 aa_index; - u32 flags; +struct of_drc_info { + char *drc_type; + char *drc_name_prefix; + u32 drc_index_start; + u32 drc_name_suffix_start; + u32 num_sequential_elems; + u32 sequential_inc; + u32 drc_power_domain; + u32 last_drc_index; }; -#define DRCONF_MEM_ASSIGNED 0x00000008 -#define DRCONF_MEM_AI_INVALID 0x00000040 -#define DRCONF_MEM_RESERVED 0x00000080 +extern int of_read_drc_info_cell(struct property **prop, + const __be32 **curval, struct of_drc_info *data); + /* * There are two methods for telling firmware what our capabilities are. @@ -159,6 +158,7 @@ struct of_drconf_cell { #define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */ #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ +#define OV5_DRMEM_V2 0x1680 /* ibm,dynamic-reconfiguration-v2 */ #define OV5_XIVE_SUPPORT 0x17C0 /* XIVE Exploitation Support Mask */ #define OV5_XIVE_LEGACY 0x1700 /* XIVE legacy mode Only */ #define OV5_XIVE_EXPLOIT 0x1740 /* XIVE exploitation mode Only */ @@ -175,6 +175,7 @@ struct of_drconf_cell { #define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */ /* Radix Table Extensions */ #define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */ +#define OV5_DRC_INFO 0x1640 /* Redef Prop Structures: drc-info */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index ce142ef99ba7..c4a72c7a8c83 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -8,9 +8,6 @@ #ifndef _PAGE_HASHPTE #define _PAGE_HASHPTE 0 #endif -#ifndef _PAGE_SHARED -#define _PAGE_SHARED 0 -#endif #ifndef _PAGE_HWWRITE #define _PAGE_HWWRITE 0 #endif @@ -45,6 +42,20 @@ #ifndef _PAGE_PTE #define _PAGE_PTE 0 #endif +/* At least one of _PAGE_PRIVILEGED or _PAGE_USER must be defined */ +#ifndef _PAGE_PRIVILEGED +#define _PAGE_PRIVILEGED 0 +#else +#ifndef _PAGE_USER +#define _PAGE_USER 0 +#endif +#endif +#ifndef _PAGE_NA +#define _PAGE_NA 0 +#endif +#ifndef _PAGE_HUGE +#define _PAGE_HUGE 0 +#endif #ifndef _PMD_PRESENT_MASK #define _PMD_PRESENT_MASK _PMD_PRESENT @@ -53,17 +64,22 @@ #define _PMD_SIZE 0 #define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE() #endif +#ifndef _PMD_USER +#define _PMD_USER 0 +#endif #ifndef _PAGE_KERNEL_RO -#define _PAGE_KERNEL_RO (_PAGE_RO) +#define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_RO) #endif #ifndef _PAGE_KERNEL_ROX -#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_RO) +#define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_RO | _PAGE_EXEC) #endif #ifndef _PAGE_KERNEL_RW -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE) +#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \ + _PAGE_HWWRITE) #endif #ifndef _PAGE_KERNEL_RWX -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC) +#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \ + _PAGE_HWWRITE | _PAGE_EXEC) #endif #ifndef _PAGE_HPTEFLAGS #define _PAGE_HPTEFLAGS _PAGE_HASHPTE @@ -85,7 +101,7 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); */ static inline bool pte_user(pte_t pte) { - return (pte_val(pte) & _PAGE_USER) == _PAGE_USER; + return (pte_val(pte) & (_PAGE_USER | _PAGE_PRIVILEGED)) == _PAGE_USER; } #endif /* __ASSEMBLY__ */ @@ -115,7 +131,8 @@ static inline bool pte_user(pte_t pte) /* Mask of bits returned by pte_pgprot() */ #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \ _PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \ - _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \ + _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | _PAGE_NA | \ + _PAGE_PRIVILEGED | \ _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC) /* @@ -142,7 +159,7 @@ static inline bool pte_user(pte_t pte) * * Note due to the way vm flags are laid out, the bits are XWR */ -#define PAGE_NONE __pgprot(_PAGE_BASE) +#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA) #define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) #define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \ _PAGE_EXEC) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b779f3ccd412..e6c7eadf6bce 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -312,7 +312,6 @@ DSISR_BAD_EXT_CTRL) #define DSISR_BAD_FAULT_64S (DSISR_BAD_FAULT_32S | \ DSISR_ATTR_CONFLICT | \ - DSISR_KEYFAULT | \ DSISR_UNSUPP_MMU | \ DSISR_PRTABLE_FAULT | \ DSISR_ICSWX_NO_CT | \ @@ -432,8 +431,9 @@ #define SPRN_LPID 0x13F /* Logical Partition Identifier */ #endif #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ -#define SPRN_HMER 0x150 /* Hardware m? error recovery */ -#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ +#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */ +#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ +#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ #define SPRN_PCR 0x152 /* Processor compatibility register */ #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h index 53a7e2955d3e..7192eece6c3e 100644 --- a/arch/powerpc/include/asm/reg_8xx.h +++ b/arch/powerpc/include/asm/reg_8xx.h @@ -66,86 +66,4 @@ #define DC_DFWT 0x40000000 /* Data cache is forced write through */ #define DC_LES 0x20000000 /* Caches are little endian mode */ -#ifdef CONFIG_8xx_CPU6 -#define do_mtspr_cpu6(rn, rn_addr, v) \ - do { \ - int _reg_cpu6 = rn_addr, _tmp_cpu6; \ - asm volatile("stw %0, %1;" \ - "lwz %0, %1;" \ - "mtspr " __stringify(rn) ",%2" : \ - : "r" (_reg_cpu6), "m"(_tmp_cpu6), \ - "r" ((unsigned long)(v)) \ - : "memory"); \ - } while (0) - -#define do_mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \ - : "r" ((unsigned long)(v)) \ - : "memory") -#define mtspr(rn, v) \ - do { \ - if (rn == SPRN_IMMR) \ - do_mtspr_cpu6(rn, 0x3d30, v); \ - else if (rn == SPRN_IC_CST) \ - do_mtspr_cpu6(rn, 0x2110, v); \ - else if (rn == SPRN_IC_ADR) \ - do_mtspr_cpu6(rn, 0x2310, v); \ - else if (rn == SPRN_IC_DAT) \ - do_mtspr_cpu6(rn, 0x2510, v); \ - else if (rn == SPRN_DC_CST) \ - do_mtspr_cpu6(rn, 0x3110, v); \ - else if (rn == SPRN_DC_ADR) \ - do_mtspr_cpu6(rn, 0x3310, v); \ - else if (rn == SPRN_DC_DAT) \ - do_mtspr_cpu6(rn, 0x3510, v); \ - else if (rn == SPRN_MI_CTR) \ - do_mtspr_cpu6(rn, 0x2180, v); \ - else if (rn == SPRN_MI_AP) \ - do_mtspr_cpu6(rn, 0x2580, v); \ - else if (rn == SPRN_MI_EPN) \ - do_mtspr_cpu6(rn, 0x2780, v); \ - else if (rn == SPRN_MI_TWC) \ - do_mtspr_cpu6(rn, 0x2b80, v); \ - else if (rn == SPRN_MI_RPN) \ - do_mtspr_cpu6(rn, 0x2d80, v); \ - else if (rn == SPRN_MI_CAM) \ - do_mtspr_cpu6(rn, 0x2190, v); \ - else if (rn == SPRN_MI_RAM0) \ - do_mtspr_cpu6(rn, 0x2390, v); \ - else if (rn == SPRN_MI_RAM1) \ - do_mtspr_cpu6(rn, 0x2590, v); \ - else if (rn == SPRN_MD_CTR) \ - do_mtspr_cpu6(rn, 0x3180, v); \ - else if (rn == SPRN_M_CASID) \ - do_mtspr_cpu6(rn, 0x3380, v); \ - else if (rn == SPRN_MD_AP) \ - do_mtspr_cpu6(rn, 0x3580, v); \ - else if (rn == SPRN_MD_EPN) \ - do_mtspr_cpu6(rn, 0x3780, v); \ - else if (rn == SPRN_M_TWB) \ - do_mtspr_cpu6(rn, 0x3980, v); \ - else if (rn == SPRN_MD_TWC) \ - do_mtspr_cpu6(rn, 0x3b80, v); \ - else if (rn == SPRN_MD_RPN) \ - do_mtspr_cpu6(rn, 0x3d80, v); \ - else if (rn == SPRN_M_TW) \ - do_mtspr_cpu6(rn, 0x3f80, v); \ - else if (rn == SPRN_MD_CAM) \ - do_mtspr_cpu6(rn, 0x3190, v); \ - else if (rn == SPRN_MD_RAM0) \ - do_mtspr_cpu6(rn, 0x3390, v); \ - else if (rn == SPRN_MD_RAM1) \ - do_mtspr_cpu6(rn, 0x3590, v); \ - else if (rn == SPRN_DEC) \ - do_mtspr_cpu6(rn, 0x2c00, v); \ - else if (rn == SPRN_TBWL) \ - do_mtspr_cpu6(rn, 0x3880, v); \ - else if (rn == SPRN_TBWU) \ - do_mtspr_cpu6(rn, 0x3a80, v); \ - else if (rn == SPRN_DPDR) \ - do_mtspr_cpu6(rn, 0x2d30, v); \ - else \ - do_mtspr(rn, v); \ - } while (0) -#endif - #endif /* _ASM_POWERPC_REG_8xx_H */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 449912f057f6..d61f9c96d916 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -389,3 +389,6 @@ COMPAT_SYS_SPU(preadv2) COMPAT_SYS_SPU(pwritev2) SYSCALL(kexec_file_load) SYSCALL(statx) +SYSCALL(pkey_alloc) +SYSCALL(pkey_free) +SYSCALL(pkey_mprotect) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 9ba11dbcaca9..daf1ba97a00c 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,14 +12,10 @@ #include <uapi/asm/unistd.h> -#define NR_syscalls 384 +#define NR_syscalls 387 #define __NR__exit __NR_exit -#define __IGNORE_pkey_mprotect -#define __IGNORE_pkey_alloc -#define __IGNORE_pkey_free - #ifndef __ASSEMBLY__ #include <linux/types.h> diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h index 1d3f2be5ae39..fa4288822b68 100644 --- a/arch/powerpc/include/asm/xive-regs.h +++ b/arch/powerpc/include/asm/xive-regs.h @@ -10,6 +10,41 @@ #define _ASM_POWERPC_XIVE_REGS_H /* + * "magic" Event State Buffer (ESB) MMIO offsets. + * + * Each interrupt source has a 2-bit state machine called ESB + * which can be controlled by MMIO. It's made of 2 bits, P and + * Q. P indicates that an interrupt is pending (has been sent + * to a queue and is waiting for an EOI). Q indicates that the + * interrupt has been triggered while pending. + * + * This acts as a coalescing mechanism in order to guarantee + * that a given interrupt only occurs at most once in a queue. + * + * When doing an EOI, the Q bit will indicate if the interrupt + * needs to be re-triggered. + * + * The following offsets into the ESB MMIO allow to read or + * manipulate the PQ bits. They must be used with an 8-bytes + * load instruction. They all return the previous state of the + * interrupt (atomically). + * + * Additionally, some ESB pages support doing an EOI via a + * store at 0 and some ESBs support doing a trigger via a + * separate trigger page. + */ +#define XIVE_ESB_STORE_EOI 0x400 /* Store */ +#define XIVE_ESB_LOAD_EOI 0x000 /* Load */ +#define XIVE_ESB_GET 0x800 /* Load */ +#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ +#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ +#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ +#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ + +#define XIVE_ESB_VAL_P 0x2 +#define XIVE_ESB_VAL_Q 0x1 + +/* * Thread Management (aka "TM") registers */ diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 371fbebf1ec9..7624e22f5045 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -58,6 +58,9 @@ struct xive_irq_data { #define XIVE_IRQ_FLAG_EOI_FW 0x10 #define XIVE_IRQ_FLAG_H_INT_ESB 0x20 +/* Special flag set by KVM for excalation interrupts */ +#define XIVE_IRQ_NO_EOI 0x80 + #define XIVE_INVALID_CHIP_ID -1 /* A queue tracking structure in a CPU */ @@ -72,41 +75,6 @@ struct xive_q { atomic_t pending_count; }; -/* - * "magic" Event State Buffer (ESB) MMIO offsets. - * - * Each interrupt source has a 2-bit state machine called ESB - * which can be controlled by MMIO. It's made of 2 bits, P and - * Q. P indicates that an interrupt is pending (has been sent - * to a queue and is waiting for an EOI). Q indicates that the - * interrupt has been triggered while pending. - * - * This acts as a coalescing mechanism in order to guarantee - * that a given interrupt only occurs at most once in a queue. - * - * When doing an EOI, the Q bit will indicate if the interrupt - * needs to be re-triggered. - * - * The following offsets into the ESB MMIO allow to read or - * manipulate the PQ bits. They must be used with an 8-bytes - * load instruction. They all return the previous state of the - * interrupt (atomically). - * - * Additionally, some ESB pages support doing an EOI via a - * store at 0 and some ESBs support doing a trigger via a - * separate trigger page. - */ -#define XIVE_ESB_STORE_EOI 0x400 /* Store */ -#define XIVE_ESB_LOAD_EOI 0x000 /* Load */ -#define XIVE_ESB_GET 0x800 /* Load */ -#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ -#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ -#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ -#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ - -#define XIVE_ESB_VAL_P 0x2 -#define XIVE_ESB_VAL_Q 0x1 - /* Global enable flags for the XIVE support */ extern bool __xive_enabled; @@ -154,7 +122,7 @@ static inline bool xive_enabled(void) { return false; } static inline bool xive_spapr_init(void) { return false; } static inline bool xive_native_init(void) { return false; } static inline void xive_smp_probe(void) { } -extern inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; } +static inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; } static inline void xive_smp_setup_cpu(void) { } static inline void xive_smp_disable_cpu(void) { } static inline void xive_kexec_teardown_cpu(int secondary) { } diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index 5f201d40bcca..860c59291bfc 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -97,6 +97,7 @@ #define ELF_NTMSPRREG 3 /* include tfhar, tfiar, texasr */ #define ELF_NEBB 3 /* includes ebbrr, ebbhr, bescr */ #define ELF_NPMU 5 /* includes siar, sdar, sier, mmcr2, mmcr0 */ +#define ELF_NPKEY 3 /* includes amr, iamr, uamor */ typedef unsigned long elf_greg_t64; typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG]; diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index e63bc37e33af..65065ce32814 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -30,4 +30,10 @@ #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x40000 /* create a huge page mapping */ +/* Override any generic PKEY permission defines */ +#define PKEY_DISABLE_EXECUTE 0x4 +#undef PKEY_ACCESS_MASK +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE |\ + PKEY_DISABLE_EXECUTE) #endif /* _UAPI_ASM_POWERPC_MMAN_H */ diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index df8684f31919..389c36fd8299 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -395,5 +395,8 @@ #define __NR_pwritev2 381 #define __NR_kexec_file_load 382 #define __NR_statx 383 +#define __NR_pkey_alloc 384 +#define __NR_pkey_free 385 +#define __NR_pkey_mprotect 386 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f390d57cf2e1..88b84ac76b53 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -178,7 +178,7 @@ int main(void) OFFSET(PACATOC, paca_struct, kernel_toc); OFFSET(PACAKBASE, paca_struct, kernelbase); OFFSET(PACAKMSR, paca_struct, kernel_msr); - OFFSET(PACASOFTIRQEN, paca_struct, soft_enabled); + OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask); OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); #ifdef CONFIG_PPC_BOOK3S OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id); @@ -239,8 +239,7 @@ int main(void) OFFSET(PACA_IN_NMI, paca_struct, in_nmi); OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area); OFFSET(PACA_EXRFI, paca_struct, exrfi); - OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence); - OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets); + OFFSET(PACA_L1D_FLUSH_SIZE, paca_struct, l1d_flush_size); #endif OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); @@ -401,6 +400,8 @@ int main(void) /* Other bits used by the vdso */ DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); + DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 679bbe714e85..3f30c994e931 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -31,7 +31,6 @@ _GLOBAL(__setup_cpu_power7) mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 - bl __init_tlb_power7 mtlr r11 blr @@ -45,7 +44,6 @@ _GLOBAL(__restore_cpu_power7) mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 - bl __init_tlb_power7 mtlr r11 blr @@ -64,7 +62,6 @@ _GLOBAL(__setup_cpu_power8) li r4,0 /* LPES = 0 */ bl __init_LPCR_ISA206 bl __init_HFSCR - bl __init_tlb_power8 bl __init_PMU_HV bl __init_PMU_HV_ISA207 mtlr r11 @@ -86,7 +83,6 @@ _GLOBAL(__restore_cpu_power8) li r4,0 /* LPES = 0 */ bl __init_LPCR_ISA206 bl __init_HFSCR - bl __init_tlb_power8 bl __init_PMU_HV bl __init_PMU_HV_ISA207 mtlr r11 @@ -111,7 +107,6 @@ _GLOBAL(__setup_cpu_power9) li r4,0 /* LPES = 0 */ bl __init_LPCR_ISA300 bl __init_HFSCR - bl __init_tlb_power9 bl __init_PMU_HV mtlr r11 blr @@ -136,7 +131,6 @@ _GLOBAL(__restore_cpu_power9) li r4,0 /* LPES = 0 */ bl __init_LPCR_ISA300 bl __init_HFSCR - bl __init_tlb_power9 bl __init_PMU_HV mtlr r11 blr @@ -194,50 +188,6 @@ __init_HFSCR: mtspr SPRN_HFSCR,r3 blr -/* - * Clear the TLB using the specified IS form of tlbiel instruction - * (invalidate by congruence class). P7 has 128 CCs., P8 has 512. - */ -__init_tlb_power7: - li r6,POWER7_TLB_SETS - mtctr r6 - li r7,0xc00 /* IS field = 0b11 */ - ptesync -2: tlbiel r7 - addi r7,r7,0x1000 - bdnz 2b - ptesync -1: blr - -__init_tlb_power8: - li r6,POWER8_TLB_SETS - mtctr r6 - li r7,0xc00 /* IS field = 0b11 */ - ptesync -2: tlbiel r7 - addi r7,r7,0x1000 - bdnz 2b - ptesync -1: blr - -/* - * Flush the TLB in hash mode. Hash must flush with RIC=2 once for process - * and one for partition scope to clear process and partition table entries. - */ -__init_tlb_power9: - li r6,POWER9_TLB_SETS_HASH - 1 - mtctr r6 - li r7,0xc00 /* IS field = 0b11 */ - li r8,0 - ptesync - PPC_TLBIEL(7, 8, 2, 1, 0) - PPC_TLBIEL(7, 8, 2, 0, 0) -2: addi r7,r7,0x1000 - PPC_TLBIEL(7, 8, 0, 0, 0) - bdnz 2b - ptesync -1: blr - __init_PMU_HV: li r5,0 mtspr SPRN_MMCRC,r5 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 1350f49d81a8..c40a9fc1e5d1 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -74,9 +74,6 @@ extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power8(void); extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power9(void); -extern void __flush_tlb_power7(unsigned int action); -extern void __flush_tlb_power8(unsigned int action); -extern void __flush_tlb_power9(unsigned int action); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); extern long __machine_check_early_realmode_p9(struct pt_regs *regs); @@ -368,7 +365,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, - .flush_tlb = __flush_tlb_power7, .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7", }, @@ -386,7 +382,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, - .flush_tlb = __flush_tlb_power8, .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, @@ -404,7 +399,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, - .flush_tlb = __flush_tlb_power9, .platform = "power9", }, { /* Power7 */ @@ -423,7 +417,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, - .flush_tlb = __flush_tlb_power7, .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7", }, @@ -443,7 +436,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, - .flush_tlb = __flush_tlb_power7, .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7+", }, @@ -463,7 +455,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, - .flush_tlb = __flush_tlb_power8, .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, @@ -483,7 +474,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, - .flush_tlb = __flush_tlb_power8, .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, @@ -503,7 +493,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, - .flush_tlb = __flush_tlb_power8, .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, @@ -523,7 +512,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, - .flush_tlb = __flush_tlb_power8, .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, @@ -543,7 +531,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, - .flush_tlb = __flush_tlb_power9, .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, @@ -563,7 +550,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, - .flush_tlb = __flush_tlb_power9, .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, @@ -583,7 +569,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, - .flush_tlb = __flush_tlb_power9, .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index cbabb5adccd9..00b215125d3e 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -44,6 +44,14 @@ #define REAL_MODE_TIMEOUT 10000 static int time_to_dump; +/* + * crash_wake_offline should be set to 1 by platforms that intend to wake + * up offline cpus prior to jumping to a kdump kernel. Currently powernv + * sets it to 1, since we want to avoid things from happening when an + * offline CPU wakes up due to something like an HMI (malfunction error), + * which propagates to all threads. + */ +int crash_wake_offline; #define CRASH_HANDLER_MAX 3 /* List of shutdown handles */ @@ -63,15 +71,12 @@ static int handle_fault(struct pt_regs *regs) #ifdef CONFIG_SMP static atomic_t cpus_in_crash; -static void crash_ipi_callback(struct pt_regs *regs) +void crash_ipi_callback(struct pt_regs *regs) { static cpumask_t cpus_state_saved = CPU_MASK_NONE; int cpu = smp_processor_id(); - if (!cpu_online(cpu)) - return; - hard_irq_disable(); if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { crash_save_cpu(regs, cpu); @@ -109,6 +114,9 @@ static void crash_kexec_prepare_cpus(int cpu) printk(KERN_EMERG "Sending IPI to other CPUs\n"); + if (crash_wake_offline) + ncpus = num_present_cpus() - 1; + crash_send_ipi(crash_ipi_callback); smp_wmb(); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 8bdc2f96c5d6..945e2c29ad2d 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -77,8 +77,6 @@ struct dt_cpu_feature { * Set up the base CPU */ -extern void __flush_tlb_power8(unsigned int action); -extern void __flush_tlb_power9(unsigned int action); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); extern long __machine_check_early_realmode_p9(struct pt_regs *regs); @@ -92,27 +90,6 @@ static struct { static void (*init_pmu_registers)(void); -static void cpufeatures_flush_tlb(void) -{ - /* - * This is a temporary measure to keep equivalent TLB flush as the - * cputable based setup code. - */ - switch (PVR_VER(mfspr(SPRN_PVR))) { - case PVR_POWER8: - case PVR_POWER8E: - case PVR_POWER8NVL: - __flush_tlb_power8(TLB_INVAL_SCOPE_GLOBAL); - break; - case PVR_POWER9: - __flush_tlb_power9(TLB_INVAL_SCOPE_GLOBAL); - break; - default: - pr_err("unknown CPU version for boot TLB flush\n"); - break; - } -} - static void __restore_cpu_cpufeatures(void) { /* @@ -137,8 +114,6 @@ static void __restore_cpu_cpufeatures(void) if (init_pmu_registers) init_pmu_registers(); - - cpufeatures_flush_tlb(); } static char dt_cpu_name[64]; @@ -157,7 +132,6 @@ static struct cpu_spec __initdata base_cpu_spec = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = NULL, .cpu_restore = __restore_cpu_cpufeatures, - .flush_tlb = NULL, .machine_check_early = NULL, .platform = NULL, }; @@ -412,7 +386,6 @@ static void init_pmu_power8(void) static int __init feat_enable_mce_power8(struct dt_cpu_feature *f) { cur_cpu_spec->platform = "power8"; - cur_cpu_spec->flush_tlb = __flush_tlb_power8; cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p8; return 1; @@ -451,7 +424,6 @@ static void init_pmu_power9(void) static int __init feat_enable_mce_power9(struct dt_cpu_feature *f) { cur_cpu_spec->platform = "power9"; - cur_cpu_spec->flush_tlb = __flush_tlb_power9; cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p9; return 1; @@ -752,8 +724,6 @@ static void __init cpufeatures_setup_finished(void) system_registers.hfscr = mfspr(SPRN_HFSCR); system_registers.fscr = mfspr(SPRN_FSCR); - cpufeatures_flush_tlb(); - pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n", cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features); } diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index cbca0a667682..2b9df0040d6b 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -740,6 +740,65 @@ static void *eeh_restore_dev_state(void *data, void *userdata) return NULL; } +int eeh_restore_vf_config(struct pci_dn *pdn) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 devctl, cmd, cap2, aer_capctl; + int old_mps; + + if (edev->pcie_cap) { + /* Restore MPS */ + old_mps = (ffs(pdn->mps) - 8) << 5; + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, &devctl); + devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; + devctl |= old_mps; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + + /* Disable Completion Timeout if possible */ + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, + 4, &cap2); + if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) { + eeh_ops->read_config(pdn, + edev->pcie_cap + PCI_EXP_DEVCTL2, + 4, &cap2); + cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS; + eeh_ops->write_config(pdn, + edev->pcie_cap + PCI_EXP_DEVCTL2, + 4, cap2); + } + } + + /* Enable SERR and parity checking */ + eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); + cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); + eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); + + /* Enable report various errors */ + if (edev->pcie_cap) { + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, &devctl); + devctl &= ~PCI_EXP_DEVCTL_CERE; + devctl |= (PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + } + + /* Enable ECRC generation and check */ + if (edev->pcie_cap && edev->aer_cap) { + eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, + 4, &aer_capctl); + aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); + eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, + 4, aer_capctl); + } + + return 0; +} + /** * pcibios_set_pcie_reset_state - Set PCI-E reset state * @dev: pci device struct diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 4f71e4c9beb7..beea2182d754 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -228,6 +228,7 @@ static void *eeh_report_error(void *data, void *userdata) edev->in_error = true; eeh_pcid_put(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); return NULL; } @@ -381,6 +382,10 @@ static void *eeh_report_resume(void *data, void *userdata) driver->err_handler->resume(dev); eeh_pcid_put(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); +#ifdef CONFIG_PCI_IOV + eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); +#endif return NULL; } @@ -416,6 +421,7 @@ static void *eeh_report_failure(void *data, void *userdata) driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); eeh_pcid_put(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); return NULL; } @@ -440,7 +446,7 @@ static void *eeh_add_virt_device(void *data, void *userdata) return NULL; } -#ifdef CONFIG_PPC_POWERNV +#ifdef CONFIG_PCI_IOV pci_iov_add_virtfn(edev->physfn, pdn->vf_index); #endif return NULL; @@ -496,7 +502,7 @@ static void *eeh_rmv_device(void *data, void *userdata) (*removed)++; if (edev->physfn) { -#ifdef CONFIG_PPC_POWERNV +#ifdef CONFIG_PCI_IOV struct pci_dn *pdn = eeh_dev_to_pdn(edev); pci_iov_remove_virtfn(edev->physfn, pdn->vf_index); diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 797549289798..deed906dd8f1 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -48,7 +48,7 @@ static ssize_t eeh_show_##_name(struct device *dev, \ \ return sprintf(buf, _format "\n", edev->_memb); \ } \ -static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL); +static DEVICE_ATTR(_name, 0444, eeh_show_##_name, NULL); EEH_SHOW_ATTR(eeh_mode, mode, "0x%x"); EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); @@ -90,6 +90,65 @@ static ssize_t eeh_pe_state_store(struct device *dev, static DEVICE_ATTR_RW(eeh_pe_state); +#ifdef CONFIG_PCI_IOV +static ssize_t eeh_notify_resume_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (!edev || !edev->pe) + return -ENODEV; + + pdn = pci_get_pdn(pdev); + return sprintf(buf, "%d\n", pdn->last_allow_rc); +} + +static ssize_t eeh_notify_resume_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + + if (!edev || !edev->pe || !eeh_ops->notify_resume) + return -ENODEV; + + if (eeh_ops->notify_resume(pci_get_pdn(pdev))) + return -EIO; + + return count; +} +static DEVICE_ATTR_RW(eeh_notify_resume); + +static int eeh_notify_resume_add(struct pci_dev *pdev) +{ + struct device_node *np; + int rc = 0; + + np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn); + + if (of_property_read_bool(np, "ibm,is-open-sriov-pf")) + rc = device_create_file(&pdev->dev, &dev_attr_eeh_notify_resume); + + return rc; +} + +static void eeh_notify_resume_remove(struct pci_dev *pdev) +{ + struct device_node *np; + + np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn); + + if (of_property_read_bool(np, "ibm,is-open-sriov-pf")) + device_remove_file(&pdev->dev, &dev_attr_eeh_notify_resume); +} +#else +static inline int eeh_notify_resume_add(struct pci_dev *pdev) { return 0; } +static inline void eeh_notify_resume_remove(struct pci_dev *pdev) { } +#endif /* CONFIG_PCI_IOV */ + void eeh_sysfs_add_device(struct pci_dev *pdev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); @@ -104,6 +163,7 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state); + rc += eeh_notify_resume_add(pdev); if (rc) pr_warn("EEH: Unable to create sysfs entries\n"); @@ -129,6 +189,8 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state); + eeh_notify_resume_remove(pdev); + if (edev) edev->mode &= ~EEH_DEV_SYSFS; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e780e1fbf6c2..eb8d01bae8c6 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -211,7 +211,7 @@ transfer_to_handler_cont: mflr r9 lwz r11,0(r9) /* virtual address of handler */ lwz r9,4(r9) /* where to go when done */ -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif #ifdef CONFIG_TRACE_IRQFLAGS @@ -301,7 +301,7 @@ stack_ovf: lis r9,StackOverflow@ha addi r9,r9,StackOverflow@l LOAD_MSR_KERNEL(r10,MSR_KERNEL) -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif mtspr SPRN_SRR0,r9 @@ -430,7 +430,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) lwz r7,_NIP(r1) lwz r2,GPR2(r1) lwz r1,GPR1(r1) -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif mtspr SPRN_SRR0,r7 @@ -727,7 +727,7 @@ fast_exception_return: lwz r10,_LINK(r11) mtlr r10 REST_GPR(10, r11) -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif mtspr SPRN_SRR1,r9 @@ -978,7 +978,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) .globl exc_exit_restart exc_exit_restart: lwz r12,_NIP(r1) -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif mtspr SPRN_SRR0,r12 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2748584b767d..2cb5109a7ea3 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -133,10 +133,9 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) * of irq tracing is used, we additionally check that condition * is correct */ -#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) - lbz r10,PACASOFTIRQEN(r13) - xori r10,r10,1 -1: tdnei r10,0 +#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) + lbz r10,PACAIRQSOFTMASK(r13) +1: tdnei r10,IRQS_ENABLED EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif @@ -152,7 +151,7 @@ system_call: /* label this so stack traces look sane */ /* We do need to set SOFTE in the stack frame or the return * from interrupt will be painful */ - li r10,1 + li r10,IRQS_ENABLED std r10,SOFTE(r1) CURRENT_THREAD_INFO(r11, r1) @@ -755,10 +754,10 @@ resume_kernel: beq+ restore /* Check that preempt_count() == 0 and interrupts are enabled */ lwz r8,TI_PREEMPT(r9) - cmpwi cr1,r8,0 + cmpwi cr0,r8,0 + bne restore ld r0,SOFTE(r1) - cmpdi r0,0 - crandc eq,cr1*4+eq,eq + andi. r0,r0,IRQS_DISABLED bne restore /* @@ -796,12 +795,12 @@ restore: * are about to re-enable interrupts */ ld r5,SOFTE(r1) - lbz r6,PACASOFTIRQEN(r13) - cmpwi cr0,r5,0 - beq .Lrestore_irq_off + lbz r6,PACAIRQSOFTMASK(r13) + andi. r5,r5,IRQS_DISABLED + bne .Lrestore_irq_off /* We are enabling, were we already enabled ? Yes, just return */ - cmpwi cr0,r6,1 + andi. r6,r6,IRQS_DISABLED beq cr0,.Ldo_restore /* @@ -820,8 +819,8 @@ restore: */ .Lrestore_no_replay: TRACE_ENABLE_INTS - li r0,1 - stb r0,PACASOFTIRQEN(r13); + li r0,IRQS_ENABLED + stb r0,PACAIRQSOFTMASK(r13); /* * Final return path. BookE is handled in a different file @@ -939,9 +938,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) beq 1f rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS stb r7,PACAIRQHAPPENED(r13) -1: li r0,0 - stb r0,PACASOFTIRQEN(r13); - TRACE_DISABLE_INTS +1: +#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) + /* The interrupt should not have soft enabled. */ + lbz r7,PACAIRQSOFTMASK(r13) +1: tdeqi r7,IRQS_ENABLED + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING +#endif b .Ldo_restore /* @@ -979,6 +982,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) addi r3,r1,STACK_FRAME_OVERHEAD; bl do_IRQ b ret_from_except +1: cmpwi cr0,r3,0xf00 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl performance_monitor_exception + b ret_from_except 1: cmpwi cr0,r3,0xe60 bne 1f addi r3,r1,STACK_FRAME_OVERHEAD; @@ -1055,15 +1063,15 @@ _GLOBAL(enter_rtas) li r0,0 mtcr r0 -#ifdef CONFIG_BUG +#ifdef CONFIG_BUG /* There is no way it is acceptable to get here with interrupts enabled, * check it with the asm equivalent of WARN_ON */ - lbz r0,PACASOFTIRQEN(r13) -1: tdnei r0,0 + lbz r0,PACAIRQSOFTMASK(r13) +1: tdeqi r0,IRQS_ENABLED EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif - + /* Hard-disable interrupts */ mfmsr r6 rldicl r7,r6,48,1 @@ -1107,6 +1115,17 @@ __enter_rtas: rtas_return_loc: FIXUP_ENDIAN + /* + * Clear RI and set SF before anything. + */ + mfmsr r6 + li r0,MSR_RI + andc r6,r6,r0 + sldi r0,r0,(MSR_SF_LG - MSR_RI_LG) + or r6,r6,r0 + sync + mtmsrd r6 + /* relocation is off at this point */ GET_PACA(r4) clrldi r4,r4,2 /* convert to realmode address */ @@ -1115,12 +1134,6 @@ rtas_return_loc: 0: mflr r3 ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */ - mfmsr r6 - li r0,MSR_RI - andc r6,r6,r0 - sync - mtmsrd r6 - ld r1,PACAR1(r4) /* Restore our SP */ ld r4,PACASAVEDMSR(r4) /* Restore our MSR */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index acd8ca76233e..ee832d344a5a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -139,7 +139,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfspr r10,SPRN_ESR SPECIAL_EXC_STORE(r10,ESR) - lbz r10,PACASOFTIRQEN(r13) + lbz r10,PACAIRQSOFTMASK(r13) SPECIAL_EXC_STORE(r10,SOFTE) ld r10,_NIP(r1) SPECIAL_EXC_STORE(r10,CSRR0) @@ -206,17 +206,17 @@ BEGIN_FTR_SECTION mtspr SPRN_MAS8,r10 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) - lbz r6,PACASOFTIRQEN(r13) + lbz r6,PACAIRQSOFTMASK(r13) ld r5,SOFTE(r1) /* Interrupts had better not already be enabled... */ - twnei r6,0 + tweqi r6,IRQS_ENABLED - cmpwi cr0,r5,0 - beq 1f + andi. r6,r5,IRQS_DISABLED + bne 1f TRACE_ENABLE_INTS - stb r5,PACASOFTIRQEN(r13) + stb r5,PACAIRQSOFTMASK(r13) 1: /* * Restore PACAIRQHAPPENED rather than setting it based on @@ -351,9 +351,9 @@ ret_from_mc_except: #define PROLOG_ADDITION_NONE_MC(n) #define PROLOG_ADDITION_MASKABLE_GEN(n) \ - lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ - cmpwi cr0,r10,0; /* yes -> go out of line */ \ - beq masked_interrupt_book3e_##n + lbz r10,PACAIRQSOFTMASK(r13); /* are irqs soft-masked? */ \ + andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \ + bne masked_interrupt_book3e_##n #define PROLOG_ADDITION_2REGS_GEN(n) \ std r14,PACA_EXGEN+EX_R14(r13); \ @@ -397,7 +397,7 @@ exc_##n##_common: \ mfspr r8,SPRN_XER; /* save XER in stackframe */ \ ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \ lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \ - lbz r11,PACASOFTIRQEN(r13); /* get current IRQ softe */ \ + lbz r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */ \ ld r12,exception_marker@toc(r2); \ li r0,0; \ std r3,GPR10(r1); /* save r10 to stackframe */ \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 2dc10bf646b8..243d072a225a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -718,10 +718,12 @@ EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) hardware_interrupt_hv: BEGIN_FTR_SECTION _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, - EXC_HV, SOFTEN_TEST_HV) + EXC_HV, SOFTEN_TEST_HV, + IRQS_DISABLED) FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, - EXC_STD, SOFTEN_TEST_PR) + EXC_STD, SOFTEN_TEST_PR, + IRQS_DISABLED) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) EXC_REAL_END(hardware_interrupt, 0x500, 0x100) @@ -729,9 +731,13 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) .globl hardware_interrupt_relon_hv; hardware_interrupt_relon_hv: BEGIN_FTR_SECTION - _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV) + _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + EXC_HV, SOFTEN_TEST_HV, + IRQS_DISABLED) FTR_SECTION_ELSE - _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) + _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + EXC_STD, SOFTEN_TEST_PR, + IRQS_DISABLED) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) @@ -827,8 +833,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -EXC_REAL_MASKABLE(decrementer, 0x900, 0x80) -EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900) +EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) +EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED) TRAMP_KVM(PACA_EXGEN, 0x900) EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) @@ -839,8 +845,8 @@ TRAMP_KVM_HV(PACA_EXGEN, 0x980) EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) -EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100) -EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00) +EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100, IRQS_DISABLED) +EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00, IRQS_DISABLED) TRAMP_KVM(PACA_EXGEN, 0xa00) #ifdef CONFIG_PPC_DOORBELL EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception) @@ -1052,7 +1058,7 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) * mode. */ __EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early) -__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60) +__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60, IRQS_DISABLED) EXC_VIRT_NONE(0x4e60, 0x20) TRAMP_KVM_HV(PACA_EXGEN, 0xe60) TRAMP_REAL_BEGIN(hmi_exception_early) @@ -1110,8 +1116,8 @@ EXC_COMMON_BEGIN(hmi_exception_common) EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception, ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON) -EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20) -EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80) +EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED) +EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED) TRAMP_KVM_HV(PACA_EXGEN, 0xe80) #ifdef CONFIG_PPC_DOORBELL EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception) @@ -1120,8 +1126,8 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception) #endif -EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20) -EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0) +EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20, IRQS_DISABLED) +EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0, IRQS_DISABLED) TRAMP_KVM_HV(PACA_EXGEN, 0xea0) EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ) @@ -1132,8 +1138,8 @@ EXC_REAL_NONE(0xee0, 0x20) EXC_VIRT_NONE(0x4ee0, 0x20) -EXC_REAL_OOL(performance_monitor, 0xf00, 0x20) -EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x20, 0xf00) +EXC_REAL_OOL_MASKABLE(performance_monitor, 0xf00, 0x20, IRQS_PMI_DISABLED) +EXC_VIRT_OOL_MASKABLE(performance_monitor, 0x4f00, 0x20, 0xf00, IRQS_PMI_DISABLED) TRAMP_KVM(PACA_EXGEN, 0xf00) EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception) @@ -1345,7 +1351,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) b . #endif -EXC_COMMON_HV(denorm_common, 0x1500, unknown_exception) +EXC_COMMON(denorm_common, 0x1500, unknown_exception) #ifdef CONFIG_CBE_RAS @@ -1455,39 +1461,37 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) std r9,PACA_EXRFI+EX_R9(r13) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) - std r12,PACA_EXRFI+EX_R12(r13) - std r8,PACA_EXRFI+EX_R13(r13) mfctr r9 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SETS(r13) - ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ - addi r12,r12,8 + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ mtctr r11 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ /* order ld/st prior to dcbt stop all streams with flushing */ sync -1: li r8,0 - .rept 8 /* 8-way set associative */ - ldx r11,r10,r8 - add r8,r8,r12 - xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not - add r8,r8,r11 // Add 0, this creates a dependency on the ldx - .endr - addi r10,r10,128 /* 128 byte cache line */ + + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 bdnz 1b mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) ld r11,PACA_EXRFI+EX_R11(r13) - ld r12,PACA_EXRFI+EX_R12(r13) - ld r8,PACA_EXRFI+EX_R13(r13) GET_SCRATCH0(r13); rfid @@ -1497,39 +1501,37 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) std r9,PACA_EXRFI+EX_R9(r13) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) - std r12,PACA_EXRFI+EX_R12(r13) - std r8,PACA_EXRFI+EX_R13(r13) mfctr r9 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SETS(r13) - ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ - addi r12,r12,8 + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ mtctr r11 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ /* order ld/st prior to dcbt stop all streams with flushing */ sync -1: li r8,0 - .rept 8 /* 8-way set associative */ - ldx r11,r10,r8 - add r8,r8,r12 - xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not - add r8,r8,r11 // Add 0, this creates a dependency on the ldx - .endr - addi r10,r10,128 /* 128 byte cache line */ + + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 bdnz 1b mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) ld r11,PACA_EXRFI+EX_R11(r13) - ld r12,PACA_EXRFI+EX_R12(r13) - ld r8,PACA_EXRFI+EX_R13(r13) GET_SCRATCH0(r13); hrfid @@ -1632,7 +1634,7 @@ USE_TEXT_SECTION() .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_BOOK3S_64 - lis r0,(DSISR_BAD_FAULT_64S|DSISR_DABRMATCH)@h + lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h ori r0,r0,DSISR_BAD_FAULT_64S@l and. r0,r4,r0 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ @@ -1828,6 +1830,8 @@ BEGIN_FTR_SECTION FTR_SECTION_ELSE beq hardware_interrupt_common ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300) + cmpwi r3,0xf00 + beq performance_monitor_common BEGIN_FTR_SECTION cmpwi r3,0xa00 beq h_doorbell_common_msgclr diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index aa71a90f5222..a61151a6ea5e 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -765,8 +765,8 @@ _GLOBAL(pmac_secondary_start) /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) */ - li r0,0 - stb r0,PACASOFTIRQEN(r13) + li r0,IRQS_DISABLED + stb r0,PACAIRQSOFTMASK(r13) li r0,PACA_IRQ_HARD_DIS stb r0,PACAIRQHAPPENED(r13) @@ -822,7 +822,8 @@ __secondary_start: /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) */ - stb r7,PACASOFTIRQEN(r13) + li r7,IRQS_DISABLED + stb r7,PACAIRQSOFTMASK(r13) li r0,PACA_IRQ_HARD_DIS stb r0,PACAIRQHAPPENED(r13) @@ -988,8 +989,8 @@ start_here_common: /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) */ - li r0,0 - stb r0,PACASOFTIRQEN(r13) + li r0,IRQS_DISABLED + stb r0,PACAIRQSOFTMASK(r13) li r0,PACA_IRQ_HARD_DIS stb r0,PACAIRQHAPPENED(r13) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 4fee00d414e8..d8670a37d70c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -33,23 +33,6 @@ #include <asm/fixmap.h> #include <asm/export.h> -/* Macro to make the code more readable. */ -#ifdef CONFIG_8xx_CPU6 -#define SPRN_MI_TWC_ADDR 0x2b80 -#define SPRN_MI_RPN_ADDR 0x2d80 -#define SPRN_MD_TWC_ADDR 0x3b80 -#define SPRN_MD_RPN_ADDR 0x3d80 - -#define MTSPR_CPU6(spr, reg, treg) \ - li treg, spr##_ADDR; \ - stw treg, 12(r0); \ - lwz treg, 12(r0); \ - mtspr spr, reg -#else -#define MTSPR_CPU6(spr, reg, treg) \ - mtspr spr, reg -#endif - #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ #define SIMPLE_KERNEL_ADDRESS 1 @@ -69,11 +52,7 @@ * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. */ -#ifdef CONFIG_PPC_16K_PAGES -#define RPN_PATTERN (0x00f0 | MD_SPS16K) -#else #define RPN_PATTERN 0x00f0 -#endif #define PAGE_SHIFT_512K 19 #define PAGE_SHIFT_8M 23 @@ -134,15 +113,12 @@ turn_on_mmu: * task's thread_struct. */ #define EXCEPTION_PROLOG \ - EXCEPTION_PROLOG_0; \ + mtspr SPRN_SPRG_SCRATCH0, r10; \ + mtspr SPRN_SPRG_SCRATCH1, r11; \ mfcr r10; \ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 -#define EXCEPTION_PROLOG_0 \ - mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11 - #define EXCEPTION_PROLOG_1 \ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ andi. r11,r11,MSR_PR; \ @@ -177,13 +153,6 @@ turn_on_mmu: SAVE_2GPRS(7, r11) /* - * Exception exit code. - */ -#define EXCEPTION_EPILOG_0 \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ - mfspr r11,SPRN_SPRG_SCRATCH1 - -/* * Note: code which follows this uses cr0.eq (set if from kernel), * r11, r12 (SRR0), and r9 (SRR1). * @@ -326,15 +295,10 @@ SystemCall: #endif InstructionTLBMiss: -#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) - mtspr SPRN_SPRG_SCRATCH2, r3 -#endif - EXCEPTION_PROLOG_0 -#ifdef CONFIG_PPC_8xx_PERF_EVENT - lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha - lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) - addi r11, r11, 1 - stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) + mtspr SPRN_SPRG_SCRATCH0, r10 + mtspr SPRN_SPRG_SCRATCH1, r11 +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) + mtspr SPRN_SPRG_SCRATCH2, r12 #endif /* If we are faulting a kernel address, we have to use the @@ -345,7 +309,7 @@ InstructionTLBMiss: /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) - mfcr r3 + mfcr r12 #endif #ifdef ITLB_MISS_KERNEL #if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) @@ -388,40 +352,46 @@ _ENTRY(ITLBMiss_cmp) lwz r10, 0(r10) /* Get the pte */ 4: #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) - mtcr r3 + mtcr r12 #endif - /* Insert the APG into the TWC from the Linux PTE. */ - rlwimi r11, r10, 0, 25, 26 - /* Load the MI_TWC with the attributes for this "segment." */ - MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ -#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) - rlwimi r10, r11, 1, MI_SPS16K -#endif #ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT - and r11, r11, r10 - rlwimi r10, r11, 0, _PAGE_PRESENT + rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1 #endif - li r11, RPN_PATTERN + /* Load the MI_TWC with the attributes for this "segment." */ + mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ + + li r11, RPN_PATTERN | 0x200 /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 20-23 and 28 must be clear. - * Software indicator bits 24, 25, 26, and 27 must be + * Software indicator bits 20 and 23 must be clear. + * Software indicator bits 22, 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ -#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) - rlwimi r10, r11, 0, 0x0ff0 /* Set 24-27, clear 20-23 */ -#else - rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */ -#endif - MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ + rlwimi r11, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */ + rlwimi r10, r11, 0, 0x0ff0 /* Set 22, 24-27, clear 20,23 */ + mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ -#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) - mfspr r3, SPRN_SPRG_SCRATCH2 +_ENTRY(itlb_miss_exit_1) + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) + mfspr r12, SPRN_SPRG_SCRATCH2 +#endif + rfi +#ifdef CONFIG_PERF_EVENTS +_ENTRY(itlb_miss_perf) + lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha + lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) + addi r11, r11, 1 + stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) +#endif + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) + mfspr r12, SPRN_SPRG_SCRATCH2 #endif - EXCEPTION_EPILOG_0 rfi #ifdef CONFIG_HUGETLB_PAGE @@ -436,7 +406,6 @@ _ENTRY(ITLBMiss_cmp) rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK #endif lwz r10, 0(r10) /* Get the pte */ - rlwinm r11, r11, 0, 0xf b 4b 20: /* 512k pages */ @@ -445,21 +414,15 @@ _ENTRY(ITLBMiss_cmp) /* Add level 2 base */ rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 lwz r10, 0(r10) /* Get the pte */ - rlwinm r11, r11, 0, 0xf b 4b #endif . = 0x1200 DataStoreTLBMiss: - mtspr SPRN_SPRG_SCRATCH2, r3 - EXCEPTION_PROLOG_0 -#ifdef CONFIG_PPC_8xx_PERF_EVENT - lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha - lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) - addi r11, r11, 1 - stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) -#endif - mfcr r3 + mtspr SPRN_SPRG_SCRATCH0, r10 + mtspr SPRN_SPRG_SCRATCH1, r11 + mtspr SPRN_SPRG_SCRATCH2, r12 + mfcr r12 /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -499,59 +462,49 @@ _ENTRY(DTLBMiss_jmp) rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ 4: - mtcr r3 + mtcr r12 - /* Insert the Guarded flag and APG into the TWC from the Linux PTE. - * It is bit 26-27 of both the Linux PTE and the TWC (at least + /* Insert the Guarded flag into the TWC from the Linux PTE. + * It is bit 27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put * this into the Linux pgd/pmd and load it in the operation * above. */ - rlwimi r11, r10, 0, 26, 27 - /* Insert the WriteThru flag into the TWC from the Linux PTE. - * It is bit 25 in the Linux PTE and bit 30 in the TWC - */ - rlwimi r11, r10, 32-5, 30, 30 - MTSPR_CPU6(SPRN_MD_TWC, r11, r3) - - /* In 4k pages mode, SPS (bit 28) in RPN must match PS[1] (bit 29) - * In 16k pages mode, SPS is always 1 */ -#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) - rlwimi r10, r11, 1, MD_SPS16K -#endif - /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. - * We also need to know if the insn is a load/store, so: - * Clear _PAGE_PRESENT and load that which will - * trap into DTLB Error with store bit set accordinly. - */ - /* PRESENT=0x1, ACCESSED=0x20 - * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); - * r10 = (r10 & ~PRESENT) | r11; - */ + rlwimi r11, r10, 0, _PAGE_GUARDED #ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT - and r11, r11, r10 - rlwimi r10, r11, 0, _PAGE_PRESENT + /* _PAGE_ACCESSED has to be set. We use second APG bit for that, 0 + * on that bit will represent a Non Access group + */ + rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1 #endif + mtspr SPRN_MD_TWC, r11 + /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 22 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ li r11, RPN_PATTERN -#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */ -#else - rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ -#endif - rlwimi r10, r11, 0, 20, 20 /* clear 20 */ - MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ + mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ /* Restore registers */ - mfspr r3, SPRN_SPRG_SCRATCH2 mtspr SPRN_DAR, r11 /* Tag DAR */ - EXCEPTION_EPILOG_0 +_ENTRY(dtlb_miss_exit_1) + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r12, SPRN_SPRG_SCRATCH2 + rfi +#ifdef CONFIG_PERF_EVENTS +_ENTRY(dtlb_miss_perf) + lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha + lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) + addi r11, r11, 1 + stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) +#endif + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r12, SPRN_SPRG_SCRATCH2 rfi #ifdef CONFIG_HUGETLB_PAGE @@ -566,7 +519,6 @@ _ENTRY(DTLBMiss_jmp) rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK #endif lwz r10, 0(r10) /* Get the pte */ - rlwinm r11, r11, 0, 0xf b 4b 20: /* 512k pages */ @@ -575,7 +527,6 @@ _ENTRY(DTLBMiss_jmp) /* Add level 2 base */ rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 lwz r10, 0(r10) /* Get the pte */ - rlwinm r11, r11, 0, 0xf b 4b #endif @@ -601,7 +552,8 @@ itlbie: */ . = 0x1400 DataTLBError: - EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH0, r10 + mtspr SPRN_SPRG_SCRATCH1, r11 mfcr r10 mfspr r11, SPRN_DAR @@ -636,7 +588,8 @@ dtlbie: */ . = 0x1c00 DataBreakpoint: - EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH0, r10 + mtspr SPRN_SPRG_SCRATCH1, r11 mfcr r10 mfspr r11, SPRN_SRR0 cmplwi cr0, r11, (dtlbie - PAGE_OFFSET)@l @@ -652,13 +605,15 @@ DataBreakpoint: EXC_XFER_EE(0x1c00, do_break) 11: mtcr r10 - EXCEPTION_EPILOG_0 + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 rfi -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#ifdef CONFIG_PERF_EVENTS . = 0x1d00 InstructionBreakpoint: - EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH0, r10 + mtspr SPRN_SPRG_SCRATCH1, r11 lis r10, (instruction_counter - PAGE_OFFSET)@ha lwz r11, (instruction_counter - PAGE_OFFSET)@l(r10) addi r11, r11, -1 @@ -666,7 +621,8 @@ InstructionBreakpoint: lis r10, 0xffff ori r10, r10, 0x01 mtspr SPRN_COUNTA, r10 - EXCEPTION_EPILOG_0 + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 rfi #else EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) @@ -681,51 +637,57 @@ InstructionBreakpoint: * not enough space in the DataStoreTLBMiss area. */ DTLBMissIMMR: - mtcr r3 - /* Set 512k byte guarded page and mark it valid */ - li r10, MD_PS512K | MD_GUARDED | MD_SVALID - MTSPR_CPU6(SPRN_MD_TWC, r10, r11) + mtcr r12 + /* Set 512k byte guarded page and mark it valid and accessed */ + li r10, MD_PS512K | MD_GUARDED | MD_SVALID | M_APG2 + mtspr SPRN_MD_TWC, r10 mfspr r10, SPRN_IMMR /* Get current IMMR */ rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_PRESENT | _PAGE_NO_CACHE - MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */ + mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r3, SPRN_SPRG_SCRATCH2 - EXCEPTION_EPILOG_0 +_ENTRY(dtlb_miss_exit_2) + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r12, SPRN_SPRG_SCRATCH2 rfi DTLBMissLinear: - mtcr r3 - /* Set 8M byte page and mark it valid */ - li r11, MD_PS8MEG | MD_SVALID - MTSPR_CPU6(SPRN_MD_TWC, r11, r3) + mtcr r12 + /* Set 8M byte page and mark it valid and accessed */ + li r11, MD_PS8MEG | MD_SVALID | M_APG2 + mtspr SPRN_MD_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_PRESENT - MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */ + mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r3, SPRN_SPRG_SCRATCH2 - EXCEPTION_EPILOG_0 +_ENTRY(dtlb_miss_exit_3) + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r12, SPRN_SPRG_SCRATCH2 rfi #ifndef CONFIG_PIN_TLB_TEXT ITLBMissLinear: - mtcr r3 - /* Set 8M byte page and mark it valid */ - li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC - MTSPR_CPU6(SPRN_MI_TWC, r11, r3) + mtcr r12 + /* Set 8M byte page and mark it valid,accessed */ + li r11, MI_PS8MEG | MI_SVALID | M_APG2 + mtspr SPRN_MI_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ - ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_PRESENT - MTSPR_CPU6(SPRN_MI_RPN, r10, r11) /* Update TLB entry */ + mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ - mfspr r3, SPRN_SPRG_SCRATCH2 - EXCEPTION_EPILOG_0 +_ENTRY(itlb_miss_exit_2) + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r12, SPRN_SPRG_SCRATCH2 rfi #endif @@ -933,13 +895,6 @@ start_here: */ lis r6, swapper_pg_dir@ha tophys(r6,r6) -#ifdef CONFIG_8xx_CPU6 - lis r4, cpu6_errata_word@h - ori r4, r4, cpu6_errata_word@l - li r3, 0x3f80 - stw r3, 12(r4) - lwz r3, 12(r4) -#endif mtspr SPRN_M_TW, r6 lis r4,2f@h ori r4,r4,2f@l @@ -1004,8 +959,8 @@ initial_mmu: lis r8, KERNELBASE@h /* Create vaddr for TLB */ ori r8, r8, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r8 - li r8, MI_PS8MEG | (2 << 5) /* Set 8M byte page, APG 2 */ - ori r8, r8, MI_SVALID /* Make it valid */ + li r8, MI_PS8MEG /* Set 8M byte page */ + ori r8, r8, MI_SVALID | M_APG2 /* Make it valid, APG 2 */ mtspr SPRN_MI_TWC, r8 li r8, MI_BOOTINIT /* Create RPN for address 0 */ mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ @@ -1032,7 +987,7 @@ initial_mmu: ori r8, r8, MD_EVALID /* Mark it valid */ mtspr SPRN_MD_EPN, r8 li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */ - ori r8, r8, MD_SVALID /* Make it valid */ + ori r8, r8, MD_SVALID | M_APG2 /* Make it valid and accessed */ mtspr SPRN_MD_TWC, r8 mr r8, r9 /* Create paddr for TLB */ ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ @@ -1061,7 +1016,7 @@ initial_mmu: #endif /* Disable debug mode entry on breakpoints */ mfspr r8, SPRN_DER -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#ifdef CONFIG_PERF_EVENTS rlwinm r8, r8, 0, ~0xc #else rlwinm r8, r8, 0, ~0x8 @@ -1094,13 +1049,7 @@ swapper_pg_dir: abatron_pteptrs: .space 8 -#ifdef CONFIG_8xx_CPU6 - .globl cpu6_errata_word -cpu6_errata_word: - .space 16 -#endif - -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#ifdef CONFIG_PERF_EVENTS .globl itlb_miss_counter itlb_miss_counter: .space 4 diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index 48c21acef915..2b269315d377 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S @@ -17,6 +17,7 @@ #include <asm/processor.h> #include <asm/thread_info.h> #include <asm/epapr_hcalls.h> +#include <asm/hw_irq.h> /* 64-bit version only for now */ #ifdef CONFIG_PPC64 @@ -46,8 +47,8 @@ _GLOBAL(\name) bl trace_hardirqs_on addi r1,r1,128 #endif - li r0,1 - stb r0,PACASOFTIRQEN(r13) + li r0,IRQS_ENABLED + stb r0,PACAIRQSOFTMASK(r13) /* Interrupts will make use return to LR, so get something we want * in there diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index f57a19348bdd..08faa93755f9 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -15,6 +15,7 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/irqflags.h> +#include <asm/hw_irq.h> #undef DEBUG @@ -53,8 +54,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) mfmsr r7 #endif /* CONFIG_TRACE_IRQFLAGS */ - li r0,1 - stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ + li r0,IRQS_ENABLED + stb r0,PACAIRQSOFTMASK(r13) /* we'll hard-enable shortly */ BEGIN_FTR_SECTION DSSALL sync diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index b7a84522e652..f88038847790 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -67,6 +67,7 @@ #include <asm/smp.h> #include <asm/livepatch.h> #include <asm/asm-prototypes.h> +#include <asm/hw_irq.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> @@ -106,12 +107,6 @@ static inline notrace unsigned long get_irq_happened(void) return happened; } -static inline notrace void set_soft_enabled(unsigned long enable) -{ - __asm__ __volatile__("stb %0,%1(13)" - : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); -} - static inline notrace int decrementer_check_overflow(void) { u64 now = get_tb_or_rtc(); @@ -191,6 +186,11 @@ notrace unsigned int __check_irq_replay(void) return 0x900; } + if (happened & PACA_IRQ_PMI) { + local_paca->irq_happened &= ~PACA_IRQ_PMI; + return 0xf00; + } + if (happened & PACA_IRQ_EE) { local_paca->irq_happened &= ~PACA_IRQ_EE; return 0x500; @@ -224,15 +224,16 @@ notrace unsigned int __check_irq_replay(void) return 0; } -notrace void arch_local_irq_restore(unsigned long en) +notrace void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; unsigned int replay; /* Write the new soft-enabled value */ - set_soft_enabled(en); - if (!en) + irq_soft_mask_set(mask); + if (mask) return; + /* * From this point onward, we can take interrupts, preempt, * etc... unless we got hard-disabled. We check if an event @@ -263,7 +264,7 @@ notrace void arch_local_irq_restore(unsigned long en) */ if (unlikely(irq_happened != PACA_IRQ_HARD_DIS)) __hard_irq_disable(); -#ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG else { /* * We should already be hard disabled here. We had bugs @@ -274,9 +275,9 @@ notrace void arch_local_irq_restore(unsigned long en) if (WARN_ON(mfmsr() & MSR_EE)) __hard_irq_disable(); } -#endif /* CONFIG_TRACE_IRQFLAGS */ +#endif - set_soft_enabled(0); + irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); /* @@ -288,7 +289,7 @@ notrace void arch_local_irq_restore(unsigned long en) /* We can soft-enable now */ trace_hardirqs_on(); - set_soft_enabled(1); + irq_soft_mask_set(IRQS_ENABLED); /* * And replay if we have to. This will return with interrupts @@ -363,7 +364,7 @@ bool prep_irq_for_idle(void) * of entering the low power state. */ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - local_paca->soft_enabled = 1; + irq_soft_mask_set(IRQS_ENABLED); /* Tell the caller to enter the low power state */ return true; diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 71e8a1b8c86e..efdd16a79075 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs) return handled; } -long hmi_exception_realmode(struct pt_regs *regs) +/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ +static enum { + DTRIG_UNKNOWN, + DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ + DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ +} hmer_debug_trig_function; + +static int init_debug_trig_function(void) { - __this_cpu_inc(irq_stat.hmi_exceptions); - -#ifdef CONFIG_PPC_BOOK3S_64 - /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */ - if (pvr_version_is(PVR_POWER9)) { - unsigned long hmer = mfspr(SPRN_HMER); - - /* Do we have the debug bit set */ - if (hmer & PPC_BIT(17)) { - hmer &= ~PPC_BIT(17); - mtspr(SPRN_HMER, hmer); - - /* - * Now to avoid problems with soft-disable we - * only do the emulation if we are coming from - * user space - */ - if (user_mode(regs)) - local_paca->hmi_p9_special_emu = 1; - - /* - * Don't bother going to OPAL if that's the - * only relevant bit. - */ - if (!(hmer & mfspr(SPRN_HMEER))) - return local_paca->hmi_p9_special_emu; + int pvr; + struct device_node *cpun; + struct property *prop = NULL; + const char *str; + + /* First look in the device tree */ + preempt_disable(); + cpun = of_get_cpu_node(smp_processor_id(), NULL); + if (cpun) { + of_property_for_each_string(cpun, "ibm,hmi-special-triggers", + prop, str) { + if (strcmp(str, "bit17-vector-ci-load") == 0) + hmer_debug_trig_function = DTRIG_VECTOR_CI; + else if (strcmp(str, "bit17-tm-suspend-escape") == 0) + hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; } + of_node_put(cpun); + } + preempt_enable(); + + /* If we found the property, don't look at PVR */ + if (prop) + goto out; + + pvr = mfspr(SPRN_PVR); + /* Check for POWER9 Nimbus (scale-out) */ + if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { + /* DD2.2 and later */ + if ((pvr & 0xfff) >= 0x202) + hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; + /* DD2.0 and DD2.1 - used for vector CI load emulation */ + else if ((pvr & 0xfff) >= 0x200) + hmer_debug_trig_function = DTRIG_VECTOR_CI; + } + + out: + switch (hmer_debug_trig_function) { + case DTRIG_VECTOR_CI: + pr_debug("HMI debug trigger used for vector CI load\n"); + break; + case DTRIG_SUSPEND_ESCAPE: + pr_debug("HMI debug trigger used for TM suspend escape\n"); + break; + default: + break; } -#endif /* CONFIG_PPC_BOOK3S_64 */ + return 0; +} +__initcall(init_debug_trig_function); + +/* + * Handle HMIs that occur as a result of a debug trigger. + * Return values: + * -1 means this is not a HMI cause that we know about + * 0 means no further handling is required + * 1 means further handling is required + */ +long hmi_handle_debugtrig(struct pt_regs *regs) +{ + unsigned long hmer = mfspr(SPRN_HMER); + long ret = 0; + + /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ + if (!((hmer & HMER_DEBUG_TRIG) + && hmer_debug_trig_function != DTRIG_UNKNOWN)) + return -1; + + hmer &= ~HMER_DEBUG_TRIG; + /* HMER is a write-AND register */ + mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); + + switch (hmer_debug_trig_function) { + case DTRIG_VECTOR_CI: + /* + * Now to avoid problems with soft-disable we + * only do the emulation if we are coming from + * host user space + */ + if (regs && user_mode(regs)) + ret = local_paca->hmi_p9_special_emu = 1; + + break; + + default: + break; + } + + /* + * See if any other HMI causes remain to be handled + */ + if (hmer & mfspr(SPRN_HMEER)) + return -1; + + return ret; +} + +/* + * Return values: + */ +long hmi_exception_realmode(struct pt_regs *regs) +{ + int ret; + + __this_cpu_inc(irq_stat.hmi_exceptions); + + ret = hmi_handle_debugtrig(regs); + if (ret >= 0) + return ret; wait_for_subcore_guest_exit(); diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 644f7040b91c..fe6fc63251fe 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -58,115 +58,6 @@ static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) return pte_pfn(*ptep); } -static void flush_tlb_206(unsigned int num_sets, unsigned int action) -{ - unsigned long rb; - unsigned int i; - - switch (action) { - case TLB_INVAL_SCOPE_GLOBAL: - rb = TLBIEL_INVAL_SET; - break; - case TLB_INVAL_SCOPE_LPID: - rb = TLBIEL_INVAL_SET_LPID; - break; - default: - BUG(); - break; - } - - asm volatile("ptesync" : : : "memory"); - for (i = 0; i < num_sets; i++) { - asm volatile("tlbiel %0" : : "r" (rb)); - rb += 1 << TLBIEL_INVAL_SET_SHIFT; - } - asm volatile("ptesync" : : : "memory"); -} - -static void flush_tlb_300(unsigned int num_sets, unsigned int action) -{ - unsigned long rb; - unsigned int i; - unsigned int r; - - switch (action) { - case TLB_INVAL_SCOPE_GLOBAL: - rb = TLBIEL_INVAL_SET; - break; - case TLB_INVAL_SCOPE_LPID: - rb = TLBIEL_INVAL_SET_LPID; - break; - default: - BUG(); - break; - } - - asm volatile("ptesync" : : : "memory"); - - if (early_radix_enabled()) - r = 1; - else - r = 0; - - /* - * First flush table/PWC caches with set 0, then flush the - * rest of the sets, partition scope. Radix must then do it - * all again with process scope. Hash just has to flush - * process table. - */ - asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : : - "r"(rb), "r"(0), "i"(2), "i"(0), "r"(r)); - for (i = 1; i < num_sets; i++) { - unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT); - - asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : : - "r"(rb+set), "r"(0), "i"(2), "i"(0), "r"(r)); - } - - asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : : - "r"(rb), "r"(0), "i"(2), "i"(1), "r"(r)); - if (early_radix_enabled()) { - for (i = 1; i < num_sets; i++) { - unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT); - - asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : : - "r"(rb+set), "r"(0), "i"(2), "i"(1), "r"(r)); - } - } - - asm volatile("ptesync" : : : "memory"); -} - -/* - * Generic routines to flush TLB on POWER processors. These routines - * are used as flush_tlb hook in the cpu_spec. - * - * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs. - * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID. - */ -void __flush_tlb_power7(unsigned int action) -{ - flush_tlb_206(POWER7_TLB_SETS, action); -} - -void __flush_tlb_power8(unsigned int action) -{ - flush_tlb_206(POWER8_TLB_SETS, action); -} - -void __flush_tlb_power9(unsigned int action) -{ - unsigned int num_sets; - - if (early_radix_enabled()) - num_sets = POWER9_TLB_SETS_RADIX; - else - num_sets = POWER9_TLB_SETS_HASH; - - flush_tlb_300(num_sets, action); -} - - /* flush SLBs and reload */ #ifdef CONFIG_PPC_BOOK3S_64 static void flush_and_reload_slb(void) @@ -226,10 +117,8 @@ static int mce_flush(int what) return 1; } if (what == MCE_FLUSH_TLB) { - if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { - cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL); - return 1; - } + tlbiel_all(); + return 1; } return 0; diff --git a/arch/powerpc/kernel/module.lds b/arch/powerpc/kernel/module.lds new file mode 100644 index 000000000000..cea5dc124be4 --- /dev/null +++ b/arch/powerpc/kernel/module.lds @@ -0,0 +1,8 @@ +/* Force alignment of .toc section. */ +SECTIONS +{ + .toc 0 : ALIGN(256) + { + *(.got .toc) + } +} diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 218971ac7e04..a2636c250b7b 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -348,8 +348,11 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, char *p; if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) me->arch.stubs_section = i; - else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) + else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) { me->arch.toc_section = i; + if (sechdrs[i].sh_addralign < 8) + sechdrs[i].sh_addralign = 8; + } else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); @@ -387,12 +390,15 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, return 0; } -/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this - gives the value maximum span in an instruction which uses a signed - offset) */ +/* + * r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the + * value maximum span in an instruction which uses a signed offset). Round down + * to a 256 byte boundary for the odd case where we are setting up r2 without a + * .toc section. + */ static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me) { - return sechdrs[me->arch.toc_section].sh_addr + 0x8000; + return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000; } /* Both low and high 16 bits are added as SIGNED additions, so if low @@ -501,12 +507,22 @@ static bool is_early_mcount_callsite(u32 *instruction) restore r2. */ static int restore_r2(u32 *instruction, struct module *me) { - if (is_early_mcount_callsite(instruction - 1)) + u32 *prev_insn = instruction - 1; + + if (is_early_mcount_callsite(prev_insn)) + return 1; + + /* + * Make sure the branch isn't a sibling call. Sibling calls aren't + * "link" branches and they don't return, so they don't need the r2 + * restore afterwards. + */ + if (!instr_is_relative_link_branch(*prev_insn)) return 1; if (*instruction != PPC_INST_NOP) { - pr_err("%s: Expect noop after relocate, got %08x\n", - me->name, *instruction); + pr_err("%s: Expected nop after call, got %08x at %pS\n", + me->name, *instruction, instruction); return 0; } /* ld r2,R2_STACK_OFFSET(r1) */ @@ -628,7 +644,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_PPC_REL24: /* FIXME: Handle weak symbols here --RR */ - if (sym->st_shndx == SHN_UNDEF) { + if (sym->st_shndx == SHN_UNDEF || + sym->st_shndx == SHN_LIVEPATCH) { /* External: go via stub */ value = stub_for_addr(sechdrs, value, me); if (!value) diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index 52fc864cdec4..98a3aeeb3c8c 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -58,7 +58,7 @@ optprobe_template_entry: std r5,_XER(r1) mfcr r5 std r5,_CCR(r1) - lbz r5,PACASOFTIRQEN(r13) + lbz r5,PACAIRQSOFTMASK(r13) std r5,SOFTE(r1) /* diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index d6597038931d..95ffedf14885 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -18,6 +18,8 @@ #include <asm/pgtable.h> #include <asm/kexec.h> +#include "setup.h" + #ifdef CONFIG_PPC_BOOK3S /* @@ -208,15 +210,14 @@ void __init allocate_pacas(void) u64 limit; int cpu; - limit = ppc64_rma_size; - #ifdef CONFIG_PPC_BOOK3S_64 /* - * We can't take SLB misses on the paca, and we want to access them - * in real mode, so allocate them within the RMA and also within - * the first segment. + * We access pacas in real mode, and cannot take SLB faults + * on them when in virtual mode, so allocate them accordingly. */ - limit = min(0x10000000ULL, limit); + limit = min(ppc64_bolted_size(), ppc64_rma_size); +#else + limit = ppc64_rma_size; #endif paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 590f4d0a6cb1..ae2ede4de6be 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -249,8 +249,31 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *pdev, int resno) return pci_iov_resource_size(pdev, resno); } + +int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + if (ppc_md.pcibios_sriov_enable) + return ppc_md.pcibios_sriov_enable(pdev, num_vfs); + + return 0; +} + +int pcibios_sriov_disable(struct pci_dev *pdev) +{ + if (ppc_md.pcibios_sriov_disable) + return ppc_md.pcibios_sriov_disable(pdev); + + return 0; +} + #endif /* CONFIG_PCI_IOV */ +void pcibios_bus_add_device(struct pci_dev *pdev) +{ + if (ppc_md.pcibios_bus_add_device) + ppc_md.pcibios_bus_add_device(pdev); +} + static resource_size_t pcibios_io_size(const struct pci_controller *hose) { #ifdef CONFIG_PPC64 @@ -339,8 +362,7 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node) */ static int pci_read_irq_line(struct pci_dev *pci_dev) { - struct of_phandle_args oirq; - unsigned int virq; + unsigned int virq = 0; pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); @@ -348,7 +370,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) memset(&oirq, 0xff, sizeof(oirq)); #endif /* Try to get a mapping from the device-tree */ - if (of_irq_parse_pci(pci_dev, &oirq)) { + if (!of_irq_parse_and_map_pci(pci_dev, 0, 0)) { u8 line, pin; /* If that fails, lets fallback to what is in the config @@ -372,11 +394,6 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) virq = irq_create_mapping(NULL, line); if (virq) irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); - } else { - pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %pOF\n", - oirq.args_count, oirq.args[0], oirq.args[1], oirq.np); - - virq = irq_create_of_mapping(&oirq); } if (!virq) { @@ -1276,8 +1293,8 @@ static void pcibios_allocate_bus_resources(struct pci_bus *bus) i + PCI_BRIDGE_RESOURCES) == 0) continue; } - pr_warning("PCI: Cannot allocate resource region " - "%d of PCI bridge %d, will remap\n", i, bus->number); + pr_warn("PCI: Cannot allocate resource region %d of PCI bridge %d, will remap\n", + i, bus->number); clear_resource: /* The resource might be figured out when doing * reassignment based on the resources required diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 2d71269e7dc1..cf47b1aec4c2 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -104,7 +104,7 @@ EXPORT_SYMBOL_GPL(pci_hp_remove_devices); */ void pci_hp_add_devices(struct pci_bus *bus) { - int slotno, mode, pass, max; + int slotno, mode, max; struct pci_dev *dev; struct pci_controller *phb; struct device_node *dn = pci_bus_to_OF_node(bus); @@ -133,13 +133,17 @@ void pci_hp_add_devices(struct pci_bus *bus) pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); pcibios_setup_bus_devices(bus); max = bus->busn_res.start; - for (pass = 0; pass < 2; pass++) { - list_for_each_entry(dev, &bus->devices, bus_list) { - if (pci_is_bridge(dev)) - max = pci_scan_bridge(bus, dev, - max, pass); - } - } + /* + * Scan bridges that are already configured. We don't touch + * them unless they are misconfigured (which will be done in + * the second scan below). + */ + for_each_pci_bridge(dev, bus) + max = pci_scan_bridge(bus, dev, max, 0); + + /* Scan bridges that need to be reconfigured */ + for_each_pci_bridge(dev, bus) + max = pci_scan_bridge(bus, dev, max, 1); } pcibios_finish_adding_to_bus(bus); } diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 1d817f4d97d9..85ad2f78b889 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -96,7 +96,8 @@ make_one_node_map(struct device_node* node, u8 pci_bus) reg = of_get_property(node, "reg", NULL); if (!reg) continue; - dev = pci_get_bus_and_slot(pci_bus, ((reg[0] >> 8) & 0xff)); + dev = pci_get_domain_bus_and_slot(0, pci_bus, + ((reg[0] >> 8) & 0xff)); if (!dev || !dev->subordinate) { pci_dev_put(dev); continue; diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 0e395afbf0f4..ab147a1909c8 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -156,10 +156,8 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, pdn->parent = parent; pdn->busno = busno; pdn->devfn = devfn; -#ifdef CONFIG_PPC_POWERNV pdn->vf_index = vf_index; pdn->pe_number = IODA_INVALID_PE; -#endif INIT_LIST_HEAD(&pdn->child_list); INIT_LIST_HEAD(&pdn->list); list_add_tail(&pdn->list, &parent->child_list); @@ -226,9 +224,7 @@ void remove_dev_pci_data(struct pci_dev *pdev) */ if (pdev->is_virtfn) { pdn = pci_get_pdn(pdev); -#ifdef CONFIG_PPC_POWERNV pdn->pe_number = IODA_INVALID_PE; -#endif return; } @@ -294,9 +290,7 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, return NULL; dn->data = pdn; pdn->phb = hose; -#ifdef CONFIG_PPC_POWERNV pdn->pe_number = IODA_INVALID_PE; -#endif regs = of_get_property(dn, "reg", NULL); if (regs) { u32 addr = of_read_number(regs, 1); diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 0d790f8432d2..98f04725def7 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -38,7 +38,7 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def) * @addr0: value of 1st cell of a device tree PCI address. * @bridge: Set this flag if the address is from a bridge 'ranges' property */ -static unsigned int pci_parse_of_flags(u32 addr0, int bridge) +unsigned int pci_parse_of_flags(u32 addr0, int bridge) { unsigned int flags = 0; @@ -369,11 +369,8 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus, pcibios_setup_bus_devices(bus); /* Now scan child busses */ - list_for_each_entry(dev, &bus->devices, bus_list) { - if (pci_is_bridge(dev)) { - of_scan_pci_bridge(dev); - } - } + for_each_pci_bridge(dev, bus) + of_scan_pci_bridge(dev); } /** diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index 56548bf6231f..9bfbd800d32f 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -63,7 +63,7 @@ static int __init proc_ppc64_init(void) { struct proc_dir_entry *pde; - pde = proc_create_data("powerpc/systemcfg", S_IFREG|S_IRUGO, NULL, + pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL, &page_map_fops, vdso_data); if (!pde) return 1; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4208cbe2fb7f..1738c4127b32 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -42,6 +42,7 @@ #include <linux/hw_breakpoint.h> #include <linux/uaccess.h> #include <linux/elf-randomize.h> +#include <linux/pkeys.h> #include <asm/pgtable.h> #include <asm/io.h> @@ -57,6 +58,7 @@ #include <asm/debug.h> #ifdef CONFIG_PPC64 #include <asm/firmware.h> +#include <asm/hw_irq.h> #endif #include <asm/code-patching.h> #include <asm/exec.h> @@ -1097,6 +1099,8 @@ static inline void save_sprs(struct thread_struct *t) t->tar = mfspr(SPRN_TAR); } #endif + + thread_pkey_regs_save(t); } static inline void restore_sprs(struct thread_struct *old_thread, @@ -1136,6 +1140,8 @@ static inline void restore_sprs(struct thread_struct *old_thread, old_thread->tidr != new_thread->tidr) mtspr(SPRN_TIDR, new_thread->tidr); #endif + + thread_pkey_regs_restore(new_thread, old_thread); } #ifdef CONFIG_PPC_BOOK3S_64 @@ -1404,7 +1410,7 @@ void show_regs(struct pt_regs * regs) print_msr_bits(regs->msr); pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); - if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) + if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) pr_cont("CFAR: "REG" ", regs->orig_gpr3); if (trap == 0x200 || trap == 0x300 || trap == 0x600) #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) @@ -1504,14 +1510,15 @@ static int assign_thread_tidr(void) { int index; int err; + unsigned long flags; again: if (!ida_pre_get(&vas_thread_ida, GFP_KERNEL)) return -ENOMEM; - spin_lock(&vas_thread_id_lock); + spin_lock_irqsave(&vas_thread_id_lock, flags); err = ida_get_new_above(&vas_thread_ida, 1, &index); - spin_unlock(&vas_thread_id_lock); + spin_unlock_irqrestore(&vas_thread_id_lock, flags); if (err == -EAGAIN) goto again; @@ -1519,9 +1526,9 @@ again: return err; if (index > MAX_THREAD_CONTEXT) { - spin_lock(&vas_thread_id_lock); + spin_lock_irqsave(&vas_thread_id_lock, flags); ida_remove(&vas_thread_ida, index); - spin_unlock(&vas_thread_id_lock); + spin_unlock_irqrestore(&vas_thread_id_lock, flags); return -ENOMEM; } @@ -1530,9 +1537,11 @@ again: static void free_thread_tidr(int id) { - spin_lock(&vas_thread_id_lock); + unsigned long flags; + + spin_lock_irqsave(&vas_thread_id_lock, flags); ida_remove(&vas_thread_ida, id); - spin_unlock(&vas_thread_id_lock); + spin_unlock_irqrestore(&vas_thread_id_lock, flags); } /* @@ -1584,6 +1593,7 @@ int set_thread_tidr(struct task_struct *t) return 0; } +EXPORT_SYMBOL_GPL(set_thread_tidr); #endif /* CONFIG_PPC64 */ @@ -1669,7 +1679,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childregs->gpr[14] = ppc_function_entry((void *)usp); #ifdef CONFIG_PPC64 clear_tsk_thread_flag(p, TIF_32BIT); - childregs->softe = 1; + childregs->softe = IRQS_ENABLED; #endif childregs->gpr[15] = kthread_arg; p->thread.regs = NULL; /* no user register state */ @@ -1860,6 +1870,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.tm_tfiar = 0; current->thread.load_tm = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + + thread_pkey_regs_init(¤t->thread); } EXPORT_SYMBOL(start_thread); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index b15bae265c90..4dffef947b8a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -59,6 +59,7 @@ #include <asm/epapr_hcalls.h> #include <asm/firmware.h> #include <asm/dt_cpu_ftrs.h> +#include <asm/drmem.h> #include <mm/mmu_decl.h> @@ -455,92 +456,74 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, #ifdef CONFIG_PPC_PSERIES /* - * Interpret the ibm,dynamic-memory property in the - * /ibm,dynamic-reconfiguration-memory node. + * Interpret the ibm dynamic reconfiguration memory LMBs. * This contains a list of memory blocks along with NUMA affinity * information. */ -static int __init early_init_dt_scan_drconf_memory(unsigned long node) +static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, + const __be32 **usm) { - const __be32 *dm, *ls, *usm; - int l; - unsigned long n, flags; - u64 base, size, memblock_size; - unsigned int is_kexec_kdump = 0, rngs; - - ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l); - if (ls == NULL || l < dt_root_size_cells * sizeof(__be32)) - return 0; - memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls); + u64 base, size; + int is_kexec_kdump = 0, rngs; - dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l); - if (dm == NULL || l < sizeof(__be32)) - return 0; + base = lmb->base_addr; + size = drmem_lmb_size(); + rngs = 1; - n = of_read_number(dm++, 1); /* number of entries */ - if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32)) - return 0; + /* + * Skip this block if the reserved bit is set in flags + * or if the block is not assigned to this partition. + */ + if ((lmb->flags & DRCONF_MEM_RESERVED) || + !(lmb->flags & DRCONF_MEM_ASSIGNED)) + return; - /* check if this is a kexec/kdump kernel. */ - usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", - &l); - if (usm != NULL) + if (*usm) is_kexec_kdump = 1; - for (; n != 0; --n) { - base = dt_mem_next_cell(dt_root_addr_cells, &dm); - flags = of_read_number(&dm[3], 1); - /* skip DRC index, pad, assoc. list index, flags */ - dm += 4; - /* skip this block if the reserved bit is set in flags - or if the block is not assigned to this partition */ - if ((flags & DRCONF_MEM_RESERVED) || - !(flags & DRCONF_MEM_ASSIGNED)) - continue; - size = memblock_size; - rngs = 1; + if (is_kexec_kdump) { + /* + * For each memblock in ibm,dynamic-memory, a + * corresponding entry in linux,drconf-usable-memory + * property contains a counter 'p' followed by 'p' + * (base, size) duple. Now read the counter from + * linux,drconf-usable-memory property + */ + rngs = dt_mem_next_cell(dt_root_size_cells, usm); + if (!rngs) /* there are no (base, size) duple */ + return; + } + + do { if (is_kexec_kdump) { - /* - * For each memblock in ibm,dynamic-memory, a corresponding - * entry in linux,drconf-usable-memory property contains - * a counter 'p' followed by 'p' (base, size) duple. - * Now read the counter from - * linux,drconf-usable-memory property - */ - rngs = dt_mem_next_cell(dt_root_size_cells, &usm); - if (!rngs) /* there are no (base, size) duple */ + base = dt_mem_next_cell(dt_root_addr_cells, usm); + size = dt_mem_next_cell(dt_root_size_cells, usm); + } + + if (iommu_is_off) { + if (base >= 0x80000000ul) continue; + if ((base + size) > 0x80000000ul) + size = 0x80000000ul - base; } - do { - if (is_kexec_kdump) { - base = dt_mem_next_cell(dt_root_addr_cells, - &usm); - size = dt_mem_next_cell(dt_root_size_cells, - &usm); - } - if (iommu_is_off) { - if (base >= 0x80000000ul) - continue; - if ((base + size) > 0x80000000ul) - size = 0x80000000ul - base; - } - memblock_add(base, size); - } while (--rngs); - } - memblock_dump_all(); - return 0; + + DBG("Adding: %llx -> %llx\n", base, size); + memblock_add(base, size); + } while (--rngs); } -#else -#define early_init_dt_scan_drconf_memory(node) 0 #endif /* CONFIG_PPC_PSERIES */ static int __init early_init_dt_scan_memory_ppc(unsigned long node, const char *uname, int depth, void *data) { +#ifdef CONFIG_PPC_PSERIES if (depth == 1 && - strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) - return early_init_dt_scan_drconf_memory(node); + strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) { + walk_drmem_lmbs_early(node, early_init_drmem_lmb); + return 0; + } +#endif return early_init_dt_scan_memory(node, uname, depth, data); } diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 02190e90c7ae..adf044daafd7 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -869,10 +869,12 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .reserved2 = 0, .reserved3 = 0, .subprocessors = 1, + .byte22 = OV5_FEAT(OV5_DRMEM_V2), .intarch = 0, .mmu = 0, .hash_ext = 0, .radix_ext = 0, + .byte22 = OV5_FEAT(OV5_DRC_INFO), }, /* option vector 6: IBM PAPR hints */ diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index f52ad5bb7109..ca72d7391d40 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -35,6 +35,7 @@ #include <linux/context_tracking.h> #include <linux/uaccess.h> +#include <linux/pkeys.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/switch_to.h> @@ -283,6 +284,18 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data) if (regno == PT_DSCR) return get_user_dscr(task, data); +#ifdef CONFIG_PPC64 + /* + * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is + * no more used as a flag, lets force usr to alway see the softe value as 1 + * which means interrupts are not soft disabled. + */ + if (regno == PT_SOFTE) { + *data = 1; + return 0; + } +#endif + if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) { *data = ((unsigned long *)task->thread.regs)[regno]; return 0; @@ -1775,6 +1788,61 @@ static int pmu_set(struct task_struct *target, return ret; } #endif + +#ifdef CONFIG_PPC_MEM_KEYS +static int pkey_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!arch_pkeys_enabled()) + return -ENODEV; + + return regset->n; +} + +static int pkey_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); + BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); + + if (!arch_pkeys_enabled()) + return -ENODEV; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.amr, 0, + ELF_NPKEY * sizeof(unsigned long)); +} + +static int pkey_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 new_amr; + int ret; + + if (!arch_pkeys_enabled()) + return -ENODEV; + + /* Only the AMR can be set from userspace */ + if (pos != 0 || count != sizeof(new_amr)) + return -EINVAL; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &new_amr, 0, sizeof(new_amr)); + if (ret) + return ret; + + /* UAMOR determines which bits of the AMR can be set from userspace. */ + target->thread.amr = (new_amr & target->thread.uamor) | + (target->thread.amr & ~target->thread.uamor); + + return 0; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + /* * These are our native regset flavors. */ @@ -1809,6 +1877,9 @@ enum powerpc_regset { REGSET_EBB, /* EBB registers */ REGSET_PMR, /* Performance Monitor Registers */ #endif +#ifdef CONFIG_PPC_MEM_KEYS + REGSET_PKEY, /* AMR register */ +#endif }; static const struct user_regset native_regsets[] = { @@ -1914,6 +1985,13 @@ static const struct user_regset native_regsets[] = { .active = pmu_active, .get = pmu_get, .set = pmu_set }, #endif +#ifdef CONFIG_PPC_MEM_KEYS + [REGSET_PKEY] = { + .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, + .size = sizeof(u64), .align = sizeof(u64), + .active = pkey_active, .get = pkey_get, .set = pkey_set + }, +#endif }; static const struct user_regset_view user_ppc_native_view = { diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index c8c5f3a550c2..fb070d8cad07 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -261,19 +261,19 @@ static int __init proc_rtas_init(void) if (rtas_node == NULL) return -ENODEV; - proc_create("powerpc/rtas/progress", S_IRUGO|S_IWUSR, NULL, + proc_create("powerpc/rtas/progress", 0644, NULL, &ppc_rtas_progress_operations); - proc_create("powerpc/rtas/clock", S_IRUGO|S_IWUSR, NULL, + proc_create("powerpc/rtas/clock", 0644, NULL, &ppc_rtas_clock_operations); - proc_create("powerpc/rtas/poweron", S_IWUSR|S_IRUGO, NULL, + proc_create("powerpc/rtas/poweron", 0644, NULL, &ppc_rtas_poweron_operations); - proc_create("powerpc/rtas/sensors", S_IRUGO, NULL, + proc_create("powerpc/rtas/sensors", 0444, NULL, &ppc_rtas_sensors_operations); - proc_create("powerpc/rtas/frequency", S_IWUSR|S_IRUGO, NULL, + proc_create("powerpc/rtas/frequency", 0644, NULL, &ppc_rtas_tone_freq_operations); - proc_create("powerpc/rtas/volume", S_IWUSR|S_IRUGO, NULL, + proc_create("powerpc/rtas/volume", 0644, NULL, &ppc_rtas_tone_volume_operations); - proc_create("powerpc/rtas/rmo_buffer", S_IRUSR, NULL, + proc_create("powerpc/rtas/rmo_buffer", 0400, NULL, &ppc_rtas_rmo_buf_ops); return 0; } diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index f6f6a8a5103a..10fabae2574d 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -727,7 +727,7 @@ static int __init rtas_flash_init(void) const struct rtas_flash_file *f = &rtas_flash_files[i]; int token; - if (!proc_create(f->filename, S_IRUSR | S_IWUSR, NULL, &f->fops)) + if (!proc_create(f->filename, 0600, NULL, &f->fops)) goto enomem; /* diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 1da8b7d8c6ca..fc600a8b1e77 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -581,7 +581,7 @@ static int __init rtas_init(void) if (!rtas_log_buf) return -ENODEV; - entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL, + entry = proc_create("powerpc/rtas/error_log", 0400, NULL, &proc_rtas_log_operations); if (!entry) printk(KERN_ERR "Failed to create error_log proc entry\n"); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 3f33869c6486..d73ec518ef80 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -346,10 +346,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) loops_per_jiffy / (500000/HZ), (loops_per_jiffy / (5000/HZ)) % 100); #endif - -#ifdef CONFIG_SMP seq_printf(m, "\n"); -#endif + /* If this is the last cpu, print the summary */ if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) show_cpuinfo_summary(m); @@ -379,10 +377,10 @@ static void c_stop(struct seq_file *m, void *v) } const struct seq_operations cpuinfo_op = { - .start =c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, }; void __init check_for_initrd(void) @@ -459,13 +457,13 @@ static void __init cpu_init_thread_core_maps(int tpc) */ void __init smp_setup_cpu_maps(void) { - struct device_node *dn = NULL; + struct device_node *dn; int cpu = 0; int nthreads = 1; DBG("smp_setup_cpu_maps()\n"); - while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) { + for_each_node_by_type(dn, "cpu") { const __be32 *intserv; __be32 cpu_be; int j, len; @@ -505,6 +503,11 @@ void __init smp_setup_cpu_maps(void) set_cpu_possible(cpu, true); cpu++; } + + if (cpu >= nr_cpu_ids) { + of_node_put(dn); + break; + } } /* If no SMT supported, nthreads is forced to 1 */ diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 21c18071d9d5..3fc11e30308f 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -51,6 +51,10 @@ void record_spr_defaults(void); static inline void record_spr_defaults(void) { }; #endif +#ifdef CONFIG_PPC64 +u64 ppc64_bolted_size(void); +#endif + /* * Having this in kvm_ppc.h makes include dependencies too * tricky to solve for setup-common.c so have it here. diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e67413f4a8f0..c388cc3357fa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -10,8 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#define DEBUG - #include <linux/export.h> #include <linux/string.h> #include <linux/sched.h> @@ -69,6 +67,7 @@ #include <asm/livepatch.h> #include <asm/opal.h> #include <asm/cputhreads.h> +#include <asm/hw_irq.h> #include "setup.h" @@ -190,6 +189,8 @@ static void __init fixup_boot_paca(void) get_paca()->cpu_start = 1; /* Allow percpu accesses to work until we setup percpu data */ get_paca()->data_offset = 0; + /* Mark interrupts disabled in PACA */ + irq_soft_mask_set(IRQS_DISABLED); } static void __init configure_exceptions(void) @@ -352,7 +353,7 @@ void __init early_setup(unsigned long dt_ptr) void early_setup_secondary(void) { /* Mark interrupts disabled in PACA */ - get_paca()->soft_enabled = 0; + irq_soft_mask_set(IRQS_DISABLED); /* Initialize the hash table or TLB handling */ early_init_mmu_secondary(); @@ -568,25 +569,31 @@ void __init initialize_cache_info(void) DBG(" <- initialize_cache_info()\n"); } -/* This returns the limit below which memory accesses to the linear - * mapping are guarnateed not to cause a TLB or SLB miss. This is - * used to allocate interrupt or emergency stacks for which our - * exception entry path doesn't deal with being interrupted. +/* + * This returns the limit below which memory accesses to the linear + * mapping are guarnateed not to cause an architectural exception (e.g., + * TLB or SLB miss fault). + * + * This is used to allocate PACAs and various interrupt stacks that + * that are accessed early in interrupt handlers that must not cause + * re-entrant interrupts. */ -static __init u64 safe_stack_limit(void) +__init u64 ppc64_bolted_size(void) { #ifdef CONFIG_PPC_BOOK3E /* Freescale BookE bolts the entire linear mapping */ - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + /* XXX: BookE ppc64_rma_limit setup seems to disagree? */ + if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) return linear_map_top; /* Other BookE, we assume the first GB is bolted */ return 1ul << 30; #else + /* BookS radix, does not take faults on linear mapping */ if (early_radix_enabled()) return ULONG_MAX; - /* BookS, the first segment is bolted */ - if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) + /* BookS hash, the first segment is bolted */ + if (early_mmu_has_feature(MMU_FTR_1T_SEGMENT)) return 1UL << SID_SHIFT_1T; return 1UL << SID_SHIFT; #endif @@ -594,7 +601,7 @@ static __init u64 safe_stack_limit(void) void __init irqstack_early_init(void) { - u64 limit = safe_stack_limit(); + u64 limit = ppc64_bolted_size(); unsigned int i; /* @@ -679,7 +686,7 @@ void __init emergency_stack_init(void) * initialized in kernel/irq.c. These are initialized here in order * to have emergency stacks available as early as possible. */ - limit = min(safe_stack_limit(), ppc64_rma_size); + limit = min(ppc64_bolted_size(), ppc64_rma_size); for_each_possible_cpu(i) { struct thread_info *ti; @@ -857,7 +864,7 @@ static void init_fallback_flush(void) int cpu; l1d_size = ppc64_caches.l1d.size; - limit = min(safe_stack_limit(), ppc64_rma_size); + limit = min(ppc64_bolted_size(), ppc64_rma_size); /* * Align to L1d size, and size it at 2x L1d size, to catch possible @@ -868,19 +875,8 @@ static void init_fallback_flush(void) memset(l1d_flush_fallback_area, 0, l1d_size * 2); for_each_possible_cpu(cpu) { - /* - * The fallback flush is currently coded for 8-way - * associativity. Different associativity is possible, but it - * will be treated as 8-way and may not evict the lines as - * effectively. - * - * 128 byte lines are mandatory. - */ - u64 c = l1d_size / 8; - paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; - paca[cpu].l1d_flush_congruence = c; - paca[cpu].l1d_flush_sets = c / 128; + paca[cpu].l1d_flush_size = l1d_size; } } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index aded81169648..a46de0035214 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -111,12 +111,20 @@ static inline int save_general_regs(struct pt_regs *regs, { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int i; + /* Force usr to alway see softe as 1 (interrupts enabled) */ + elf_greg_t64 softe = 0x1; WARN_ON(!FULL_REGS(regs)); for (i = 0; i <= PT_RESULT; i ++) { if (i == 14 && !FULL_REGS(regs)) i = 32; + if ( i == PT_SOFTE) { + if(__put_user((unsigned int)softe, &frame->mc_gregs[i])) + return -EFAULT; + else + continue; + } if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i])) return -EFAULT; } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 4b9ca3570344..720117690822 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -110,6 +110,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs = tsk->thread.regs; unsigned long msr = regs->msr; long err = 0; + /* Force usr to alway see softe as 1 (interrupts enabled) */ + unsigned long softe = 0x1; BUG_ON(tsk != current); @@ -169,6 +171,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, WARN_ON(!FULL_REGS(regs)); err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); err |= __put_user(msr, &sc->gp_regs[PT_MSR]); + err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]); err |= __put_user(signr, &sc->signal); err |= __put_user(handler, &sc->handler); if (set != NULL) @@ -207,7 +210,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc); #endif struct pt_regs *regs = tsk->thread.regs; - unsigned long msr = tsk->thread.ckpt_regs.msr; + unsigned long msr = tsk->thread.regs->msr; long err = 0; BUG_ON(tsk != current); @@ -216,6 +219,12 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, WARN_ON(tm_suspend_disabled); + /* Restore checkpointed FP, VEC, and VSX bits from ckpt_regs as + * it contains the correct FP, VEC, VSX state after we treclaimed + * the transaction and giveup_all() was called on reclaiming. + */ + msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX); + /* Remove TM bits from thread's MSR. The MSR in the sigcontext * just indicates to userland that we were doing a transaction, but we * don't want to return in transactional state. This also ensures diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e0a4c1f82e25..bbe7634b3a43 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -543,7 +543,25 @@ void smp_send_debugger_break(void) #ifdef CONFIG_KEXEC_CORE void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { + int cpu; + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000); + if (kdump_in_progress() && crash_wake_offline) { + for_each_present_cpu(cpu) { + if (cpu_online(cpu)) + continue; + /* + * crash_ipi_callback will wait for + * all cpus, including offline CPUs. + * We don't care about nmi_ipi_function. + * Offline cpus will jump straight into + * crash_ipi_callback, we can skip the + * entire NMI dance and waiting for + * cpus to clear pending mask, etc. + */ + do_smp_send_nmi_ipi(cpu); + } + } } #endif diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index b8d4a1dac39f..5a8bfee6e187 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); SYSFS_SPRSETUP(purr, SPRN_PURR); SYSFS_SPRSETUP(spurr, SPRN_SPURR); SYSFS_SPRSETUP(pir, SPRN_PIR); +SYSFS_SPRSETUP(tscr, SPRN_TSCR); /* Lets only enable read for phyp resources and @@ -495,6 +496,7 @@ static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static DEVICE_ATTR(spurr, 0400, show_spurr, NULL); static DEVICE_ATTR(purr, 0400, show_purr, store_purr); static DEVICE_ATTR(pir, 0400, show_pir, NULL); +static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr); /* * This is the system wide DSCR register default value. Any @@ -785,6 +787,9 @@ static int register_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) device_create_file(s, &dev_attr_pir); + + if (cpu_has_feature(CPU_FTR_ARCH_206)) + device_create_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC_FSL_BOOK3E @@ -867,6 +872,9 @@ static int unregister_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) device_remove_file(s, &dev_attr_pir); + + if (cpu_has_feature(CPU_FTR_ARCH_206)) + device_remove_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC_FSL_BOOK3E diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index fe6f3a285455..a32823dcd9a4 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -244,7 +244,7 @@ static u64 scan_dispatch_log(u64 stop_tb) void accumulate_stolen_time(void) { u64 sst, ust; - u8 save_soft_enabled = local_paca->soft_enabled; + unsigned long save_irq_soft_mask = irq_soft_mask_return(); struct cpu_accounting_data *acct = &local_paca->accounting; /* We are called early in the exception entry, before @@ -253,7 +253,7 @@ void accumulate_stolen_time(void) * needs to reflect that so various debug stuff doesn't * complain */ - local_paca->soft_enabled = 0; + irq_soft_mask_set(IRQS_DISABLED); sst = scan_dispatch_log(acct->starttime_user); ust = scan_dispatch_log(acct->starttime); @@ -261,7 +261,7 @@ void accumulate_stolen_time(void) acct->utime -= ust; acct->steal_time += ust + sst; - local_paca->soft_enabled = save_soft_enabled; + irq_soft_mask_set(save_irq_soft_mask); } static inline u64 calculate_stolen_time(u64 stop_tb) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index c93f1e6a9fff..1e48d157196a 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -20,6 +20,7 @@ #include <linux/sched/debug.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/pkeys.h> #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/ptrace.h> @@ -38,6 +39,8 @@ #include <linux/ratelimit.h> #include <linux/context_tracking.h> #include <linux/smp.h> +#include <linux/console.h> +#include <linux/kmsg_dump.h> #include <asm/emulated_ops.h> #include <asm/pgtable.h> @@ -142,6 +145,28 @@ static int die_owner = -1; static unsigned int die_nest_count; static int die_counter; +extern void panic_flush_kmsg_start(void) +{ + /* + * These are mostly taken from kernel/panic.c, but tries to do + * relatively minimal work. Don't use delay functions (TB may + * be broken), don't crash dump (need to set a firmware log), + * don't run notifiers. We do want to get some information to + * Linux console. + */ + console_verbose(); + bust_spinlocks(1); +} + +extern void panic_flush_kmsg_end(void) +{ + printk_safe_flush_on_panic(); + kmsg_dump(KMSG_DUMP_PANIC); + bust_spinlocks(0); + debug_locks_off(); + console_flush_on_panic(); +} + static unsigned long oops_begin(struct pt_regs *regs) { int cpu; @@ -266,7 +291,9 @@ void user_single_step_siginfo(struct task_struct *tsk, info->si_addr = (void __user *)regs->nip; } -void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) + +void _exception_pkey(int signr, struct pt_regs *regs, int code, + unsigned long addr, int key) { siginfo_t info; const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \ @@ -289,13 +316,27 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) local_irq_enable(); current->thread.trap_nr = code; + + /* + * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need + * to capture the content, if the task gets killed. + */ + thread_pkey_regs_save(¤t->thread); + memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; info.si_addr = (void __user *) addr; + info.si_pkey = key; + force_sig_info(signr, &info, current); } +void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) +{ + _exception_pkey(signr, regs, code, addr, 0); +} + void system_reset_exception(struct pt_regs *regs) { /* @@ -337,7 +378,7 @@ void system_reset_exception(struct pt_regs *regs) * No debugger or crash dump registered, print logs then * panic. */ - __die("System Reset", regs, SIGABRT); + die("System Reset", regs, SIGABRT); mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); @@ -1564,7 +1605,7 @@ void facility_unavailable_exception(struct pt_regs *regs) u8 status; bool hv; - hv = (regs->trap == 0xf80); + hv = (TRAP(regs) == 0xf80); if (hv) value = mfspr(SPRN_HFSCR); else @@ -2113,13 +2154,13 @@ static int __init ppc_warn_emulated_init(void) if (!dir) return -ENOMEM; - d = debugfs_create_u32("do_warn", S_IRUGO | S_IWUSR, dir, + d = debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated); if (!d) goto fail; for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) { - d = debugfs_create_u32(entries[i].name, S_IRUGO | S_IWUSR, dir, + d = debugfs_create_u32(entries[i].name, 0644, dir, (u32 *)&entries[i].val.counter); if (!d) goto fail; diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 382021324883..c002adcc694c 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -64,6 +64,12 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) cmpwi cr0,r3,CLOCK_REALTIME cmpwi cr1,r3,CLOCK_MONOTONIC cror cr0*4+eq,cr0*4+eq,cr1*4+eq + + cmpwi cr5,r3,CLOCK_REALTIME_COARSE + cmpwi cr6,r3,CLOCK_MONOTONIC_COARSE + cror cr5*4+eq,cr5*4+eq,cr6*4+eq + + cror cr0*4+eq,cr0*4+eq,cr5*4+eq bne cr0,99f mflr r12 /* r12 saves lr */ @@ -72,6 +78,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) bl V_LOCAL_FUNC(__get_datapage) /* get data page */ lis r7,NSEC_PER_SEC@h /* want nanoseconds */ ori r7,r7,NSEC_PER_SEC@l + beq cr5,70f 50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */ bne cr1,80f /* if not monotonic, all done */ @@ -97,19 +104,57 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) ld r0,CFG_TB_UPDATE_COUNT(r3) cmpld cr0,r0,r8 /* check if updated */ bne- 50b + b 78f - /* Add wall->monotonic offset and check for overflow or underflow. + /* + * For coarse clocks we get data directly from the vdso data page, so + * we don't need to call __do_get_tspec, but we still need to do the + * counter trick. */ - add r4,r4,r6 - add r5,r5,r9 - cmpd cr0,r5,r7 - cmpdi cr1,r5,0 - blt 1f - subf r5,r7,r5 - addi r4,r4,1 -1: bge cr1,80f - addi r4,r4,-1 - add r5,r5,r7 +70: ld r8,CFG_TB_UPDATE_COUNT(r3) + andi. r0,r8,1 /* pending update ? loop */ + bne- 70b + add r3,r3,r0 /* r0 is already 0 */ + + /* + * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE + * too + */ + ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3) + ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3) + bne cr6,75f + + /* CLOCK_MONOTONIC_COARSE */ + lwa r6,WTOM_CLOCK_SEC(r3) + lwa r9,WTOM_CLOCK_NSEC(r3) + + /* check if counter has updated */ + or r0,r6,r9 +75: or r0,r0,r4 + or r0,r0,r5 + xor r0,r0,r0 + add r3,r3,r0 + ld r0,CFG_TB_UPDATE_COUNT(r3) + cmpld cr0,r0,r8 /* check if updated */ + bne- 70b + + /* Counter has not updated, so continue calculating proper values for + * sec and nsec if monotonic coarse, or just return with the proper + * values for realtime. + */ + bne cr6,80f + + /* Add wall->monotonic offset and check for overflow or underflow */ +78: add r4,r4,r6 + add r5,r5,r9 + cmpd cr0,r5,r7 + cmpdi cr1,r5,0 + blt 79f + subf r5,r7,r5 + addi r4,r4,1 +79: bge cr1,80f + addi r4,r4,-1 + add r5,r5,r7 80: std r4,TSPC64_TV_SEC(r11) std r5,TSPC64_TV_NSEC(r11) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 74901a87bf7a..c8af90ff49f0 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -273,6 +273,7 @@ SECTIONS #ifdef CONFIG_PPC32 .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA + *(.data.rel*) *(.sdata) *(.sdata2) *(.got.plt) *(.got) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 87da80ccced1..6256dc3b0087 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -6,6 +6,9 @@ * * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c */ + +#define pr_fmt(fmt) "watchdog: " fmt + #include <linux/kernel.h> #include <linux/param.h> #include <linux/init.h> @@ -26,15 +29,45 @@ #include <asm/paca.h> /* - * The watchdog has a simple timer that runs on each CPU, once per timer - * period. This is the heartbeat. + * The powerpc watchdog ensures that each CPU is able to service timers. + * The watchdog sets up a simple timer on each CPU to run once per timer + * period, and updates a per-cpu timestamp and a "pending" cpumask. This is + * the heartbeat. + * + * Then there are two systems to check that the heartbeat is still running. + * The local soft-NMI, and the SMP checker. + * + * The soft-NMI checker can detect lockups on the local CPU. When interrupts + * are disabled with local_irq_disable(), platforms that use soft-masking + * can leave hardware interrupts enabled and handle them with a masked + * interrupt handler. The masked handler can send the timer interrupt to the + * watchdog's soft_nmi_interrupt(), which appears to Linux as an NMI + * interrupt, and can be used to detect CPUs stuck with IRQs disabled. + * + * The soft-NMI checker will compare the heartbeat timestamp for this CPU + * with the current time, and take action if the difference exceeds the + * watchdog threshold. + * + * The limitation of the soft-NMI watchdog is that it does not work when + * interrupts are hard disabled or otherwise not being serviced. This is + * solved by also having a SMP watchdog where all CPUs check all other + * CPUs heartbeat. * - * Then there are checks to see if the heartbeat has not triggered on a CPU - * for the panic timeout period. Currently the watchdog only supports an - * SMP check, so the heartbeat only turns on when we have 2 or more CPUs. + * The SMP checker can detect lockups on other CPUs. A gobal "pending" + * cpumask is kept, containing all CPUs which enable the watchdog. Each + * CPU clears their pending bit in their heartbeat timer. When the bitmask + * becomes empty, the last CPU to clear its pending bit updates a global + * timestamp and refills the pending bitmask. * - * This is not an NMI watchdog, but Linux uses that name for a generic - * watchdog in some cases, so NMI gets used in some places. + * In the heartbeat timer, if any CPU notices that the global timestamp has + * not been updated for a period exceeding the watchdog threshold, then it + * means the CPU(s) with their bit still set in the pending mask have had + * their heartbeat stop, and action is taken. + * + * Some platforms implement true NMI IPIs, which can by used by the SMP + * watchdog to detect an unresponsive CPU and pull it out of its stuck + * state with the NMI IPI, to get crash/debug data from it. This way the + * SMP watchdog can detect hardware interrupts off lockups. */ static cpumask_t wd_cpus_enabled __read_mostly; @@ -47,19 +80,7 @@ static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */ static DEFINE_PER_CPU(struct timer_list, wd_timer); static DEFINE_PER_CPU(u64, wd_timer_tb); -/* - * These are for the SMP checker. CPUs clear their pending bit in their - * heartbeat. If the bitmask becomes empty, the time is noted and the - * bitmask is refilled. - * - * All CPUs clear their bit in the pending mask every timer period. - * Once all have cleared, the time is noted and the bits are reset. - * If the time since all clear was greater than the panic timeout, - * we can panic with the list of stuck CPUs. - * - * This will work best with NMI IPIs for crash code so the stuck CPUs - * can be pulled out to get their backtraces. - */ +/* SMP checker bits */ static unsigned long __wd_smp_lock; static cpumask_t wd_smp_cpus_pending; static cpumask_t wd_smp_cpus_stuck; @@ -90,7 +111,7 @@ static inline void wd_smp_unlock(unsigned long *flags) static void wd_lockup_ipi(struct pt_regs *regs) { - pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id()); + pr_emerg("CPU %d Hard LOCKUP\n", raw_smp_processor_id()); print_modules(); print_irqtrace_events(current); if (regs) @@ -131,8 +152,8 @@ static void watchdog_smp_panic(int cpu, u64 tb) if (cpumask_weight(&wd_smp_cpus_pending) == 0) goto out; - pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n", - cpu, cpumask_pr_args(&wd_smp_cpus_pending)); + pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n", + cpu, cpumask_pr_args(&wd_smp_cpus_pending)); if (!sysctl_hardlockup_all_cpu_backtrace) { /* @@ -175,7 +196,7 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb) if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) { unsigned long flags; - pr_emerg("Watchdog CPU:%d became unstuck\n", cpu); + pr_emerg("CPU %d became unstuck\n", cpu); wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); wd_smp_unlock(&flags); @@ -233,13 +254,10 @@ void soft_nmi_interrupt(struct pt_regs *regs) } set_cpu_stuck(cpu, tb); - pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu); + pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", cpu, (void *)regs->nip); print_modules(); print_irqtrace_events(current); - if (regs) - show_regs(regs); - else - dump_stack(); + show_regs(regs); wd_smp_unlock(&flags); @@ -388,30 +406,8 @@ int __init watchdog_nmi_probe(void) "powerpc/watchdog:online", start_wd_on_cpu, stop_wd_on_cpu); if (err < 0) { - pr_warn("Watchdog could not be initialized"); + pr_warn("could not be initialized"); return err; } return 0; } - -static void handle_backtrace_ipi(struct pt_regs *regs) -{ - nmi_cpu_backtrace(regs); -} - -static void raise_backtrace_ipi(cpumask_t *mask) -{ - unsigned int cpu; - - for_each_cpu(cpu, mask) { - if (cpu == smp_processor_id()) - handle_backtrace_ipi(NULL); - else - smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000); - } -} - -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) -{ - nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi); -} diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2d46037ce936..e4f70c33fbc7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -93,10 +93,10 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); static int dynamic_mt_modes = 6; -module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR); +module_param(dynamic_mt_modes, int, 0644); MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)"); static int target_smt_mode; -module_param(target_smt_mode, int, S_IRUGO | S_IWUSR); +module_param(target_smt_mode, int, 0644); MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); static bool indep_threads_mode = true; @@ -109,12 +109,10 @@ static struct kernel_param_ops module_param_ops = { .get = param_get_int, }; -module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, - S_IRUGO | S_IWUSR); +module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, 0644); MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization"); -module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, - S_IRUGO | S_IWUSR); +module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644); MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); #endif diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index c356f9a40b24..b11043b23c18 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -87,8 +87,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI); } if (dsisr & DSISR_MC_TLB_MULTI) { - if (cur_cpu_spec && cur_cpu_spec->flush_tlb) - cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID); + tlbiel_all_lpid(vcpu->kvm->arch.radix); dsisr &= ~DSISR_MC_TLB_MULTI; } /* Any other errors we don't understand? */ @@ -105,8 +104,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) reload_slb(vcpu); break; case SRR1_MC_IFETCH_TLBMULTI: - if (cur_cpu_spec && cur_cpu_spec->flush_tlb) - cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID); + tlbiel_all_lpid(vcpu->kvm->arch.radix); break; default: handled = 0; @@ -268,17 +266,19 @@ static void kvmppc_tb_resync_done(void) * secondary threads to proceed. * - All secondary threads will eventually call opal hmi handler on * their exit path. + * + * Returns 1 if the timebase offset should be applied, 0 if not. */ long kvmppc_realmode_hmi_handler(void) { - int ptid = local_paca->kvm_hstate.ptid; bool resync_req; - /* This is only called on primary thread. */ - BUG_ON(ptid != 0); __this_cpu_inc(irq_stat.hmi_exceptions); + if (hmi_handle_debugtrig(NULL) >= 0) + return 1; + /* * By now primary thread has already completed guest->host * partition switch but haven't signaled secondaries yet. diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 26c11f678fbf..8888e625a999 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -42,7 +42,7 @@ static void *real_vmalloc_addr(void *x) } /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ -static int global_invalidates(struct kvm *kvm, unsigned long flags) +static int global_invalidates(struct kvm *kvm) { int global; int cpu; @@ -522,7 +522,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, if (v & HPTE_V_VALID) { hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); rb = compute_tlbie_rb(v, pte_r, pte_index); - do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); + do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); /* * The reference (R) and change (C) bits in a HPT * entry can be set by hardware at any time up until @@ -572,7 +572,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) if (kvm_is_radix(kvm)) return H_FUNCTION; - global = global_invalidates(kvm, 0); + global = global_invalidates(kvm); for (i = 0; i < 4 && ret == H_SUCCESS; ) { n = 0; for (; i < 4; ++i) { @@ -732,8 +732,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, rb = compute_tlbie_rb(v, r, pte_index); hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | HPTE_V_ABSENT); - do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), - true); + do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); /* Don't lose R/C bit updates done by hardware */ r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); hpte[1] = cpu_to_be64(r); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9c61f736c75b..7886b313d135 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1908,16 +1908,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) bne 27f bl kvmppc_realmode_hmi_handler nop + cmpdi r3, 0 li r12, BOOK3S_INTERRUPT_HMI /* - * At this point kvmppc_realmode_hmi_handler would have resync-ed - * the TB. Hence it is not required to subtract guest timebase - * offset from timebase. So, skip it. + * At this point kvmppc_realmode_hmi_handler may have resync-ed + * the TB, and if it has, we must not subtract the guest timebase + * offset from the timebase. So, skip it. * * Also, do not call kvmppc_subcore_exit_guest() because it has * been invoked as part of kvmppc_realmode_hmi_handler(). */ - b 30f + beq 30f 27: /* Subtract timebase offset from timebase */ @@ -3248,7 +3249,7 @@ kvmppc_bad_host_intr: mfctr r4 #endif mfxer r5 - lbz r6, PACASOFTIRQEN(r13) + lbz r6, PACAIRQSOFTMASK(r13) std r3, _LINK(r1) std r4, _CTR(r1) std r5, _XER(r1) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index d329b2add7e2..b8356cdc0c04 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1039,7 +1039,7 @@ static void xics_debugfs_init(struct kvmppc_xics *xics) return; } - xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root, + xics->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, xics, &xics_debug_fops); pr_debug("%s: created %s\n", __func__, name); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index d469224c4ada..e0d881ab304e 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -23,19 +23,26 @@ #include <asm/code-patching.h> #include <asm/setup.h> -static int __patch_instruction(unsigned int *addr, unsigned int instr) +static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, + unsigned int *patch_addr) { int err; - __put_user_size(instr, addr, 4, err); + __put_user_size(instr, patch_addr, 4, err); if (err) return err; - asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" :: "r" (addr)); + asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), + "r" (exec_addr)); return 0; } +int raw_patch_instruction(unsigned int *addr, unsigned int instr) +{ + return __patch_instruction(addr, instr, addr); +} + #ifdef CONFIG_STRICT_KERNEL_RWX static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); @@ -138,7 +145,7 @@ static inline int unmap_patch_area(unsigned long addr) int patch_instruction(unsigned int *addr, unsigned int instr) { int err; - unsigned int *dest = NULL; + unsigned int *patch_addr = NULL; unsigned long flags; unsigned long text_poke_addr; unsigned long kaddr = (unsigned long)addr; @@ -148,8 +155,8 @@ int patch_instruction(unsigned int *addr, unsigned int instr) * when text_poke_area is not ready, but we still need * to allow patching. We just do the plain old patching */ - if (!this_cpu_read(*PTRRELOC(&text_poke_area))) - return __patch_instruction(addr, instr); + if (!this_cpu_read(text_poke_area)) + return raw_patch_instruction(addr, instr); local_irq_save(flags); @@ -159,17 +166,10 @@ int patch_instruction(unsigned int *addr, unsigned int instr) goto out; } - dest = (unsigned int *)(text_poke_addr) + + patch_addr = (unsigned int *)(text_poke_addr) + ((kaddr & ~PAGE_MASK) / sizeof(unsigned int)); - /* - * We use __put_user_size so that we can handle faults while - * writing to dest and return err to handle faults gracefully - */ - __put_user_size(instr, dest, 4, err); - if (!err) - asm ("dcbst 0, %0; sync; icbi 0,%0; icbi 0,%1; sync; isync" - ::"r" (dest), "r"(addr)); + __patch_instruction(addr, instr, patch_addr); err = unmap_patch_area(text_poke_addr); if (err) @@ -184,7 +184,7 @@ out: int patch_instruction(unsigned int *addr, unsigned int instr) { - return __patch_instruction(addr, instr); + return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ @@ -302,6 +302,11 @@ int instr_is_relative_branch(unsigned int instr) return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); } +int instr_is_relative_link_branch(unsigned int instr) +{ + return instr_is_relative_branch(instr) && (instr & BRANCH_SET_LINK); +} + static unsigned long branch_iform_target(const unsigned int *instr) { signed long imm; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index a95ea007d654..73697c4e3468 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -62,7 +62,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest, } } - patch_instruction(dest, instr); + raw_patch_instruction(dest, instr); return 0; } @@ -91,7 +91,7 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) } for (; dest < end; dest++) - patch_instruction(dest, PPC_INST_NOP); + raw_patch_instruction(dest, PPC_INST_NOP); return 0; } @@ -170,7 +170,7 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) for (; start < end; start++) { dest = (void *)start + *start; - patch_instruction(dest, PPC_INST_LWSYNC); + raw_patch_instruction(dest, PPC_INST_LWSYNC); } } @@ -188,7 +188,7 @@ static void do_final_fixups(void) length = (__end_interrupts - _stext) / sizeof(int); while (length--) { - patch_instruction(dest, *src); + raw_patch_instruction(dest, *src); src++; dest++; } diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index f29212e40f40..849f50cd62f2 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -67,7 +67,7 @@ void __init MMU_init_hw(void) /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ #ifdef CONFIG_PIN_TLB_DATA unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; - unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY; + unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY; #ifdef CONFIG_PIN_TLB_IMMR int i = 29; #else @@ -79,7 +79,7 @@ void __init MMU_init_hw(void) for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { mtspr(SPRN_MD_CTR, ctr | (i << 8)); mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); - mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); + mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID | M_APG2); mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); addr += LARGE_PAGE_SIZE_8M; mem -= LARGE_PAGE_SIZE_8M; diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 76a6b057d454..f06f3577d8d1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -9,7 +9,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(BITS).o pgtable_$(BITS).o \ - init-common.o mmu_context.o + init-common.o mmu_context.o drmem.o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ tlb_nohash_low.o obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o @@ -44,3 +44,4 @@ obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o +obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c new file mode 100644 index 000000000000..1604110c4238 --- /dev/null +++ b/arch/powerpc/mm/drmem.c @@ -0,0 +1,439 @@ +/* + * Dynamic reconfiguration memory support + * + * Copyright 2017 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "drmem: " fmt + +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/memblock.h> +#include <asm/prom.h> +#include <asm/drmem.h> + +static struct drmem_lmb_info __drmem_info; +struct drmem_lmb_info *drmem_info = &__drmem_info; + +u64 drmem_lmb_memory_max(void) +{ + struct drmem_lmb *last_lmb; + + last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1]; + return last_lmb->base_addr + drmem_lmb_size(); +} + +static u32 drmem_lmb_flags(struct drmem_lmb *lmb) +{ + /* + * Return the value of the lmb flags field minus the reserved + * bit used internally for hotplug processing. + */ + return lmb->flags & ~DRMEM_LMB_RESERVED; +} + +static struct property *clone_property(struct property *prop, u32 prop_sz) +{ + struct property *new_prop; + + new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL); + if (!new_prop) + return NULL; + + new_prop->name = kstrdup(prop->name, GFP_KERNEL); + new_prop->value = kzalloc(prop_sz, GFP_KERNEL); + if (!new_prop->name || !new_prop->value) { + kfree(new_prop->name); + kfree(new_prop->value); + kfree(new_prop); + return NULL; + } + + new_prop->length = prop_sz; +#if defined(CONFIG_OF_DYNAMIC) + of_property_set_flag(new_prop, OF_DYNAMIC); +#endif + return new_prop; +} + +static int drmem_update_dt_v1(struct device_node *memory, + struct property *prop) +{ + struct property *new_prop; + struct of_drconf_cell_v1 *dr_cell; + struct drmem_lmb *lmb; + u32 *p; + + new_prop = clone_property(prop, prop->length); + if (!new_prop) + return -1; + + p = new_prop->value; + *p++ = cpu_to_be32(drmem_info->n_lmbs); + + dr_cell = (struct of_drconf_cell_v1 *)p; + + for_each_drmem_lmb(lmb) { + dr_cell->base_addr = cpu_to_be64(lmb->base_addr); + dr_cell->drc_index = cpu_to_be32(lmb->drc_index); + dr_cell->aa_index = cpu_to_be32(lmb->aa_index); + dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb)); + + dr_cell++; + } + + of_update_property(memory, new_prop); + return 0; +} + +static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, + struct drmem_lmb *lmb) +{ + dr_cell->base_addr = cpu_to_be64(lmb->base_addr); + dr_cell->drc_index = cpu_to_be32(lmb->drc_index); + dr_cell->aa_index = cpu_to_be32(lmb->aa_index); + dr_cell->flags = cpu_to_be32(lmb->flags); +} + +static int drmem_update_dt_v2(struct device_node *memory, + struct property *prop) +{ + struct property *new_prop; + struct of_drconf_cell_v2 *dr_cell; + struct drmem_lmb *lmb, *prev_lmb; + u32 lmb_sets, prop_sz, seq_lmbs; + u32 *p; + + /* First pass, determine how many LMB sets are needed. */ + lmb_sets = 0; + prev_lmb = NULL; + for_each_drmem_lmb(lmb) { + if (!prev_lmb) { + prev_lmb = lmb; + lmb_sets++; + continue; + } + + if (prev_lmb->aa_index != lmb->aa_index || + prev_lmb->flags != lmb->flags) + lmb_sets++; + + prev_lmb = lmb; + } + + prop_sz = lmb_sets * sizeof(*dr_cell) + sizeof(__be32); + new_prop = clone_property(prop, prop_sz); + if (!new_prop) + return -1; + + p = new_prop->value; + *p++ = cpu_to_be32(lmb_sets); + + dr_cell = (struct of_drconf_cell_v2 *)p; + + /* Second pass, populate the LMB set data */ + prev_lmb = NULL; + seq_lmbs = 0; + for_each_drmem_lmb(lmb) { + if (prev_lmb == NULL) { + /* Start of first LMB set */ + prev_lmb = lmb; + init_drconf_v2_cell(dr_cell, lmb); + seq_lmbs++; + continue; + } + + if (prev_lmb->aa_index != lmb->aa_index || + prev_lmb->flags != lmb->flags) { + /* end of one set, start of another */ + dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs); + dr_cell++; + + init_drconf_v2_cell(dr_cell, lmb); + seq_lmbs = 1; + } else { + seq_lmbs++; + } + + prev_lmb = lmb; + } + + /* close out last LMB set */ + dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs); + of_update_property(memory, new_prop); + return 0; +} + +int drmem_update_dt(void) +{ + struct device_node *memory; + struct property *prop; + int rc = -1; + + memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (!memory) + return -1; + + prop = of_find_property(memory, "ibm,dynamic-memory", NULL); + if (prop) { + rc = drmem_update_dt_v1(memory, prop); + } else { + prop = of_find_property(memory, "ibm,dynamic-memory-v2", NULL); + if (prop) + rc = drmem_update_dt_v2(memory, prop); + } + + of_node_put(memory); + return rc; +} + +static void __init read_drconf_v1_cell(struct drmem_lmb *lmb, + const __be32 **prop) +{ + const __be32 *p = *prop; + + lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p); + lmb->drc_index = of_read_number(p++, 1); + + p++; /* skip reserved field */ + + lmb->aa_index = of_read_number(p++, 1); + lmb->flags = of_read_number(p++, 1); + + *prop = p; +} + +static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, + void (*func)(struct drmem_lmb *, const __be32 **)) +{ + struct drmem_lmb lmb; + u32 i, n_lmbs; + + n_lmbs = of_read_number(prop++, 1); + + for (i = 0; i < n_lmbs; i++) { + read_drconf_v1_cell(&lmb, &prop); + func(&lmb, &usm); + } +} + +static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, + const __be32 **prop) +{ + const __be32 *p = *prop; + + dr_cell->seq_lmbs = of_read_number(p++, 1); + dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p); + dr_cell->drc_index = of_read_number(p++, 1); + dr_cell->aa_index = of_read_number(p++, 1); + dr_cell->flags = of_read_number(p++, 1); + + *prop = p; +} + +static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, + void (*func)(struct drmem_lmb *, const __be32 **)) +{ + struct of_drconf_cell_v2 dr_cell; + struct drmem_lmb lmb; + u32 i, j, lmb_sets; + + lmb_sets = of_read_number(prop++, 1); + + for (i = 0; i < lmb_sets; i++) { + read_drconf_v2_cell(&dr_cell, &prop); + + for (j = 0; j < dr_cell.seq_lmbs; j++) { + lmb.base_addr = dr_cell.base_addr; + dr_cell.base_addr += drmem_lmb_size(); + + lmb.drc_index = dr_cell.drc_index; + dr_cell.drc_index++; + + lmb.aa_index = dr_cell.aa_index; + lmb.flags = dr_cell.flags; + + func(&lmb, &usm); + } + } +} + +#ifdef CONFIG_PPC_PSERIES +void __init walk_drmem_lmbs_early(unsigned long node, + void (*func)(struct drmem_lmb *, const __be32 **)) +{ + const __be32 *prop, *usm; + int len; + + prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len); + if (!prop || len < dt_root_size_cells * sizeof(__be32)) + return; + + drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop); + + usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len); + + prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len); + if (prop) { + __walk_drmem_v1_lmbs(prop, usm, func); + } else { + prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2", + &len); + if (prop) + __walk_drmem_v2_lmbs(prop, usm, func); + } + + memblock_dump_all(); +} + +#endif + +static int __init init_drmem_lmb_size(struct device_node *dn) +{ + const __be32 *prop; + int len; + + if (drmem_info->lmb_size) + return 0; + + prop = of_get_property(dn, "ibm,lmb-size", &len); + if (!prop || len < dt_root_size_cells * sizeof(__be32)) { + pr_info("Could not determine LMB size\n"); + return -1; + } + + drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop); + return 0; +} + +/* + * Returns the property linux,drconf-usable-memory if + * it exists (the property exists only in kexec/kdump kernels, + * added by kexec-tools) + */ +static const __be32 *of_get_usable_memory(struct device_node *dn) +{ + const __be32 *prop; + u32 len; + + prop = of_get_property(dn, "linux,drconf-usable-memory", &len); + if (!prop || len < sizeof(unsigned int)) + return NULL; + + return prop; +} + +void __init walk_drmem_lmbs(struct device_node *dn, + void (*func)(struct drmem_lmb *, const __be32 **)) +{ + const __be32 *prop, *usm; + + if (init_drmem_lmb_size(dn)) + return; + + usm = of_get_usable_memory(dn); + + prop = of_get_property(dn, "ibm,dynamic-memory", NULL); + if (prop) { + __walk_drmem_v1_lmbs(prop, usm, func); + } else { + prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL); + if (prop) + __walk_drmem_v2_lmbs(prop, usm, func); + } +} + +static void __init init_drmem_v1_lmbs(const __be32 *prop) +{ + struct drmem_lmb *lmb; + + drmem_info->n_lmbs = of_read_number(prop++, 1); + + drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb), + GFP_KERNEL); + if (!drmem_info->lmbs) + return; + + for_each_drmem_lmb(lmb) + read_drconf_v1_cell(lmb, &prop); +} + +static void __init init_drmem_v2_lmbs(const __be32 *prop) +{ + struct drmem_lmb *lmb; + struct of_drconf_cell_v2 dr_cell; + const __be32 *p; + u32 i, j, lmb_sets; + int lmb_index; + + lmb_sets = of_read_number(prop++, 1); + + /* first pass, calculate the number of LMBs */ + p = prop; + for (i = 0; i < lmb_sets; i++) { + read_drconf_v2_cell(&dr_cell, &p); + drmem_info->n_lmbs += dr_cell.seq_lmbs; + } + + drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb), + GFP_KERNEL); + if (!drmem_info->lmbs) + return; + + /* second pass, read in the LMB information */ + lmb_index = 0; + p = prop; + + for (i = 0; i < lmb_sets; i++) { + read_drconf_v2_cell(&dr_cell, &p); + + for (j = 0; j < dr_cell.seq_lmbs; j++) { + lmb = &drmem_info->lmbs[lmb_index++]; + + lmb->base_addr = dr_cell.base_addr; + dr_cell.base_addr += drmem_info->lmb_size; + + lmb->drc_index = dr_cell.drc_index; + dr_cell.drc_index++; + + lmb->aa_index = dr_cell.aa_index; + lmb->flags = dr_cell.flags; + } + } +} + +static int __init drmem_init(void) +{ + struct device_node *dn; + const __be32 *prop; + + dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (!dn) { + pr_info("No dynamic reconfiguration memory found\n"); + return 0; + } + + if (init_drmem_lmb_size(dn)) { + of_node_put(dn); + return 0; + } + + prop = of_get_property(dn, "ibm,dynamic-memory", NULL); + if (prop) { + init_drmem_v1_lmbs(prop); + } else { + prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL); + if (prop) + init_drmem_v2_lmbs(prop); + } + + of_node_put(dn); + return 0; +} +late_initcall(drmem_init); diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index c2e7dea59490..876e2a3c79f2 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -112,26 +112,25 @@ struct flag_info { static const struct flag_info flag_array[] = { { -#ifdef CONFIG_PPC_BOOK3S_64 - .mask = _PAGE_PRIVILEGED, - .val = 0, -#else - .mask = _PAGE_USER, + .mask = _PAGE_USER | _PAGE_PRIVILEGED, .val = _PAGE_USER, -#endif .set = "user", .clear = " ", }, { -#if _PAGE_RO == 0 - .mask = _PAGE_RW, + .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA, .val = _PAGE_RW, -#else - .mask = _PAGE_RO, - .val = 0, -#endif .set = "rw", - .clear = "ro", }, { + .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA, + .val = _PAGE_RO, + .set = "ro", + }, { +#if _PAGE_NA != 0 + .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA, + .val = _PAGE_RO, + .set = "na", + }, { +#endif .mask = _PAGE_EXEC, .val = _PAGE_EXEC, .set = " X ", @@ -213,7 +212,7 @@ static const struct flag_info flag_array[] = { .val = H_PAGE_4K_PFN, .set = "4K_pfn", }, { -#endif +#else /* CONFIG_PPC_64K_PAGES */ .mask = H_PAGE_F_GIX, .val = H_PAGE_F_GIX, .set = "f_gix", @@ -224,14 +223,11 @@ static const struct flag_info flag_array[] = { .val = H_PAGE_F_SECOND, .set = "f_second", }, { +#endif /* CONFIG_PPC_64K_PAGES */ #endif .mask = _PAGE_SPECIAL, .val = _PAGE_SPECIAL, .set = "special", - }, { - .mask = _PAGE_SHARED, - .val = _PAGE_SHARED, - .set = "shared", } }; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 6e1e39035380..866446cf2d9a 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -107,7 +107,8 @@ static bool store_updates_sp(struct pt_regs *regs) */ static int -__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code) +__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code, + int pkey) { /* * If we are in kernel mode, bail out with a SEGV, this will @@ -117,17 +118,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code) if (!user_mode(regs)) return SIGSEGV; - _exception(SIGSEGV, regs, si_code, address); + _exception_pkey(SIGSEGV, regs, si_code, address, pkey); return 0; } static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address) { - return __bad_area_nosemaphore(regs, address, SEGV_MAPERR); + return __bad_area_nosemaphore(regs, address, SEGV_MAPERR, 0); } -static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code) +static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code, + int pkey) { struct mm_struct *mm = current->mm; @@ -137,17 +139,23 @@ static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code) */ up_read(&mm->mmap_sem); - return __bad_area_nosemaphore(regs, address, si_code); + return __bad_area_nosemaphore(regs, address, si_code, pkey); } static noinline int bad_area(struct pt_regs *regs, unsigned long address) { - return __bad_area(regs, address, SEGV_MAPERR); + return __bad_area(regs, address, SEGV_MAPERR, 0); +} + +static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address, + int pkey) +{ + return __bad_area_nosemaphore(regs, address, SEGV_PKUERR, pkey); } static noinline int bad_access(struct pt_regs *regs, unsigned long address) { - return __bad_area(regs, address, SEGV_ACCERR); + return __bad_area(regs, address, SEGV_ACCERR, 0); } static int do_sigbus(struct pt_regs *regs, unsigned long address, @@ -432,6 +440,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + if (error_code & DSISR_KEYFAULT) + return bad_key_fault_exception(regs, address, + get_mm_addr_key(mm, address)); + /* * We want to do this outside mmap_sem, because reading code around nip * can result in fault, which will cause a deadlock when called with @@ -503,6 +515,31 @@ good_area: * the fault. */ fault = handle_mm_fault(vma, address, flags); + +#ifdef CONFIG_PPC_MEM_KEYS + /* + * if the HPTE is not hashed, hardware will not detect + * a key fault. Lets check if we failed because of a + * software detected key fault. + */ + if (unlikely(fault & VM_FAULT_SIGSEGV) && + !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, + is_exec, 0)) { + /* + * The PGD-PDT...PMD-PTE tree may not have been fully setup. + * Hence we cannot walk the tree to locate the PTE, to locate + * the key. Hence let's use vma_pkey() to get the key; instead + * of get_mm_addr_key(). + */ + int pkey = vma_pkey(vma); + + if (likely(pkey)) { + up_read(&mm->mmap_sem); + return bad_key_fault_exception(regs, address, pkey); + } + } +#endif /* CONFIG_PPC_MEM_KEYS */ + major |= fault & VM_FAULT_MAJOR; /* @@ -576,7 +613,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) /* kernel has accessed a bad area */ - switch (regs->trap) { + switch (TRAP(regs)) { case 0x300: case 0x380: printk(KERN_ALERT "Unable to handle kernel paging request for " diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 6fa450c12d6d..5a69b51d08a3 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -20,6 +20,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpg_prot) { + real_pte_t rpte; unsigned long hpte_group; unsigned long rflags, pa; unsigned long old_pte, new_pte; @@ -54,6 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * need to add in 0x1 if it's a read-only user page */ rflags = htab_convert_pte_flags(new_pte); + rpte = __real_pte(__pte(old_pte), ptep); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -64,13 +66,10 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, /* * There MIGHT be an HPTE for this pte */ - hash = hpt_hash(vpn, shift, ssize); - if (old_pte & H_PAGE_F_SECOND) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; + unsigned long gslot = pte_get_hash_gslot(vpn, shift, ssize, + rpte, 0); - if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K, + if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_4K, MMU_PAGE_4K, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -118,8 +117,7 @@ repeat: return -1; } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; - new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & - (H_PAGE_F_SECOND | H_PAGE_F_GIX); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 1a68cb19b0e3..2253bbc6a599 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -15,34 +15,22 @@ #include <linux/mm.h> #include <asm/machdep.h> #include <asm/mmu.h> + /* - * index from 0 - 15 + * Return true, if the entry has a slot value which + * the software considers as invalid. */ -bool __rpte_sub_valid(real_pte_t rpte, unsigned long index) +static inline bool hpte_soft_invalid(unsigned long hidx) { - unsigned long g_idx; - unsigned long ptev = pte_val(rpte.pte); - - g_idx = (ptev & H_PAGE_COMBO_VALID) >> H_PAGE_F_GIX_SHIFT; - index = index >> 2; - if (g_idx & (0x1 << index)) - return true; - else - return false; + return ((hidx & 0xfUL) == 0xfUL); } + /* * index from 0 - 15 */ -static unsigned long mark_subptegroup_valid(unsigned long ptev, unsigned long index) +bool __rpte_sub_valid(real_pte_t rpte, unsigned long index) { - unsigned long g_idx; - - if (!(ptev & H_PAGE_COMBO)) - return ptev; - index = index >> 2; - g_idx = 0x1 << index; - - return ptev | (g_idx << H_PAGE_F_GIX_SHIFT); + return !(hpte_soft_invalid(__rpte_to_hidx(rpte, index))); } int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, @@ -50,12 +38,11 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, int ssize, int subpg_prot) { real_pte_t rpte; - unsigned long *hidxp; unsigned long hpte_group; unsigned int subpg_index; - unsigned long rflags, pa, hidx; + unsigned long rflags, pa; unsigned long old_pte, new_pte, subpg_pte; - unsigned long vpn, hash, slot; + unsigned long vpn, hash, slot, gslot; unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift; /* @@ -116,8 +103,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * On hash insert failure we use old pte value and we don't * want slot information there if we have a insert failure. */ - old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); - new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); + old_pte &= ~H_PAGE_HASHPTE; + new_pte &= ~H_PAGE_HASHPTE; goto htab_insert_hpte; } /* @@ -126,18 +113,14 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, if (__rpte_sub_valid(rpte, subpg_index)) { int ret; - hash = hpt_hash(vpn, shift, ssize); - hidx = __rpte_to_hidx(rpte, subpg_index); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - - ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, + gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, + subpg_index); + ret = mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_4K, MMU_PAGE_4K, ssize, flags); + /* - *if we failed because typically the HPTE wasn't really here + * If we failed because typically the HPTE wasn't really here * we try an insertion. */ if (ret == -1) @@ -148,6 +131,14 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, } htab_insert_hpte: + + /* + * Initialize all hidx entries to invalid value, the first time + * the PTE is about to allocate a 4K HPTE. + */ + if (!(old_pte & H_PAGE_COMBO)) + rpte.hidx = INVALID_RPTE_HIDX; + /* * handle H_PAGE_4K_PFN case */ @@ -172,15 +163,39 @@ repeat: * Primary is full, try the secondary */ if (unlikely(slot == -1)) { + bool soft_invalid; + hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, HPTE_V_SECONDARY, MMU_PAGE_4K, MMU_PAGE_4K, ssize); - if (slot == -1) { - if (mftb() & 0x1) + + soft_invalid = hpte_soft_invalid(slot); + if (unlikely(soft_invalid)) { + /* + * We got a valid slot from a hardware point of view. + * but we cannot use it, because we use this special + * value; as defined by hpte_soft_invalid(), to track + * invalid slots. We cannot use it. So invalidate it. + */ + gslot = slot & _PTEIDX_GROUP_IX; + mmu_hash_ops.hpte_invalidate(hpte_group + gslot, vpn, + MMU_PAGE_4K, MMU_PAGE_4K, + ssize, 0); + } + + if (unlikely(slot == -1 || soft_invalid)) { + /* + * For soft invalid slot, let's ensure that we release a + * slot from the primary, with the hope that we will + * acquire that slot next time we try. This will ensure + * that we do not get the same soft-invalid slot. + */ + if (soft_invalid || (mftb() & 0x1)) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? @@ -198,21 +213,10 @@ repeat: MMU_PAGE_4K, MMU_PAGE_4K, old_pte); return -1; } - /* - * Insert slot number & secondary bit in PTE second half, - * clear H_PAGE_BUSY and set appropriate HPTE slot bit - * Since we have H_PAGE_BUSY set on ptep, we can be sure - * nobody is undating hidx. - */ - hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); - rpte.hidx &= ~(0xfUL << (subpg_index << 2)); - *hidxp = rpte.hidx | (slot << (subpg_index << 2)); - new_pte = mark_subptegroup_valid(new_pte, subpg_index); - new_pte |= H_PAGE_HASHPTE; - /* - * check __real_pte for details on matching smp_rmb() - */ - smp_wmb(); + + new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot); + new_pte |= H_PAGE_HASHPTE; + *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } @@ -221,6 +225,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize) { + real_pte_t rpte; unsigned long hpte_group; unsigned long rflags, pa; unsigned long old_pte, new_pte; @@ -257,6 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); rflags = htab_convert_pte_flags(new_pte); + rpte = __real_pte(__pte(old_pte), ptep); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -264,16 +270,13 @@ int __hash_page_64K(unsigned long ea, unsigned long access, vpn = hpt_vpn(ea, vsid, ssize); if (unlikely(old_pte & H_PAGE_HASHPTE)) { + unsigned long gslot; + /* * There MIGHT be an HPTE for this pte */ - hash = hpt_hash(vpn, shift, ssize); - if (old_pte & H_PAGE_F_SECOND) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - - if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, + gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0); + if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_64K, MMU_PAGE_64K, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; @@ -322,9 +325,9 @@ repeat: MMU_PAGE_64K, MMU_PAGE_64K, old_pte); return -1; } + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; - new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & - (H_PAGE_F_SECOND | H_PAGE_F_GIX); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 640cf566e986..a0675e91ad7d 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -47,6 +47,103 @@ DEFINE_RAW_SPINLOCK(native_tlbie_lock); +static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is) +{ + unsigned long rb; + + rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); + + asm volatile("tlbiel %0" : : "r" (rb)); +} + +/* + * tlbiel instruction for hash, set invalidation + * i.e., r=1 and is=01 or is=10 or is=11 + */ +static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is, + unsigned int pid, + unsigned int ric, unsigned int prs) +{ + unsigned long rb; + unsigned long rs; + unsigned int r = 0; /* hash format */ + + rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); + rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); + + asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) + : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r) + : "memory"); +} + + +static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is) +{ + unsigned int set; + + asm volatile("ptesync": : :"memory"); + + for (set = 0; set < num_sets; set++) + tlbiel_hash_set_isa206(set, is); + + asm volatile("ptesync": : :"memory"); +} + +static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) +{ + unsigned int set; + + asm volatile("ptesync": : :"memory"); + + /* + * Flush the first set of the TLB, and any caching of partition table + * entries. Then flush the remaining sets of the TLB. Hash mode uses + * partition scoped TLB translations. + */ + tlbiel_hash_set_isa300(0, is, 0, 2, 0); + for (set = 1; set < num_sets; set++) + tlbiel_hash_set_isa300(set, is, 0, 0, 0); + + /* + * Now invalidate the process table cache. + * + * From ISA v3.0B p. 1078: + * The following forms are invalid. + * * PRS=1, R=0, and RIC!=2 (The only process-scoped + * HPT caching is of the Process Table.) + */ + tlbiel_hash_set_isa300(0, is, 0, 2, 1); + + asm volatile("ptesync": : :"memory"); +} + +void hash__tlbiel_all(unsigned int action) +{ + unsigned int is; + + switch (action) { + case TLB_INVAL_SCOPE_GLOBAL: + is = 3; + break; + case TLB_INVAL_SCOPE_LPID: + is = 2; + break; + default: + BUG(); + } + + if (early_cpu_has_feature(CPU_FTR_ARCH_300)) + tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is); + else if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) + tlbiel_all_isa206(POWER8_TLB_SETS, is); + else if (early_cpu_has_feature(CPU_FTR_ARCH_206)) + tlbiel_all_isa206(POWER7_TLB_SETS, is); + else + WARN(1, "%s called on pre-POWER7 CPU\n", __func__); + + asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); +} + static inline unsigned long ___tlbie(unsigned long vpn, int psize, int apsize, int ssize) { diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 655a5a9a183d..7d07c7e17db6 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -36,6 +36,7 @@ #include <linux/memblock.h> #include <linux/context_tracking.h> #include <linux/libfdt.h> +#include <linux/pkeys.h> #include <asm/debugfs.h> #include <asm/processor.h> @@ -232,6 +233,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) */ rflags |= HPTE_R_M; + rflags |= pte_to_hpte_pkey_bits(pteflags); return rflags; } @@ -606,7 +608,7 @@ static void init_hpte_page_sizes(void) continue; /* not a supported page size */ for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) { penc = mmu_psize_defs[bp].penc[ap]; - if (penc == -1) + if (penc == -1 || !mmu_psize_defs[ap].shift) continue; shift = mmu_psize_defs[ap].shift - LP_SHIFT; if (shift <= 0) @@ -772,7 +774,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size) int rc; rc = mmu_hash_ops.resize_hpt(target_hpt_shift); - if (rc) + if (rc && (rc != -ENODEV)) printk(KERN_WARNING "Unable to resize hash page table to target order %d: %d\n", target_hpt_shift, rc); @@ -979,8 +981,9 @@ void __init hash__early_init_devtree(void) void __init hash__early_init_mmu(void) { +#ifndef CONFIG_PPC_64K_PAGES /* - * We have code in __hash_page_64K() and elsewhere, which assumes it can + * We have code in __hash_page_4K() and elsewhere, which assumes it can * do the following: * new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX); * @@ -991,6 +994,7 @@ void __init hash__early_init_mmu(void) * with a BUILD_BUG_ON(). */ BUILD_BUG_ON(H_PAGE_F_SECOND != (1ul << (H_PAGE_F_GIX_SHIFT + 3))); +#endif /* CONFIG_PPC_64K_PAGES */ htab_init_page_sizes(); @@ -1049,6 +1053,10 @@ void __init hash__early_init_mmu(void) pr_info("Initializing hash mmu with SLB\n"); /* Initialize SLB management */ slb_initialize(); + + if (cpu_has_feature(CPU_FTR_ARCH_206) + && cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_all(); } #ifdef CONFIG_SMP @@ -1068,6 +1076,10 @@ void hash__early_init_mmu_secondary(void) } /* Initialize SLB */ slb_initialize(); + + if (cpu_has_feature(CPU_FTR_ARCH_206) + && cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_all(); } #endif /* CONFIG_SMP */ @@ -1569,6 +1581,30 @@ out_exit: local_irq_restore(flags); } +#ifdef CONFIG_PPC_MEM_KEYS +/* + * Return the protection key associated with the given address and the + * mm_struct. + */ +u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) +{ + pte_t *ptep; + u16 pkey = 0; + unsigned long flags; + + if (!mm || !mm->pgd) + return 0; + + local_irq_save(flags); + ptep = find_linux_pte(mm->pgd, address, NULL, NULL); + if (ptep) + pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep))); + local_irq_restore(flags); + + return pkey; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline void tm_flush_hash_page(int local) { @@ -1592,29 +1628,42 @@ static inline void tm_flush_hash_page(int local) } #endif +/* + * Return the global hash slot, corresponding to the given PTE, which contains + * the HPTE. + */ +unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift, + int ssize, real_pte_t rpte, unsigned int subpg_index) +{ + unsigned long hash, gslot, hidx; + + hash = hpt_hash(vpn, shift, ssize); + hidx = __rpte_to_hidx(rpte, subpg_index); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + gslot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + gslot += hidx & _PTEIDX_GROUP_IX; + return gslot; +} + /* WARNING: This is called from hash_low_64.S, if you change this prototype, * do not forget to update the assembly call site ! */ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, unsigned long flags) { - unsigned long hash, index, shift, hidx, slot; + unsigned long index, shift, gslot; int local = flags & HPTE_LOCAL_UPDATE; DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn); pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { - hash = hpt_hash(vpn, shift, ssize); - hidx = __rpte_to_hidx(pte, index); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); + gslot = pte_get_hash_gslot(vpn, shift, ssize, pte, index); + DBG_LOW(" sub %ld: gslot=%lx\n", index, gslot); /* * We use same base page size and actual psize, because we don't * use these functions for hugepage */ - mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize, + mmu_hash_ops.hpte_invalidate(gslot, vpn, psize, psize, ssize, local); } pte_iterate_hashed_end(); @@ -1825,16 +1874,24 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, */ BUG_ON(first_memblock_base != 0); - /* On LPAR systems, the first entry is our RMA region, - * non-LPAR 64-bit hash MMU systems don't have a limitation - * on real mode access, but using the first entry works well - * enough. We also clamp it to 1G to avoid some funky things - * such as RTAS bugs etc... + /* + * On virtualized systems the first entry is our RMA region aka VRMA, + * non-virtualized 64-bit hash MMU systems don't have a limitation + * on real mode access. + * + * For guests on platforms before POWER9, we clamp the it limit to 1G + * to avoid some funky things such as RTAS bugs etc... */ - ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + if (!early_cpu_has_feature(CPU_FTR_HVMODE)) { + ppc64_rma_size = first_memblock_size; + if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) + ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000); - /* Finally limit subsequent allocations */ - memblock_set_current_limit(ppc64_rma_size); + /* Finally limit subsequent allocations */ + memblock_set_current_limit(ppc64_rma_size); + } else { + ppc64_rma_size = ULONG_MAX; + } } #ifdef CONFIG_DEBUG_FS diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 0c2a91df3210..12511f5a015f 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -23,6 +23,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, unsigned int shift, unsigned int mmu_psize) { + real_pte_t rpte; unsigned long vpn; unsigned long old_pte, new_pte; unsigned long rflags, pa, sz; @@ -62,6 +63,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); rflags = htab_convert_pte_flags(new_pte); + rpte = __real_pte(__pte(old_pte), ptep); sz = ((1UL) << shift); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -72,15 +74,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, /* Check if pte already has an hpte (case 2) */ if (unlikely(old_pte & H_PAGE_HASHPTE)) { /* There MIGHT be an HPTE for this pte */ - unsigned long hash, slot; + unsigned long gslot; - hash = hpt_hash(vpn, shift, ssize); - if (old_pte & H_PAGE_F_SECOND) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - - if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize, + gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0); + if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize, mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -107,8 +104,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, return -1; } - new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & - (H_PAGE_F_SECOND | H_PAGE_F_GIX); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot); } /* diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a9b9083c5e49..876da2bc1796 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -96,7 +96,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, *hpdp = __hugepd(__pa(new) | (shift_to_mmu_psize(pshift) << 2)); #elif defined(CONFIG_PPC_8xx) - *hpdp = __hugepd(__pa(new) | + *hpdp = __hugepd(__pa(new) | _PMD_USER | (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : _PMD_PAGE_512K) | _PMD_PRESENT); #else @@ -752,7 +752,7 @@ void flush_dcache_icache_hugepage(struct page *page) * So long as we atomically load page table pointers we are safe against teardown, * we can follow the address down to the the page and take a ref on it. * This function need to be called with interrupts disabled. We use this variant - * when we have MSR[EE] = 0 but the paca->soft_enabled = 1 + * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED */ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, bool *is_thp, unsigned *hpage_shift) @@ -855,9 +855,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, pte = READ_ONCE(*ptep); - if (!pte_present(pte) || !pte_read(pte)) - return 0; - if (write && !pte_write(pte)) + if (!pte_access_permitted(pte, write)) return 0; /* hugepages are never "special" */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a07722531b32..fdb424a29f03 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -183,7 +183,8 @@ static __meminit void vmemmap_list_populate(unsigned long phys, vmemmap_list = vmem_back; } -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; @@ -193,17 +194,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); for (; start < end; start += page_size) { - struct vmem_altmap *altmap; void *p; int rc; if (vmemmap_populated(start, page_size)) continue; - /* altmap lookups only work at section boundaries */ - altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start)); - - p = __vmemmap_alloc_block_buf(page_size, node, altmap); + if (altmap) + p = altmap_alloc_block_buf(page_size, altmap); + else + p = vmemmap_alloc_block_buf(page_size, node); if (!p) return -ENOMEM; @@ -214,9 +214,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) rc = vmemmap_create_mapping(start, page_size, __pa(p)); if (rc < 0) { - pr_warning( - "vmemmap_populate: Unable to create vmemmap mapping: %d\n", - rc); + pr_warn("%s: Unable to create vmemmap mapping: %d\n", + __func__, rc); return -EFAULT; } } @@ -257,7 +256,8 @@ static unsigned long vmemmap_list_free(unsigned long start) return vmem_back->phys; } -void __ref vmemmap_free(unsigned long start, unsigned long end) +void __ref vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; unsigned long page_order = get_order(page_size); @@ -268,7 +268,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) for (; start < end; start += page_size) { unsigned long nr_pages, addr; - struct vmem_altmap *altmap; struct page *section_base; struct page *page; @@ -288,7 +287,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) section_base = pfn_to_page(vmemmap_section_start(start)); nr_pages = 1 << page_order; - altmap = to_vmem_altmap((unsigned long) section_base); if (altmap) { vmem_altmap_free(altmap, nr_pages); } else if (PageReserved(page)) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 4362b86ef84c..fe8c61149fb8 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -127,7 +127,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -138,21 +139,19 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) start = (unsigned long)__va(start); rc = create_section_mapping(start, start + size); if (rc) { - pr_warning( - "Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", + pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", start, start + size, rc); return -EFAULT; } - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct vmem_altmap *altmap; struct page *page; int ret; @@ -161,11 +160,10 @@ int arch_remove_memory(u64 start, u64 size) * when querying the zone. */ page = pfn_to_page(start_pfn); - altmap = to_vmem_altmap((unsigned long) page); if (altmap) page += vmem_altmap_offset(altmap); - ret = __remove_pages(page_zone(page), start_pfn, nr_pages); + ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); if (ret) return ret; diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index d60a62bf4fc7..0ab297c4cfad 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -12,6 +12,7 @@ #include <linux/mm.h> #include <linux/cpu.h> +#include <linux/sched/mm.h> #include <asm/mmu_context.h> @@ -58,6 +59,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * * On the read side the barrier is in pte_xchg(), which orders * the store to the PTE vs the load of mm_cpumask. + * + * This full barrier is needed by membarrier when switching + * between processes after store to rq->curr, before user-space + * memory accesses. */ smp_mb(); @@ -80,6 +85,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (new_on_cpu) radix_kvm_prefetch_workaround(next); + else + membarrier_arch_switch_mm(prev, next, tsk); /* * The actual HW switching method differs between the various diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 59c0766ae4e0..929d9ef7083f 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -16,6 +16,7 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/mm.h> +#include <linux/pkeys.h> #include <linux/spinlock.h> #include <linux/idr.h> #include <linux/export.h> @@ -118,6 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm) subpage_prot_init_new_context(mm); + pkey_mm_init(mm); return index; } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index adb6364f4091..314d19ab9385 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -40,6 +40,7 @@ #include <asm/hvcall.h> #include <asm/setup.h> #include <asm/vdso.h> +#include <asm/drmem.h> static int numa_enabled = 1; @@ -179,21 +180,6 @@ static const __be32 *of_get_associativity(struct device_node *dev) return of_get_property(dev, "ibm,associativity", NULL); } -/* - * Returns the property linux,drconf-usable-memory if - * it exists (the property exists only in kexec/kdump kernels, - * added by kexec-tools) - */ -static const __be32 *of_get_usable_memory(struct device_node *memory) -{ - const __be32 *prop; - u32 len; - prop = of_get_property(memory, "linux,drconf-usable-memory", &len); - if (!prop || len < sizeof(unsigned int)) - return NULL; - return prop; -} - int __node_distance(int a, int b) { int i; @@ -387,69 +373,6 @@ static unsigned long read_n_cells(int n, const __be32 **buf) return result; } -/* - * Read the next memblock list entry from the ibm,dynamic-memory property - * and return the information in the provided of_drconf_cell structure. - */ -static void read_drconf_cell(struct of_drconf_cell *drmem, const __be32 **cellp) -{ - const __be32 *cp; - - drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp); - - cp = *cellp; - drmem->drc_index = of_read_number(cp, 1); - drmem->reserved = of_read_number(&cp[1], 1); - drmem->aa_index = of_read_number(&cp[2], 1); - drmem->flags = of_read_number(&cp[3], 1); - - *cellp = cp + 4; -} - -/* - * Retrieve and validate the ibm,dynamic-memory property of the device tree. - * - * The layout of the ibm,dynamic-memory property is a number N of memblock - * list entries followed by N memblock list entries. Each memblock list entry - * contains information as laid out in the of_drconf_cell struct above. - */ -static int of_get_drconf_memory(struct device_node *memory, const __be32 **dm) -{ - const __be32 *prop; - u32 len, entries; - - prop = of_get_property(memory, "ibm,dynamic-memory", &len); - if (!prop || len < sizeof(unsigned int)) - return 0; - - entries = of_read_number(prop++, 1); - - /* Now that we know the number of entries, revalidate the size - * of the property read in to ensure we have everything - */ - if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int)) - return 0; - - *dm = prop; - return entries; -} - -/* - * Retrieve and validate the ibm,lmb-size property for drconf memory - * from the device tree. - */ -static u64 of_get_lmb_size(struct device_node *memory) -{ - const __be32 *prop; - u32 len; - - prop = of_get_property(memory, "ibm,lmb-size", &len); - if (!prop || len < sizeof(unsigned int)) - return 0; - - return read_n_cells(n_mem_size_cells, &prop); -} - struct assoc_arrays { u32 n_arrays; u32 array_sz; @@ -466,19 +389,27 @@ struct assoc_arrays { * indicating the size of each associativity array, followed by a list * of N associativity arrays. */ -static int of_get_assoc_arrays(struct device_node *memory, - struct assoc_arrays *aa) +static int of_get_assoc_arrays(struct assoc_arrays *aa) { + struct device_node *memory; const __be32 *prop; u32 len; + memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (!memory) + return -1; + prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len); - if (!prop || len < 2 * sizeof(unsigned int)) + if (!prop || len < 2 * sizeof(unsigned int)) { + of_node_put(memory); return -1; + } aa->n_arrays = of_read_number(prop++, 1); aa->array_sz = of_read_number(prop++, 1); + of_node_put(memory); + /* Now that we know the number of arrays and size of each array, * revalidate the size of the property read in. */ @@ -493,26 +424,30 @@ static int of_get_assoc_arrays(struct device_node *memory, * This is like of_node_to_nid_single() for memory represented in the * ibm,dynamic-reconfiguration-memory node. */ -static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, - struct assoc_arrays *aa) +static int of_drconf_to_nid_single(struct drmem_lmb *lmb) { + struct assoc_arrays aa = { .arrays = NULL }; int default_nid = 0; int nid = default_nid; - int index; + int rc, index; + + rc = of_get_assoc_arrays(&aa); + if (rc) + return default_nid; - if (min_common_depth > 0 && min_common_depth <= aa->array_sz && - !(drmem->flags & DRCONF_MEM_AI_INVALID) && - drmem->aa_index < aa->n_arrays) { - index = drmem->aa_index * aa->array_sz + min_common_depth - 1; - nid = of_read_number(&aa->arrays[index], 1); + if (min_common_depth > 0 && min_common_depth <= aa.array_sz && + !(lmb->flags & DRCONF_MEM_AI_INVALID) && + lmb->aa_index < aa.n_arrays) { + index = lmb->aa_index * aa.array_sz + min_common_depth - 1; + nid = of_read_number(&aa.arrays[index], 1); if (nid == 0xffff || nid >= MAX_NUMNODES) nid = default_nid; if (nid > 0) { - index = drmem->aa_index * aa->array_sz; + index = lmb->aa_index * aa.array_sz; initialize_distance_lookup_table(nid, - &aa->arrays[index]); + &aa.arrays[index]); } } @@ -551,7 +486,7 @@ static int numa_setup_cpu(unsigned long lcpu) nid = of_node_to_nid_single(cpu); out_present: - if (nid < 0 || !node_online(nid)) + if (nid < 0 || !node_possible(nid)) nid = first_online_node; map_cpu_to_node(lcpu, nid); @@ -645,67 +580,48 @@ static inline int __init read_usm_ranges(const __be32 **usm) * Extract NUMA information from the ibm,dynamic-reconfiguration-memory * node. This assumes n_mem_{addr,size}_cells have been set. */ -static void __init parse_drconf_memory(struct device_node *memory) +static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, + const __be32 **usm) { - const __be32 *uninitialized_var(dm), *usm; - unsigned int n, rc, ranges, is_kexec_kdump = 0; - unsigned long lmb_size, base, size, sz; + unsigned int ranges, is_kexec_kdump = 0; + unsigned long base, size, sz; int nid; - struct assoc_arrays aa = { .arrays = NULL }; - - n = of_get_drconf_memory(memory, &dm); - if (!n) - return; - - lmb_size = of_get_lmb_size(memory); - if (!lmb_size) - return; - rc = of_get_assoc_arrays(memory, &aa); - if (rc) + /* + * Skip this block if the reserved bit is set in flags (0x80) + * or if the block is not assigned to this partition (0x8) + */ + if ((lmb->flags & DRCONF_MEM_RESERVED) + || !(lmb->flags & DRCONF_MEM_ASSIGNED)) return; - /* check if this is a kexec/kdump kernel */ - usm = of_get_usable_memory(memory); - if (usm != NULL) + if (*usm) is_kexec_kdump = 1; - for (; n != 0; --n) { - struct of_drconf_cell drmem; - - read_drconf_cell(&drmem, &dm); + base = lmb->base_addr; + size = drmem_lmb_size(); + ranges = 1; - /* skip this block if the reserved bit is set in flags (0x80) - or if the block is not assigned to this partition (0x8) */ - if ((drmem.flags & DRCONF_MEM_RESERVED) - || !(drmem.flags & DRCONF_MEM_ASSIGNED)) - continue; - - base = drmem.base_addr; - size = lmb_size; - ranges = 1; + if (is_kexec_kdump) { + ranges = read_usm_ranges(usm); + if (!ranges) /* there are no (base, size) duple */ + return; + } + do { if (is_kexec_kdump) { - ranges = read_usm_ranges(&usm); - if (!ranges) /* there are no (base, size) duple */ - continue; + base = read_n_cells(n_mem_addr_cells, usm); + size = read_n_cells(n_mem_size_cells, usm); } - do { - if (is_kexec_kdump) { - base = read_n_cells(n_mem_addr_cells, &usm); - size = read_n_cells(n_mem_size_cells, &usm); - } - nid = of_drconf_to_nid_single(&drmem, &aa); - fake_numa_create_new_node( - ((base + size) >> PAGE_SHIFT), - &nid); - node_set_online(nid); - sz = numa_enforce_memory_limit(base, size); - if (sz) - memblock_set_node(base, sz, - &memblock.memory, nid); - } while (--ranges); - } + + nid = of_drconf_to_nid_single(lmb); + fake_numa_create_new_node(((base + size) >> PAGE_SHIFT), + &nid); + node_set_online(nid); + sz = numa_enforce_memory_limit(base, size); + if (sz) + memblock_set_node(base, sz, &memblock.memory, nid); + } while (--ranges); } static int __init parse_numa_properties(void) @@ -800,8 +716,10 @@ new_range: * ibm,dynamic-reconfiguration-memory node. */ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); - if (memory) - parse_drconf_memory(memory); + if (memory) { + walk_drmem_lmbs(memory, numa_setup_drmem_lmb); + of_node_put(memory); + } return 0; } @@ -892,6 +810,32 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) NODE_DATA(nid)->node_spanned_pages = spanned_pages; } +static void __init find_possible_nodes(void) +{ + struct device_node *rtas; + u32 numnodes, i; + + if (min_common_depth <= 0) + return; + + rtas = of_find_node_by_path("/rtas"); + if (!rtas) + return; + + if (of_property_read_u32_index(rtas, + "ibm,max-associativity-domains", + min_common_depth, &numnodes)) + goto out; + + for (i = 0; i < numnodes; i++) { + if (!node_possible(i)) + node_set(i, node_possible_map); + } + +out: + of_node_put(rtas); +} + void __init initmem_init(void) { int nid, cpu; @@ -905,12 +849,15 @@ void __init initmem_init(void) memblock_dump_all(); /* - * Reduce the possible NUMA nodes to the online NUMA nodes, - * since we do not support node hotplug. This ensures that we - * lower the maximum NUMA node ID to what is actually present. + * Modify the set of possible NUMA nodes to reflect information + * available about the set of online nodes, and the set of nodes + * that we expect to make use of for this platform's affinity + * calculations. */ nodes_and(node_possible_map, node_possible_map, node_online_map); + find_possible_nodes(); + for_each_online_node(nid) { unsigned long start_pfn, end_pfn; @@ -979,43 +926,26 @@ early_param("topology_updates", early_topology_updates); * memory represented in the device tree by the property * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory. */ -static int hot_add_drconf_scn_to_nid(struct device_node *memory, - unsigned long scn_addr) +static int hot_add_drconf_scn_to_nid(unsigned long scn_addr) { - const __be32 *dm; - unsigned int drconf_cell_cnt, rc; + struct drmem_lmb *lmb; unsigned long lmb_size; - struct assoc_arrays aa; int nid = -1; - drconf_cell_cnt = of_get_drconf_memory(memory, &dm); - if (!drconf_cell_cnt) - return -1; - - lmb_size = of_get_lmb_size(memory); - if (!lmb_size) - return -1; - - rc = of_get_assoc_arrays(memory, &aa); - if (rc) - return -1; - - for (; drconf_cell_cnt != 0; --drconf_cell_cnt) { - struct of_drconf_cell drmem; - - read_drconf_cell(&drmem, &dm); + lmb_size = drmem_lmb_size(); + for_each_drmem_lmb(lmb) { /* skip this block if it is reserved or not assigned to * this partition */ - if ((drmem.flags & DRCONF_MEM_RESERVED) - || !(drmem.flags & DRCONF_MEM_ASSIGNED)) + if ((lmb->flags & DRCONF_MEM_RESERVED) + || !(lmb->flags & DRCONF_MEM_ASSIGNED)) continue; - if ((scn_addr < drmem.base_addr) - || (scn_addr >= (drmem.base_addr + lmb_size))) + if ((scn_addr < lmb->base_addr) + || (scn_addr >= (lmb->base_addr + lmb_size))) continue; - nid = of_drconf_to_nid_single(&drmem, &aa); + nid = of_drconf_to_nid_single(lmb); break; } @@ -1080,7 +1010,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (memory) { - nid = hot_add_drconf_scn_to_nid(memory, scn_addr); + nid = hot_add_drconf_scn_to_nid(scn_addr); of_node_put(memory); } else { nid = hot_add_node_scn_to_nid(scn_addr); @@ -1096,11 +1026,7 @@ static u64 hot_add_drconf_memory_max(void) { struct device_node *memory = NULL; struct device_node *dn = NULL; - unsigned int drconf_cell_cnt = 0; - u64 lmb_size = 0; - const __be32 *dm = NULL; const __be64 *lrdr = NULL; - struct of_drconf_cell drmem; dn = of_find_node_by_path("/rtas"); if (dn) { @@ -1112,14 +1038,8 @@ static u64 hot_add_drconf_memory_max(void) memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (memory) { - drconf_cell_cnt = of_get_drconf_memory(memory, &dm); - lmb_size = of_get_lmb_size(memory); - - /* Advance to the last cell, each cell has 6 32 bit integers */ - dm += (drconf_cell_cnt - 1) * 6; - read_drconf_cell(&drmem, &dm); of_node_put(memory); - return drmem.base_addr + lmb_size; + return drmem_lmb_memory_max(); } return 0; } @@ -1278,6 +1198,42 @@ static long vphn_get_associativity(unsigned long cpu, return rc; } +int find_and_online_cpu_nid(int cpu) +{ + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; + int new_nid; + + /* Use associativity from first thread for all siblings */ + vphn_get_associativity(cpu, associativity); + new_nid = associativity_to_nid(associativity); + if (new_nid < 0 || !node_possible(new_nid)) + new_nid = first_online_node; + + if (NODE_DATA(new_nid) == NULL) { +#ifdef CONFIG_MEMORY_HOTPLUG + /* + * Need to ensure that NODE_DATA is initialized for a node from + * available memory (see memblock_alloc_try_nid). If unable to + * init the node, then default to nearest node that has memory + * installed. + */ + if (try_online_node(new_nid)) + new_nid = first_online_node; +#else + /* + * Default to using the nearest node that has memory installed. + * Otherwise, it would be necessary to patch the kernel MM code + * to deal with more memoryless-node error conditions. + */ + new_nid = first_online_node; +#endif + } + + pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__, + cpu, new_nid); + return new_nid; +} + /* * Update the CPU maps and sysfs entries for a single CPU when its NUMA * characteristics change. This function doesn't perform any locking and is @@ -1345,7 +1301,6 @@ int numa_update_cpu_topology(bool cpus_locked) { unsigned int cpu, sibling, changed = 0; struct topology_update_data *updates, *ud; - __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; cpumask_t updated_cpus; struct device *dev; int weight, new_nid, i = 0; @@ -1383,11 +1338,7 @@ int numa_update_cpu_topology(bool cpus_locked) continue; } - /* Use associativity from first thread for all siblings */ - vphn_get_associativity(cpu, associativity); - new_nid = associativity_to_nid(associativity); - if (new_nid < 0 || !node_online(new_nid)) - new_nid = first_online_node; + new_nid = find_and_online_cpu_nid(cpu); if (new_nid == numa_cpu_lookup_table[cpu]) { cpumask_andnot(&cpu_associativity_changes_mask, diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index cfbbee941a76..573a9a2ee455 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -579,6 +579,9 @@ void __init radix__early_init_mmu(void) radix_init_iamr(); radix_init_pgtable(); + + if (cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_all(); } void radix__early_init_mmu_secondary(void) @@ -600,6 +603,9 @@ void radix__early_init_mmu_secondary(void) radix_init_amor(); } radix_init_iamr(); + + if (cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_all(); } void radix__mmu_cleanup_all(void) @@ -622,22 +628,11 @@ void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, * physical on those processors */ BUG_ON(first_memblock_base != 0); + /* - * We limit the allocation that depend on ppc64_rma_size - * to first_memblock_size. We also clamp it to 1GB to - * avoid some funky things such as RTAS bugs. - * - * On radix config we really don't have a limitation - * on real mode access. But keeping it as above works - * well enough. - */ - ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); - /* - * Finally limit subsequent allocations. We really don't want - * to limit the memblock allocations to rma_size. FIXME!! should - * we even limit at all ? + * Radix mode is not limited by RMA / VRMA addressing. */ - memblock_set_current_limit(first_memblock_base + first_memblock_size); + ppc64_rma_size = ULONG_MAX; } #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index a03ff3d99e0c..9f361ae571e9 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -54,7 +54,8 @@ static inline int pte_looks_normal(pte_t pte) return 0; #else return (pte_val(pte) & - (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) == + (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER | + _PAGE_PRIVILEGED)) == (_PAGE_PRESENT | _PAGE_USER); #endif } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index f6c7f54c0515..d35d9ad3c1cd 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -98,14 +98,7 @@ ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ flags &= ~(_PAGE_USER | _PAGE_EXEC); - -#ifdef _PAGE_BAP_SR - /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format - * which means that we just cleared supervisor access... oops ;-) This - * restores it - */ - flags |= _PAGE_BAP_SR; -#endif + flags |= _PAGE_PRIVILEGED; return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 813ea22c3e00..c9a623c2d8a2 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -244,20 +244,8 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, /* * Force kernel mapping. */ -#if defined(CONFIG_PPC_BOOK3S_64) - flags |= _PAGE_PRIVILEGED; -#else flags &= ~_PAGE_USER; -#endif - - -#ifdef _PAGE_BAP_SR - /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format - * which means that we just cleared supervisor access... oops ;-) This - * restores it - */ - flags |= _PAGE_BAP_SR; -#endif + flags |= _PAGE_PRIVILEGED; if (ppc_md.ioremap) return ppc_md.ioremap(addr, size, flags, caller); diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c new file mode 100644 index 000000000000..ba71c5481f42 --- /dev/null +++ b/arch/powerpc/mm/pkeys.c @@ -0,0 +1,468 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * PowerPC Memory Protection Keys management + * + * Copyright 2017, Ram Pai, IBM Corporation. + */ + +#include <asm/mman.h> +#include <asm/setup.h> +#include <linux/pkeys.h> +#include <linux/of_device.h> + +DEFINE_STATIC_KEY_TRUE(pkey_disabled); +bool pkey_execute_disable_supported; +int pkeys_total; /* Total pkeys as per device tree */ +bool pkeys_devtree_defined; /* pkey property exported by device tree */ +u32 initial_allocation_mask; /* Bits set for reserved keys */ +u64 pkey_amr_uamor_mask; /* Bits in AMR/UMOR not to be touched */ +u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ + +#define AMR_BITS_PER_PKEY 2 +#define AMR_RD_BIT 0x1UL +#define AMR_WR_BIT 0x2UL +#define IAMR_EX_BIT 0x1UL +#define PKEY_REG_BITS (sizeof(u64)*8) +#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY)) + +static void scan_pkey_feature(void) +{ + u32 vals[2]; + struct device_node *cpu; + + cpu = of_find_node_by_type(NULL, "cpu"); + if (!cpu) + return; + + if (of_property_read_u32_array(cpu, + "ibm,processor-storage-keys", vals, 2)) + return; + + /* + * Since any pkey can be used for data or execute, we will just treat + * all keys as equal and track them as one entity. + */ + pkeys_total = be32_to_cpu(vals[0]); + pkeys_devtree_defined = true; +} + +static inline bool pkey_mmu_enabled(void) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + return pkeys_total; + else + return cpu_has_feature(CPU_FTR_PKEY); +} + +int pkey_initialize(void) +{ + int os_reserved, i; + + /* + * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral + * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE. + * Ensure that the bits a distinct. + */ + BUILD_BUG_ON(PKEY_DISABLE_EXECUTE & + (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + + /* + * pkey_to_vmflag_bits() assumes that the pkey bits are contiguous + * in the vmaflag. Make sure that is really the case. + */ + BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + + __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + != (sizeof(u64) * BITS_PER_BYTE)); + + /* scan the device tree for pkey feature */ + scan_pkey_feature(); + + /* + * Let's assume 32 pkeys on P8 bare metal, if its not defined by device + * tree. We make this exception since skiboot forgot to expose this + * property on power8. + */ + if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) && + cpu_has_feature(CPU_FTRS_POWER8)) + pkeys_total = 32; + + /* + * Adjust the upper limit, based on the number of bits supported by + * arch-neutral code. + */ + pkeys_total = min_t(int, pkeys_total, + (ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)); + + if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total) + static_branch_enable(&pkey_disabled); + else + static_branch_disable(&pkey_disabled); + + if (static_branch_likely(&pkey_disabled)) + return 0; + + /* + * The device tree cannot be relied to indicate support for + * execute_disable support. Instead we use a PVR check. + */ + if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p)) + pkey_execute_disable_supported = false; + else + pkey_execute_disable_supported = true; + +#ifdef CONFIG_PPC_4K_PAGES + /* + * The OS can manage only 8 pkeys due to its inability to represent them + * in the Linux 4K PTE. + */ + os_reserved = pkeys_total - 8; +#else + os_reserved = 0; +#endif + /* + * Bits are in LE format. NOTE: 1, 0 are reserved. + * key 0 is the default key, which allows read/write/execute. + * key 1 is recommended not to be used. PowerISA(3.0) page 1015, + * programming note. + */ + initial_allocation_mask = ~0x0; + + /* register mask is in BE format */ + pkey_amr_uamor_mask = ~0x0ul; + pkey_iamr_mask = ~0x0ul; + + for (i = 2; i < (pkeys_total - os_reserved); i++) { + initial_allocation_mask &= ~(0x1 << i); + pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i)); + pkey_iamr_mask &= ~(0x1ul << pkeyshift(i)); + } + return 0; +} + +arch_initcall(pkey_initialize); + +void pkey_mm_init(struct mm_struct *mm) +{ + if (static_branch_likely(&pkey_disabled)) + return; + mm_pkey_allocation_map(mm) = initial_allocation_mask; + /* -1 means unallocated or invalid */ + mm->context.execute_only_pkey = -1; +} + +static inline u64 read_amr(void) +{ + return mfspr(SPRN_AMR); +} + +static inline void write_amr(u64 value) +{ + mtspr(SPRN_AMR, value); +} + +static inline u64 read_iamr(void) +{ + if (!likely(pkey_execute_disable_supported)) + return 0x0UL; + + return mfspr(SPRN_IAMR); +} + +static inline void write_iamr(u64 value) +{ + if (!likely(pkey_execute_disable_supported)) + return; + + mtspr(SPRN_IAMR, value); +} + +static inline u64 read_uamor(void) +{ + return mfspr(SPRN_UAMOR); +} + +static inline void write_uamor(u64 value) +{ + mtspr(SPRN_UAMOR, value); +} + +static bool is_pkey_enabled(int pkey) +{ + u64 uamor = read_uamor(); + u64 pkey_bits = 0x3ul << pkeyshift(pkey); + u64 uamor_pkey_bits = (uamor & pkey_bits); + + /* + * Both the bits in UAMOR corresponding to the key should be set or + * reset. + */ + WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits)); + return !!(uamor_pkey_bits); +} + +static inline void init_amr(int pkey, u8 init_bits) +{ + u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); + u64 old_amr = read_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); + + write_amr(old_amr | new_amr_bits); +} + +static inline void init_iamr(int pkey, u8 init_bits) +{ + u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); + u64 old_iamr = read_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); + + write_iamr(old_iamr | new_iamr_bits); +} + +static void pkey_status_change(int pkey, bool enable) +{ + u64 old_uamor; + + /* Reset the AMR and IAMR bits for this key */ + init_amr(pkey, 0x0); + init_iamr(pkey, 0x0); + + /* Enable/disable key */ + old_uamor = read_uamor(); + if (enable) + old_uamor |= (0x3ul << pkeyshift(pkey)); + else + old_uamor &= ~(0x3ul << pkeyshift(pkey)); + write_uamor(old_uamor); +} + +void __arch_activate_pkey(int pkey) +{ + pkey_status_change(pkey, true); +} + +void __arch_deactivate_pkey(int pkey) +{ + pkey_status_change(pkey, false); +} + +/* + * Set the access rights in AMR IAMR and UAMOR registers for @pkey to that + * specified in @init_val. + */ +int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val) +{ + u64 new_amr_bits = 0x0ul; + u64 new_iamr_bits = 0x0ul; + + if (!is_pkey_enabled(pkey)) + return -EINVAL; + + if (init_val & PKEY_DISABLE_EXECUTE) { + if (!pkey_execute_disable_supported) + return -EINVAL; + new_iamr_bits |= IAMR_EX_BIT; + } + init_iamr(pkey, new_iamr_bits); + + /* Set the bits we need in AMR: */ + if (init_val & PKEY_DISABLE_ACCESS) + new_amr_bits |= AMR_RD_BIT | AMR_WR_BIT; + else if (init_val & PKEY_DISABLE_WRITE) + new_amr_bits |= AMR_WR_BIT; + + init_amr(pkey, new_amr_bits); + return 0; +} + +void thread_pkey_regs_save(struct thread_struct *thread) +{ + if (static_branch_likely(&pkey_disabled)) + return; + + /* + * TODO: Skip saving registers if @thread hasn't used any keys yet. + */ + thread->amr = read_amr(); + thread->iamr = read_iamr(); + thread->uamor = read_uamor(); +} + +void thread_pkey_regs_restore(struct thread_struct *new_thread, + struct thread_struct *old_thread) +{ + if (static_branch_likely(&pkey_disabled)) + return; + + /* + * TODO: Just set UAMOR to zero if @new_thread hasn't used any keys yet. + */ + if (old_thread->amr != new_thread->amr) + write_amr(new_thread->amr); + if (old_thread->iamr != new_thread->iamr) + write_iamr(new_thread->iamr); + if (old_thread->uamor != new_thread->uamor) + write_uamor(new_thread->uamor); +} + +void thread_pkey_regs_init(struct thread_struct *thread) +{ + if (static_branch_likely(&pkey_disabled)) + return; + + write_amr(read_amr() & pkey_amr_uamor_mask); + write_iamr(read_iamr() & pkey_iamr_mask); + write_uamor(read_uamor() & pkey_amr_uamor_mask); +} + +static inline bool pkey_allows_readwrite(int pkey) +{ + int pkey_shift = pkeyshift(pkey); + + if (!is_pkey_enabled(pkey)) + return true; + + return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift)); +} + +int __execute_only_pkey(struct mm_struct *mm) +{ + bool need_to_set_mm_pkey = false; + int execute_only_pkey = mm->context.execute_only_pkey; + int ret; + + /* Do we need to assign a pkey for mm's execute-only maps? */ + if (execute_only_pkey == -1) { + /* Go allocate one to use, which might fail */ + execute_only_pkey = mm_pkey_alloc(mm); + if (execute_only_pkey < 0) + return -1; + need_to_set_mm_pkey = true; + } + + /* + * We do not want to go through the relatively costly dance to set AMR + * if we do not need to. Check it first and assume that if the + * execute-only pkey is readwrite-disabled than we do not have to set it + * ourselves. + */ + if (!need_to_set_mm_pkey && !pkey_allows_readwrite(execute_only_pkey)) + return execute_only_pkey; + + /* + * Set up AMR so that it denies access for everything other than + * execution. + */ + ret = __arch_set_user_pkey_access(current, execute_only_pkey, + PKEY_DISABLE_ACCESS | + PKEY_DISABLE_WRITE); + /* + * If the AMR-set operation failed somehow, just return 0 and + * effectively disable execute-only support. + */ + if (ret) { + mm_pkey_free(mm, execute_only_pkey); + return -1; + } + + /* We got one, store it and use it from here on out */ + if (need_to_set_mm_pkey) + mm->context.execute_only_pkey = execute_only_pkey; + return execute_only_pkey; +} + +static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) +{ + /* Do this check first since the vm_flags should be hot */ + if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC) + return false; + + return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey); +} + +/* + * This should only be called for *plain* mprotect calls. + */ +int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, + int pkey) +{ + /* + * If the currently associated pkey is execute-only, but the requested + * protection requires read or write, move it back to the default pkey. + */ + if (vma_is_pkey_exec_only(vma) && (prot & (PROT_READ | PROT_WRITE))) + return 0; + + /* + * The requested protection is execute-only. Hence let's use an + * execute-only pkey. + */ + if (prot == PROT_EXEC) { + pkey = execute_only_pkey(vma->vm_mm); + if (pkey > 0) + return pkey; + } + + /* Nothing to override. */ + return vma_pkey(vma); +} + +static bool pkey_access_permitted(int pkey, bool write, bool execute) +{ + int pkey_shift; + u64 amr; + + if (!pkey) + return true; + + if (!is_pkey_enabled(pkey)) + return true; + + pkey_shift = pkeyshift(pkey); + if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift))) + return true; + + amr = read_amr(); /* Delay reading amr until absolutely needed */ + return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) || + (write && !(amr & (AMR_WR_BIT << pkey_shift)))); +} + +bool arch_pte_access_permitted(u64 pte, bool write, bool execute) +{ + if (static_branch_likely(&pkey_disabled)) + return true; + + return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute); +} + +/* + * We only want to enforce protection keys on the current thread because we + * effectively have no access to AMR/IAMR for other threads or any way to tell + * which AMR/IAMR in a threaded process we could use. + * + * So do not enforce things if the VMA is not from the current mm, or if we are + * in a kernel thread. + */ +static inline bool vma_is_foreign(struct vm_area_struct *vma) +{ + if (!current->mm) + return true; + + /* if it is not our ->mm, it has to be foreign */ + if (current->mm != vma->vm_mm) + return true; + + return false; +} + +bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, + bool execute, bool foreign) +{ + if (static_branch_likely(&pkey_disabled)) + return true; + /* + * Do not enforce our key-permissions on a foreign vma. + */ + if (foreign || vma_is_foreign(vma)) + return true; + + return pkey_access_permitted(vma_pkey(vma), write, execute); +} diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 781532d7bc4d..f14a07c2fb90 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -195,6 +195,9 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) unsigned long next, limit; int err; + if (radix_enabled()) + return -ENOENT; + /* Check parameters */ if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) || addr >= mm->task_size || len >= mm->task_size || diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 884f4b705b57..71d1b19ad1c0 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -23,6 +23,72 @@ #define RIC_FLUSH_PWC 1 #define RIC_FLUSH_ALL 2 +/* + * tlbiel instruction for radix, set invalidation + * i.e., r=1 and is=01 or is=10 or is=11 + */ +static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, + unsigned int pid, + unsigned int ric, unsigned int prs) +{ + unsigned long rb; + unsigned long rs; + unsigned int r = 1; /* radix format */ + + rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); + rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); + + asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) + : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r) + : "memory"); +} + +static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) +{ + unsigned int set; + + asm volatile("ptesync": : :"memory"); + + /* + * Flush the first set of the TLB, and the entire Page Walk Cache + * and partition table entries. Then flush the remaining sets of the + * TLB. + */ + tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); + for (set = 1; set < num_sets; set++) + tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); + + /* Do the same for process scoped entries. */ + tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); + for (set = 1; set < num_sets; set++) + tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); + + asm volatile("ptesync": : :"memory"); +} + +void radix__tlbiel_all(unsigned int action) +{ + unsigned int is; + + switch (action) { + case TLB_INVAL_SCOPE_GLOBAL: + is = 3; + break; + case TLB_INVAL_SCOPE_LPID: + is = 2; + break; + default: + BUG(); + } + + if (early_cpu_has_feature(CPU_FTR_ARCH_300)) + tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); + else + WARN(1, "%s called on pre-POWER9 CPU\n", __func__); + + asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); +} + static inline void __tlbiel_pid(unsigned long pid, int set, unsigned long ric) { @@ -600,14 +666,12 @@ void radix__flush_tlb_all(void) */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); /* * now flush host entires by passing PRS = 0 and LPID == 0 */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(0, 0, rb, 0, ric, prs, r); } void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index bfc4a0869609..15fe5f0c8665 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -388,7 +388,10 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - flush_tlb_mm(vma->vm_mm); + if (end - start == PAGE_SIZE && !(start & ~PAGE_MASK)) + flush_tlb_page(vma, start); + else + flush_tlb_mm(vma->vm_mm); } EXPORT_SYMBOL(flush_tlb_range); diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index 3c39f05f0af3..6c0020d1c561 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -18,6 +18,7 @@ #include <asm/machdep.h> #include <asm/firmware.h> #include <asm/ptrace.h> +#include <asm/code-patching.h> #define PERF_8xx_ID_CPU_CYCLES 1 #define PERF_8xx_ID_HW_INSTRUCTIONS 2 @@ -30,8 +31,13 @@ extern unsigned long itlb_miss_counter, dtlb_miss_counter; extern atomic_t instruction_counter; +extern unsigned int itlb_miss_perf, dtlb_miss_perf; +extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2; +extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3; static atomic_t insn_ctr_ref; +static atomic_t itlb_miss_ref; +static atomic_t dtlb_miss_ref; static s64 get_insn_ctr(void) { @@ -96,9 +102,24 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags) val = get_insn_ctr(); break; case PERF_8xx_ID_ITLB_LOAD_MISS: + if (atomic_inc_return(&itlb_miss_ref) == 1) { + unsigned long target = (unsigned long)&itlb_miss_perf; + + patch_branch(&itlb_miss_exit_1, target, 0); +#ifndef CONFIG_PIN_TLB_TEXT + patch_branch(&itlb_miss_exit_2, target, 0); +#endif + } val = itlb_miss_counter; break; case PERF_8xx_ID_DTLB_LOAD_MISS: + if (atomic_inc_return(&dtlb_miss_ref) == 1) { + unsigned long target = (unsigned long)&dtlb_miss_perf; + + patch_branch(&dtlb_miss_exit_1, target, 0); + patch_branch(&dtlb_miss_exit_2, target, 0); + patch_branch(&dtlb_miss_exit_3, target, 0); + } val = dtlb_miss_counter; break; } @@ -143,13 +164,36 @@ static void mpc8xx_pmu_read(struct perf_event *event) static void mpc8xx_pmu_del(struct perf_event *event, int flags) { + /* mfspr r10, SPRN_SPRG_SCRATCH0 */ + unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) | + __PPC_SPR(SPRN_SPRG_SCRATCH0); + mpc8xx_pmu_read(event); - if (event_type(event) != PERF_8xx_ID_HW_INSTRUCTIONS) - return; /* If it was the last user, stop counting to avoid useles overhead */ - if (atomic_dec_return(&insn_ctr_ref) == 0) - mtspr(SPRN_ICTRL, 7); + switch (event_type(event)) { + case PERF_8xx_ID_CPU_CYCLES: + break; + case PERF_8xx_ID_HW_INSTRUCTIONS: + if (atomic_dec_return(&insn_ctr_ref) == 0) + mtspr(SPRN_ICTRL, 7); + break; + case PERF_8xx_ID_ITLB_LOAD_MISS: + if (atomic_dec_return(&itlb_miss_ref) == 0) { + patch_instruction(&itlb_miss_exit_1, insn); +#ifndef CONFIG_PIN_TLB_TEXT + patch_instruction(&itlb_miss_exit_2, insn); +#endif + } + break; + case PERF_8xx_ID_DTLB_LOAD_MISS: + if (atomic_dec_return(&dtlb_miss_ref) == 0) { + patch_instruction(&dtlb_miss_exit_1, insn); + patch_instruction(&dtlb_miss_exit_2, insn); + patch_instruction(&dtlb_miss_exit_3, insn); + } + break; + } } static struct pmu mpc8xx_pmu = { diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 225c9c86d7c0..57ebc655d2ac 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o -obj-$(CONFIG_PPC_8xx_PERF_EVENT) += 8xx-pmu.o +obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC32) += $(obj32-y) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index fce545774d50..f89bbd54ecec 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -322,7 +322,7 @@ static inline void perf_read_regs(struct pt_regs *regs) */ static inline int perf_intr_is_nmi(struct pt_regs *regs) { - return !regs->softe; + return (regs->softe & IRQS_DISABLED); } /* diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index be4e7f84f70a..d7532e7b9ab5 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -40,7 +40,6 @@ static struct imc_pmu *core_imc_pmu; /* Thread IMC data structures and variables */ static DEFINE_PER_CPU(u64 *, thread_imc_mem); -static struct imc_pmu *thread_imc_pmu; static int thread_imc_mem_size; struct imc_pmu *imc_event_to_pmu(struct perf_event *event) @@ -117,17 +116,13 @@ static struct attribute *device_str_attr_create(const char *name, const char *st return &attr->attr.attr; } -struct imc_events *imc_parse_event(struct device_node *np, const char *scale, - const char *unit, const char *prefix, u32 base) +static int imc_parse_event(struct device_node *np, const char *scale, + const char *unit, const char *prefix, + u32 base, struct imc_events *event) { - struct imc_events *event; const char *s; u32 reg; - event = kzalloc(sizeof(struct imc_events), GFP_KERNEL); - if (!event) - return NULL; - if (of_property_read_u32(np, "reg", ®)) goto error; /* Add the base_reg value to the "reg" */ @@ -158,14 +153,32 @@ struct imc_events *imc_parse_event(struct device_node *np, const char *scale, goto error; } - return event; + return 0; error: kfree(event->unit); kfree(event->scale); kfree(event->name); - kfree(event); + return -EINVAL; +} + +/* + * imc_free_events: Function to cleanup the events list, having + * "nr_entries". + */ +static void imc_free_events(struct imc_events *events, int nr_entries) +{ + int i; - return NULL; + /* Nothing to clean, return */ + if (!events) + return; + for (i = 0; i < nr_entries; i++) { + kfree(events[i].unit); + kfree(events[i].scale); + kfree(events[i].name); + } + + kfree(events); } /* @@ -177,9 +190,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) struct attribute_group *attr_group; struct attribute **attrs, *dev_str; struct device_node *np, *pmu_events; - struct imc_events *ev; u32 handle, base_reg; - int i=0, j=0, ct; + int i = 0, j = 0, ct, ret; const char *prefix, *g_scale, *g_unit; const char *ev_val_str, *ev_scale_str, *ev_unit_str; @@ -217,15 +229,17 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) ct = 0; /* Parse the events and update the struct */ for_each_child_of_node(pmu_events, np) { - ev = imc_parse_event(np, g_scale, g_unit, prefix, base_reg); - if (ev) - pmu->events[ct++] = ev; + ret = imc_parse_event(np, g_scale, g_unit, prefix, base_reg, &pmu->events[ct]); + if (!ret) + ct++; } /* Allocate memory for attribute group */ attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL); - if (!attr_group) + if (!attr_group) { + imc_free_events(pmu->events, ct); return -ENOMEM; + } /* * Allocate memory for attributes. @@ -238,31 +252,31 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL); if (!attrs) { kfree(attr_group); - kfree(pmu->events); + imc_free_events(pmu->events, ct); return -ENOMEM; } attr_group->name = "events"; attr_group->attrs = attrs; do { - ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i]->value); - dev_str = device_str_attr_create(pmu->events[i]->name, ev_val_str); + ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value); + dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str); if (!dev_str) continue; attrs[j++] = dev_str; - if (pmu->events[i]->scale) { - ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale",pmu->events[i]->name); - dev_str = device_str_attr_create(ev_scale_str, pmu->events[i]->scale); + if (pmu->events[i].scale) { + ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name); + dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale); if (!dev_str) continue; attrs[j++] = dev_str; } - if (pmu->events[i]->unit) { - ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit",pmu->events[i]->name); - dev_str = device_str_attr_create(ev_unit_str, pmu->events[i]->unit); + if (pmu->events[i].unit) { + ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name); + dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit); if (!dev_str) continue; @@ -273,7 +287,6 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) /* Save the event attribute */ pmu->attr_groups[IMC_EVENT_ATTR] = attr_group; - kfree(pmu->events); return 0; } @@ -611,7 +624,8 @@ static int ppc_core_imc_cpu_online(unsigned int cpu) static int ppc_core_imc_cpu_offline(unsigned int cpu) { - unsigned int ncpu, core_id; + unsigned int core_id; + int ncpu; struct imc_pmu_ref *ref; /* @@ -1171,6 +1185,15 @@ static void cleanup_all_thread_imc_memory(void) } } +/* Function to free the attr_groups which are dynamically allocated */ +static void imc_common_mem_free(struct imc_pmu *pmu_ptr) +{ + if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) + kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); + kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); + kfree(pmu_ptr); +} + /* * Common function to unregister cpu hotplug callback and * free the memory. @@ -1203,13 +1226,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE); cleanup_all_thread_imc_memory(); } - - /* Only free the attr_groups which are dynamically allocated */ - if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) - kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); - kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); - kfree(pmu_ptr); - return; } @@ -1258,8 +1274,10 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref), GFP_KERNEL); - if (!core_imc_refc) + if (!core_imc_refc) { + kfree(pmu_ptr->mem_info); return -ENOMEM; + } core_imc_pmu = pmu_ptr; break; @@ -1272,11 +1290,12 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, thread_imc_mem_size = pmu_ptr->counter_mem_size; for_each_online_cpu(cpu) { res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size); - if (res) + if (res) { + cleanup_all_thread_imc_memory(); return res; + } } - thread_imc_pmu = pmu_ptr; break; default: return -EINVAL; @@ -1300,8 +1319,10 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id int ret; ret = imc_mem_init(pmu_ptr, parent, pmu_idx); - if (ret) - goto err_free; + if (ret) { + imc_common_mem_free(pmu_ptr); + return ret; + } switch (pmu_ptr->domain) { case IMC_DOMAIN_NEST: @@ -1368,6 +1389,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id return 0; err_free: + imc_common_mem_free(pmu_ptr); imc_common_cpuhp_mem_free(pmu_ptr); return ret; } diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index 92e98048404f..04f0c73a9b4f 100644 --- a/arch/powerpc/platforms/44x/fsp2.c +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -27,6 +27,17 @@ #include <asm/time.h> #include <asm/uic.h> #include <asm/ppc4xx.h> +#include <asm/dcr.h> +#include <linux/interrupt.h> +#include <linux/of_irq.h> +#include "fsp2.h" + +#define FSP2_BUS_ERR "ibm,bus-error-irq" +#define FSP2_CMU_ERR "ibm,cmu-error-irq" +#define FSP2_CONF_ERR "ibm,conf-error-irq" +#define FSP2_OPBD_ERR "ibm,opbd-error-irq" +#define FSP2_MCUE "ibm,mc-ue-irq" +#define FSP2_RST_WRN "ibm,reset-warning-irq" static __initdata struct of_device_id fsp2_of_bus[] = { { .compatible = "ibm,plb4", }, @@ -35,6 +46,194 @@ static __initdata struct of_device_id fsp2_of_bus[] = { {}, }; +static void l2regs(void) +{ + pr_err("L2 Controller:\n"); + pr_err("MCK: 0x%08x\n", mfl2(L2MCK)); + pr_err("INT: 0x%08x\n", mfl2(L2INT)); + pr_err("PLBSTAT0: 0x%08x\n", mfl2(L2PLBSTAT0)); + pr_err("PLBSTAT1: 0x%08x\n", mfl2(L2PLBSTAT1)); + pr_err("ARRSTAT0: 0x%08x\n", mfl2(L2ARRSTAT0)); + pr_err("ARRSTAT1: 0x%08x\n", mfl2(L2ARRSTAT1)); + pr_err("ARRSTAT2: 0x%08x\n", mfl2(L2ARRSTAT2)); + pr_err("CPUSTAT: 0x%08x\n", mfl2(L2CPUSTAT)); + pr_err("RACSTAT0: 0x%08x\n", mfl2(L2RACSTAT0)); + pr_err("WACSTAT0: 0x%08x\n", mfl2(L2WACSTAT0)); + pr_err("WACSTAT1: 0x%08x\n", mfl2(L2WACSTAT1)); + pr_err("WACSTAT2: 0x%08x\n", mfl2(L2WACSTAT2)); + pr_err("WDFSTAT: 0x%08x\n", mfl2(L2WDFSTAT)); + pr_err("LOG0: 0x%08x\n", mfl2(L2LOG0)); + pr_err("LOG1: 0x%08x\n", mfl2(L2LOG1)); + pr_err("LOG2: 0x%08x\n", mfl2(L2LOG2)); + pr_err("LOG3: 0x%08x\n", mfl2(L2LOG3)); + pr_err("LOG4: 0x%08x\n", mfl2(L2LOG4)); + pr_err("LOG5: 0x%08x\n", mfl2(L2LOG5)); +} + +static void show_plbopb_regs(u32 base, int num) +{ + pr_err("\nPLBOPB Bridge %d:\n", num); + pr_err("GESR0: 0x%08x\n", mfdcr(base + PLB4OPB_GESR0)); + pr_err("GESR1: 0x%08x\n", mfdcr(base + PLB4OPB_GESR1)); + pr_err("GESR2: 0x%08x\n", mfdcr(base + PLB4OPB_GESR2)); + pr_err("GEARU: 0x%08x\n", mfdcr(base + PLB4OPB_GEARU)); + pr_err("GEAR: 0x%08x\n", mfdcr(base + PLB4OPB_GEAR)); +} + +static irqreturn_t bus_err_handler(int irq, void *data) +{ + pr_err("Bus Error\n"); + + l2regs(); + + pr_err("\nPLB6 Controller:\n"); + pr_err("BC_SHD: 0x%08x\n", mfdcr(DCRN_PLB6_SHD)); + pr_err("BC_ERR: 0x%08x\n", mfdcr(DCRN_PLB6_ERR)); + + pr_err("\nPLB6-to-PLB4 Bridge:\n"); + pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_ESR)); + pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARH)); + pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARL)); + + pr_err("\nPLB4-to-PLB6 Bridge:\n"); + pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_ESR)); + pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARH)); + pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARL)); + + pr_err("\nPLB6-to-MCIF Bridge:\n"); + pr_err("BESR0: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR0)); + pr_err("BESR1: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR1)); + pr_err("BEARH: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARH)); + pr_err("BEARL: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARL)); + + pr_err("\nPLB4 Arbiter:\n"); + pr_err("P0ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRH)); + pr_err("P0ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRL)); + pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH)); + pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH)); + pr_err("P1ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRH)); + pr_err("P1ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRL)); + pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH)); + pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH)); + + show_plbopb_regs(DCRN_PLB4OPB0_BASE, 0); + show_plbopb_regs(DCRN_PLB4OPB1_BASE, 1); + show_plbopb_regs(DCRN_PLB4OPB2_BASE, 2); + show_plbopb_regs(DCRN_PLB4OPB3_BASE, 3); + + pr_err("\nPLB4-to-AHB Bridge:\n"); + pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_ESR)); + pr_err("SEUAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SEUAR)); + pr_err("SELAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SELAR)); + + pr_err("\nAHB-to-PLB4 Bridge:\n"); + pr_err("\nESR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_ESR)); + pr_err("\nEAR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_EAR)); + panic("Bus Error\n"); +} + +static irqreturn_t cmu_err_handler(int irq, void *data) { + pr_err("CMU Error\n"); + pr_err("FIR0: 0x%08x\n", mfcmu(CMUN_FIR0)); + panic("CMU Error\n"); +} + +static irqreturn_t conf_err_handler(int irq, void *data) { + pr_err("Configuration Logic Error\n"); + pr_err("CONF_FIR: 0x%08x\n", mfdcr(DCRN_CONF_FIR_RWC)); + pr_err("RPERR0: 0x%08x\n", mfdcr(DCRN_CONF_RPERR0)); + pr_err("RPERR1: 0x%08x\n", mfdcr(DCRN_CONF_RPERR1)); + panic("Configuration Logic Error\n"); +} + +static irqreturn_t opbd_err_handler(int irq, void *data) { + panic("OPBD Error\n"); +} + +static irqreturn_t mcue_handler(int irq, void *data) { + pr_err("DDR: Uncorrectable Error\n"); + pr_err("MCSTAT: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCSTAT)); + pr_err("MCOPT1: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT1)); + pr_err("MCOPT2: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT2)); + pr_err("PHYSTAT: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_PHYSTAT)); + pr_err("CFGR0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR0)); + pr_err("CFGR1: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR1)); + pr_err("CFGR2: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR2)); + pr_err("CFGR3: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR3)); + pr_err("SCRUB_CNTL: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_SCRUB_CNTL)); + pr_err("ECCERR_PORT0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_PORT0)); + pr_err("ECCERR_ADDR_PORT0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_ADDR_PORT0)); + pr_err("ECCERR_CNT_PORT0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_COUNT_PORT0)); + pr_err("ECC_CHECK_PORT0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECC_CHECK_PORT0)); + pr_err("MCER0: 0x%08x\n", + mfdcr(DCRN_CW_BASE + DCRN_CW_MCER0)); + pr_err("MCER1: 0x%08x\n", + mfdcr(DCRN_CW_BASE + DCRN_CW_MCER1)); + pr_err("BESR: 0x%08x\n", + mfdcr(DCRN_PLB6MCIF_BESR0)); + pr_err("BEARL: 0x%08x\n", + mfdcr(DCRN_PLB6MCIF_BEARL)); + pr_err("BEARH: 0x%08x\n", + mfdcr(DCRN_PLB6MCIF_BEARH)); + panic("DDR: Uncorrectable Error\n"); +} + +static irqreturn_t rst_wrn_handler(int irq, void *data) { + u32 crcs = mfcmu(CMUN_CRCS); + switch (crcs & CRCS_STAT_MASK) { + case CRCS_STAT_CHIP_RST_B: + panic("Received chassis-initiated reset request"); + default: + panic("Unknown external reset: CRCS=0x%x", crcs); + } +} + +static void node_irq_request(const char *compat, irq_handler_t errirq_handler) +{ + struct device_node *np; + unsigned int irq; + int32_t rc; + + for_each_compatible_node(np, NULL, compat) { + irq = irq_of_parse_and_map(np, 0); + if (irq == NO_IRQ) { + pr_err("device tree node %s is missing a interrupt", + np->name); + return; + } + + rc = request_irq(irq, errirq_handler, 0, np->name, np); + if (rc) { + pr_err("fsp_of_probe: request_irq failed: np=%s rc=%d", + np->full_name, rc); + return; + } + } +} + +static void critical_irq_setup(void) +{ + node_irq_request(FSP2_CMU_ERR, cmu_err_handler); + node_irq_request(FSP2_BUS_ERR, bus_err_handler); + node_irq_request(FSP2_CONF_ERR, conf_err_handler); + node_irq_request(FSP2_OPBD_ERR, opbd_err_handler); + node_irq_request(FSP2_MCUE, mcue_handler); + node_irq_request(FSP2_RST_WRN, rst_wrn_handler); +} + static int __init fsp2_device_probe(void) { of_platform_bus_probe(NULL, fsp2_of_bus, NULL); @@ -44,18 +243,76 @@ machine_device_initcall(fsp2, fsp2_device_probe); static int __init fsp2_probe(void) { + u32 val; unsigned long root = of_get_flat_dt_root(); if (!of_flat_dt_is_compatible(root, "ibm,fsp2")) return 0; + + /* Clear BC_ERR and mask snoopable request plb errors. */ + val = mfdcr(DCRN_PLB6_CR0); + val |= 0x20000000; + mtdcr(DCRN_PLB6_BASE, val); + mtdcr(DCRN_PLB6_HD, 0xffff0000); + mtdcr(DCRN_PLB6_SHD, 0xffff0000); + + /* TVSENSE reset is blocked (clock gated) by the POR default of the TVS + * sleep config bit. As a consequence, TVSENSE will provide erratic + * sensor values, which may result in spurious (parity) errors + * recorded in the CMU FIR and leading to erroneous interrupt requests + * once the CMU interrupt is unmasked. + */ + + /* 1. set TVS1[UNDOZE] */ + val = mfcmu(CMUN_TVS1); + val |= 0x4; + mtcmu(CMUN_TVS1, val); + + /* 2. clear FIR[TVS] and FIR[TVSPAR] */ + val = mfcmu(CMUN_FIR0); + val |= 0x30000000; + mtcmu(CMUN_FIR0, val); + + /* L2 machine checks */ + mtl2(L2PLBMCKEN0, 0xffffffff); + mtl2(L2PLBMCKEN1, 0x0000ffff); + mtl2(L2ARRMCKEN0, 0xffffffff); + mtl2(L2ARRMCKEN1, 0xffffffff); + mtl2(L2ARRMCKEN2, 0xfffff000); + mtl2(L2CPUMCKEN, 0xffffffff); + mtl2(L2RACMCKEN0, 0xffffffff); + mtl2(L2WACMCKEN0, 0xffffffff); + mtl2(L2WACMCKEN1, 0xffffffff); + mtl2(L2WACMCKEN2, 0xffffffff); + mtl2(L2WDFMCKEN, 0xffffffff); + + /* L2 interrupts */ + mtl2(L2PLBINTEN1, 0xffff0000); + + /* + * At a global level, enable all L2 machine checks and interrupts + * reported by the L2 subsystems, except for the external machine check + * input (UIC0.1). + */ + mtl2(L2MCKEN, 0x000007ff); + mtl2(L2INTEN, 0x000004ff); + + /* Enable FSP-2 configuration logic parity errors */ + mtdcr(DCRN_CONF_EIR_RS, 0x80000000); return 1; } +static void __init fsp2_irq_init(void) +{ + uic_init_tree(); + critical_irq_setup(); +} + define_machine(fsp2) { .name = "FSP-2", .probe = fsp2_probe, .progress = udbg_progress, - .init_IRQ = uic_init_tree, + .init_IRQ = fsp2_irq_init, .get_irq = uic_get_irq, .restart = ppc4xx_reset_system, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/44x/fsp2.h b/arch/powerpc/platforms/44x/fsp2.h new file mode 100644 index 000000000000..9e1d52754c8b --- /dev/null +++ b/arch/powerpc/platforms/44x/fsp2.h @@ -0,0 +1,272 @@ +#ifndef _ASM_POWERPC_FSP_DCR_H_ +#define _ASM_POWERPC_FSP_DCR_H_ +#ifdef __KERNEL__ +#include <asm/dcr.h> + +#define DCRN_CMU_ADDR 0x00C /* Chip management unic addr */ +#define DCRN_CMU_DATA 0x00D /* Chip management unic data */ + +/* PLB4 Arbiter */ +#define DCRN_PLB4_PCBI 0x010 /* PLB Crossbar ID/Rev Register */ +#define DCRN_PLB4_P0ACR 0x011 /* PLB0 Arbiter Control Register */ +#define DCRN_PLB4_P0ESRL 0x012 /* PLB0 Error Status Register Low */ +#define DCRN_PLB4_P0ESRH 0x013 /* PLB0 Error Status Register High */ +#define DCRN_PLB4_P0EARL 0x014 /* PLB0 Error Address Register Low */ +#define DCRN_PLB4_P0EARH 0x015 /* PLB0 Error Address Register High */ +#define DCRN_PLB4_P0ESRLS 0x016 /* PLB0 Error Status Register Low Set*/ +#define DCRN_PLB4_P0ESRHS 0x017 /* PLB0 Error Status Register High */ +#define DCRN_PLB4_PCBC 0x018 /* PLB Crossbar Control Register */ +#define DCRN_PLB4_P1ACR 0x019 /* PLB1 Arbiter Control Register */ +#define DCRN_PLB4_P1ESRL 0x01A /* PLB1 Error Status Register Low */ +#define DCRN_PLB4_P1ESRH 0x01B /* PLB1 Error Status Register High */ +#define DCRN_PLB4_P1EARL 0x01C /* PLB1 Error Address Register Low */ +#define DCRN_PLB4_P1EARH 0x01D /* PLB1 Error Address Register High */ +#define DCRN_PLB4_P1ESRLS 0x01E /* PLB1 Error Status Register Low Set*/ +#define DCRN_PLB4_P1ESRHS 0x01F /*PLB1 Error Status Register High Set*/ + +/* PLB4/OPB bridge 0, 1, 2, 3 */ +#define DCRN_PLB4OPB0_BASE 0x020 +#define DCRN_PLB4OPB1_BASE 0x030 +#define DCRN_PLB4OPB2_BASE 0x040 +#define DCRN_PLB4OPB3_BASE 0x050 + +#define PLB4OPB_GESR0 0x0 /* Error status 0: Master Dev 0-3 */ +#define PLB4OPB_GEAR 0x2 /* Error Address Register */ +#define PLB4OPB_GEARU 0x3 /* Error Upper Address Register */ +#define PLB4OPB_GESR1 0x4 /* Error Status 1: Master Dev 4-7 */ +#define PLB4OPB_GESR2 0xC /* Error Status 2: Master Dev 8-11 */ + +/* PLB4-to-AHB Bridge */ +#define DCRN_PLB4AHB_BASE 0x400 +#define DCRN_PLB4AHB_SEUAR (DCRN_PLB4AHB_BASE + 1) +#define DCRN_PLB4AHB_SELAR (DCRN_PLB4AHB_BASE + 2) +#define DCRN_PLB4AHB_ESR (DCRN_PLB4AHB_BASE + 3) +#define DCRN_AHBPLB4_ESR (DCRN_PLB4AHB_BASE + 8) +#define DCRN_AHBPLB4_EAR (DCRN_PLB4AHB_BASE + 9) + +/* PLB6 Controller */ +#define DCRN_PLB6_BASE 0x11111300 +#define DCRN_PLB6_CR0 (DCRN_PLB6_BASE) +#define DCRN_PLB6_ERR (DCRN_PLB6_BASE + 0x0B) +#define DCRN_PLB6_HD (DCRN_PLB6_BASE + 0x0E) +#define DCRN_PLB6_SHD (DCRN_PLB6_BASE + 0x10) + +/* PLB4-to-PLB6 Bridge */ +#define DCRN_PLB4PLB6_BASE 0x11111320 +#define DCRN_PLB4PLB6_ESR (DCRN_PLB4PLB6_BASE + 1) +#define DCRN_PLB4PLB6_EARH (DCRN_PLB4PLB6_BASE + 3) +#define DCRN_PLB4PLB6_EARL (DCRN_PLB4PLB6_BASE + 4) + +/* PLB6-to-PLB4 Bridge */ +#define DCRN_PLB6PLB4_BASE 0x11111350 +#define DCRN_PLB6PLB4_ESR (DCRN_PLB6PLB4_BASE + 1) +#define DCRN_PLB6PLB4_EARH (DCRN_PLB6PLB4_BASE + 3) +#define DCRN_PLB6PLB4_EARL (DCRN_PLB6PLB4_BASE + 4) + +/* PLB6-to-MCIF Bridge */ +#define DCRN_PLB6MCIF_BASE 0x11111380 +#define DCRN_PLB6MCIF_BESR0 (DCRN_PLB6MCIF_BASE + 0) +#define DCRN_PLB6MCIF_BESR1 (DCRN_PLB6MCIF_BASE + 1) +#define DCRN_PLB6MCIF_BEARL (DCRN_PLB6MCIF_BASE + 2) +#define DCRN_PLB6MCIF_BEARH (DCRN_PLB6MCIF_BASE + 3) + +/* Configuration Logic Registers */ +#define DCRN_CONF_BASE 0x11111400 +#define DCRN_CONF_FIR_RWC (DCRN_CONF_BASE + 0x3A) +#define DCRN_CONF_EIR_RS (DCRN_CONF_BASE + 0x3E) +#define DCRN_CONF_RPERR0 (DCRN_CONF_BASE + 0x4D) +#define DCRN_CONF_RPERR1 (DCRN_CONF_BASE + 0x4E) + +#define DCRN_L2CDCRAI 0x11111100 +#define DCRN_L2CDCRDI 0x11111104 +/* L2 indirect addresses */ +#define L2MCK 0x120 +#define L2MCKEN 0x130 +#define L2INT 0x150 +#define L2INTEN 0x160 +#define L2LOG0 0x180 +#define L2LOG1 0x184 +#define L2LOG2 0x188 +#define L2LOG3 0x18C +#define L2LOG4 0x190 +#define L2LOG5 0x194 +#define L2PLBSTAT0 0x300 +#define L2PLBSTAT1 0x304 +#define L2PLBMCKEN0 0x330 +#define L2PLBMCKEN1 0x334 +#define L2PLBINTEN0 0x360 +#define L2PLBINTEN1 0x364 +#define L2ARRSTAT0 0x500 +#define L2ARRSTAT1 0x504 +#define L2ARRSTAT2 0x508 +#define L2ARRMCKEN0 0x530 +#define L2ARRMCKEN1 0x534 +#define L2ARRMCKEN2 0x538 +#define L2ARRINTEN0 0x560 +#define L2ARRINTEN1 0x564 +#define L2ARRINTEN2 0x568 +#define L2CPUSTAT 0x700 +#define L2CPUMCKEN 0x730 +#define L2CPUINTEN 0x760 +#define L2RACSTAT0 0x900 +#define L2RACMCKEN0 0x930 +#define L2RACINTEN0 0x960 +#define L2WACSTAT0 0xD00 +#define L2WACSTAT1 0xD04 +#define L2WACSTAT2 0xD08 +#define L2WACMCKEN0 0xD30 +#define L2WACMCKEN1 0xD34 +#define L2WACMCKEN2 0xD38 +#define L2WACINTEN0 0xD60 +#define L2WACINTEN1 0xD64 +#define L2WACINTEN2 0xD68 +#define L2WDFSTAT 0xF00 +#define L2WDFMCKEN 0xF30 +#define L2WDFINTEN 0xF60 + +/* DDR3/4 Memory Controller */ +#define DCRN_DDR34_BASE 0x11120000 +#define DCRN_DDR34_MCSTAT 0x10 +#define DCRN_DDR34_MCOPT1 0x20 +#define DCRN_DDR34_MCOPT2 0x21 +#define DCRN_DDR34_PHYSTAT 0x32 +#define DCRN_DDR34_CFGR0 0x40 +#define DCRN_DDR34_CFGR1 0x41 +#define DCRN_DDR34_CFGR2 0x42 +#define DCRN_DDR34_CFGR3 0x43 +#define DCRN_DDR34_SCRUB_CNTL 0xAA +#define DCRN_DDR34_SCRUB_INT 0xAB +#define DCRN_DDR34_SCRUB_START_ADDR 0xB0 +#define DCRN_DDR34_SCRUB_END_ADDR 0xD0 +#define DCRN_DDR34_ECCERR_ADDR_PORT0 0xE0 +#define DCRN_DDR34_ECCERR_ADDR_PORT1 0xE1 +#define DCRN_DDR34_ECCERR_ADDR_PORT2 0xE2 +#define DCRN_DDR34_ECCERR_ADDR_PORT3 0xE3 +#define DCRN_DDR34_ECCERR_COUNT_PORT0 0xE4 +#define DCRN_DDR34_ECCERR_COUNT_PORT1 0xE5 +#define DCRN_DDR34_ECCERR_COUNT_PORT2 0xE6 +#define DCRN_DDR34_ECCERR_COUNT_PORT3 0xE7 +#define DCRN_DDR34_ECCERR_PORT0 0xF0 +#define DCRN_DDR34_ECCERR_PORT1 0xF2 +#define DCRN_DDR34_ECCERR_PORT2 0xF4 +#define DCRN_DDR34_ECCERR_PORT3 0xF6 +#define DCRN_DDR34_ECC_CHECK_PORT0 0xF8 +#define DCRN_DDR34_ECC_CHECK_PORT1 0xF9 +#define DCRN_DDR34_ECC_CHECK_PORT2 0xF9 +#define DCRN_DDR34_ECC_CHECK_PORT3 0xFB + +#define DDR34_SCRUB_CNTL_STOP 0x00000000 +#define DDR34_SCRUB_CNTL_SCRUB 0x80000000 +#define DDR34_SCRUB_CNTL_UE_STOP 0x20000000 +#define DDR34_SCRUB_CNTL_CE_STOP 0x10000000 +#define DDR34_SCRUB_CNTL_RANK_EN 0x00008000 + +/* PLB-Attached DDR3/4 Core Wrapper */ +#define DCRN_CW_BASE 0x11111800 +#define DCRN_CW_MCER0 0x00 +#define DCRN_CW_MCER1 0x01 +#define DCRN_CW_MCER_AND0 0x02 +#define DCRN_CW_MCER_AND1 0x03 +#define DCRN_CW_MCER_OR0 0x04 +#define DCRN_CW_MCER_OR1 0x05 +#define DCRN_CW_MCER_MASK0 0x06 +#define DCRN_CW_MCER_MASK1 0x07 +#define DCRN_CW_MCER_MASK_AND0 0x08 +#define DCRN_CW_MCER_MASK_AND1 0x09 +#define DCRN_CW_MCER_MASK_OR0 0x0A +#define DCRN_CW_MCER_MASK_OR1 0x0B +#define DCRN_CW_MCER_ACTION0 0x0C +#define DCRN_CW_MCER_ACTION1 0x0D +#define DCRN_CW_MCER_WOF0 0x0E +#define DCRN_CW_MCER_WOF1 0x0F +#define DCRN_CW_LFIR 0x10 +#define DCRN_CW_LFIR_AND 0x11 +#define DCRN_CW_LFIR_OR 0x12 +#define DCRN_CW_LFIR_MASK 0x13 +#define DCRN_CW_LFIR_MASK_AND 0x14 +#define DCRN_CW_LFIR_MASK_OR 0x15 + +#define CW_MCER0_MEM_CE 0x00020000 +/* CMU addresses */ +#define CMUN_CRCS 0x00 /* Chip Reset Control/Status */ +#define CMUN_CONFFIR0 0x20 /* Config Reg Parity FIR 0 */ +#define CMUN_CONFFIR1 0x21 /* Config Reg Parity FIR 1 */ +#define CMUN_CONFFIR2 0x22 /* Config Reg Parity FIR 2 */ +#define CMUN_CONFFIR3 0x23 /* Config Reg Parity FIR 3 */ +#define CMUN_URCR3_RS 0x24 /* Unit Reset Control Reg 3 Set */ +#define CMUN_URCR3_C 0x25 /* Unit Reset Control Reg 3 Clear */ +#define CMUN_URCR3_P 0x26 /* Unit Reset Control Reg 3 Pulse */ +#define CMUN_PW0 0x2C /* Pulse Width Register */ +#define CMUN_URCR0_P 0x2D /* Unit Reset Control Reg 0 Pulse */ +#define CMUN_URCR1_P 0x2E /* Unit Reset Control Reg 1 Pulse */ +#define CMUN_URCR2_P 0x2F /* Unit Reset Control Reg 2 Pulse */ +#define CMUN_CLS_RW 0x30 /* Code Load Status (Read/Write) */ +#define CMUN_CLS_S 0x31 /* Code Load Status (Set) */ +#define CMUN_CLS_C 0x32 /* Code Load Status (Clear */ +#define CMUN_URCR2_RS 0x33 /* Unit Reset Control Reg 2 Set */ +#define CMUN_URCR2_C 0x34 /* Unit Reset Control Reg 2 Clear */ +#define CMUN_CLKEN0 0x35 /* Clock Enable 0 */ +#define CMUN_CLKEN1 0x36 /* Clock Enable 1 */ +#define CMUN_PCD0 0x37 /* PSI clock divider 0 */ +#define CMUN_PCD1 0x38 /* PSI clock divider 1 */ +#define CMUN_TMR0 0x39 /* Reset Timer */ +#define CMUN_TVS0 0x3A /* TV Sense Reg 0 */ +#define CMUN_TVS1 0x3B /* TV Sense Reg 1 */ +#define CMUN_MCCR 0x3C /* DRAM Configuration Reg */ +#define CMUN_FIR0 0x3D /* Fault Isolation Reg 0 */ +#define CMUN_FMR0 0x3E /* FIR Mask Reg 0 */ +#define CMUN_ETDRB 0x3F /* ETDR Backdoor */ + +/* CRCS bit fields */ +#define CRCS_STAT_MASK 0xF0000000 +#define CRCS_STAT_POR 0x10000000 +#define CRCS_STAT_PHR 0x20000000 +#define CRCS_STAT_PCIE 0x30000000 +#define CRCS_STAT_CRCS_SYS 0x40000000 +#define CRCS_STAT_DBCR_SYS 0x50000000 +#define CRCS_STAT_HOST_SYS 0x60000000 +#define CRCS_STAT_CHIP_RST_B 0x70000000 +#define CRCS_STAT_CRCS_CHIP 0x80000000 +#define CRCS_STAT_DBCR_CHIP 0x90000000 +#define CRCS_STAT_HOST_CHIP 0xA0000000 +#define CRCS_STAT_PSI_CHIP 0xB0000000 +#define CRCS_STAT_CRCS_CORE 0xC0000000 +#define CRCS_STAT_DBCR_CORE 0xD0000000 +#define CRCS_STAT_HOST_CORE 0xE0000000 +#define CRCS_STAT_PCIE_HOT 0xF0000000 +#define CRCS_STAT_SELF_CORE 0x40000000 +#define CRCS_STAT_SELF_CHIP 0x50000000 +#define CRCS_WATCHE 0x08000000 +#define CRCS_CORE 0x04000000 /* Reset PPC440 core */ +#define CRCS_CHIP 0x02000000 /* Chip Reset */ +#define CRCS_SYS 0x01000000 /* System Reset */ +#define CRCS_WRCR 0x00800000 /* Watchdog reset on core reset */ +#define CRCS_EXTCR 0x00080000 /* CHIP_RST_B triggers chip reset */ +#define CRCS_PLOCK 0x00000002 /* PLL Locked */ + +#define mtcmu(reg, data) \ +do { \ + mtdcr(DCRN_CMU_ADDR, reg); \ + mtdcr(DCRN_CMU_DATA, data); \ +} while (0) + +#define mfcmu(reg)\ + ({u32 data; \ + mtdcr(DCRN_CMU_ADDR, reg); \ + data = mfdcr(DCRN_CMU_DATA); \ + data; }) + +#define mtl2(reg, data) \ +do { \ + mtdcr(DCRN_L2CDCRAI, reg); \ + mtdcr(DCRN_L2CDCRDI, data); \ +} while (0) + +#define mfl2(reg) \ + ({u32 data; \ + mtdcr(DCRN_L2CDCRAI, reg); \ + data = mfdcr(DCRN_L2CDCRDI); \ + data; }) + +#endif /* __KERNEL__ */ +#endif /* _ASM_POWERPC_FSP2_DCR_H_ */ diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index f99e79ee060e..48abb4cb304c 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -387,8 +387,8 @@ static unsigned int __init get_fifo_size(struct device_node *np, if (fp) return *fp; - pr_warning("no %s property in %pOF node, defaulting to %d\n", - prop_name, np, DEFAULT_FIFO_SIZE); + pr_warn("no %s property in %pOF node, defaulting to %d\n", + prop_name, np, DEFAULT_FIFO_SIZE); return DEFAULT_FIFO_SIZE; } diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 9e974b1e1697..17cf249b18ee 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -90,7 +90,7 @@ struct mpc52xx_gpt_priv { struct list_head list; /* List of all GPT devices */ struct device *dev; struct mpc52xx_gpt __iomem *regs; - spinlock_t lock; + raw_spinlock_t lock; struct irq_domain *irqhost; u32 ipb_freq; u8 wdt_mode; @@ -141,9 +141,9 @@ static void mpc52xx_gpt_irq_unmask(struct irq_data *d) struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d); unsigned long flags; - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); setbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); } static void mpc52xx_gpt_irq_mask(struct irq_data *d) @@ -151,9 +151,9 @@ static void mpc52xx_gpt_irq_mask(struct irq_data *d) struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d); unsigned long flags; - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); } static void mpc52xx_gpt_irq_ack(struct irq_data *d) @@ -171,14 +171,14 @@ static int mpc52xx_gpt_irq_set_type(struct irq_data *d, unsigned int flow_type) dev_dbg(gpt->dev, "%s: virq=%i type=%x\n", __func__, d->irq, flow_type); - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); reg = in_be32(&gpt->regs->mode) & ~MPC52xx_GPT_MODE_ICT_MASK; if (flow_type & IRQF_TRIGGER_RISING) reg |= MPC52xx_GPT_MODE_ICT_RISING; if (flow_type & IRQF_TRIGGER_FALLING) reg |= MPC52xx_GPT_MODE_ICT_FALLING; out_be32(&gpt->regs->mode, reg); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); return 0; } @@ -264,11 +264,11 @@ mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node) /* If the GPT is currently disabled, then change it to be in Input * Capture mode. If the mode is non-zero, then the pin could be * already in use for something. */ - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); mode = in_be32(&gpt->regs->mode); if ((mode & MPC52xx_GPT_MODE_MS_MASK) == 0) out_be32(&gpt->regs->mode, mode | MPC52xx_GPT_MODE_MS_IC); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); dev_dbg(gpt->dev, "%s() complete. virq=%i\n", __func__, cascade_virq); } @@ -295,9 +295,9 @@ mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v) dev_dbg(gpt->dev, "%s: gpio:%d v:%d\n", __func__, gpio, v); r = v ? MPC52xx_GPT_MODE_GPIO_OUT_HIGH : MPC52xx_GPT_MODE_GPIO_OUT_LOW; - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK, r); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); } static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio) @@ -307,9 +307,9 @@ static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio) dev_dbg(gpt->dev, "%s: gpio:%d\n", __func__, gpio); - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); return 0; } @@ -436,16 +436,16 @@ static int mpc52xx_gpt_do_start(struct mpc52xx_gpt_priv *gpt, u64 period, } /* Set and enable the timer, reject an attempt to use a wdt as gpt */ - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); if (as_wdt) gpt->wdt_mode |= MPC52xx_GPT_IS_WDT; else if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) { - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); return -EBUSY; } out_be32(&gpt->regs->count, prescale << 16 | clocks); clrsetbits_be32(&gpt->regs->mode, clear, set); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); return 0; } @@ -476,14 +476,14 @@ int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt) unsigned long flags; /* reject the operation if the timer is used as watchdog (gpt 0 only) */ - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) { - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); return -EBUSY; } clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); return 0; } EXPORT_SYMBOL(mpc52xx_gpt_stop_timer); @@ -500,9 +500,9 @@ u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt) u64 prescale; unsigned long flags; - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); period = in_be32(&gpt->regs->count); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); prescale = period >> 16; period &= 0xffff; @@ -532,9 +532,9 @@ static inline void mpc52xx_gpt_wdt_ping(struct mpc52xx_gpt_priv *gpt_wdt) { unsigned long flags; - spin_lock_irqsave(&gpt_wdt->lock, flags); + raw_spin_lock_irqsave(&gpt_wdt->lock, flags); out_8((u8 *) &gpt_wdt->regs->mode, MPC52xx_GPT_MODE_WDT_PING); - spin_unlock_irqrestore(&gpt_wdt->lock, flags); + raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags); } /* wdt misc device api */ @@ -638,11 +638,11 @@ static int mpc52xx_wdt_release(struct inode *inode, struct file *file) struct mpc52xx_gpt_priv *gpt_wdt = file->private_data; unsigned long flags; - spin_lock_irqsave(&gpt_wdt->lock, flags); + raw_spin_lock_irqsave(&gpt_wdt->lock, flags); clrbits32(&gpt_wdt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE | MPC52xx_GPT_MODE_WDT_EN); gpt_wdt->wdt_mode &= ~MPC52xx_GPT_IS_WDT; - spin_unlock_irqrestore(&gpt_wdt->lock, flags); + raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags); #endif clear_bit(0, &wdt_is_active); return 0; @@ -723,7 +723,7 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev) if (!gpt) return -ENOMEM; - spin_lock_init(&gpt->lock); + raw_spin_lock_init(&gpt->lock); gpt->dev = &ofdev->dev; gpt->ipb_freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node); gpt->regs = of_iomap(ofdev->dev.of_node, 0); diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 96bb55ca61d3..d2ef39f0edc8 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -84,7 +84,7 @@ static ssize_t show_status(struct device *d, return sprintf(buf, "%02x\n", ret); } -static DEVICE_ATTR(status, S_IRUGO, show_status, NULL); +static DEVICE_ATTR(status, 0444, show_status, NULL); static void mcu_power_off(void) { diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c index bb7b25acf26f..74c154e67c8b 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c @@ -75,7 +75,7 @@ static void __init mpc832x_sys_setup_arch(void) par_io_init(np); of_node_put(np); - for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;) + for_each_node_by_name(np, "ucc") par_io_of_config(np); } diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index a4539c5accb0..438986593873 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -204,7 +204,7 @@ static void __init mpc832x_rdb_setup_arch(void) par_io_init(np); of_node_put(np); - for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;) + for_each_node_by_name(np, "ucc") par_io_of_config(np); } #endif /* CONFIG_QUICC_ENGINE */ diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c index 4fc3051c2b2e..fd44dd03e1f3 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c @@ -83,7 +83,7 @@ static void __init mpc836x_mds_setup_arch(void) par_io_init(np); of_node_put(np); - for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;) + for_each_node_by_name(np, "ucc") par_io_of_config(np); #ifdef CONFIG_QE_USB /* Must fixup Par IO before QE GPIO chips are registered. */ diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index 82f8490b5aa7..38d4ba9f37b5 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -252,8 +252,7 @@ static int socrates_fpga_pic_host_xlate(struct irq_domain *h, /* type is configurable */ if (intspec[1] != IRQ_TYPE_LEVEL_LOW && intspec[1] != IRQ_TYPE_LEVEL_HIGH) { - pr_warning("FPGA PIC: invalid irq type, " - "setting default active low\n"); + pr_warn("FPGA PIC: invalid irq type, setting default active low\n"); *out_flags = IRQ_TYPE_LEVEL_LOW; } else { *out_flags = intspec[1]; @@ -267,7 +266,7 @@ static int socrates_fpga_pic_host_xlate(struct irq_domain *h, if (intspec[2] <= 2) fpga_irq->irq_line = intspec[2]; else - pr_warning("FPGA PIC: invalid irq routing\n"); + pr_warn("FPGA PIC: invalid irq routing\n"); return 0; } @@ -293,7 +292,7 @@ void socrates_fpga_pic_init(struct device_node *pic) for (i = 0; i < 3; i++) { socrates_fpga_irqs[i] = irq_of_parse_and_map(pic, i); if (!socrates_fpga_irqs[i]) { - pr_warning("FPGA PIC: can't get irq%d.\n", i); + pr_warn("FPGA PIC: can't get irq%d\n", i); continue; } irq_set_chained_handler(socrates_fpga_irqs[i], diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index a0e989ed4b6f..17c6cd3d02e6 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -101,7 +101,7 @@ static int __init mpc86xx_hpcn_probe(void) /* Be nice and don't give silent boot death. Delete this in 2.6.27 */ if (of_machine_is_compatible("mpc86xx")) { - pr_warning("WARNING: your dts/dtb is old. You must update before the next kernel release\n"); + pr_warn("WARNING: your dts/dtb is old. You must update before the next kernel release.\n"); return 1; } diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index e2089d3de00c..d408162d5af4 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -116,18 +116,6 @@ config 8xx_GPIO If in doubt, say Y here. -config 8xx_CPU6 - bool "CPU6 Silicon Errata (860 Pre Rev. C)" - help - MPC860 CPUs, prior to Rev C have some bugs in the silicon, which - require workarounds for Linux (and most other OSes to work). If you - get a BUG() very early in boot, this might fix the problem. For - more details read the document entitled "MPC860 Family Device Errata - Reference" on Freescale's website. This option also incurs a - performance hit. - - If in doubt, say N here. - config 8xx_CPU15 bool "CPU15 Silicon Errata" depends on !HUGETLB_PAGE diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 5a96a2763e4a..14ef17e10ec9 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -293,17 +293,6 @@ config CPM2 you wish to build a kernel for a machine with a CPM2 coprocessor on it (826x, 827x, 8560). -config AXON_RAM - tristate "Axon DDR2 memory device driver" - depends on PPC_IBM_CELL_BLADE && BLOCK - select DAX - default m - help - It registers one block device per Axon's DDR2 memory bank found - on a system. Block devices are called axonram?, their major and - minor numbers are available in /proc/devices, /proc/partitions or - in /sys/block/axonram?/dev. - config FSL_ULI1575 bool default n diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index ae07470fde3c..a429d859f15d 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -33,7 +33,6 @@ config PPC_85xx config PPC_8xx bool "Freescale 8xx" select FSL_SOC - select PPC_LIB_RHEAP select SYS_SUPPORTS_HUGETLBFS config 40x @@ -168,13 +167,6 @@ config PPC_FPU bool default y if PPC64 -config PPC_8xx_PERF_EVENT - bool "PPC 8xx perf events" - depends on PPC_8xx && PERF_EVENTS - help - This is Performance Events support for PPC 8xx. The 8xx doesn't - have a PMU but some events are emulated using 8xx features. - config FSL_EMB_PERFMON bool "Freescale Embedded Perfmon" depends on E500 || PPC_83xx diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 6fc85e29dc08..5d4bf9aed51a 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -315,8 +315,7 @@ static int __init setup_iic(void) struct cbe_iic_regs __iomem *node_iic; const u32 *np; - for (dn = NULL; - (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) { + for_each_node_by_name(dn, "interrupt-controller") { if (!of_device_is_compatible(dn, "IBM,CBEA-Internal-Interrupt-Controller")) continue; diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index d3543e68efe8..7d31b8d14661 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -192,8 +192,7 @@ static void __init mpic_init_IRQ(void) struct device_node *dn; struct mpic *mpic; - for (dn = NULL; - (dn = of_find_node_by_name(dn, "interrupt-controller"));) { + for_each_node_by_name(dn, "interrupt-controller") { if (!of_device_is_compatible(dn, "CBEA,platform-open-pic")) continue; diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index aa44bfc46467..c137f0cb4151 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -343,8 +343,7 @@ void __init spider_init_IRQ(void) * device-tree is bogus anyway) so all we can do is pray or maybe test * the address and deduce the node-id */ - for (dn = NULL; - (dn = of_find_node_by_name(dn, "interrupt-controller"));) { + for_each_node_by_name(dn, "interrupt-controller") { if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) { if (of_address_to_resource(dn, 0, &r)) { printk(KERN_WARNING "spider-pic: Failed\n"); diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index f636ee22b203..5c409c98cca8 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -292,12 +292,12 @@ static int __init of_enumerate_spus(int (*fn)(void *data)) unsigned int n = 0; ret = -ENODEV; - for (node = of_find_node_by_type(NULL, "spe"); - node; node = of_find_node_by_type(node, "spe")) { + for_each_node_by_type(node, "spe") { ret = fn(node); if (ret) { printk(KERN_WARNING "%s: Error initializing %s\n", __func__, node->name); + of_node_put(node); break; } n++; diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index fc7772c3d068..c1be486da899 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -2375,8 +2375,8 @@ static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n) p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE; - return snprintf(tbuf, n, "%u.%09u %d %u %u %llu\n", - (unsigned int) p->tstamp.tv_sec, + return snprintf(tbuf, n, "%llu.%09u %d %u %u %llu\n", + (unsigned long long) p->tstamp.tv_sec, (unsigned int) p->tstamp.tv_nsec, p->spu_id, (unsigned int) p->type, @@ -2499,7 +2499,7 @@ void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx, struct switch_log_entry *p; p = ctx->switch_log->log + ctx->switch_log->head; - ktime_get_ts(&p->tstamp); + ktime_get_ts64(&p->tstamp); p->timebase = get_tb(); p->spu_id = spu ? spu->number : -1; p->type = type; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 9558d725a99b..db329d4bf1c3 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -455,7 +455,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, } } - ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO); + ret = spufs_mkdir(inode, dentry, flags, mode & 0777); if (ret) goto out_aff_unlock; @@ -546,7 +546,7 @@ static int spufs_create_gang(struct inode *inode, struct path path = {.mnt = mnt, .dentry = dentry}; int ret; - ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO); + ret = spufs_mkgang(inode, dentry, mode & 0777); if (!ret) { ret = spufs_gang_open(&path); if (ret < 0) { diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 2d0479ad3af4..b5fc1b3fe538 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -69,7 +69,7 @@ struct switch_log { unsigned long head; unsigned long tail; struct switch_log_entry { - struct timespec tstamp; + struct timespec64 tstamp; s32 spu_id; u32 type; u32 val; diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c index 81799d70a1ee..cfddc87f81bf 100644 --- a/arch/powerpc/platforms/maple/time.c +++ b/arch/powerpc/platforms/maple/time.c @@ -134,7 +134,7 @@ int maple_set_rtc_time(struct rtc_time *tm) static struct resource rtc_iores = { .name = "rtc", - .flags = IORESOURCE_BUSY, + .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; unsigned long __init maple_get_boot_time(void) diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c index aafa01ba062f..2c72263ad6ab 100644 --- a/arch/powerpc/platforms/pasemi/dma_lib.c +++ b/arch/powerpc/platforms/pasemi/dma_lib.c @@ -589,7 +589,7 @@ int pasemi_dma_init(void) pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, 0); while (pasemi_read_dma_reg(PAS_DMA_COM_RXSTA) & 1) { if (time_after(jiffies, timeout)) { - pr_warning("Warning: Could not disable RX section\n"); + pr_warn("Warning: Could not disable RX section\n"); break; } } @@ -598,7 +598,7 @@ int pasemi_dma_init(void) pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, 0); while (pasemi_read_dma_reg(PAS_DMA_COM_TXSTA) & 1) { if (time_after(jiffies, timeout)) { - pr_warning("Warning: Could not disable TX section\n"); + pr_warn("Warning: Could not disable TX section\n"); break; } } diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c index a00096b1c713..6b5dcccae1d3 100644 --- a/arch/powerpc/platforms/powermac/backlight.c +++ b/arch/powerpc/platforms/powermac/backlight.c @@ -186,7 +186,7 @@ int pmac_backlight_set_legacy_brightness(int brightness) return __pmac_backlight_set_legacy_brightness(brightness); } -int pmac_backlight_get_legacy_brightness() +int pmac_backlight_get_legacy_brightness(void) { int result = -ENXIO; @@ -205,12 +205,12 @@ int pmac_backlight_get_legacy_brightness() return result; } -void pmac_backlight_disable() +void pmac_backlight_disable(void) { atomic_inc(&kernel_backlight_disabled); } -void pmac_backlight_enable() +void pmac_backlight_enable(void) { atomic_dec(&kernel_backlight_disabled); } diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 9e3f39d36e88..3f82cb24eb2b 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -829,7 +829,7 @@ core99_ata100_enable(struct device_node *node, long value) if (value) { if (pci_device_from_OF_node(node, &pbus, &pid) == 0) - pdev = pci_get_bus_and_slot(pbus, pid); + pdev = pci_get_domain_bus_and_slot(0, pbus, pid); if (pdev == NULL) return 0; rc = pci_enable_device(pdev); @@ -2641,7 +2641,7 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ phys_addr_t addr; u64 size; - for (node = NULL; (node = of_find_node_by_name(node, name)) != NULL;) { + for_each_node_by_name(node, name) { if (!compat) break; if (of_device_is_compatible(node, compat)) @@ -2853,7 +2853,6 @@ set_initial_features(void) } /* Enable ATA-100 before PCI probe. */ - np = of_find_node_by_name(NULL, "ata-6"); for_each_node_by_name(np, "ata-6") { if (np->parent && of_device_is_compatible(np->parent, "uni-north") diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 5e0719b27294..57bbff465964 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -486,15 +486,16 @@ static int __init pmac_pic_probe_mpic(void) struct device_node *np, *master = NULL, *slave = NULL; /* We can have up to 2 MPICs cascaded */ - for (np = NULL; (np = of_find_node_by_type(np, "open-pic")) - != NULL;) { + for_each_node_by_type(np, "open-pic") { if (master == NULL && of_get_property(np, "interrupts", NULL) == NULL) master = of_node_get(np); else if (slave == NULL) slave = of_node_get(np); - if (master && slave) + if (master && slave) { + of_node_put(np); break; + } } /* Check for bogus setups */ @@ -604,6 +605,7 @@ static int pmacpic_find_viaint(void) if (np == NULL) goto not_found; viaint = irq_of_parse_and_map(np, 0); + of_node_put(np); not_found: #endif /* CONFIG_ADB_PMU */ diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 2cd99eb30762..95275e0e2efa 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -774,8 +774,8 @@ static void __init smp_core99_probe(void) if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345); /* Count CPUs in the device-tree */ - for (cpus = NULL; (cpus = of_find_node_by_type(cpus, "cpu")) != NULL;) - ++ncpus; + for_each_node_by_type(cpus, "cpu") + ++ncpus; printk(KERN_INFO "PowerMac SMP probe found %d cpus\n", ncpus); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 3732118a0482..6c9d5199a7e2 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o obj-$(CONFIG_PPC_FTW) += nx-ftw.o +obj-$(CONFIG_OCXL_BASE) += ocxl.o diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 4650fb294e7a..33c86c1a1720 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -43,6 +43,22 @@ static int eeh_event_irq = -EINVAL; +void pnv_pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (!pdev->is_virtfn) + return; + + /* + * The following operations will fail if VF's sysfs files + * aren't created or its resources aren't finalized. + */ + eeh_add_device_early(pdn); + eeh_add_device_late(pdev); + eeh_sysfs_add_device(pdev); +} + static int pnv_eeh_init(void) { struct pci_controller *hose; @@ -86,6 +102,7 @@ static int pnv_eeh_init(void) } eeh_set_pe_aux_size(max_diag_size); + ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device; return 0; } @@ -1638,70 +1655,11 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) return ret; } -static int pnv_eeh_restore_vf_config(struct pci_dn *pdn) -{ - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - u32 devctl, cmd, cap2, aer_capctl; - int old_mps; - - if (edev->pcie_cap) { - /* Restore MPS */ - old_mps = (ffs(pdn->mps) - 8) << 5; - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; - devctl |= old_mps; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - - /* Disable Completion Timeout */ - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, - 4, &cap2); - if (cap2 & 0x10) { - eeh_ops->read_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, &cap2); - cap2 |= 0x10; - eeh_ops->write_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, cap2); - } - } - - /* Enable SERR and parity checking */ - eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); - cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); - eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); - - /* Enable report various errors */ - if (edev->pcie_cap) { - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_CERE; - devctl |= (PCI_EXP_DEVCTL_NFERE | - PCI_EXP_DEVCTL_FERE | - PCI_EXP_DEVCTL_URRE); - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - } - - /* Enable ECRC generation and check */ - if (edev->pcie_cap && edev->aer_cap) { - eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, &aer_capctl); - aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); - eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, aer_capctl); - } - - return 0; -} - static int pnv_eeh_restore_config(struct pci_dn *pdn) { struct eeh_dev *edev = pdn_to_eeh_dev(pdn); struct pnv_phb *phb; - s64 ret; + s64 ret = 0; int config_addr = (pdn->busno << 8) | (pdn->devfn); if (!edev) @@ -1715,7 +1673,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) * to be exported by firmware in extendible way. */ if (edev->physfn) { - ret = pnv_eeh_restore_vf_config(pdn); + ret = eeh_restore_vf_config(pdn); } else { phb = pdn->phb->private_data; ret = opal_pci_reinit(phb->opal_id, @@ -1728,7 +1686,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) return -EIO; } - return 0; + return ret; } static struct eeh_ops pnv_eeh_ops = { @@ -1746,25 +1704,10 @@ static struct eeh_ops pnv_eeh_ops = { .read_config = pnv_eeh_read_config, .write_config = pnv_eeh_write_config, .next_error = pnv_eeh_next_error, - .restore_config = pnv_eeh_restore_config + .restore_config = pnv_eeh_restore_config, + .notify_resume = NULL }; -void pcibios_bus_add_device(struct pci_dev *pdev) -{ - struct pci_dn *pdn = pci_get_pdn(pdev); - - if (!pdev->is_virtfn) - return; - - /* - * The following operations will fail if VF's sysfs files - * aren't created or its resources aren't finalized. - */ - eeh_add_device_early(pdn); - eeh_add_device_late(pdev); - eeh_sysfs_add_device(pdev); -} - #ifdef CONFIG_PCI_IOV static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev) { diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index f6cbc1a71472..0a253b64ac5f 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -39,7 +39,10 @@ */ static struct pci_dev *get_pci_dev(struct device_node *dn) { - return PCI_DN(dn)->pcidev; + struct pci_dn *pdn = PCI_DN(dn); + + return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus), + pdn->busno, pdn->devfn); } /* Given a NPU device get the associated PCI device. */ @@ -277,7 +280,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) int64_t rc = 0; phys_addr_t top = memblock_end_of_DRAM(); - if (phb->type != PNV_PHB_NPU || !npe->pdev) + if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev) return -EINVAL; rc = pnv_npu_unset_window(npe, 0); diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c new file mode 100644 index 000000000000..fa9b53af3c7b --- /dev/null +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -0,0 +1,515 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include <asm/pnv-ocxl.h> +#include <asm/opal.h> +#include <asm/xive.h> +#include <misc/ocxl-config.h> +#include "pci.h" + +#define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full +#define PNV_OCXL_ACTAG_MAX 64 +/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */ +#define PNV_OCXL_PASID_BITS 15 +#define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1) + +#define AFU_PRESENT (1 << 31) +#define AFU_INDEX_MASK 0x3F000000 +#define AFU_INDEX_SHIFT 24 +#define ACTAG_MASK 0xFFF + + +struct actag_range { + u16 start; + u16 count; +}; + +struct npu_link { + struct list_head list; + int domain; + int bus; + int dev; + u16 fn_desired_actags[8]; + struct actag_range fn_actags[8]; + bool assignment_done; +}; +static struct list_head links_list = LIST_HEAD_INIT(links_list); +static DEFINE_MUTEX(links_list_lock); + + +/* + * opencapi actags handling: + * + * When sending commands, the opencapi device references the memory + * context it's targeting with an 'actag', which is really an alias + * for a (BDF, pasid) combination. When it receives a command, the NPU + * must do a lookup of the actag to identify the memory context. The + * hardware supports a finite number of actags per link (64 for + * POWER9). + * + * The device can carry multiple functions, and each function can have + * multiple AFUs. Each AFU advertises in its config space the number + * of desired actags. The host must configure in the config space of + * the AFU how many actags the AFU is really allowed to use (which can + * be less than what the AFU desires). + * + * When a PCI function is probed by the driver, it has no visibility + * about the other PCI functions and how many actags they'd like, + * which makes it impossible to distribute actags fairly among AFUs. + * + * Unfortunately, the only way to know how many actags a function + * desires is by looking at the data for each AFU in the config space + * and add them up. Similarly, the only way to know how many actags + * all the functions of the physical device desire is by adding the + * previously computed function counts. Then we can match that against + * what the hardware supports. + * + * To get a comprehensive view, we use a 'pci fixup': at the end of + * PCI enumeration, each function counts how many actags its AFUs + * desire and we save it in a 'npu_link' structure, shared between all + * the PCI functions of a same device. Therefore, when the first + * function is probed by the driver, we can get an idea of the total + * count of desired actags for the device, and assign the actags to + * the AFUs, by pro-rating if needed. + */ + +static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos) +{ + int vsec = pos; + u16 vendor, id; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, + OCXL_EXT_CAP_ID_DVSEC))) { + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, + &vendor); + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); + if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) + return vsec; + } + return 0; +} + +static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) +{ + int vsec = 0; + u8 idx; + + while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID, + vsec))) { + pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, + &idx); + if (idx == afu_idx) + return vsec; + } + return 0; +} + +static int get_max_afu_index(struct pci_dev *dev, int *afu_idx) +{ + int pos; + u32 val; + + pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0); + if (!pos) + return -ESRCH; + + pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val); + if (val & AFU_PRESENT) + *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT; + else + *afu_idx = -1; + return 0; +} + +static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag) +{ + int pos; + u16 actag_sup; + + pos = find_dvsec_afu_ctrl(dev, afu_idx); + if (!pos) + return -ESRCH; + + pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, + &actag_sup); + *actag = actag_sup & ACTAG_MASK; + return 0; +} + +static struct npu_link *find_link(struct pci_dev *dev) +{ + struct npu_link *link; + + list_for_each_entry(link, &links_list, list) { + /* The functions of a device all share the same link */ + if (link->domain == pci_domain_nr(dev->bus) && + link->bus == dev->bus->number && + link->dev == PCI_SLOT(dev->devfn)) { + return link; + } + } + + /* link doesn't exist yet. Allocate one */ + link = kzalloc(sizeof(struct npu_link), GFP_KERNEL); + if (!link) + return NULL; + link->domain = pci_domain_nr(dev->bus); + link->bus = dev->bus->number; + link->dev = PCI_SLOT(dev->devfn); + list_add(&link->list, &links_list); + return link; +} + +static void pnv_ocxl_fixup_actag(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct npu_link *link; + int rc, afu_idx = -1, i, actag; + + if (!machine_is(powernv)) + return; + + if (phb->type != PNV_PHB_NPU_OCAPI) + return; + + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_warn(&dev->dev, "couldn't update actag information\n"); + mutex_unlock(&links_list_lock); + return; + } + + /* + * Check how many actags are desired for the AFUs under that + * function and add it to the count for the link + */ + rc = get_max_afu_index(dev, &afu_idx); + if (rc) { + /* Most likely an invalid config space */ + dev_dbg(&dev->dev, "couldn't find AFU information\n"); + afu_idx = -1; + } + + link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0; + for (i = 0; i <= afu_idx; i++) { + /* + * AFU index 'holes' are allowed. So don't fail if we + * can't read the actag info for an index + */ + rc = get_actag_count(dev, i, &actag); + if (rc) + continue; + link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag; + } + dev_dbg(&dev->dev, "total actags for function: %d\n", + link->fn_desired_actags[PCI_FUNC(dev->devfn)]); + + mutex_unlock(&links_list_lock); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag); + +static u16 assign_fn_actags(u16 desired, u16 total) +{ + u16 count; + + if (total <= PNV_OCXL_ACTAG_MAX) + count = desired; + else + count = PNV_OCXL_ACTAG_MAX * desired / total; + + return count; +} + +static void assign_actags(struct npu_link *link) +{ + u16 actag_count, range_start = 0, total_desired = 0; + int i; + + for (i = 0; i < 8; i++) + total_desired += link->fn_desired_actags[i]; + + for (i = 0; i < 8; i++) { + if (link->fn_desired_actags[i]) { + actag_count = assign_fn_actags( + link->fn_desired_actags[i], + total_desired); + link->fn_actags[i].start = range_start; + link->fn_actags[i].count = actag_count; + range_start += actag_count; + WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX); + } + pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n", + link->domain, link->bus, link->dev, i, + link->fn_actags[i].start, link->fn_actags[i].count, + link->fn_desired_actags[i]); + } + link->assignment_done = true; +} + +int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, + u16 *supported) +{ + struct npu_link *link; + + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_err(&dev->dev, "actag information not found\n"); + mutex_unlock(&links_list_lock); + return -ENODEV; + } + /* + * On p9, we only have 64 actags per link, so they must be + * shared by all the functions of the same adapter. We counted + * the desired actag counts during PCI enumeration, so that we + * can allocate a pro-rated number of actags to each function. + */ + if (!link->assignment_done) + assign_actags(link); + + *base = link->fn_actags[PCI_FUNC(dev->devfn)].start; + *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count; + *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)]; + + mutex_unlock(&links_list_lock); + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag); + +int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count) +{ + struct npu_link *link; + int i, rc = -EINVAL; + + /* + * The number of PASIDs (process address space ID) which can + * be used by a function depends on how many functions exist + * on the device. The NPU needs to be configured to know how + * many bits are available to PASIDs and how many are to be + * used by the function BDF indentifier. + * + * We only support one AFU-carrying function for now. + */ + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_err(&dev->dev, "actag information not found\n"); + mutex_unlock(&links_list_lock); + return -ENODEV; + } + + for (i = 0; i < 8; i++) + if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) { + *count = PNV_OCXL_PASID_MAX; + rc = 0; + break; + } + + mutex_unlock(&links_list_lock); + dev_dbg(&dev->dev, "%d PASIDs available for function\n", + rc ? 0 : *count); + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count); + +static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf) +{ + int shift, idx; + + WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE); + idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2; + shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2)); + buf[idx] |= rate << shift; +} + +int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, + char *rate_buf, int rate_buf_size) +{ + if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) + return -EINVAL; + /* + * The TL capabilities are a characteristic of the NPU, so + * we go with hard-coded values. + * + * The receiving rate of each template is encoded on 4 bits. + * + * On P9: + * - templates 0 -> 3 are supported + * - templates 0, 1 and 3 have a 0 receiving rate + * - template 2 has receiving rate of 1 (extra cycle) + */ + memset(rate_buf, 0, rate_buf_size); + set_templ_rate(2, 1, rate_buf); + *cap = PNV_OCXL_TL_P9_RECV_CAP; + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap); + +int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, + uint64_t rate_buf_phys, int rate_buf_size) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int rc; + + if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) + return -EINVAL; + + rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap, + rate_buf_phys, rate_buf_size); + if (rc) { + dev_err(&dev->dev, "Can't configure host TL: %d\n", rc); + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf); + +int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq) +{ + int rc; + + rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq); + if (rc) { + dev_err(&dev->dev, + "Can't get translation interrupt for device\n"); + return rc; + } + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq); + +void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, + void __iomem *tfc, void __iomem *pe_handle) +{ + iounmap(dsisr); + iounmap(dar); + iounmap(tfc); + iounmap(pe_handle); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs); + +int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, + void __iomem **dar, void __iomem **tfc, + void __iomem **pe_handle) +{ + u64 reg; + int i, j, rc = 0; + void __iomem *regs[4]; + + /* + * opal stores the mmio addresses of the DSISR, DAR, TFC and + * PE_HANDLE registers in a device tree property, in that + * order + */ + for (i = 0; i < 4; i++) { + rc = of_property_read_u64_index(dev->dev.of_node, + "ibm,opal-xsl-mmio", i, ®); + if (rc) + break; + regs[i] = ioremap(reg, 8); + if (!regs[i]) { + rc = -EINVAL; + break; + } + } + if (rc) { + dev_err(&dev->dev, "Can't map translation mmio registers\n"); + for (j = i - 1; j >= 0; j--) + iounmap(regs[j]); + } else { + *dsisr = regs[0]; + *dar = regs[1]; + *tfc = regs[2]; + *pe_handle = regs[3]; + } + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs); + +struct spa_data { + u64 phb_opal_id; + u32 bdfn; +}; + +int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, + void **platform_data) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct spa_data *data; + u32 bdfn; + int rc; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + bdfn = (dev->bus->number << 8) | dev->devfn; + rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem), + PE_mask); + if (rc) { + dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc); + kfree(data); + return rc; + } + data->phb_opal_id = phb->opal_id; + data->bdfn = bdfn; + *platform_data = (void *) data; + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup); + +void pnv_ocxl_spa_release(void *platform_data) +{ + struct spa_data *data = (struct spa_data *) platform_data; + int rc; + + rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0); + WARN_ON(rc); + kfree(data); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release); + +int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle) +{ + struct spa_data *data = (struct spa_data *) platform_data; + int rc; + + rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle); + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe); + +int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr) +{ + __be64 flags, trigger_page; + s64 rc; + u32 hwirq; + + hwirq = xive_native_alloc_irq(); + if (!hwirq) + return -ENOENT; + + rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL, + NULL); + if (rc || !trigger_page) { + xive_native_free_irq(hwirq); + return -ENOENT; + } + *irq = hwirq; + *trigger_addr = be64_to_cpu(trigger_page); + return 0; + +} +EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq); + +void pnv_ocxl_free_xive_irq(u32 irq) +{ + xive_native_free_irq(irq); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq); diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 4c827826c05e..0dc8fa4e0af2 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -103,9 +103,9 @@ static ssize_t dump_ack_store(struct dump_obj *dump_obj, * due to the dynamic size of the dump */ static struct dump_attribute id_attribute = - __ATTR(id, S_IRUGO, dump_id_show, NULL); + __ATTR(id, 0444, dump_id_show, NULL); static struct dump_attribute type_attribute = - __ATTR(type, S_IRUGO, dump_type_show, NULL); + __ATTR(type, 0444, dump_type_show, NULL); static struct dump_attribute ack_attribute = __ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store); diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index ecd6d9177d13..ba6e437abb4b 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -83,9 +83,9 @@ static ssize_t elog_ack_store(struct elog_obj *elog_obj, } static struct elog_attribute id_attribute = - __ATTR(id, S_IRUGO, elog_id_show, NULL); + __ATTR(id, 0444, elog_id_show, NULL); static struct elog_attribute type_attribute = - __ATTR(type, S_IRUGO, elog_type_show, NULL); + __ATTR(type, 0444, elog_type_show, NULL); static struct elog_attribute ack_attribute = __ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store); diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 465ea105b771..dd4c9b8b8a81 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -21,6 +21,78 @@ #include <asm/io.h> #include <asm/imc-pmu.h> #include <asm/cputhreads.h> +#include <asm/debugfs.h> + +static struct dentry *imc_debugfs_parent; + +/* Helpers to export imc command and mode via debugfs */ +static int imc_mem_get(void *data, u64 *val) +{ + *val = cpu_to_be64(*(u64 *)data); + return 0; +} + +static int imc_mem_set(void *data, u64 val) +{ + *(u64 *)data = cpu_to_be64(val); + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n"); + +static struct dentry *imc_debugfs_create_x64(const char *name, umode_t mode, + struct dentry *parent, u64 *value) +{ + return debugfs_create_file_unsafe(name, mode, parent, + value, &fops_imc_x64); +} + +/* + * export_imc_mode_and_cmd: Create a debugfs interface + * for imc_cmd and imc_mode + * for each node in the system. + * imc_mode and imc_cmd can be changed by echo into + * this interface. + */ +static void export_imc_mode_and_cmd(struct device_node *node, + struct imc_pmu *pmu_ptr) +{ + static u64 loc, *imc_mode_addr, *imc_cmd_addr; + int chip = 0, nid; + char mode[16], cmd[16]; + u32 cb_offset; + + imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root); + + /* + * Return here, either because 'imc' directory already exists, + * Or failed to create a new one. + */ + if (!imc_debugfs_parent) + return; + + if (of_property_read_u32(node, "cb_offset", &cb_offset)) + cb_offset = IMC_CNTL_BLK_OFFSET; + + for_each_node(nid) { + loc = (u64)(pmu_ptr->mem_info[chip].vbase) + cb_offset; + imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET); + sprintf(mode, "imc_mode_%d", nid); + if (!imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent, + imc_mode_addr)) + goto err; + + imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET); + sprintf(cmd, "imc_cmd_%d", nid); + if (!imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent, + imc_cmd_addr)) + goto err; + chip++; + } + return; + +err: + debugfs_remove_recursive(imc_debugfs_parent); +} /* * imc_get_mem_addr_nest: Function to get nest counter memory region @@ -65,6 +137,7 @@ static int imc_get_mem_addr_nest(struct device_node *node, } pmu_ptr->imc_counter_mmaped = true; + export_imc_mode_and_cmd(node, pmu_ptr); kfree(base_addr_arr); kfree(chipid_arr); return 0; @@ -213,6 +286,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev) } } + /* If none of the nest units are registered, remove debugfs interface */ + if (pmu_count == 0) + debugfs_remove_recursive(imc_debugfs_parent); + return 0; } diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index 23fb6647dced..6fd4092798d5 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -260,13 +260,13 @@ void __init opal_sys_param_init(void) /* If the parameter is read-only or read-write */ switch (perm[i] & 3) { case OPAL_SYSPARAM_READ: - attr[i].kobj_attr.attr.mode = S_IRUGO; + attr[i].kobj_attr.attr.mode = 0444; break; case OPAL_SYSPARAM_WRITE: - attr[i].kobj_attr.attr.mode = S_IWUSR; + attr[i].kobj_attr.attr.mode = 0200; break; case OPAL_SYSPARAM_RW: - attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR; + attr[i].kobj_attr.attr.mode = 0644; break; default: break; diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6f4b00a2ac46..1b2936ba6040 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -320,3 +320,6 @@ OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP); OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO); OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO); OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR); +OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP); +OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE); +OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 041ddbd1fc57..c15182765ff5 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -127,7 +127,7 @@ int __init early_init_dt_scan_opal(unsigned long node, if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { powerpc_firmware_features |= FW_FEATURE_OPAL; - pr_info("OPAL detected !\n"); + pr_debug("OPAL detected !\n"); } else { panic("OPAL != V3 detected, no longer supported.\n"); } @@ -239,8 +239,8 @@ int opal_message_notifier_register(enum opal_msg_type msg_type, struct notifier_block *nb) { if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) { - pr_warning("%s: Invalid arguments, msg_type:%d\n", - __func__, msg_type); + pr_warn("%s: Invalid arguments, msg_type:%d\n", + __func__, msg_type); return -EINVAL; } @@ -281,8 +281,8 @@ static void opal_handle_message(void) /* check for errors. */ if (ret) { - pr_warning("%s: Failed to retrieve opal message, err=%lld\n", - __func__, ret); + pr_warn("%s: Failed to retrieve opal message, err=%lld\n", + __func__, ret); return; } @@ -461,24 +461,14 @@ static int opal_recover_mce(struct pt_regs *regs, void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) { - /* - * This is mostly taken from kernel/panic.c, but tries to do - * relatively minimal work. Don't use delay functions (TB may - * be broken), don't crash dump (need to set a firmware log), - * don't run notifiers. We do want to get some information to - * Linux console. - */ - console_verbose(); - bust_spinlocks(1); + panic_flush_kmsg_start(); + pr_emerg("Hardware platform error: %s\n", msg); if (regs) show_regs(regs); smp_send_stop(); - printk_safe_flush_on_panic(); - kmsg_dump(KMSG_DUMP_PANIC); - bust_spinlocks(0); - debug_locks_off(); - console_flush_on_panic(); + + panic_flush_kmsg_end(); /* * Don't bother to shut things down because this will diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9582aeb1fe4c..496e47696ed0 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -54,7 +54,8 @@ #define POWERNV_IOMMU_DEFAULT_LEVELS 1 #define POWERNV_IOMMU_MAX_LEVELS 5 -static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU" }; +static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK", + "NPU_OCAPI" }; static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, @@ -89,6 +90,7 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, } static bool pnv_iommu_bypass_disabled __read_mostly; +static bool pci_reset_phbs __read_mostly; static int __init iommu_setup(char *str) { @@ -110,6 +112,14 @@ static int __init iommu_setup(char *str) } early_param("iommu", iommu_setup); +static int __init pci_reset_phbs_setup(char *str) +{ + pci_reset_phbs = true; + return 0; +} + +early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup); + static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) { /* @@ -924,7 +934,7 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) * Configure PELTV. NPUs don't have a PELTV table so skip * configuration on them. */ - if (phb->type != PNV_PHB_NPU) + if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI) pnv_ioda_set_peltv(phb, pe, true); /* Setup reverse map */ @@ -1059,8 +1069,8 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) pe = pnv_ioda_alloc_pe(phb); if (!pe) { - pr_warning("%s: Not enough PE# available, disabling device\n", - pci_name(dev)); + pr_warn("%s: Not enough PE# available, disabling device\n", + pci_name(dev)); return NULL; } @@ -1072,7 +1082,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) * At some point we want to remove the PDN completely anyways */ pci_dev_get(dev); - pdn->pcidev = dev; pdn->pe_number = pe->pe_number; pe->flags = PNV_IODA_PE_DEV; pe->pdev = dev; @@ -1119,7 +1128,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) continue; pe->device_count++; - pdn->pcidev = dev; pdn->pe_number = pe->pe_number; if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) pnv_ioda_setup_same_PE(dev->subordinate, pe); @@ -1164,7 +1172,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) pe = pnv_ioda_alloc_pe(phb); if (!pe) { - pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", + pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n", __func__, pci_domain_nr(bus), bus->number); return NULL; } @@ -1234,7 +1242,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) pci_dev_get(npu_pdev); npu_pdn = pci_get_pdn(npu_pdev); rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; - npu_pdn->pcidev = npu_pdev; npu_pdn->pe_number = pe_num; phb->ioda.pe_rmap[rid] = pe->pe_number; @@ -1272,16 +1279,23 @@ static void pnv_pci_ioda_setup_PEs(void) { struct pci_controller *hose, *tmp; struct pnv_phb *phb; + struct pci_bus *bus; + struct pci_dev *pdev; list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; - if (phb->type == PNV_PHB_NPU) { + if (phb->type == PNV_PHB_NPU_NVLINK) { /* PE#0 is needed for error reporting */ pnv_ioda_reserve_pe(phb, 0); pnv_ioda_setup_npu_PEs(hose->bus); if (phb->model == PNV_PHB_MODEL_NPU2) pnv_npu2_init(phb); } + if (phb->type == PNV_PHB_NPU_OCAPI) { + bus = hose->bus; + list_for_each_entry(pdev, &bus->devices, bus_list) + pnv_ioda_setup_dev_PE(pdev); + } } } @@ -1692,7 +1706,7 @@ m64_failed: return ret; } -int pcibios_sriov_disable(struct pci_dev *pdev) +int pnv_pcibios_sriov_disable(struct pci_dev *pdev) { pnv_pci_sriov_disable(pdev); @@ -1701,7 +1715,7 @@ int pcibios_sriov_disable(struct pci_dev *pdev) return 0; } -int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { /* Allocate PCI data */ add_dev_pci_data(pdev); @@ -2572,7 +2586,6 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, unsigned long direct_table_size; if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) || - (window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) return 0; @@ -2640,7 +2653,7 @@ static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque) hose = pci_bus_to_host(pdev->bus); phb = hose->private_data; - if (phb->type != PNV_PHB_NPU) + if (phb->type != PNV_PHB_NPU_NVLINK) return 0; *ptmppe = &phb->ioda.pe_array[pdn->pe_number]; @@ -2724,7 +2737,7 @@ static void pnv_pci_ioda_setup_iommu_api(void) list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; - if (phb->type != PNV_PHB_NPU) + if (phb->type != PNV_PHB_NPU_NVLINK) continue; list_for_each_entry(pe, &phb->ioda.pe_list, list) { @@ -3293,7 +3306,7 @@ static void pnv_pci_ioda_create_dbgfs(void) sprintf(name, "PCI%04x", hose->global_number); phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); if (!phb->dbgfs) { - pr_warning("%s: Error on creating debugfs on PHB#%x\n", + pr_warn("%s: Error on creating debugfs on PHB#%x\n", __func__, hose->global_number); continue; } @@ -3774,6 +3787,13 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { .shutdown = pnv_pci_ioda_shutdown, }; +static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { + .enable_device_hook = pnv_pci_enable_device_hook, + .window_alignment = pnv_pci_window_alignment, + .reset_secondary_bus = pnv_pci_reset_secondary_bus, + .shutdown = pnv_pci_ioda_shutdown, +}; + #ifdef CONFIG_CXL_BASE const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = { .dma_dev_setup = pnv_pci_dma_dev_setup, @@ -4007,9 +4027,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, */ ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; - if (phb->type == PNV_PHB_NPU) { + switch (phb->type) { + case PNV_PHB_NPU_NVLINK: hose->controller_ops = pnv_npu_ioda_controller_ops; - } else { + break; + case PNV_PHB_NPU_OCAPI: + hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops; + break; + default: phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; hose->controller_ops = pnv_pci_ioda_controller_ops; } @@ -4019,6 +4044,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, #ifdef CONFIG_PCI_IOV ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment; + ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable; + ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable; #endif pci_add_flags(PCI_REASSIGN_ALL_RSRC); @@ -4026,15 +4053,16 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Reset IODA tables to a clean state */ rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); if (rc) - pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); + pr_warn(" OPAL Error %ld performing IODA table reset !\n", rc); /* * If we're running in kdump kernel, the previous kernel never * shutdown PCI devices correctly. We already got IODA table * cleaned out. So we have to issue PHB reset to stop all PCI - * transactions from previous kernel. + * transactions from previous kernel. The ppc_pci_reset_phbs + * kernel parameter will force this reset too. */ - if (is_kdump_kernel()) { + if (is_kdump_kernel() || pci_reset_phbs) { pr_info(" Issue PHB reset ...\n"); pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE); @@ -4052,8 +4080,26 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np) void __init pnv_pci_init_npu_phb(struct device_node *np) { - pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU); + pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK); +} + +void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np) +{ + pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI); +} + +static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + if (!machine_is(powernv)) + return; + + if (phb->type == PNV_PHB_NPU_OCAPI) + dev->cfg_size = PCI_CFG_SPACE_EXP_SIZE; } +DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pnv_npu2_opencapi_cfg_size_fixup); void __init pnv_pci_init_ioda_hub(struct device_node *np) { diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 5422f4a6317c..69d102cbf48f 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -1142,6 +1142,10 @@ void __init pnv_pci_init(void) for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb") pnv_pci_init_npu_phb(np); + /* Look for NPU2 OpenCAPI PHBs */ + for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb") + pnv_pci_init_npu2_opencapi_phb(np); + /* Configure IOMMU DMA hooks */ set_pci_dma_ops(&dma_iommu_ops); } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index b772d7473896..eada4b6068cb 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -12,9 +12,10 @@ struct pci_dn; #define NV_NMMU_ATSD_REGS 8 enum pnv_phb_type { - PNV_PHB_IODA1 = 0, - PNV_PHB_IODA2 = 1, - PNV_PHB_NPU = 2, + PNV_PHB_IODA1 = 0, + PNV_PHB_IODA2 = 1, + PNV_PHB_NPU_NVLINK = 2, + PNV_PHB_NPU_OCAPI = 3, }; /* Precise PHB model for error management */ @@ -227,6 +228,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_init_npu_phb(struct device_node *np); +extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np); extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index ba030669eca1..9664c8461f03 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -37,6 +37,8 @@ #include <asm/kvm_ppc.h> #include <asm/ppc-opcode.h> #include <asm/cpuidle.h> +#include <asm/kexec.h> +#include <asm/reg.h> #include "powernv.h" @@ -209,9 +211,32 @@ static void pnv_smp_cpu_kill_self(void) } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); + } else if ((srr1 & wmask) == SRR1_WAKERESET) { + irq_set_pending_from_srr1(srr1); + /* Does not return */ } + smp_mb(); + /* + * For kdump kernels, we process the ipi and jump to + * crash_ipi_callback + */ + if (kdump_in_progress()) { + /* + * If we got to this point, we've not used + * NMI's, otherwise we would have gone + * via the SRR1_WAKERESET path. We are + * using regular IPI's for waking up offline + * threads. + */ + struct pt_regs regs; + + ppc_save_regs(®s); + crash_ipi_callback(®s); + /* Does not return */ + } + if (cpu_core_split_required()) continue; @@ -371,5 +396,8 @@ void __init pnv_smp_init(void) #ifdef CONFIG_HOTPLUG_CPU ppc_md.cpu_die = pnv_smp_cpu_kill_self; +#ifdef CONFIG_KEXEC_CORE + crash_wake_offline = 1; +#endif #endif } diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index e48462447ff0..e7075aaff1bb 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -663,8 +663,8 @@ static void ps3_find_and_add_device(u64 bus_id, u64 dev_id) if (rem) break; } - pr_warning("%s:%u: device %llu:%llu not found\n", __func__, __LINE__, - bus_id, dev_id); + pr_warn("%s:%u: device %llu:%llu not found\n", + __func__, __LINE__, bus_id, dev_id); return; found: @@ -859,11 +859,9 @@ static int ps3_probe_thread(void *data) if (notify_event->event_type != notify_region_probe || notify_event->bus_id != dev.sbd.bus_id) { - pr_warning("%s:%u: bad notify_event: event %llu, " - "dev_id %llu, dev_type %llu\n", - __func__, __LINE__, notify_event->event_type, - notify_event->dev_id, - notify_event->dev_type); + pr_warn("%s:%u: bad notify_event: event %llu, dev_id %llu, dev_type %llu\n", + __func__, __LINE__, notify_event->event_type, + notify_event->dev_id, notify_event->dev_type); continue; } diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index b0f34663b1ae..7f870ec29daf 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -607,8 +607,8 @@ static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr, r->ioid, iopte_flag); if (result) { - pr_warning("%s:%d: lv1_put_iopte failed: %s\n", - __func__, __LINE__, ps3_result(result)); + pr_warn("%s:%d: lv1_put_iopte failed: %s\n", + __func__, __LINE__, ps3_result(result)); goto fail_map; } DBG("%s: pg=%d bus=%#lx, lpar=%#lx, ioid=%#x\n", __func__, diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index 3db53e8aff92..cdbfc5cfd6f3 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -699,7 +699,7 @@ static void os_area_queue_work_handler(struct work_struct *work) error = update_flash_db(); if (error) - pr_warning("%s: Could not update FLASH ROM\n", __func__); + pr_warn("%s: Could not update FLASH ROM\n", __func__); pr_debug(" <- %s:%d\n", __func__, __LINE__); } diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 6244bc849469..77a37520068d 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -113,6 +113,7 @@ static void ps3_panic(char *str) printk(" System does not reboot automatically.\n"); printk(" Please press POWER button.\n"); printk("\n"); + panic_flush_kmsg_end(); while(1) lv1_pause(1); diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 560aefde06c0..25427a48feae 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -72,20 +72,20 @@ MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager"); MODULE_LICENSE("GPL"); MODULE_VERSION(CMM_DRIVER_VERSION); -module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR); +module_param_named(delay, delay, uint, 0644); MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); -module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR); +module_param_named(hotplug_delay, hotplug_delay, uint, 0644); MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove " "before loaning resumes. " "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]"); -module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR); +module_param_named(oom_kb, oom_kb, uint, 0644); MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " "[Default=" __stringify(CMM_OOM_KB) "]"); -module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR); +module_param_named(min_mem_mb, min_mem_mb, ulong, 0644); MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. " "[Default=" __stringify(CMM_MIN_MEM_MB) "]"); -module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR); +module_param_named(debug, cmm_debug, uint, 0644); MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. " "[Default=" __stringify(CMM_DEBUG) "]"); @@ -385,7 +385,7 @@ static int cmm_thread(void *dummy) { \ return sprintf(buf, format, ##args); \ } \ - static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) + static DEVICE_ATTR(name, 0444, show_##name, NULL) CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages)); CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target)); @@ -411,7 +411,7 @@ static ssize_t store_oom_pages(struct device *dev, return count; } -static DEVICE_ATTR(oom_freed_kb, S_IWUSR | S_IRUGO, +static DEVICE_ATTR(oom_freed_kb, 0644, show_oom_pages, store_oom_pages); static struct device_attribute *cmm_attrs[] = { @@ -765,7 +765,7 @@ static int cmm_set_disable(const char *val, const struct kernel_param *kp) } module_param_call(disable, cmm_set_disable, param_get_uint, - &cmm_disabled, S_IRUGO | S_IWUSR); + &cmm_disabled, 0644); MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. " "[Default=" __stringify(CMM_DISABLE) "]"); diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 6b812ad990e4..823cb27efa8b 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -55,6 +55,43 @@ static int ibm_get_config_addr_info; static int ibm_get_config_addr_info2; static int ibm_configure_pe; +#ifdef CONFIG_PCI_IOV +void pseries_pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pci_dn *physfn_pdn; + struct eeh_dev *edev; + + if (!pdev->is_virtfn) + return; + + pdn->device_id = pdev->device; + pdn->vendor_id = pdev->vendor; + pdn->class_code = pdev->class; + /* + * Last allow unfreeze return code used for retrieval + * by user space in eeh-sysfs to show the last command + * completion from platform. + */ + pdn->last_allow_rc = 0; + physfn_pdn = pci_get_pdn(pdev->physfn); + pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index]; + edev = pdn_to_eeh_dev(pdn); + + /* + * The following operations will fail if VF's sysfs files + * aren't created or its resources aren't finalized. + */ + eeh_add_device_early(pdn); + eeh_add_device_late(pdev); + edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8); + eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */ + eeh_add_to_parent_pe(edev); /* Add as VF PE type */ + eeh_sysfs_add_device(pdev); + +} +#endif + /* * Buffer for reporting slot-error-detail rtas calls. Its here * in BSS, and not dynamically alloced, so that it ends up in @@ -120,6 +157,11 @@ static int pseries_eeh_init(void) /* Set EEH probe mode */ eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); +#ifdef CONFIG_PCI_IOV + /* Set EEH machine dependent code */ + ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device; +#endif + return 0; } @@ -684,6 +726,121 @@ static int pseries_eeh_write_config(struct pci_dn *pdn, int where, int size, u32 return rtas_write_config(pdn, where, size, val); } +static int pseries_eeh_restore_config(struct pci_dn *pdn) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + s64 ret = 0; + + if (!edev) + return -EEXIST; + + /* + * FIXME: The MPS, error routing rules, timeout setting are worthy + * to be exported by firmware in extendible way. + */ + if (edev->physfn) + ret = eeh_restore_vf_config(pdn); + + if (ret) { + pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", + __func__, edev->pe_config_addr, ret); + return -EIO; + } + + return ret; +} + +#ifdef CONFIG_PCI_IOV +int pseries_send_allow_unfreeze(struct pci_dn *pdn, + u16 *vf_pe_array, int cur_vfs) +{ + int rc; + int ibm_allow_unfreeze = rtas_token("ibm,open-sriov-allow-unfreeze"); + unsigned long buid, addr; + + addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + buid = pdn->phb->buid; + spin_lock(&rtas_data_buf_lock); + memcpy(rtas_data_buf, vf_pe_array, RTAS_DATA_BUF_SIZE); + rc = rtas_call(ibm_allow_unfreeze, 5, 1, NULL, + addr, + BUID_HI(buid), + BUID_LO(buid), + rtas_data_buf, cur_vfs * sizeof(u16)); + spin_unlock(&rtas_data_buf_lock); + if (rc) + pr_warn("%s: Failed to allow unfreeze for PHB#%x-PE#%lx, rc=%x\n", + __func__, + pdn->phb->global_number, addr, rc); + return rc; +} + +static int pseries_call_allow_unfreeze(struct eeh_dev *edev) +{ + struct pci_dn *pdn, *tmp, *parent, *physfn_pdn; + int cur_vfs = 0, rc = 0, vf_index, bus, devfn; + u16 *vf_pe_array; + + vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!vf_pe_array) + return -ENOMEM; + if (pci_num_vf(edev->physfn ? edev->physfn : edev->pdev)) { + if (edev->pdev->is_physfn) { + cur_vfs = pci_num_vf(edev->pdev); + pdn = eeh_dev_to_pdn(edev); + parent = pdn->parent; + for (vf_index = 0; vf_index < cur_vfs; vf_index++) + vf_pe_array[vf_index] = + cpu_to_be16(pdn->pe_num_map[vf_index]); + rc = pseries_send_allow_unfreeze(pdn, vf_pe_array, + cur_vfs); + pdn->last_allow_rc = rc; + for (vf_index = 0; vf_index < cur_vfs; vf_index++) { + list_for_each_entry_safe(pdn, tmp, + &parent->child_list, + list) { + bus = pci_iov_virtfn_bus(edev->pdev, + vf_index); + devfn = pci_iov_virtfn_devfn(edev->pdev, + vf_index); + if (pdn->busno != bus || + pdn->devfn != devfn) + continue; + pdn->last_allow_rc = rc; + } + } + } else { + pdn = pci_get_pdn(edev->pdev); + vf_pe_array[0] = cpu_to_be16(pdn->pe_number); + physfn_pdn = pci_get_pdn(edev->physfn); + rc = pseries_send_allow_unfreeze(physfn_pdn, + vf_pe_array, 1); + pdn->last_allow_rc = rc; + } + } + + kfree(vf_pe_array); + return rc; +} + +static int pseries_notify_resume(struct pci_dn *pdn) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + + if (!edev) + return -EEXIST; + + if (rtas_token("ibm,open-sriov-allow-unfreeze") + == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + if (edev->pdev->is_physfn || edev->pdev->is_virtfn) + return pseries_call_allow_unfreeze(edev); + + return 0; +} +#endif + static struct eeh_ops pseries_eeh_ops = { .name = "pseries", .init = pseries_eeh_init, @@ -699,7 +856,10 @@ static struct eeh_ops pseries_eeh_ops = { .read_config = pseries_eeh_read_config, .write_config = pseries_eeh_write_config, .next_error = NULL, - .restore_config = NULL + .restore_config = pseries_eeh_restore_config, +#ifdef CONFIG_PCI_IOV + .notify_resume = pseries_notify_resume +#endif }; /** diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index 63cc82ad58ac..a3bbeb43689e 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -114,6 +114,8 @@ static __initdata struct vec5_fw_feature vec5_fw_features_table[] = { {FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY}, {FW_FEATURE_PRRN, OV5_PRRN}, + {FW_FEATURE_DRMEM_V2, OV5_DRMEM_V2}, + {FW_FEATURE_DRC_INFO, OV5_DRC_INFO}, }; static void __init fw_vec5_feature_init(const char *vec5, unsigned long len) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index a7d14aa7bb7c..dceb51454d8d 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -340,6 +340,8 @@ static void pseries_remove_processor(struct device_node *np) cpu_maps_update_done(); } +extern int find_and_online_cpu_nid(int cpu); + static int dlpar_online_cpu(struct device_node *dn) { int rc = 0; @@ -364,6 +366,7 @@ static int dlpar_online_cpu(struct device_node *dn) != CPU_STATE_OFFLINE); cpu_maps_update_done(); timed_topology_update(1); + find_and_online_cpu_nid(cpu); rc = device_online(get_cpu_device(cpu)); if (rc) goto out; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 1d48ab424bd9..c1578f54c626 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -23,6 +23,7 @@ #include <asm/prom.h> #include <asm/sparsemem.h> #include <asm/fadump.h> +#include <asm/drmem.h> #include "pseries.h" static bool rtas_hp_event; @@ -100,100 +101,6 @@ static struct property *dlpar_clone_property(struct property *prop, return new_prop; } -static struct property *dlpar_clone_drconf_property(struct device_node *dn) -{ - struct property *prop, *new_prop; - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; - int i; - - prop = of_find_property(dn, "ibm,dynamic-memory", NULL); - if (!prop) - return NULL; - - new_prop = dlpar_clone_property(prop, prop->length); - if (!new_prop) - return NULL; - - /* Convert the property to cpu endian-ness */ - p = new_prop->value; - *p = be32_to_cpu(*p); - - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - - for (i = 0; i < num_lmbs; i++) { - lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr); - lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index); - lmbs[i].aa_index = be32_to_cpu(lmbs[i].aa_index); - lmbs[i].flags = be32_to_cpu(lmbs[i].flags); - } - - return new_prop; -} - -static void dlpar_update_drconf_property(struct device_node *dn, - struct property *prop) -{ - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; - int i; - - /* Convert the property back to BE */ - p = prop->value; - num_lmbs = *p; - *p = cpu_to_be32(*p); - p++; - - lmbs = (struct of_drconf_cell *)p; - for (i = 0; i < num_lmbs; i++) { - lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr); - lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index); - lmbs[i].aa_index = cpu_to_be32(lmbs[i].aa_index); - lmbs[i].flags = cpu_to_be32(lmbs[i].flags); - } - - rtas_hp_event = true; - of_update_property(dn, prop); - rtas_hp_event = false; -} - -static int dlpar_update_device_tree_lmb(struct of_drconf_cell *lmb) -{ - struct device_node *dn; - struct property *prop; - struct of_drconf_cell *lmbs; - u32 *p, num_lmbs; - int i; - - dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); - if (!dn) - return -ENODEV; - - prop = dlpar_clone_drconf_property(dn); - if (!prop) { - of_node_put(dn); - return -ENODEV; - } - - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - - for (i = 0; i < num_lmbs; i++) { - if (lmbs[i].drc_index == lmb->drc_index) { - lmbs[i].flags = lmb->flags; - lmbs[i].aa_index = lmb->aa_index; - - dlpar_update_drconf_property(dn, prop); - break; - } - } - - of_node_put(dn); - return 0; -} - static u32 find_aa_index(struct device_node *dr_node, struct property *ala_prop, const u32 *lmb_assoc) { @@ -256,7 +163,7 @@ static u32 find_aa_index(struct device_node *dr_node, return aa_index; } -static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb) +static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb) { struct device_node *parent, *lmb_node, *dr_node; struct property *ala_prop; @@ -299,9 +206,9 @@ static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb) return aa_index; } -static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb) +static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb) { - int aa_index; + int rc, aa_index; lmb->flags |= DRCONF_MEM_ASSIGNED; @@ -313,17 +220,29 @@ static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb) } lmb->aa_index = aa_index; - return dlpar_update_device_tree_lmb(lmb); + + rtas_hp_event = true; + rc = drmem_update_dt(); + rtas_hp_event = false; + + return rc; } -static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb) +static int dlpar_remove_device_tree_lmb(struct drmem_lmb *lmb) { + int rc; + lmb->flags &= ~DRCONF_MEM_ASSIGNED; lmb->aa_index = 0xffffffff; - return dlpar_update_device_tree_lmb(lmb); + + rtas_hp_event = true; + rc = drmem_update_dt(); + rtas_hp_event = false; + + return rc; } -static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) +static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb) { unsigned long section_nr; struct mem_section *mem_sect; @@ -336,7 +255,36 @@ static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) return mem_block; } -static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online) +static int get_lmb_range(u32 drc_index, int n_lmbs, + struct drmem_lmb **start_lmb, + struct drmem_lmb **end_lmb) +{ + struct drmem_lmb *lmb, *start, *end; + struct drmem_lmb *last_lmb; + + start = NULL; + for_each_drmem_lmb(lmb) { + if (lmb->drc_index == drc_index) { + start = lmb; + break; + } + } + + if (!start) + return -EINVAL; + + end = &start[n_lmbs - 1]; + + last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1]; + if (end > last_lmb) + return -EINVAL; + + *start_lmb = start; + *end_lmb = end; + return 0; +} + +static int dlpar_change_lmb_state(struct drmem_lmb *lmb, bool online) { struct memory_block *mem_block; int rc; @@ -357,13 +305,13 @@ static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online) return rc; } -static int dlpar_online_lmb(struct of_drconf_cell *lmb) +static int dlpar_online_lmb(struct drmem_lmb *lmb) { return dlpar_change_lmb_state(lmb, true); } #ifdef CONFIG_MEMORY_HOTREMOVE -static int dlpar_offline_lmb(struct of_drconf_cell *lmb) +static int dlpar_offline_lmb(struct drmem_lmb *lmb) { return dlpar_change_lmb_state(lmb, false); } @@ -426,7 +374,7 @@ static int pseries_remove_mem_node(struct device_node *np) return 0; } -static bool lmb_is_removable(struct of_drconf_cell *lmb) +static bool lmb_is_removable(struct drmem_lmb *lmb) { int i, scns_per_block; int rc = 1; @@ -458,9 +406,9 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb) return rc ? true : false; } -static int dlpar_add_lmb(struct of_drconf_cell *); +static int dlpar_add_lmb(struct drmem_lmb *); -static int dlpar_remove_lmb(struct of_drconf_cell *lmb) +static int dlpar_remove_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; int nid, rc; @@ -484,28 +432,25 @@ static int dlpar_remove_lmb(struct of_drconf_cell *lmb) return 0; } -static int dlpar_memory_remove_by_count(u32 lmbs_to_remove, - struct property *prop) +static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) { - struct of_drconf_cell *lmbs; + struct drmem_lmb *lmb; int lmbs_removed = 0; int lmbs_available = 0; - u32 num_lmbs, *p; - int i, rc; + int rc; pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove); if (lmbs_to_remove == 0) return -EINVAL; - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - /* Validate that there are enough LMBs to satisfy the request */ - for (i = 0; i < num_lmbs; i++) { - if (lmb_is_removable(&lmbs[i])) + for_each_drmem_lmb(lmb) { + if (lmb_is_removable(lmb)) lmbs_available++; + + if (lmbs_available == lmbs_to_remove) + break; } if (lmbs_available < lmbs_to_remove) { @@ -514,45 +459,47 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove, return -EINVAL; } - for (i = 0; i < num_lmbs && lmbs_removed < lmbs_to_remove; i++) { - rc = dlpar_remove_lmb(&lmbs[i]); + for_each_drmem_lmb(lmb) { + rc = dlpar_remove_lmb(lmb); if (rc) continue; - lmbs_removed++; - /* Mark this lmb so we can add it later if all of the * requested LMBs cannot be removed. */ - lmbs[i].reserved = 1; + drmem_mark_lmb_reserved(lmb); + + lmbs_removed++; + if (lmbs_removed == lmbs_to_remove) + break; } if (lmbs_removed != lmbs_to_remove) { pr_err("Memory hot-remove failed, adding LMB's back\n"); - for (i = 0; i < num_lmbs; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb(lmb) { + if (!drmem_lmb_reserved(lmb)) continue; - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_add_lmb(lmb); if (rc) pr_err("Failed to add LMB back, drc index %x\n", - lmbs[i].drc_index); + lmb->drc_index); - lmbs[i].reserved = 0; + drmem_remove_lmb_reservation(lmb); } rc = -EINVAL; } else { - for (i = 0; i < num_lmbs; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb(lmb) { + if (!drmem_lmb_reserved(lmb)) continue; - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); pr_info("Memory at %llx was hot-removed\n", - lmbs[i].base_addr); + lmb->base_addr); - lmbs[i].reserved = 0; + drmem_remove_lmb_reservation(lmb); } rc = 0; } @@ -560,26 +507,21 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove, return rc; } -static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop) +static int dlpar_memory_remove_by_index(u32 drc_index) { - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; + struct drmem_lmb *lmb; int lmb_found; - int i, rc; + int rc; pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index); - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - lmb_found = 0; - for (i = 0; i < num_lmbs; i++) { - if (lmbs[i].drc_index == drc_index) { + for_each_drmem_lmb(lmb) { + if (lmb->drc_index == drc_index) { lmb_found = 1; - rc = dlpar_remove_lmb(&lmbs[i]); + rc = dlpar_remove_lmb(lmb); if (!rc) - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); break; } @@ -590,35 +532,30 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop) if (rc) pr_info("Failed to hot-remove memory at %llx\n", - lmbs[i].base_addr); + lmb->base_addr); else - pr_info("Memory at %llx was hot-removed\n", lmbs[i].base_addr); + pr_info("Memory at %llx was hot-removed\n", lmb->base_addr); return rc; } -static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop) +static int dlpar_memory_readd_by_index(u32 drc_index) { - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; + struct drmem_lmb *lmb; int lmb_found; - int i, rc; + int rc; pr_info("Attempting to update LMB, drc index %x\n", drc_index); - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - lmb_found = 0; - for (i = 0; i < num_lmbs; i++) { - if (lmbs[i].drc_index == drc_index) { + for_each_drmem_lmb(lmb) { + if (lmb->drc_index == drc_index) { lmb_found = 1; - rc = dlpar_remove_lmb(&lmbs[i]); + rc = dlpar_remove_lmb(lmb); if (!rc) { - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_add_lmb(lmb); if (rc) - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); } break; } @@ -629,20 +566,18 @@ static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop) if (rc) pr_info("Failed to update memory at %llx\n", - lmbs[i].base_addr); + lmb->base_addr); else - pr_info("Memory at %llx was updated\n", lmbs[i].base_addr); + pr_info("Memory at %llx was updated\n", lmb->base_addr); return rc; } -static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index, - struct property *prop) +static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; - int i, rc, start_lmb_found; - int lmbs_available = 0, start_index = 0, end_index; + struct drmem_lmb *lmb, *start_lmb, *end_lmb; + int lmbs_available = 0; + int rc; pr_info("Attempting to hot-remove %u LMB(s) at %x\n", lmbs_to_remove, drc_index); @@ -650,29 +585,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index, if (lmbs_to_remove == 0) return -EINVAL; - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - start_lmb_found = 0; - - /* Navigate to drc_index */ - while (start_index < num_lmbs) { - if (lmbs[start_index].drc_index == drc_index) { - start_lmb_found = 1; - break; - } - - start_index++; - } - - if (!start_lmb_found) + rc = get_lmb_range(drc_index, lmbs_to_remove, &start_lmb, &end_lmb); + if (rc) return -EINVAL; - end_index = start_index + lmbs_to_remove; - /* Validate that there are enough LMBs to satisfy the request */ - for (i = start_index; i < end_index; i++) { - if (lmbs[i].flags & DRCONF_MEM_RESERVED) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (lmb->flags & DRCONF_MEM_RESERVED) break; lmbs_available++; @@ -681,42 +600,43 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index, if (lmbs_available < lmbs_to_remove) return -EINVAL; - for (i = start_index; i < end_index; i++) { - if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED)) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) continue; - rc = dlpar_remove_lmb(&lmbs[i]); + rc = dlpar_remove_lmb(lmb); if (rc) break; - lmbs[i].reserved = 1; + drmem_mark_lmb_reserved(lmb); } if (rc) { pr_err("Memory indexed-count-remove failed, adding any removed LMBs\n"); - for (i = start_index; i < end_index; i++) { - if (!lmbs[i].reserved) + + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!drmem_lmb_reserved(lmb)) continue; - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_add_lmb(lmb); if (rc) pr_err("Failed to add LMB, drc index %x\n", - be32_to_cpu(lmbs[i].drc_index)); + lmb->drc_index); - lmbs[i].reserved = 0; + drmem_remove_lmb_reservation(lmb); } rc = -EINVAL; } else { - for (i = start_index; i < end_index; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!drmem_lmb_reserved(lmb)) continue; - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); pr_info("Memory at %llx (drc index %x) was hot-removed\n", - lmbs[i].base_addr, lmbs[i].drc_index); + lmb->base_addr, lmb->drc_index); - lmbs[i].reserved = 0; + drmem_remove_lmb_reservation(lmb); } } @@ -737,32 +657,30 @@ static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog) { return -EOPNOTSUPP; } -static int dlpar_remove_lmb(struct of_drconf_cell *lmb) +static int dlpar_remove_lmb(struct drmem_lmb *lmb) { return -EOPNOTSUPP; } -static int dlpar_memory_remove_by_count(u32 lmbs_to_remove, - struct property *prop) +static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) { return -EOPNOTSUPP; } -static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop) +static int dlpar_memory_remove_by_index(u32 drc_index) { return -EOPNOTSUPP; } -static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop) +static int dlpar_memory_readd_by_index(u32 drc_index) { return -EOPNOTSUPP; } -static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index, - struct property *prop) +static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { return -EOPNOTSUPP; } #endif /* CONFIG_MEMORY_HOTREMOVE */ -static int dlpar_add_lmb(struct of_drconf_cell *lmb) +static int dlpar_add_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; int nid, rc; @@ -801,77 +719,79 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb) return rc; } -static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop) +static int dlpar_memory_add_by_count(u32 lmbs_to_add) { - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; + struct drmem_lmb *lmb; int lmbs_available = 0; int lmbs_added = 0; - int i, rc; + int rc; pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add); if (lmbs_to_add == 0) return -EINVAL; - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - /* Validate that there are enough LMBs to satisfy the request */ - for (i = 0; i < num_lmbs; i++) { - if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED)) + for_each_drmem_lmb(lmb) { + if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) lmbs_available++; + + if (lmbs_available == lmbs_to_add) + break; } if (lmbs_available < lmbs_to_add) return -EINVAL; - for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) { - if (lmbs[i].flags & DRCONF_MEM_ASSIGNED) + for_each_drmem_lmb(lmb) { + if (lmb->flags & DRCONF_MEM_ASSIGNED) continue; - rc = dlpar_acquire_drc(lmbs[i].drc_index); + rc = dlpar_acquire_drc(lmb->drc_index); if (rc) continue; - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_add_lmb(lmb); if (rc) { - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); continue; } - lmbs_added++; - /* Mark this lmb so we can remove it later if all of the * requested LMBs cannot be added. */ - lmbs[i].reserved = 1; + drmem_mark_lmb_reserved(lmb); + + lmbs_added++; + if (lmbs_added == lmbs_to_add) + break; } if (lmbs_added != lmbs_to_add) { pr_err("Memory hot-add failed, removing any added LMBs\n"); - for (i = 0; i < num_lmbs; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb(lmb) { + if (!drmem_lmb_reserved(lmb)) continue; - rc = dlpar_remove_lmb(&lmbs[i]); + rc = dlpar_remove_lmb(lmb); if (rc) pr_err("Failed to remove LMB, drc index %x\n", - be32_to_cpu(lmbs[i].drc_index)); + lmb->drc_index); else - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); + + drmem_remove_lmb_reservation(lmb); } rc = -EINVAL; } else { - for (i = 0; i < num_lmbs; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb(lmb) { + if (!drmem_lmb_reserved(lmb)) continue; pr_info("Memory at %llx (drc index %x) was hot-added\n", - lmbs[i].base_addr, lmbs[i].drc_index); - lmbs[i].reserved = 0; + lmb->base_addr, lmb->drc_index); + drmem_remove_lmb_reservation(lmb); } rc = 0; } @@ -879,28 +799,22 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop) return rc; } -static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop) +static int dlpar_memory_add_by_index(u32 drc_index) { - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; - int i, lmb_found; - int rc; + struct drmem_lmb *lmb; + int rc, lmb_found; pr_info("Attempting to hot-add LMB, drc index %x\n", drc_index); - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - lmb_found = 0; - for (i = 0; i < num_lmbs; i++) { - if (lmbs[i].drc_index == drc_index) { + for_each_drmem_lmb(lmb) { + if (lmb->drc_index == drc_index) { lmb_found = 1; - rc = dlpar_acquire_drc(lmbs[i].drc_index); + rc = dlpar_acquire_drc(lmb->drc_index); if (!rc) { - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_add_lmb(lmb); if (rc) - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); } break; @@ -914,18 +828,16 @@ static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop) pr_info("Failed to hot-add memory, drc index %x\n", drc_index); else pr_info("Memory at %llx (drc index %x) was hot-added\n", - lmbs[i].base_addr, drc_index); + lmb->base_addr, drc_index); return rc; } -static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index, - struct property *prop) +static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index) { - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; - int i, rc, start_lmb_found; - int lmbs_available = 0, start_index = 0, end_index; + struct drmem_lmb *lmb, *start_lmb, *end_lmb; + int lmbs_available = 0; + int rc; pr_info("Attempting to hot-add %u LMB(s) at index %x\n", lmbs_to_add, drc_index); @@ -933,29 +845,13 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index, if (lmbs_to_add == 0) return -EINVAL; - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - start_lmb_found = 0; - - /* Navigate to drc_index */ - while (start_index < num_lmbs) { - if (lmbs[start_index].drc_index == drc_index) { - start_lmb_found = 1; - break; - } - - start_index++; - } - - if (!start_lmb_found) + rc = get_lmb_range(drc_index, lmbs_to_add, &start_lmb, &end_lmb); + if (rc) return -EINVAL; - end_index = start_index + lmbs_to_add; - /* Validate that the LMBs in this range are not reserved */ - for (i = start_index; i < end_index; i++) { - if (lmbs[i].flags & DRCONF_MEM_RESERVED) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (lmb->flags & DRCONF_MEM_RESERVED) break; lmbs_available++; @@ -964,46 +860,48 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index, if (lmbs_available < lmbs_to_add) return -EINVAL; - for (i = start_index; i < end_index; i++) { - if (lmbs[i].flags & DRCONF_MEM_ASSIGNED) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (lmb->flags & DRCONF_MEM_ASSIGNED) continue; - rc = dlpar_acquire_drc(lmbs[i].drc_index); + rc = dlpar_acquire_drc(lmb->drc_index); if (rc) break; - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_add_lmb(lmb); if (rc) { - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); break; } - lmbs[i].reserved = 1; + drmem_mark_lmb_reserved(lmb); } if (rc) { pr_err("Memory indexed-count-add failed, removing any added LMBs\n"); - for (i = start_index; i < end_index; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!drmem_lmb_reserved(lmb)) continue; - rc = dlpar_remove_lmb(&lmbs[i]); + rc = dlpar_remove_lmb(lmb); if (rc) pr_err("Failed to remove LMB, drc index %x\n", - be32_to_cpu(lmbs[i].drc_index)); + lmb->drc_index); else - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); + + drmem_remove_lmb_reservation(lmb); } rc = -EINVAL; } else { - for (i = start_index; i < end_index; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!drmem_lmb_reserved(lmb)) continue; pr_info("Memory at %llx (drc index %x) was hot-added\n", - lmbs[i].base_addr, lmbs[i].drc_index); - lmbs[i].reserved = 0; + lmb->base_addr, lmb->drc_index); + drmem_remove_lmb_reservation(lmb); } } @@ -1012,37 +910,23 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index, int dlpar_memory(struct pseries_hp_errorlog *hp_elog) { - struct device_node *dn; - struct property *prop; u32 count, drc_index; int rc; lock_device_hotplug(); - dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); - if (!dn) { - rc = -EINVAL; - goto dlpar_memory_out; - } - - prop = dlpar_clone_drconf_property(dn); - if (!prop) { - rc = -EINVAL; - goto dlpar_memory_out; - } - switch (hp_elog->action) { case PSERIES_HP_ELOG_ACTION_ADD: if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) { count = hp_elog->_drc_u.drc_count; - rc = dlpar_memory_add_by_count(count, prop); + rc = dlpar_memory_add_by_count(count); } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) { drc_index = hp_elog->_drc_u.drc_index; - rc = dlpar_memory_add_by_index(drc_index, prop); + rc = dlpar_memory_add_by_index(drc_index); } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) { count = hp_elog->_drc_u.ic.count; drc_index = hp_elog->_drc_u.ic.index; - rc = dlpar_memory_add_by_ic(count, drc_index, prop); + rc = dlpar_memory_add_by_ic(count, drc_index); } else { rc = -EINVAL; } @@ -1051,14 +935,14 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) case PSERIES_HP_ELOG_ACTION_REMOVE: if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) { count = hp_elog->_drc_u.drc_count; - rc = dlpar_memory_remove_by_count(count, prop); + rc = dlpar_memory_remove_by_count(count); } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) { drc_index = hp_elog->_drc_u.drc_index; - rc = dlpar_memory_remove_by_index(drc_index, prop); + rc = dlpar_memory_remove_by_index(drc_index); } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) { count = hp_elog->_drc_u.ic.count; drc_index = hp_elog->_drc_u.ic.index; - rc = dlpar_memory_remove_by_ic(count, drc_index, prop); + rc = dlpar_memory_remove_by_ic(count, drc_index); } else { rc = -EINVAL; } @@ -1066,7 +950,7 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) break; case PSERIES_HP_ELOG_ACTION_READD: drc_index = hp_elog->_drc_u.drc_index; - rc = dlpar_memory_readd_by_index(drc_index, prop); + rc = dlpar_memory_readd_by_index(drc_index); break; default: pr_err("Invalid action (%d) specified\n", hp_elog->action); @@ -1074,10 +958,6 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) break; } - dlpar_free_property(prop); - -dlpar_memory_out: - of_node_put(dn); unlock_device_hotplug(); return rc; } @@ -1116,7 +996,7 @@ static int pseries_add_mem_node(struct device_node *np) static int pseries_update_drconf_memory(struct of_reconfig_data *pr) { - struct of_drconf_cell *new_drmem, *old_drmem; + struct of_drconf_cell_v1 *new_drmem, *old_drmem; unsigned long memblock_size; u32 entries; __be32 *p; @@ -1139,11 +1019,11 @@ static int pseries_update_drconf_memory(struct of_reconfig_data *pr) * of_drconf_cell's. */ entries = be32_to_cpu(*p++); - old_drmem = (struct of_drconf_cell *)p; + old_drmem = (struct of_drconf_cell_v1 *)p; p = (__be32 *)pr->prop->value; p++; - new_drmem = (struct of_drconf_cell *)p; + new_drmem = (struct of_drconf_cell_v1 *)p; for (i = 0; i < entries; i++) { if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) && diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 957ae347b0b3..89b7ce807e70 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -163,7 +163,7 @@ static int __init hcall_inst_init(void) for_each_possible_cpu(cpu) { snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu); - hcall_file = debugfs_create_file(cpu_name_buf, S_IRUGO, + hcall_file = debugfs_create_file(cpu_name_buf, 0444, hcall_root, per_cpu(hcall_stats, cpu), &hcall_inst_seq_fops); diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 408a86044133..c7c1140c13b6 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -298,7 +298,7 @@ out: return rc; return count; } -static BUS_ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe); +static BUS_ATTR(probe, 0200, NULL, ibmebus_store_probe); static ssize_t ibmebus_store_remove(struct bus_type *bus, const char *buf, size_t count) @@ -325,7 +325,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus, return -ENODEV; } } -static BUS_ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove); +static BUS_ATTR(remove, 0200, NULL, ibmebus_store_remove); static struct attribute *ibmbus_bus_attrs[] = { &bus_attr_probe.attr, diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index eaa11334fc8c..06f02960b439 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -816,15 +816,15 @@ static void remove_ddw(struct device_node *np, bool remove_prop) ret = tce_clearrange_multi_pSeriesLP(0, 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp); if (ret) - pr_warning("%pOF failed to clear tces in window.\n", - np); + pr_warn("%pOF failed to clear tces in window.\n", + np); else pr_debug("%pOF successfully cleared tces in window.\n", np); ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn); if (ret) - pr_warning("%pOF: failed to remove direct window: rtas returned " + pr_warn("%pOF: failed to remove direct window: rtas returned " "%d to ibm,remove-pe-dma-window(%x) %llx\n", np, ret, ddw_avail[2], liobn); else @@ -836,7 +836,7 @@ delprop: if (remove_prop) ret = of_remove_property(np, win64); if (ret) - pr_warning("%pOF: failed to remove direct window property: %d\n", + pr_warn("%pOF: failed to remove direct window property: %d\n", np, ret); } diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index b2706c483067..c508c938dc71 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -370,10 +370,10 @@ static void parse_system_parameter_string(struct seq_file *m) */ static int lparcfg_count_active_processors(void) { - struct device_node *cpus_dn = NULL; + struct device_node *cpus_dn; int count = 0; - while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) { + for_each_node_by_type(cpus_dn, "cpu") { #ifdef LPARCFG_DEBUG printk(KERN_ERR "cpus_dn %p\n", cpus_dn); #endif @@ -697,11 +697,11 @@ static const struct file_operations lparcfg_fops = { static int __init lparcfg_init(void) { - umode_t mode = S_IRUSR | S_IRGRP | S_IROTH; + umode_t mode = 0444; /* Allow writing if we have FW_FEATURE_SPLPAR */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) - mode |= S_IWUSR; + mode |= 0200; if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) { printk(KERN_ERR "Failed to create powerpc/lparcfg\n"); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index f7042ad492ba..0f7fb7170b03 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -384,7 +384,7 @@ static ssize_t migration_store(struct class *class, #define MIGRATION_API_VERSION 1 static CLASS_ATTR_WO(migration); -static CLASS_ATTR_STRING(api_version, S_IRUGO, __stringify(MIGRATION_API_VERSION)); +static CLASS_ATTR_STRING(api_version, 0444, __stringify(MIGRATION_API_VERSION)); static int __init mobility_sysfs_init(void) { diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c index 7e75101fa522..6df192f38f80 100644 --- a/arch/powerpc/platforms/pseries/of_helpers.c +++ b/arch/powerpc/platforms/pseries/of_helpers.c @@ -3,6 +3,7 @@ #include <linux/err.h> #include <linux/slab.h> #include <linux/of.h> +#include <asm/prom.h> #include "of_helpers.h" @@ -37,3 +38,62 @@ struct device_node *pseries_of_derive_parent(const char *path) kfree(parent_path); return parent ? parent : ERR_PTR(-EINVAL); } + + +/* Helper Routines to convert between drc_index to cpu numbers */ + +int of_read_drc_info_cell(struct property **prop, const __be32 **curval, + struct of_drc_info *data) +{ + const char *p; + const __be32 *p2; + + if (!data) + return -EINVAL; + + /* Get drc-type:encode-string */ + p = data->drc_type = (char*) (*curval); + p = of_prop_next_string(*prop, p); + if (!p) + return -EINVAL; + + /* Get drc-name-prefix:encode-string */ + data->drc_name_prefix = (char *)p; + p = of_prop_next_string(*prop, p); + if (!p) + return -EINVAL; + + /* Get drc-index-start:encode-int */ + p2 = (const __be32 *)p; + p2 = of_prop_next_u32(*prop, p2, &data->drc_index_start); + if (!p2) + return -EINVAL; + + /* Get drc-name-suffix-start:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start); + if (!p2) + return -EINVAL; + + /* Get number-sequential-elements:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->num_sequential_elems); + if (!p2) + return -EINVAL; + + /* Get sequential-increment:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->sequential_inc); + if (!p2) + return -EINVAL; + + /* Get drc-power-domain:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->drc_power_domain); + if (!p2) + return -EINVAL; + + /* Should now know end of current entry */ + (*curval) = (void *)p2; + data->last_drc_index = data->drc_index_start + + ((data->num_sequential_elems - 1) * data->sequential_inc); + + return 0; +} +EXPORT_SYMBOL(of_read_drc_info_cell); diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 09eba5a9929a..eab96637d6cf 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -3,17 +3,17 @@ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM * * pSeries specific routines for PCI. - * + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA @@ -54,10 +54,174 @@ void pcibios_name_device(struct pci_dev *dev) } } } -} +} DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); #endif +#ifdef CONFIG_PCI_IOV +#define MAX_VFS_FOR_MAP_PE 256 +struct pe_map_bar_entry { + __be64 bar; /* Input: Virtual Function BAR */ + __be16 rid; /* Input: Virtual Function Router ID */ + __be16 pe_num; /* Output: Virtual Function PE Number */ + __be32 reserved; /* Reserved Space */ +}; + +int pseries_send_map_pe(struct pci_dev *pdev, + u16 num_vfs, + struct pe_map_bar_entry *vf_pe_array) +{ + struct pci_dn *pdn; + int rc; + unsigned long buid, addr; + int ibm_map_pes = rtas_token("ibm,open-sriov-map-pe-number"); + + if (ibm_map_pes == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + pdn = pci_get_pdn(pdev); + addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + buid = pdn->phb->buid; + spin_lock(&rtas_data_buf_lock); + memcpy(rtas_data_buf, vf_pe_array, + RTAS_DATA_BUF_SIZE); + rc = rtas_call(ibm_map_pes, 5, 1, NULL, addr, + BUID_HI(buid), BUID_LO(buid), + rtas_data_buf, + num_vfs * sizeof(struct pe_map_bar_entry)); + memcpy(vf_pe_array, rtas_data_buf, RTAS_DATA_BUF_SIZE); + spin_unlock(&rtas_data_buf_lock); + + if (rc) + dev_err(&pdev->dev, + "%s: Failed to associate pes PE#%lx, rc=%x\n", + __func__, addr, rc); + + return rc; +} + +void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) +{ + struct pci_dn *pdn; + + pdn = pci_get_pdn(pdev); + pdn->pe_num_map[vf_index] = be16_to_cpu(pe_num); + dev_dbg(&pdev->dev, "VF %04x:%02x:%02x.%x associated with PE#%x\n", + pci_domain_nr(pdev->bus), + pdev->bus->number, + PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), + PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), + pdn->pe_num_map[vf_index]); +} + +int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) +{ + struct pci_dn *pdn; + int i, rc, vf_index; + struct pe_map_bar_entry *vf_pe_array; + struct resource *res; + u64 size; + + vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!vf_pe_array) + return -ENOMEM; + + pdn = pci_get_pdn(pdev); + /* create firmware structure to associate pes */ + for (vf_index = 0; vf_index < num_vfs; vf_index++) { + pdn->pe_num_map[vf_index] = IODA_INVALID_PE; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + if (!res->parent) + continue; + size = pcibios_iov_resource_alignment(pdev, i + + PCI_IOV_RESOURCES); + vf_pe_array[vf_index].bar = + cpu_to_be64(res->start + size * vf_index); + vf_pe_array[vf_index].rid = + cpu_to_be16((pci_iov_virtfn_bus(pdev, vf_index) + << 8) | pci_iov_virtfn_devfn(pdev, + vf_index)); + vf_pe_array[vf_index].pe_num = + cpu_to_be16(IODA_INVALID_PE); + } + } + + rc = pseries_send_map_pe(pdev, num_vfs, vf_pe_array); + /* Only zero is success */ + if (!rc) + for (vf_index = 0; vf_index < num_vfs; vf_index++) + pseries_set_pe_num(pdev, vf_index, + vf_pe_array[vf_index].pe_num); + + kfree(vf_pe_array); + return rc; +} + +int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + struct pci_dn *pdn; + int rc; + const int *max_vfs; + int max_config_vfs; + struct device_node *dn = pci_device_to_OF_node(pdev); + + max_vfs = of_get_property(dn, "ibm,number-of-configurable-vfs", NULL); + + if (!max_vfs) + return -EINVAL; + + /* First integer stores max config */ + max_config_vfs = of_read_number(&max_vfs[0], 1); + if (max_config_vfs < num_vfs && num_vfs > MAX_VFS_FOR_MAP_PE) { + dev_err(&pdev->dev, + "Num VFs %x > %x Configurable VFs\n", + num_vfs, (num_vfs > MAX_VFS_FOR_MAP_PE) ? + MAX_VFS_FOR_MAP_PE : max_config_vfs); + return -EINVAL; + } + + pdn = pci_get_pdn(pdev); + pdn->pe_num_map = kmalloc_array(num_vfs, + sizeof(*pdn->pe_num_map), + GFP_KERNEL); + if (!pdn->pe_num_map) + return -ENOMEM; + + rc = pseries_associate_pes(pdev, num_vfs); + + /* Anything other than zero is failure */ + if (rc) { + dev_err(&pdev->dev, "Failure to enable sriov: %x\n", rc); + kfree(pdn->pe_num_map); + } else { + pci_vf_drivers_autoprobe(pdev, false); + } + + return rc; +} + +int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + /* Allocate PCI data */ + add_dev_pci_data(pdev); + return pseries_pci_sriov_enable(pdev, num_vfs); +} + +int pseries_pcibios_sriov_disable(struct pci_dev *pdev) +{ + struct pci_dn *pdn; + + pdn = pci_get_pdn(pdev); + /* Releasing pe_num_map */ + kfree(pdn->pe_num_map); + /* Release PCI data */ + remove_dev_pci_data(pdev); + pci_vf_drivers_autoprobe(pdev, true); + return 0; +} +#endif + static void __init pSeries_request_regions(void) { if (!isa_io_base) @@ -76,6 +240,11 @@ void __init pSeries_final_fixup(void) pSeries_request_regions(); eeh_addr_cache_build(); + +#ifdef CONFIG_PCI_IOV + ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable; + ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable; +#endif } /* diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c index 35c891aabef0..6ed22127391b 100644 --- a/arch/powerpc/platforms/pseries/pseries_energy.c +++ b/arch/powerpc/platforms/pseries/pseries_energy.c @@ -22,6 +22,7 @@ #include <asm/page.h> #include <asm/hvcall.h> #include <asm/firmware.h> +#include <asm/prom.h> #define MODULE_VERS "1.0" @@ -38,26 +39,58 @@ static int sysfs_entries; static u32 cpu_to_drc_index(int cpu) { struct device_node *dn = NULL; - const int *indexes; - int i; + int thread_index; int rc = 1; u32 ret = 0; dn = of_find_node_by_path("/cpus"); if (dn == NULL) goto err; - indexes = of_get_property(dn, "ibm,drc-indexes", NULL); - if (indexes == NULL) - goto err_of_node_put; + /* Convert logical cpu number to core number */ - i = cpu_core_index_of_thread(cpu); - /* - * The first element indexes[0] is the number of drc_indexes - * returned in the list. Hence i+1 will get the drc_index - * corresponding to core number i. - */ - WARN_ON(i > indexes[0]); - ret = indexes[i + 1]; + thread_index = cpu_core_index_of_thread(cpu); + + if (firmware_has_feature(FW_FEATURE_DRC_INFO)) { + struct property *info = NULL; + struct of_drc_info drc; + int j; + u32 num_set_entries; + const __be32 *value; + + info = of_find_property(dn, "ibm,drc-info", NULL); + if (info == NULL) + goto err_of_node_put; + + value = of_prop_next_u32(info, NULL, &num_set_entries); + if (!value) + goto err_of_node_put; + + for (j = 0; j < num_set_entries; j++) { + + of_read_drc_info_cell(&info, &value, &drc); + if (strncmp(drc.drc_type, "CPU", 3)) + goto err; + + if (thread_index < drc.last_drc_index) + break; + } + + ret = drc.drc_index_start + (thread_index * drc.sequential_inc); + } else { + const __be32 *indexes; + + indexes = of_get_property(dn, "ibm,drc-indexes", NULL); + if (indexes == NULL) + goto err_of_node_put; + + /* + * The first element indexes[0] is the number of drc_indexes + * returned in the list. Hence thread_index+1 will get the + * drc_index corresponding to core number thread_index. + */ + ret = indexes[thread_index + 1]; + } + rc = 0; err_of_node_put: @@ -72,34 +105,71 @@ static int drc_index_to_cpu(u32 drc_index) { struct device_node *dn = NULL; const int *indexes; - int i, cpu = 0; + int thread_index = 0, cpu = 0; int rc = 1; dn = of_find_node_by_path("/cpus"); if (dn == NULL) goto err; - indexes = of_get_property(dn, "ibm,drc-indexes", NULL); - if (indexes == NULL) - goto err_of_node_put; - /* - * First element in the array is the number of drc_indexes - * returned. Search through the list to find the matching - * drc_index and get the core number - */ - for (i = 0; i < indexes[0]; i++) { - if (indexes[i + 1] == drc_index) + + if (firmware_has_feature(FW_FEATURE_DRC_INFO)) { + struct property *info = NULL; + struct of_drc_info drc; + int j; + u32 num_set_entries; + const __be32 *value; + + info = of_find_property(dn, "ibm,drc-info", NULL); + if (info == NULL) + goto err_of_node_put; + + value = of_prop_next_u32(info, NULL, &num_set_entries); + if (!value) + goto err_of_node_put; + + for (j = 0; j < num_set_entries; j++) { + + of_read_drc_info_cell(&info, &value, &drc); + if (strncmp(drc.drc_type, "CPU", 3)) + goto err; + + if (drc_index > drc.last_drc_index) { + cpu += drc.num_sequential_elems; + continue; + } + cpu += ((drc_index - drc.drc_index_start) / + drc.sequential_inc); + + thread_index = cpu_first_thread_of_core(cpu); + rc = 0; break; + } + } else { + unsigned long int i; + + indexes = of_get_property(dn, "ibm,drc-indexes", NULL); + if (indexes == NULL) + goto err_of_node_put; + /* + * First element in the array is the number of drc_indexes + * returned. Search through the list to find the matching + * drc_index and get the core number + */ + for (i = 0; i < indexes[0]; i++) { + if (indexes[i + 1] == drc_index) + break; + } + /* Convert core number to logical cpu number */ + thread_index = cpu_first_thread_of_core(i); + rc = 0; } - /* Convert core number to logical cpu number */ - cpu = cpu_first_thread_of_core(i); - rc = 0; err_of_node_put: of_node_put(dn); err: if (rc) printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index); - return cpu; + return thread_index; } /* diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index f24d8159c9e1..0e0208117e77 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -405,7 +405,7 @@ static int proc_ppc64_create_ofdt(void) { struct proc_dir_entry *ent; - ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops); + ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_fops); if (ent) proc_set_size(ent, 0); diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index c47585a78b69..054ce7a16fc3 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -179,7 +179,7 @@ static int __init scanlog_init(void) if (!scanlog_buffer) goto err; - ent = proc_create("powerpc/rtas/scan-log-dump", S_IRUSR, NULL, + ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL, &scanlog_fops); if (!ent) goto err; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index ae4f596273b5..372d7ada1a0c 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -371,8 +371,8 @@ void pseries_disable_reloc_on_exc(void) mdelay(get_longbusy_msecs(rc)); } if (rc != H_SUCCESS) - pr_warning("Warning: Failed to disable relocation on " - "exceptions: %ld\n", rc); + pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n", + rc); } EXPORT_SYMBOL(pseries_disable_reloc_on_exc); @@ -492,6 +492,162 @@ static void pseries_setup_rfi_flush(void) setup_rfi_flush(types, enable); } +#ifdef CONFIG_PCI_IOV +enum rtas_iov_fw_value_map { + NUM_RES_PROPERTY = 0, /* Number of Resources */ + LOW_INT = 1, /* Lowest 32 bits of Address */ + START_OF_ENTRIES = 2, /* Always start of entry */ + APERTURE_PROPERTY = 2, /* Start of entry+ to Aperture Size */ + WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */ + NEXT_ENTRY = 7 /* Go to next entry on array */ +}; + +enum get_iov_fw_value_index { + BAR_ADDRS = 1, /* Get Bar Address */ + APERTURE_SIZE = 2, /* Get Aperture Size */ + WDW_SIZE = 3 /* Get Window Size */ +}; + +resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, + enum get_iov_fw_value_index value) +{ + const int *indexes; + struct device_node *dn = pci_device_to_OF_node(dev); + int i, num_res, ret = 0; + + indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); + if (!indexes) + return 0; + + /* + * First element in the array is the number of Bars + * returned. Search through the list to find the matching + * bar + */ + num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1); + if (resno >= num_res) + return 0; /* or an errror */ + + i = START_OF_ENTRIES + NEXT_ENTRY * resno; + switch (value) { + case BAR_ADDRS: + ret = of_read_number(&indexes[i], 2); + break; + case APERTURE_SIZE: + ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2); + break; + case WDW_SIZE: + ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2); + break; + } + + return ret; +} + +void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) +{ + struct resource *res; + resource_size_t base, size; + int i, r, num_res; + + num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1); + num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS); + for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS; + i += NEXT_ENTRY, r++) { + res = &dev->resource[r + PCI_IOV_RESOURCES]; + base = of_read_number(&indexes[i], 2); + size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2); + res->flags = pci_parse_of_flags(of_read_number + (&indexes[i + LOW_INT], 1), 0); + res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED); + res->name = pci_name(dev); + res->start = base; + res->end = base + size - 1; + } +} + +void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes) +{ + struct resource *res, *root, *conflict; + resource_size_t base, size; + int i, r, num_res; + + /* + * First element in the array is the number of Bars + * returned. Search through the list to find the matching + * bars assign them from firmware into resources structure. + */ + num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1); + for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS; + i += NEXT_ENTRY, r++) { + res = &dev->resource[r + PCI_IOV_RESOURCES]; + base = of_read_number(&indexes[i], 2); + size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2); + res->name = pci_name(dev); + res->start = base; + res->end = base + size - 1; + root = &iomem_resource; + dev_dbg(&dev->dev, + "pSeries IOV BAR %d: trying firmware assignment %pR\n", + r + PCI_IOV_RESOURCES, res); + conflict = request_resource_conflict(root, res); + if (conflict) { + dev_info(&dev->dev, + "BAR %d: %pR conflicts with %s %pR\n", + r + PCI_IOV_RESOURCES, res, + conflict->name, conflict); + res->flags |= IORESOURCE_UNSET; + } + } +} + +static void pseries_pci_fixup_resources(struct pci_dev *pdev) +{ + const int *indexes; + struct device_node *dn = pci_device_to_OF_node(pdev); + + /*Firmware must support open sriov otherwise dont configure*/ + indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); + if (!indexes) + return; + /* Assign the addresses from device tree*/ + of_pci_set_vf_bar_size(pdev, indexes); +} + +static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev) +{ + const int *indexes; + struct device_node *dn = pci_device_to_OF_node(pdev); + + if (!pdev->is_physfn || pdev->is_added) + return; + /*Firmware must support open sriov otherwise dont configure*/ + indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); + if (!indexes) + return; + /* Assign the addresses from device tree*/ + of_pci_parse_iov_addrs(pdev, indexes); +} + +static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev, + int resno) +{ + const __be32 *reg; + struct device_node *dn = pci_device_to_OF_node(pdev); + + /*Firmware must support open sriov otherwise report regular alignment*/ + reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL); + if (!reg) + return pci_iov_resource_size(pdev, resno); + + if (!pdev->is_physfn) + return 0; + return pseries_get_iov_fw_value(pdev, + resno - PCI_IOV_RESOURCES, + APERTURE_SIZE); +} +#endif + static void __init pSeries_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); @@ -525,6 +681,14 @@ static void __init pSeries_setup_arch(void) vpa_init(boot_cpuid); ppc_md.power_save = pseries_lpar_idle; ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; +#ifdef CONFIG_PCI_IOV + ppc_md.pcibios_fixup_resources = + pseries_pci_fixup_resources; + ppc_md.pcibios_fixup_sriov = + pseries_pci_fixup_iov_resources; + ppc_md.pcibios_iov_resource_alignment = + pseries_pci_iov_resource_alignment; +#endif } else { /* No special idle routine */ ppc_md.enable_pmcs = power4_enable_pmcs; @@ -533,6 +697,12 @@ static void __init pSeries_setup_arch(void) ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare; } +static void pseries_panic(char *str) +{ + panic_flush_kmsg_end(); + rtas_os_term(str); +} + static int __init pSeries_init_panel(void) { /* Manually leave the kernel version on the panel. */ @@ -761,7 +931,7 @@ define_machine(pseries) { .pcibios_fixup = pSeries_final_fixup, .restart = rtas_restart, .halt = rtas_halt, - .panic = rtas_os_term, + .panic = pseries_panic, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, .set_rtc_time = rtas_set_rtc_time, diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 89726f07d249..52a021e1f86b 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -214,8 +214,7 @@ static ssize_t show_hibernate(struct device *dev, return sprintf(buf, "%d\n", KERN_DT_UPDATE); } -static DEVICE_ATTR(hibernate, S_IWUSR | S_IRUGO, - show_hibernate, store_hibernate); +static DEVICE_ATTR(hibernate, 0644, show_hibernate, store_hibernate); static struct bus_type suspend_subsys = { .name = "power", diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 0baba21404dc..9861407d644a 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -32,7 +32,6 @@ mv64x60-$(CONFIG_PCI) += mv64x60_pci.o obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o \ mv64x60_udbg.o obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o -obj-$(CONFIG_AXON_RAM) += axonram.o obj-$(CONFIG_PPC_INDIRECT_PCI) += indirect_pci.o obj-$(CONFIG_PPC_I8259) += i8259.o @@ -43,7 +42,8 @@ obj-$(CONFIG_OF_RTC) += of_rtc.o obj-$(CONFIG_CPM) += cpm_common.o obj-$(CONFIG_CPM1) += cpm1.o -obj-$(CONFIG_CPM2) += cpm2.o cpm2_pic.o +obj-$(CONFIG_CPM2) += cpm2.o cpm2_pic.o cpm_gpio.o +obj-$(CONFIG_8xx_GPIO) += cpm_gpio.o obj-$(CONFIG_QUICC_ENGINE) += cpm_common.o obj-$(CONFIG_PPC_DCR) += dcr.o obj-$(CONFIG_UCODE_PATCH) += micropatch.o diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c deleted file mode 100644 index 1b307c80b401..000000000000 --- a/arch/powerpc/sysdev/axonram.c +++ /dev/null @@ -1,383 +0,0 @@ -/* - * (C) Copyright IBM Deutschland Entwicklung GmbH 2006 - * - * Author: Maxim Shchetynin <maxim@de.ibm.com> - * - * Axon DDR2 device driver. - * It registers one block device per Axon's DDR2 memory bank found on a system. - * Block devices are called axonram?, their major and minor numbers are - * available in /proc/devices, /proc/partitions or in /sys/block/axonram?/dev. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/bio.h> -#include <linux/blkdev.h> -#include <linux/dax.h> -#include <linux/device.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/genhd.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/ioport.h> -#include <linux/irq.h> -#include <linux/irqreturn.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/mod_devicetable.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/of_device.h> -#include <linux/of_platform.h> -#include <linux/pfn_t.h> -#include <linux/uio.h> - -#include <asm/page.h> -#include <asm/prom.h> - -#define AXON_RAM_MODULE_NAME "axonram" -#define AXON_RAM_DEVICE_NAME "axonram" -#define AXON_RAM_MINORS_PER_DISK 16 -#define AXON_RAM_BLOCK_SHIFT PAGE_SHIFT -#define AXON_RAM_BLOCK_SIZE 1 << AXON_RAM_BLOCK_SHIFT -#define AXON_RAM_SECTOR_SHIFT 9 -#define AXON_RAM_SECTOR_SIZE 1 << AXON_RAM_SECTOR_SHIFT -#define AXON_RAM_IRQ_FLAGS IRQF_SHARED | IRQF_TRIGGER_RISING - -static int azfs_major, azfs_minor; - -struct axon_ram_bank { - struct platform_device *device; - struct gendisk *disk; - struct dax_device *dax_dev; - unsigned int irq_id; - unsigned long ph_addr; - unsigned long io_addr; - unsigned long size; - unsigned long ecc_counter; -}; - -static ssize_t -axon_ram_sysfs_ecc(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct platform_device *device = to_platform_device(dev); - struct axon_ram_bank *bank = device->dev.platform_data; - - BUG_ON(!bank); - - return sprintf(buf, "%ld\n", bank->ecc_counter); -} - -static DEVICE_ATTR(ecc, S_IRUGO, axon_ram_sysfs_ecc, NULL); - -/** - * axon_ram_irq_handler - interrupt handler for Axon RAM ECC - * @irq: interrupt ID - * @dev: pointer to of_device - */ -static irqreturn_t -axon_ram_irq_handler(int irq, void *dev) -{ - struct platform_device *device = dev; - struct axon_ram_bank *bank = device->dev.platform_data; - - BUG_ON(!bank); - - dev_err(&device->dev, "Correctable memory error occurred\n"); - bank->ecc_counter++; - return IRQ_HANDLED; -} - -/** - * axon_ram_make_request - make_request() method for block device - * @queue, @bio: see blk_queue_make_request() - */ -static blk_qc_t -axon_ram_make_request(struct request_queue *queue, struct bio *bio) -{ - struct axon_ram_bank *bank = bio->bi_disk->private_data; - unsigned long phys_mem, phys_end; - void *user_mem; - struct bio_vec vec; - unsigned int transfered; - struct bvec_iter iter; - - phys_mem = bank->io_addr + (bio->bi_iter.bi_sector << - AXON_RAM_SECTOR_SHIFT); - phys_end = bank->io_addr + bank->size; - transfered = 0; - bio_for_each_segment(vec, bio, iter) { - if (unlikely(phys_mem + vec.bv_len > phys_end)) { - bio_io_error(bio); - return BLK_QC_T_NONE; - } - - user_mem = page_address(vec.bv_page) + vec.bv_offset; - if (bio_data_dir(bio) == READ) - memcpy(user_mem, (void *) phys_mem, vec.bv_len); - else - memcpy((void *) phys_mem, user_mem, vec.bv_len); - - phys_mem += vec.bv_len; - transfered += vec.bv_len; - } - bio_endio(bio); - return BLK_QC_T_NONE; -} - -static const struct block_device_operations axon_ram_devops = { - .owner = THIS_MODULE, -}; - -static long -__axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_pages, - void **kaddr, pfn_t *pfn) -{ - resource_size_t offset = pgoff * PAGE_SIZE; - - *kaddr = (void *) bank->io_addr + offset; - *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); - return (bank->size - offset) / PAGE_SIZE; -} - -static long -axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, - void **kaddr, pfn_t *pfn) -{ - struct axon_ram_bank *bank = dax_get_private(dax_dev); - - return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn); -} - -static size_t axon_ram_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) -{ - return copy_from_iter(addr, bytes, i); -} - -static const struct dax_operations axon_ram_dax_ops = { - .direct_access = axon_ram_dax_direct_access, - .copy_from_iter = axon_ram_copy_from_iter, -}; - -/** - * axon_ram_probe - probe() method for platform driver - * @device: see platform_driver method - */ -static int axon_ram_probe(struct platform_device *device) -{ - static int axon_ram_bank_id = -1; - struct axon_ram_bank *bank; - struct resource resource; - int rc; - - axon_ram_bank_id++; - - dev_info(&device->dev, "Found memory controller on %pOF\n", - device->dev.of_node); - - bank = kzalloc(sizeof(*bank), GFP_KERNEL); - if (!bank) - return -ENOMEM; - - device->dev.platform_data = bank; - - bank->device = device; - - if (of_address_to_resource(device->dev.of_node, 0, &resource) != 0) { - dev_err(&device->dev, "Cannot access device tree\n"); - rc = -EFAULT; - goto failed; - } - - bank->size = resource_size(&resource); - - if (bank->size == 0) { - dev_err(&device->dev, "No DDR2 memory found for %s%d\n", - AXON_RAM_DEVICE_NAME, axon_ram_bank_id); - rc = -ENODEV; - goto failed; - } - - dev_info(&device->dev, "Register DDR2 memory device %s%d with %luMB\n", - AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20); - - bank->ph_addr = resource.start; - bank->io_addr = (unsigned long) ioremap_prot( - bank->ph_addr, bank->size, _PAGE_NO_CACHE); - if (bank->io_addr == 0) { - dev_err(&device->dev, "ioremap() failed\n"); - rc = -EFAULT; - goto failed; - } - - bank->disk = alloc_disk(AXON_RAM_MINORS_PER_DISK); - if (bank->disk == NULL) { - dev_err(&device->dev, "Cannot register disk\n"); - rc = -EFAULT; - goto failed; - } - - - bank->disk->major = azfs_major; - bank->disk->first_minor = azfs_minor; - bank->disk->fops = &axon_ram_devops; - bank->disk->private_data = bank; - - sprintf(bank->disk->disk_name, "%s%d", - AXON_RAM_DEVICE_NAME, axon_ram_bank_id); - - bank->dax_dev = alloc_dax(bank, bank->disk->disk_name, - &axon_ram_dax_ops); - if (!bank->dax_dev) { - rc = -ENOMEM; - goto failed; - } - - bank->disk->queue = blk_alloc_queue(GFP_KERNEL); - if (bank->disk->queue == NULL) { - dev_err(&device->dev, "Cannot register disk queue\n"); - rc = -EFAULT; - goto failed; - } - - set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT); - blk_queue_make_request(bank->disk->queue, axon_ram_make_request); - blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE); - device_add_disk(&device->dev, bank->disk); - - bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0); - if (!bank->irq_id) { - dev_err(&device->dev, "Cannot access ECC interrupt ID\n"); - rc = -EFAULT; - goto failed; - } - - rc = request_irq(bank->irq_id, axon_ram_irq_handler, - AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device); - if (rc != 0) { - dev_err(&device->dev, "Cannot register ECC interrupt handler\n"); - bank->irq_id = 0; - rc = -EFAULT; - goto failed; - } - - rc = device_create_file(&device->dev, &dev_attr_ecc); - if (rc != 0) { - dev_err(&device->dev, "Cannot create sysfs file\n"); - rc = -EFAULT; - goto failed; - } - - azfs_minor += bank->disk->minors; - - return 0; - -failed: - if (bank->irq_id) - free_irq(bank->irq_id, device); - if (bank->disk != NULL) { - if (bank->disk->major > 0) - unregister_blkdev(bank->disk->major, - bank->disk->disk_name); - if (bank->disk->flags & GENHD_FL_UP) - del_gendisk(bank->disk); - put_disk(bank->disk); - } - kill_dax(bank->dax_dev); - put_dax(bank->dax_dev); - device->dev.platform_data = NULL; - if (bank->io_addr != 0) - iounmap((void __iomem *) bank->io_addr); - kfree(bank); - return rc; -} - -/** - * axon_ram_remove - remove() method for platform driver - * @device: see of_platform_driver method - */ -static int -axon_ram_remove(struct platform_device *device) -{ - struct axon_ram_bank *bank = device->dev.platform_data; - - BUG_ON(!bank || !bank->disk); - - device_remove_file(&device->dev, &dev_attr_ecc); - free_irq(bank->irq_id, device); - kill_dax(bank->dax_dev); - put_dax(bank->dax_dev); - del_gendisk(bank->disk); - put_disk(bank->disk); - iounmap((void __iomem *) bank->io_addr); - kfree(bank); - - return 0; -} - -static const struct of_device_id axon_ram_device_id[] = { - { - .type = "dma-memory" - }, - {} -}; -MODULE_DEVICE_TABLE(of, axon_ram_device_id); - -static struct platform_driver axon_ram_driver = { - .probe = axon_ram_probe, - .remove = axon_ram_remove, - .driver = { - .name = AXON_RAM_MODULE_NAME, - .of_match_table = axon_ram_device_id, - }, -}; - -/** - * axon_ram_init - */ -static int __init -axon_ram_init(void) -{ - azfs_major = register_blkdev(azfs_major, AXON_RAM_DEVICE_NAME); - if (azfs_major < 0) { - printk(KERN_ERR "%s cannot become block device major number\n", - AXON_RAM_MODULE_NAME); - return -EFAULT; - } - azfs_minor = 0; - - return platform_driver_register(&axon_ram_driver); -} - -/** - * axon_ram_exit - */ -static void __exit -axon_ram_exit(void) -{ - platform_driver_unregister(&axon_ram_driver); - unregister_blkdev(azfs_major, AXON_RAM_DEVICE_NAME); -} - -module_init(axon_ram_init); -module_exit(axon_ram_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Maxim Shchetynin <maxim@de.ibm.com>"); -MODULE_DESCRIPTION("Axon DDR2 RAM device driver for IBM Cell BE"); diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c index c6f154b602fb..5240d3a74a10 100644 --- a/arch/powerpc/sysdev/cpm1.c +++ b/arch/powerpc/sysdev/cpm1.c @@ -629,8 +629,9 @@ static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio) return 0; } -int cpm1_gpiochip_add16(struct device_node *np) +int cpm1_gpiochip_add16(struct device *dev) { + struct device_node *np = dev->of_node; struct cpm1_gpio16_chip *cpm1_gc; struct of_mm_gpio_chip *mm_gc; struct gpio_chip *gc; @@ -660,6 +661,8 @@ int cpm1_gpiochip_add16(struct device_node *np) gc->get = cpm1_gpio16_get; gc->set = cpm1_gpio16_set; gc->to_irq = cpm1_gpio16_to_irq; + gc->parent = dev; + gc->owner = THIS_MODULE; return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc); } @@ -755,8 +758,9 @@ static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio) return 0; } -int cpm1_gpiochip_add32(struct device_node *np) +int cpm1_gpiochip_add32(struct device *dev) { + struct device_node *np = dev->of_node; struct cpm1_gpio32_chip *cpm1_gc; struct of_mm_gpio_chip *mm_gc; struct gpio_chip *gc; @@ -776,31 +780,10 @@ int cpm1_gpiochip_add32(struct device_node *np) gc->direction_output = cpm1_gpio32_dir_out; gc->get = cpm1_gpio32_get; gc->set = cpm1_gpio32_set; + gc->parent = dev; + gc->owner = THIS_MODULE; return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc); } -static int cpm_init_par_io(void) -{ - struct device_node *np; - - for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-a") - cpm1_gpiochip_add16(np); - - for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-b") - cpm1_gpiochip_add32(np); - - for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-c") - cpm1_gpiochip_add16(np); - - for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-d") - cpm1_gpiochip_add16(np); - - /* Port E uses CPM2 layout */ - for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-e") - cpm2_gpiochip_add32(np); - return 0; -} -arch_initcall(cpm_init_par_io); - #endif /* CONFIG_8xx_GPIO */ diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c index f78ff841652c..07718b9a2c99 100644 --- a/arch/powerpc/sysdev/cpm2.c +++ b/arch/powerpc/sysdev/cpm2.c @@ -354,14 +354,3 @@ void cpm2_set_pin(int port, int pin, int flags) else clrbits32(&iop[port].odr, pin); } - -static int cpm_init_par_io(void) -{ - struct device_node *np; - - for_each_compatible_node(np, NULL, "fsl,cpm2-pario-bank") - cpm2_gpiochip_add32(np); - return 0; -} -arch_initcall(cpm_init_par_io); - diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index 51bf749a4f3a..b74508175b67 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -190,8 +190,9 @@ static int cpm2_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio) return 0; } -int cpm2_gpiochip_add32(struct device_node *np) +int cpm2_gpiochip_add32(struct device *dev) { + struct device_node *np = dev->of_node; struct cpm2_gpio32_chip *cpm2_gc; struct of_mm_gpio_chip *mm_gc; struct gpio_chip *gc; @@ -211,6 +212,8 @@ int cpm2_gpiochip_add32(struct device_node *np) gc->direction_output = cpm2_gpio32_dir_out; gc->get = cpm2_gpio32_get; gc->set = cpm2_gpio32_set; + gc->parent = dev; + gc->owner = THIS_MODULE; return of_mm_gpiochip_add_data(np, mm_gc, cpm2_gc); } diff --git a/arch/powerpc/sysdev/cpm_gpio.c b/arch/powerpc/sysdev/cpm_gpio.c new file mode 100644 index 000000000000..0badc90be666 --- /dev/null +++ b/arch/powerpc/sysdev/cpm_gpio.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common CPM GPIO wrapper for the CPM GPIO ports + * + * Author: Christophe Leroy <christophe.leroy@c-s.fr> + * + * Copyright 2017 CS Systemes d'Information. + * + */ + +#include <linux/module.h> +#include <linux/of_device.h> + +#include <asm/cpm.h> +#ifdef CONFIG_8xx_GPIO +#include <asm/cpm1.h> +#endif + +static int cpm_gpio_probe(struct platform_device *ofdev) +{ + struct device *dev = &ofdev->dev; + int (*gp_add)(struct device *dev) = of_device_get_match_data(dev); + + if (!gp_add) + return -ENODEV; + + return gp_add(dev); +} + +static const struct of_device_id cpm_gpio_match[] = { +#ifdef CONFIG_8xx_GPIO + { + .compatible = "fsl,cpm1-pario-bank-a", + .data = cpm1_gpiochip_add16, + }, + { + .compatible = "fsl,cpm1-pario-bank-b", + .data = cpm1_gpiochip_add32, + }, + { + .compatible = "fsl,cpm1-pario-bank-c", + .data = cpm1_gpiochip_add16, + }, + { + .compatible = "fsl,cpm1-pario-bank-d", + .data = cpm1_gpiochip_add16, + }, + /* Port E uses CPM2 layout */ + { + .compatible = "fsl,cpm1-pario-bank-e", + .data = cpm2_gpiochip_add32, + }, +#endif + { + .compatible = "fsl,cpm2-pario-bank", + .data = cpm2_gpiochip_add32, + }, + {}, +}; +MODULE_DEVICE_TABLE(of, cpm_gpio_match); + +static struct platform_driver cpm_gpio_driver = { + .probe = cpm_gpio_probe, + .driver = { + .name = "cpm-gpio", + .owner = THIS_MODULE, + .of_match_table = cpm_gpio_match, + }, +}; + +static int __init cpm_gpio_init(void) +{ + return platform_driver_register(&cpm_gpio_driver); +} +arch_initcall(cpm_gpio_init); + +MODULE_AUTHOR("Christophe Leroy <christophe.leroy@c-s.fr>"); +MODULE_DESCRIPTION("Driver for CPM GPIO"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:cpm-gpio"); diff --git a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c index 1707bf04dec6..94278e8af192 100644 --- a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c +++ b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c @@ -56,17 +56,16 @@ static ssize_t fsl_timer_wakeup_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct timeval interval; - int val = 0; + time64_t interval = 0; mutex_lock(&sysfs_lock); if (fsl_wakeup->timer) { mpic_get_remain_time(fsl_wakeup->timer, &interval); - val = interval.tv_sec + 1; + interval++; } mutex_unlock(&sysfs_lock); - return sprintf(buf, "%d\n", val); + return sprintf(buf, "%lld\n", interval); } static ssize_t fsl_timer_wakeup_store(struct device *dev, @@ -74,11 +73,10 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev, const char *buf, size_t count) { - struct timeval interval; + time64_t interval; int ret; - interval.tv_usec = 0; - if (kstrtol(buf, 0, &interval.tv_sec)) + if (kstrtoll(buf, 0, &interval)) return -EINVAL; mutex_lock(&sysfs_lock); @@ -89,13 +87,13 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev, fsl_wakeup->timer = NULL; } - if (!interval.tv_sec) { + if (!interval) { mutex_unlock(&sysfs_lock); return count; } fsl_wakeup->timer = mpic_request_timer(fsl_mpic_timer_irq, - fsl_wakeup, &interval); + fsl_wakeup, interval); if (!fsl_wakeup->timer) { mutex_unlock(&sysfs_lock); return -EINVAL; diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 61e07c78d64f..918be816b097 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -448,7 +448,7 @@ static void setup_pci_atmu(struct pci_controller *hose) #endif /* adjusting outbound windows could reclaim space in mem map */ if (paddr_hi < 0xffffffffull) - pr_warning("%pOF: WARNING: Outbound window cfg leaves " + pr_warn("%pOF: WARNING: Outbound window cfg leaves " "gaps in memory map. Adjusting the memory map " "could reduce unnecessary bounce buffering.\n", hose->dn); @@ -531,7 +531,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) dev = pdev->dev.of_node; if (!of_device_is_available(dev)) { - pr_warning("%pOF: disabled\n", dev); + pr_warn("%pOF: disabled\n", dev); return -ENODEV; } @@ -808,8 +808,8 @@ int __init mpc83xx_add_bridge(struct device_node *dev) is_mpc83xx_pci = 1; if (!of_device_is_available(dev)) { - pr_warning("%pOF: disabled by the firmware.\n", - dev); + pr_warn("%pOF: disabled by the firmware.\n", + dev); return -ENODEV; } pr_debug("Adding PCI host bridge %pOF\n", dev); @@ -1070,7 +1070,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) if (is_in_pci_mem_space(addr)) { if (user_mode(regs)) { pagefault_disable(); - ret = get_user(regs->nip, &inst); + ret = get_user(inst, (__u32 __user *)regs->nip); pagefault_enable(); } else { ret = probe_kernel_address((void *)regs->nip, inst); @@ -1304,10 +1304,8 @@ static int add_err_dev(struct platform_device *pdev) pdev->resource, pdev->num_resources, &pd, sizeof(pd)); - if (IS_ERR(errdev)) - return PTR_ERR(errdev); - return 0; + return PTR_ERR_OR_ZERO(errdev); } static int fsl_pci_probe(struct platform_device *pdev) diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index bafb014e1a7e..cb9a8b71fd0f 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -145,21 +145,21 @@ static struct resource pic1_iores = { .name = "8259 (master)", .start = 0x20, .end = 0x21, - .flags = IORESOURCE_BUSY, + .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; static struct resource pic2_iores = { .name = "8259 (slave)", .start = 0xa0, .end = 0xa1, - .flags = IORESOURCE_BUSY, + .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; static struct resource pic_edgectrl_iores = { .name = "8259 edge control", .start = 0x4d0, .end = 0x4d1, - .flags = IORESOURCE_BUSY, + .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; static int i8259_host_match(struct irq_domain *h, struct device_node *node, diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index ead3e2549ebf..73067805300a 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1008,9 +1008,8 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq, if (hw == mpic->spurious_vec) return -EINVAL; if (mpic->protected && test_bit(hw, mpic->protected)) { - pr_warning("mpic: Mapping of source 0x%x failed, " - "source protected by firmware !\n",\ - (unsigned int)hw); + pr_warn("mpic: Mapping of source 0x%x failed, source protected by firmware !\n", + (unsigned int)hw); return -EPERM; } @@ -1040,9 +1039,8 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq, return 0; if (hw >= mpic->num_sources) { - pr_warning("mpic: Mapping of source 0x%x failed, " - "source out of range !\n",\ - (unsigned int)hw); + pr_warn("mpic: Mapping of source 0x%x failed, source out of range !\n", + (unsigned int)hw); return -EINVAL; } diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c index a418579591be..87e7c42777a8 100644 --- a/arch/powerpc/sysdev/mpic_timer.c +++ b/arch/powerpc/sysdev/mpic_timer.c @@ -47,9 +47,6 @@ #define MAX_TICKS_CASCADE (~0U) #define TIMER_OFFSET(num) (1 << (TIMERS_PER_GROUP - 1 - num)) -/* tv_usec should be less than ONE_SECOND, otherwise use tv_sec */ -#define ONE_SECOND 1000000 - struct timer_regs { u32 gtccr; u32 res0[3]; @@ -90,51 +87,23 @@ static struct cascade_priv cascade_timer[] = { static LIST_HEAD(timer_group_list); static void convert_ticks_to_time(struct timer_group_priv *priv, - const u64 ticks, struct timeval *time) + const u64 ticks, time64_t *time) { - u64 tmp_sec; - - time->tv_sec = (__kernel_time_t)div_u64(ticks, priv->timerfreq); - tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq; - - time->tv_usec = 0; - - if (tmp_sec <= ticks) - time->tv_usec = (__kernel_suseconds_t) - div_u64((ticks - tmp_sec) * 1000000, priv->timerfreq); - - return; + *time = (u64)div_u64(ticks, priv->timerfreq); } /* the time set by the user is converted to "ticks" */ static int convert_time_to_ticks(struct timer_group_priv *priv, - const struct timeval *time, u64 *ticks) + time64_t time, u64 *ticks) { u64 max_value; /* prevent u64 overflow */ - u64 tmp = 0; - - u64 tmp_sec; - u64 tmp_ms; - u64 tmp_us; max_value = div_u64(ULLONG_MAX, priv->timerfreq); - if (time->tv_sec > max_value || - (time->tv_sec == max_value && time->tv_usec > 0)) + if (time > max_value) return -EINVAL; - tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq; - tmp += tmp_sec; - - tmp_ms = time->tv_usec / 1000; - tmp_ms = div_u64((u64)tmp_ms * (u64)priv->timerfreq, 1000); - tmp += tmp_ms; - - tmp_us = time->tv_usec % 1000; - tmp_us = div_u64((u64)tmp_us * (u64)priv->timerfreq, 1000000); - tmp += tmp_us; - - *ticks = tmp; + *ticks = (u64)time * (u64)priv->timerfreq; return 0; } @@ -223,7 +192,7 @@ static struct mpic_timer *get_cascade_timer(struct timer_group_priv *priv, return allocated_timer; } -static struct mpic_timer *get_timer(const struct timeval *time) +static struct mpic_timer *get_timer(time64_t time) { struct timer_group_priv *priv; struct mpic_timer *timer; @@ -277,7 +246,7 @@ static struct mpic_timer *get_timer(const struct timeval *time) * @handle: the timer to be started. * * It will do ->fn(->dev) callback from the hardware interrupt at - * the ->timeval point in the future. + * the 'time64_t' point in the future. */ void mpic_start_timer(struct mpic_timer *handle) { @@ -319,7 +288,7 @@ EXPORT_SYMBOL(mpic_stop_timer); * * Query timer remaining time. */ -void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) +void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time) { struct timer_group_priv *priv = container_of(handle, struct timer_group_priv, timer[handle->num]); @@ -391,7 +360,7 @@ EXPORT_SYMBOL(mpic_free_timer); * else "handle" on success. */ struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, - const struct timeval *time) + time64_t time) { struct mpic_timer *allocated_timer; int ret; @@ -399,11 +368,7 @@ struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, if (list_empty(&timer_group_list)) return NULL; - if (!(time->tv_sec + time->tv_usec) || - time->tv_sec < 0 || time->tv_usec < 0) - return NULL; - - if (time->tv_usec > ONE_SECOND) + if (time < 0) return NULL; allocated_timer = get_timer(time); diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c index d52b3b81e05f..1afcdb428e51 100644 --- a/arch/powerpc/sysdev/mv64x60_pci.c +++ b/arch/powerpc/sysdev/mv64x60_pci.c @@ -37,7 +37,7 @@ static ssize_t mv64x60_hs_reg_read(struct file *filp, struct kobject *kobj, if (count < MV64X60_VAL_LEN_MAX) return -EINVAL; - phb = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); + phb = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); if (!phb) return -ENODEV; pci_read_config_dword(phb, MV64X60_PCICFG_CPCI_HOTSWAP, &v); @@ -61,7 +61,7 @@ static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj, if (sscanf(buf, "%i", &v) != 1) return -EINVAL; - phb = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); + phb = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); if (!phb) return -ENODEV; pci_write_config_dword(phb, MV64X60_PCICFG_CPCI_HOTSWAP, v); @@ -73,7 +73,7 @@ static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj, static const struct bin_attribute mv64x60_hs_reg_attr = { /* Hotswap register */ .attr = { .name = "hs_reg", - .mode = S_IRUGO | S_IWUSR, + .mode = 0644, }, .size = MV64X60_VAL_LEN_MAX, .read = mv64x60_hs_reg_read, diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 2bfb9968d562..1459f4e8b698 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -241,18 +241,16 @@ static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr, cpu, hw_id); if (!request_mem_region(addr, size, rname)) { - pr_warning("icp_native: Could not reserve ICP MMIO" - " for CPU %d, interrupt server #0x%x\n", - cpu, hw_id); + pr_warn("icp_native: Could not reserve ICP MMIO for CPU %d, interrupt server #0x%x\n", + cpu, hw_id); return -EBUSY; } icp_native_regs[cpu] = ioremap(addr, size); kvmppc_set_xics_phys(cpu, addr); if (!icp_native_regs[cpu]) { - pr_warning("icp_native: Failed ioremap for CPU %d, " - "interrupt server #0x%x, addr %#lx\n", - cpu, hw_id, addr); + pr_warn("icp_native: Failed ioremap for CPU %d, interrupt server #0x%x, addr %#lx\n", + cpu, hw_id, addr); release_mem_region(addr, size); return -ENOMEM; } diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index 1c6bf4b66f56..f85f916ba432 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -131,8 +131,8 @@ static int ics_opal_set_affinity(struct irq_data *d, wanted_server = xics_get_irq_server(d->irq, cpumask, 1); if (wanted_server < 0) { - pr_warning("%s: No online cpus in the mask %*pb for irq %d\n", - __func__, cpumask_pr_args(cpumask), d->irq); + pr_warn("%s: No online cpus in the mask %*pb for irq %d\n", + __func__, cpumask_pr_args(cpumask), d->irq); return -1; } server = ics_opal_mangle_server(wanted_server); diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index 42e0c56ff81c..6aabc74688a6 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -141,8 +141,8 @@ static int ics_rtas_set_affinity(struct irq_data *d, irq_server = xics_get_irq_server(d->irq, cpumask, 1); if (irq_server == -1) { - pr_warning("%s: No online cpus in the mask %*pb for irq %d\n", - __func__, cpumask_pr_args(cpumask), d->irq); + pr_warn("%s: No online cpus in the mask %*pb for irq %d\n", + __func__, cpumask_pr_args(cpumask), d->irq); return -1; } diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index ffe138b8b9dc..77e864d5506d 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -243,8 +243,8 @@ void xics_migrate_irqs_away(void) /* This is expected during cpu offline. */ if (cpu_online(cpu)) - pr_warning("IRQ %u affinity broken off cpu %u\n", - virq, cpu); + pr_warn("IRQ %u affinity broken off cpu %u\n", + virq, cpu); /* Reset affinity to all cpus */ raw_spin_unlock_irqrestore(&desc->lock, flags); @@ -466,7 +466,7 @@ void __init xics_init(void) rc = icp_opal_init(); } if (rc < 0) { - pr_warning("XICS: Cannot find a Presentation Controller !\n"); + pr_warn("XICS: Cannot find a Presentation Controller !\n"); return; } @@ -481,7 +481,7 @@ void __init xics_init(void) if (rc < 0) rc = ics_opal_init(); if (rc < 0) - pr_warning("XICS: Cannot find a Source Controller !\n"); + pr_warn("XICS: Cannot find a Source Controller !\n"); /* Initialize common bits */ xics_get_server_size(); diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a3b8d7d1316e..40c06110821c 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d) * EOI the source if it hasn't been disabled and hasn't * been passed-through to a KVM guest */ - if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d)) + if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && + !(xd->flags & XIVE_IRQ_NO_EOI)) xive_do_source_eoi(irqd_to_hwirq(d), xd); /* @@ -1269,11 +1270,6 @@ static void xive_setup_cpu(void) { struct xive_cpu *xc = __this_cpu_read(xive_cpu); - /* Debug: Dump the TM state */ - pr_devel("CPU %d [HW 0x%02x] VT=%02x\n", - smp_processor_id(), hard_smp_processor_id(), - in_8(xive_tima + xive_tima_offset + TM_WORD2)); - /* The backend might have additional things to do */ if (xive_ops->setup_cpu) xive_ops->setup_cpu(smp_processor_id(), xc); diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c index 31db8c072acd..9deea5ee13f6 100644 --- a/arch/powerpc/xmon/ppc-dis.c +++ b/arch/powerpc/xmon/ppc-dis.c @@ -93,10 +93,6 @@ lookup_powerpc (unsigned long insn, ppc_cpu_t dialect) { const struct powerpc_opcode *opcode; const struct powerpc_opcode *opcode_end; - unsigned long op; - - /* Get the major opcode of the instruction. */ - op = PPC_OP (insn); opcode_end = powerpc_opcodes + powerpc_num_opcodes; /* Find the first match in the opcode table for this major opcode. */ diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0ddc7ac6c5f1..82e1a3ee6e0f 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1623,7 +1623,7 @@ static void excprint(struct pt_regs *fp) printf(" current = 0x%lx\n", current); #ifdef CONFIG_PPC64 printf(" paca = 0x%lx\t softe: %d\t irq_happened: 0x%02x\n", - local_paca, local_paca->soft_enabled, local_paca->irq_happened); + local_paca, local_paca->irq_soft_mask, local_paca->irq_happened); #endif if (current) { printf(" pid = %ld, comm = %s\n", @@ -2377,8 +2377,6 @@ static void dump_one_paca(int cpu) printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]); DUMP(p, rfi_flush_fallback_area, "px"); - DUMP(p, l1d_flush_congruence, "llx"); - DUMP(p, l1d_flush_sets, "llx"); #endif DUMP(p, dscr_default, "llx"); #ifdef CONFIG_PPC_BOOK3E @@ -2395,7 +2393,7 @@ static void dump_one_paca(int cpu) DUMP(p, stab_rr, "lx"); DUMP(p, saved_r1, "lx"); DUMP(p, trap_save, "x"); - DUMP(p, soft_enabled, "x"); + DUMP(p, irq_soft_mask, "x"); DUMP(p, irq_happened, "x"); DUMP(p, io_sync, "x"); DUMP(p, irq_work_pending, "x"); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 865e14f50c14..b6722c246d9c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -22,6 +22,7 @@ config RISCV select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select HAVE_GENERIC_DMA_COHERENT @@ -43,6 +44,10 @@ config MMU config ARCH_PHYS_ADDR_T_64BIT def_bool y +config ZONE_DMA32 + bool + default y + config ARCH_DMA_ADDR_T_64BIT def_bool y @@ -55,6 +60,9 @@ config PAGE_OFFSET config STACKTRACE_SUPPORT def_bool y +config TRACE_IRQFLAGS_SUPPORT + def_bool y + config RWSEM_GENERIC_SPINLOCK def_bool y @@ -107,6 +115,8 @@ config ARCH_RV64I bool "RV64I" select CPU_SUPPORTS_64BIT_KERNEL select 64BIT + select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_GRAPH_TRACER endchoice diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 681ac0d09314..4286a5f83876 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -12,7 +12,6 @@ generic-y += errno.h generic-y += exec.h generic-y += fb.h generic-y += fcntl.h -generic-y += ftrace.h generic-y += futex.h generic-y += hardirq.h generic-y += hash.h diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 3c7a2c97e377..421fa3585798 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -40,15 +40,15 @@ #define SR_SD _AC(0x8000000000000000, UL) /* FS/XS dirty */ #endif -/* SPTBR flags */ +/* SATP flags */ #if __riscv_xlen == 32 -#define SPTBR_PPN _AC(0x003FFFFF, UL) -#define SPTBR_MODE_32 _AC(0x80000000, UL) -#define SPTBR_MODE SPTBR_MODE_32 +#define SATP_PPN _AC(0x003FFFFF, UL) +#define SATP_MODE_32 _AC(0x80000000, UL) +#define SATP_MODE SATP_MODE_32 #else -#define SPTBR_PPN _AC(0x00000FFFFFFFFFFF, UL) -#define SPTBR_MODE_39 _AC(0x8000000000000000, UL) -#define SPTBR_MODE SPTBR_MODE_39 +#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL) +#define SATP_MODE_39 _AC(0x8000000000000000, UL) +#define SATP_MODE SATP_MODE_39 #endif /* Interrupt Enable and Interrupt Pending flags */ diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h new file mode 100644 index 000000000000..66d4175eb13e --- /dev/null +++ b/arch/riscv/include/asm/ftrace.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2017 Andes Technology Corporation */ + +/* + * The graph frame test is not possible if CONFIG_FRAME_POINTER is not enabled. + * Check arch/riscv/kernel/mcount.S for detail. + */ +#if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_FRAME_POINTER) +#define HAVE_FUNCTION_GRAPH_FP_TEST +#endif diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index 97424834dce2..336d60ec5698 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -39,16 +39,6 @@ static inline void destroy_context(struct mm_struct *mm) { } -static inline pgd_t *current_pgdir(void) -{ - return pfn_to_virt(csr_read(sptbr) & SPTBR_PPN); -} - -static inline void set_pgdir(pgd_t *pgd) -{ - csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE); -} - /* * When necessary, performs a deferred icache flush for the given MM context, * on the local CPU. RISC-V has no direct mechanism for instruction cache @@ -93,7 +83,12 @@ static inline void switch_mm(struct mm_struct *prev, cpumask_clear_cpu(cpu, mm_cpumask(prev)); cpumask_set_cpu(cpu, mm_cpumask(next)); - set_pgdir(next->pgd); + /* + * Use the old spbtr name instead of using the current satp + * name to support binutils 2.29 which doesn't know about the + * privileged ISA 1.10 yet. + */ + csr_write(sptbr, virt_to_pfn(next->pgd) | SATP_MODE); local_flush_tlb_all(); flush_icache_deferred(next); diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 7b9c24ebdf52..7b209aec355d 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -36,7 +36,14 @@ static inline void local_flush_tlb_page(unsigned long addr) #define flush_tlb_all() local_flush_tlb_all() #define flush_tlb_page(vma, addr) local_flush_tlb_page(addr) -#define flush_tlb_range(vma, start, end) local_flush_tlb_all() + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + local_flush_tlb_all(); +} + +#define flush_tlb_mm(mm) flush_tlb_all() #else /* CONFIG_SMP */ @@ -45,16 +52,13 @@ static inline void local_flush_tlb_page(unsigned long addr) #define flush_tlb_all() sbi_remote_sfence_vma(0, 0, -1) #define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) #define flush_tlb_range(vma, start, end) \ - sbi_remote_sfence_vma(0, start, (end) - (start)) + sbi_remote_sfence_vma(mm_cpumask((vma)->vm_mm)->bits, \ + start, (end) - (start)) +#define flush_tlb_mm(mm) \ + sbi_remote_sfence_vma(mm_cpumask(mm)->bits, 0, -1) #endif /* CONFIG_SMP */ -/* Flush the TLB entries of the specified mm context */ -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - flush_tlb_all(); -} - /* Flush a range of kernel pages */ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h index 2f704a5c4196..080fb28061de 100644 --- a/arch/riscv/include/asm/unistd.h +++ b/arch/riscv/include/asm/unistd.h @@ -11,7 +11,6 @@ * GNU General Public License for more details. */ -#define __ARCH_HAVE_MMU #define __ARCH_WANT_SYS_CLONE #include <uapi/asm/unistd.h> #include <uapi/asm/syscalls.h> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index ab8baf7bd142..196f62ffc428 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -2,6 +2,11 @@ # Makefile for the RISC-V Linux kernel # +ifdef CONFIG_FTRACE +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_setup.o = -pg +endif + extra-y += head.o extra-y += vmlinux.lds @@ -29,5 +34,7 @@ CFLAGS_setup.o := -mcmodel=medany obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_FUNCTION_TRACER) += mcount.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o clean: diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 7404ec222406..87fc045be51f 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -78,10 +78,13 @@ _save_context: REG_S x31, PT_T6(sp) /* - * Disable FPU to detect illegal usage of - * floating point in kernel space + * Disable user-mode memory access as it should only be set in the + * actual user copy routines. + * + * Disable the FPU to detect illegal usage of floating point in kernel + * space. */ - li t0, SR_FS + li t0, SR_SUM | SR_FS REG_L s0, TASK_TI_USER_SP(tp) csrrc s1, sstatus, t0 diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c new file mode 100644 index 000000000000..d0de68d144cb --- /dev/null +++ b/arch/riscv/kernel/ftrace.c @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * Copyright (C) 2017 Andes Technology Corporation + */ + +#include <linux/ftrace.h> + +/* + * Most of this file is copied from arm64. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long old; + struct ftrace_graph_ent trace; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + /* + * We don't suffer access faults, so no extra fault-recovery assembly + * is needed here. + */ + old = *parent; + + trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; + + if (!ftrace_graph_entry(&trace)) + return; + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer, NULL); + if (err == -EBUSY) + return; + *parent = return_hooker; +} diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 78f670d70133..226eeb190f90 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -74,15 +74,15 @@ relocate: sub a1, a1, a0 add ra, ra, a1 - /* Point stvec to virtual address of intruction after sptbr write */ + /* Point stvec to virtual address of intruction after satp write */ la a0, 1f add a0, a0, a1 csrw stvec, a0 - /* Compute sptbr for kernel page tables, but don't load it yet */ + /* Compute satp for kernel page tables, but don't load it yet */ la a2, swapper_pg_dir srl a2, a2, PAGE_SHIFT - li a1, SPTBR_MODE + li a1, SATP_MODE or a2, a2, a1 /* diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S new file mode 100644 index 000000000000..c46a778627be --- /dev/null +++ b/arch/riscv/kernel/mcount.S @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2017 Andes Technology Corporation */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/csr.h> +#include <asm/unistd.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> +#include <asm-generic/export.h> +#include <asm/ftrace.h> + + .text + + .macro SAVE_ABI_STATE + addi sp, sp, -16 + sd s0, 0(sp) + sd ra, 8(sp) + addi s0, sp, 16 + .endm + + /* + * The call to ftrace_return_to_handler would overwrite the return + * register if a0 was not saved. + */ + .macro SAVE_RET_ABI_STATE + addi sp, sp, -32 + sd s0, 16(sp) + sd ra, 24(sp) + sd a0, 8(sp) + addi s0, sp, 32 + .endm + + .macro STORE_ABI_STATE + ld ra, 8(sp) + ld s0, 0(sp) + addi sp, sp, 16 + .endm + + .macro STORE_RET_ABI_STATE + ld ra, 24(sp) + ld s0, 16(sp) + ld a0, 8(sp) + addi sp, sp, 32 + .endm + +ENTRY(ftrace_stub) + ret +ENDPROC(ftrace_stub) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(return_to_handler) +/* + * On implementing the frame point test, the ideal way is to compare the + * s0 (frame pointer, if enabled) on entry and the sp (stack pointer) on return. + * However, the psABI of variable-length-argument functions does not allow this. + * + * So alternatively we check the *old* frame pointer position, that is, the + * value stored in -16(s0) on entry, and the s0 on return. + */ +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + mv t6, s0 +#endif + SAVE_RET_ABI_STATE +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + mv a0, t6 +#endif + la t0, ftrace_return_to_handler + jalr t0 + mv a1, a0 + STORE_RET_ABI_STATE + jalr a1 +ENDPROC(return_to_handler) +EXPORT_SYMBOL(return_to_handler) +#endif + +ENTRY(_mcount) + la t4, ftrace_stub +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + la t0, ftrace_graph_return + ld t1, 0(t0) + bne t1, t4, do_ftrace_graph_caller + + la t3, ftrace_graph_entry + ld t2, 0(t3) + la t6, ftrace_graph_entry_stub + bne t2, t6, do_ftrace_graph_caller +#endif + la t3, ftrace_trace_function + ld t5, 0(t3) + bne t5, t4, do_trace + ret + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * A pseudo representation for the function graph tracer: + * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller) + */ +do_ftrace_graph_caller: + addi a0, s0, -8 + mv a1, ra +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + ld a2, -16(s0) +#endif + SAVE_ABI_STATE + la t0, prepare_ftrace_return + jalr t0 + STORE_ABI_STATE + ret +#endif + +/* + * A pseudo representation for the function tracer: + * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller) + */ +do_trace: + ld a1, -8(s0) + mv a0, ra + + SAVE_ABI_STATE + jalr t5 + STORE_ABI_STATE + ret +ENDPROC(_mcount) +EXPORT_SYMBOL(_mcount) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index cb7b0c63014e..09f7064e898c 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -49,10 +49,6 @@ struct screen_info screen_info = { }; #endif -#ifdef CONFIG_CMDLINE_BOOL -static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; -#endif /* CONFIG_CMDLINE_BOOL */ - unsigned long va_pa_offset; EXPORT_SYMBOL(va_pa_offset); unsigned long pfn_base; @@ -153,25 +149,6 @@ void __init sbi_save(unsigned int hartid, void *dtb) early_init_dt_scan(__va(dtb)); } -/* - * Allow the user to manually add a memory region (in case DTS is broken); - * "mem_end=nn[KkMmGg]" - */ -static int __init mem_end_override(char *p) -{ - resource_size_t base, end; - - if (!p) - return -EINVAL; - base = (uintptr_t) __pa(PAGE_OFFSET); - end = memparse(p, &p) & PMD_MASK; - if (end == 0) - return -EINVAL; - memblock_add(base, end - base); - return 0; -} -early_param("mem_end", mem_end_override); - static void __init setup_bootmem(void) { struct memblock_region *reg; @@ -204,22 +181,19 @@ static void __init setup_bootmem(void) early_init_fdt_scan_reserved_mem(); memblock_allow_resize(); memblock_dump_all(); + + for_each_memblock(memory, reg) { + unsigned long start_pfn = memblock_region_memory_base_pfn(reg); + unsigned long end_pfn = memblock_region_memory_end_pfn(reg); + + memblock_set_node(PFN_PHYS(start_pfn), + PFN_PHYS(end_pfn - start_pfn), + &memblock.memory, 0); + } } void __init setup_arch(char **cmdline_p) { -#ifdef CONFIG_CMDLINE_BOOL -#ifdef CONFIG_CMDLINE_OVERRIDE - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); -#else - if (builtin_cmdline[0] != '\0') { - /* Append bootloader command line to built-in */ - strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); - strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); - } -#endif /* CONFIG_CMDLINE_OVERRIDE */ -#endif /* CONFIG_CMDLINE_BOOL */ *cmdline_p = boot_command_line; parse_early_param(); diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index e8a178df8144..582cb153eb24 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -74,7 +74,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, down_write(&mm->mmap_sem); vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0); - if (unlikely(IS_ERR_VALUE(vdso_base))) { + if (IS_ERR_VALUE(vdso_base)) { ret = vdso_base; goto end; } diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index ceebfc29305b..148c98ca9b45 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -238,6 +238,10 @@ vmalloc_fault: * Do _not_ use "tsk->active_mm->pgd" here. * We might be inside an interrupt in the middle * of a task switch. + * + * Note: Use the old spbtr name instead of using the current + * satp name to support binutils 2.29 which doesn't know about + * the privileged ISA 1.10 yet. */ index = pgd_index(addr); pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr)) + index; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 9f4bee5e51fd..c77df8142be2 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -17,6 +17,7 @@ #include <linux/initrd.h> #include <linux/memblock.h> #include <linux/swap.h> +#include <linux/sizes.h> #include <asm/tlbflush.h> #include <asm/sections.h> @@ -25,11 +26,12 @@ static void __init zone_sizes_init(void) { - unsigned long zones_size[MAX_NR_ZONES]; + unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; - memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_NORMAL] = max_mapnr; - free_area_init_node(0, zones_size, pfn_base, NULL); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, max_low_pfn)); + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + + free_area_init_nodes(max_zone_pfns); } void setup_zero_page(void) @@ -39,8 +41,6 @@ void setup_zero_page(void) void __init paging_init(void) { - init_mm.pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr)); - setup_zero_page(); local_flush_tlb_all(); zone_sizes_init(); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 671535e64aba..3fa3e5323612 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -222,7 +222,8 @@ device_initcall(s390_cma_mem_init); #endif /* CONFIG_CMA */ -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); @@ -232,14 +233,14 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) if (rc) return rc; - rc = __add_pages(nid, start_pfn, size_pages, want_memblock); + rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock); if (rc) vmem_remove_mapping(start, size); return rc; } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { /* * There is no hardware or firmware interface which could trigger a diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 3316d463fc29..db55561c5981 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -211,7 +211,8 @@ static void vmem_remove_range(unsigned long start, unsigned long size) /* * Add a backed mem_map array to the virtual mem_map array. */ -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long pgt_prot, sgt_prot; unsigned long address = start; @@ -296,7 +297,8 @@ out: return ret; } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c index f3a0649ab521..627416bbd0b1 100644 --- a/arch/score/kernel/setup.c +++ b/arch/score/kernel/setup.c @@ -124,9 +124,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long n = (unsigned long) v - 1; - seq_printf(m, "processor\t\t: %ld\n", n); - seq_printf(m, "\n"); - + seq_printf(m, "processor\t\t: %ld\n\n", n); return 0; } diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index bf726af5f1a5..ce0bbaa7e404 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -485,20 +485,20 @@ void free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; int ret; /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); return ret; } -EXPORT_SYMBOL_GPL(arch_add_memory); #ifdef CONFIG_NUMA int memory_add_physaddr_to_nid(u64 addr) @@ -510,7 +510,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; @@ -518,7 +518,7 @@ int arch_remove_memory(u64 start, u64 size) int ret; zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); if (unlikely(ret)) pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, ret); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 55ba62957e64..995f9490334d 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2628,7 +2628,7 @@ EXPORT_SYMBOL(_PAGE_CACHE); #ifdef CONFIG_SPARSEMEM_VMEMMAP int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, - int node) + int node, struct vmem_altmap *altmap) { unsigned long pte_base; @@ -2671,7 +2671,8 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, return 0; } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/unicore32/include/asm/bitops.h b/arch/unicore32/include/asm/bitops.h index 401f597bc38c..c0cbdbe17168 100644 --- a/arch/unicore32/include/asm/bitops.h +++ b/arch/unicore32/include/asm/bitops.h @@ -44,4 +44,6 @@ static inline int fls(int x) #define find_first_bit find_first_bit #define find_first_zero_bit find_first_zero_bit +#include <asm-generic/bitops/find.h> + #endif /* __UNICORE_BITOPS_H__ */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 72d5149bcaa1..63bf349b2b24 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -55,6 +55,7 @@ config X86 select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_PHYS_TO_DMA + select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 @@ -62,6 +63,7 @@ config X86 select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX + select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAS_ZONE_DEVICE if X86_64 select ARCH_HAVE_NMI_SAFE_CMPXCHG @@ -116,6 +118,7 @@ config X86 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 @@ -321,7 +324,7 @@ config X86_64_SMP config X86_32_LAZY_GS def_bool y - depends on X86_32 && !CC_STACKPROTECTOR + depends on X86_32 && CC_STACKPROTECTOR_NONE config ARCH_SUPPORTS_UPROBES def_bool y diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 1e3883e45687..74f6eee15179 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -21,6 +21,7 @@ #include <linux/export.h> #include <linux/context_tracking.h> #include <linux/user-return-notifier.h> +#include <linux/nospec.h> #include <linux/uprobes.h> #include <linux/livepatch.h> #include <linux/syscalls.h> @@ -206,7 +207,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) * special case only applies after poking regs and before the * very next return to user mode. */ - current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); #endif user_enter_irqoff(); @@ -282,7 +283,8 @@ __visible void do_syscall_64(struct pt_regs *regs) * regs->orig_ax, which changes the behavior of some syscalls. */ if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { - regs->ax = sys_call_table[nr & __SYSCALL_MASK]( + nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls); + regs->ax = sys_call_table[nr]( regs->di, regs->si, regs->dx, regs->r10, regs->r8, regs->r9); } @@ -304,7 +306,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) unsigned int nr = (unsigned int)regs->orig_ax; #ifdef CONFIG_IA32_EMULATION - current->thread.status |= TS_COMPAT; + ti->status |= TS_COMPAT; #endif if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { @@ -318,6 +320,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) } if (likely(nr < IA32_NR_syscalls)) { + nr = array_index_nospec(nr, IA32_NR_syscalls); /* * It's possible that a 32-bit syscall implementation * takes a 64-bit parameter but nonetheless assumes that diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2a35b1e0fb90..abee6d2b9311 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -566,6 +566,11 @@ restore_all: .Lrestore_nocheck: RESTORE_REGS 4 # skip orig_eax/error_code .Lirq_return: + /* + * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization + * when returning from IPI handler and when returning from + * scheduler to user-space. + */ INTERRUPT_RETURN .section .fixup, "ax" diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a83570495162..4a9bef6aca34 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -236,91 +236,20 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) pushq %r9 /* pt_regs->r9 */ pushq %r10 /* pt_regs->r10 */ pushq %r11 /* pt_regs->r11 */ - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ - UNWIND_HINT_REGS extra=0 - - TRACE_IRQS_OFF - - /* - * If we need to do entry work or if we guess we'll need to do - * exit work, go straight to the slow path. - */ - movq PER_CPU_VAR(current_task), %r11 - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) - jnz entry_SYSCALL64_slow_path - -entry_SYSCALL_64_fastpath: - /* - * Easy case: enable interrupts and issue the syscall. If the syscall - * needs pt_regs, we'll call a stub that disables interrupts again - * and jumps to the slow path. - */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) -#if __SYSCALL_MASK == ~0 - cmpq $__NR_syscall_max, %rax -#else - andl $__SYSCALL_MASK, %eax - cmpl $__NR_syscall_max, %eax -#endif - ja 1f /* return -ENOSYS (already in pt_regs->ax) */ - movq %r10, %rcx - - /* - * This call instruction is handled specially in stub_ptregs_64. - * It might end up jumping to the slow path. If it jumps, RAX - * and all argument registers are clobbered. - */ -#ifdef CONFIG_RETPOLINE - movq sys_call_table(, %rax, 8), %rax - call __x86_indirect_thunk_rax -#else - call *sys_call_table(, %rax, 8) -#endif -.Lentry_SYSCALL_64_after_fastpath_call: - - movq %rax, RAX(%rsp) -1: + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ + UNWIND_HINT_REGS - /* - * If we get here, then we know that pt_regs is clean for SYSRET64. - * If we see that no exit work is required (which we are required - * to check with IRQs off), then we can go straight to SYSRET64. - */ - DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF - movq PER_CPU_VAR(current_task), %r11 - testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) - jnz 1f - - LOCKDEP_SYS_EXIT - TRACE_IRQS_ON /* user mode is traced as IRQs on */ - movq RIP(%rsp), %rcx - movq EFLAGS(%rsp), %r11 - addq $6*8, %rsp /* skip extra regs -- they were preserved */ - UNWIND_HINT_EMPTY - jmp .Lpop_c_regs_except_rcx_r11_and_sysret - -1: - /* - * The fast path looked good when we started, but something changed - * along the way and we need to switch to the slow path. Calling - * raise(3) will trigger this, for example. IRQs are off. - */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) - SAVE_EXTRA_REGS - movq %rsp, %rdi - call syscall_return_slowpath /* returns with IRQs disabled */ - jmp return_from_SYSCALL_64 -entry_SYSCALL64_slow_path: /* IRQs are off. */ - SAVE_EXTRA_REGS movq %rsp, %rdi call do_syscall_64 /* returns with IRQs disabled */ -return_from_SYSCALL_64: TRACE_IRQS_IRETQ /* we're about to change IF */ /* @@ -393,7 +322,6 @@ syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ UNWIND_HINT_EMPTY POP_EXTRA_REGS -.Lpop_c_regs_except_rcx_r11_and_sysret: popq %rsi /* skip r11 */ popq %r10 popq %r9 @@ -424,47 +352,6 @@ syscall_return_via_sysret: USERGS_SYSRET64 END(entry_SYSCALL_64) -ENTRY(stub_ptregs_64) - /* - * Syscalls marked as needing ptregs land here. - * If we are on the fast path, we need to save the extra regs, - * which we achieve by trying again on the slow path. If we are on - * the slow path, the extra regs are already saved. - * - * RAX stores a pointer to the C function implementing the syscall. - * IRQs are on. - */ - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) - jne 1f - - /* - * Called from fast path -- disable IRQs again, pop return address - * and jump to slow path - */ - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - popq %rax - UNWIND_HINT_REGS extra=0 - jmp entry_SYSCALL64_slow_path - -1: - JMP_NOSPEC %rax /* Called from C */ -END(stub_ptregs_64) - -.macro ptregs_stub func -ENTRY(ptregs_\func) - UNWIND_HINT_FUNC - leaq \func(%rip), %rax - jmp stub_ptregs_64 -END(ptregs_\func) -.endm - -/* Instantiate ptregs_stub for each ptregs-using syscall */ -#define __SYSCALL_64_QUAL_(sym) -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) -#include <asm/syscalls_64.h> - /* * %rdi: prev task * %rsi: next task @@ -804,6 +691,10 @@ GLOBAL(restore_regs_and_return_to_kernel) POP_EXTRA_REGS POP_C_REGS addq $8, %rsp /* skip regs->orig_ax */ + /* + * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization + * when returning from IPI handler. + */ INTERRUPT_RETURN ENTRY(native_iret) diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index 9c09775e589d..c176d2fab1da 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -7,14 +7,11 @@ #include <asm/asm-offsets.h> #include <asm/syscall.h> -#define __SYSCALL_64_QUAL_(sym) sym -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym - -#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #include <asm/syscalls_64.h> #undef __SYSCALL_64 -#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), +#define __SYSCALL_64(nr, sym, qual) [nr] = sym, extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 8e4ea143ed96..78f91ec1056e 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -93,7 +93,8 @@ struct amd_nb { PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ - PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) + PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \ + PERF_SAMPLE_PERIOD) #define PEBS_REGS \ (PERF_REG_X86_AX | \ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 7fb336210e1b..30d406146016 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -24,6 +24,34 @@ #define wmb() asm volatile("sfence" ::: "memory") #endif +/** + * array_index_mask_nospec() - generate a mask that is ~0UL when the + * bounds check succeeds and 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * Returns: + * 0 - (index < size) + */ +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + unsigned long mask; + + asm ("cmp %1,%2; sbb %0,%0;" + :"=r" (mask) + :"r"(size),"r" (index) + :"cc"); + return mask; +} + +/* Override the default implementation from linux/nospec.h. */ +#define array_index_mask_nospec array_index_mask_nospec + +/* Prevent speculative execution past this barrier. */ +#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ + "lfence", X86_FEATURE_LFENCE_RDTSC) + #ifdef CONFIG_X86_PPRO_FENCE #define dma_rmb() rmb() #else diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 64c4a30e0d39..e203169931c7 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -137,8 +137,10 @@ enum fixed_addresses { extern void reserve_top_address(unsigned long reserve); -#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE) extern int fixmaps_set; diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 35a6bc4da8ad..cf090e584202 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -10,6 +10,10 @@ * * Things ending in "2" are usually because we have no better * name for them. There's no processor called "SILVERMONT2". + * + * While adding a new CPUID for a new microarchitecture, add a new + * group to keep logically sorted out in chronological order. Within + * that group keep the CPUID for the variants sorted by model number. */ #define INTEL_FAM6_CORE_YONAH 0x0E @@ -49,6 +53,8 @@ #define INTEL_FAM6_KABYLAKE_MOBILE 0x8E #define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E +#define INTEL_FAM6_CANNONLAKE_MOBILE 0x66 + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_PINEVIEW 0x1C diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h index 528ed4be4393..9e7adcdbe031 100644 --- a/arch/x86/include/asm/intel_pmc_ipc.h +++ b/arch/x86/include/asm/intel_pmc_ipc.h @@ -38,6 +38,7 @@ int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen); int intel_pmc_s0ix_counter_read(u64 *data); int intel_pmc_gcr_read(u32 offset, u32 *data); +int intel_pmc_gcr_read64(u32 offset, u64 *data); int intel_pmc_gcr_write(u32 offset, u32 data); int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val); @@ -70,6 +71,11 @@ static inline int intel_pmc_gcr_read(u32 offset, u32 *data) return -EINVAL; } +static inline int intel_pmc_gcr_read64(u32 offset, u64 *data) +{ + return -EINVAL; +} + static inline int intel_pmc_gcr_write(u32 offset, u32 data) { return -EINVAL; diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index b577dd0916aa..13e70da38bed 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -4,6 +4,7 @@ #include <linux/const.h> #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) +#define KASAN_SHADOW_SCALE_SHIFT 3 /* * Compiler uses shadow offset assuming that addresses start @@ -12,12 +13,15 @@ * 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT */ #define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ - ((-1UL << __VIRTUAL_MASK_SHIFT) >> 3)) + ((-1UL << __VIRTUAL_MASK_SHIFT) >> \ + KASAN_SHADOW_SCALE_SHIFT)) /* - * 47 bits for kernel address -> (47 - 3) bits for shadow - * 56 bits for kernel address -> (56 - 3) bits for shadow + * 47 bits for kernel address -> (47 - KASAN_SHADOW_SCALE_SHIFT) bits for shadow + * 56 bits for kernel address -> (56 - KASAN_SHADOW_SCALE_SHIFT) bits for shadow */ -#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (__VIRTUAL_MASK_SHIFT - 3))) +#define KASAN_SHADOW_END (KASAN_SHADOW_START + \ + (1ULL << (__VIRTUAL_MASK_SHIFT - \ + KASAN_SHADOW_SCALE_SHIFT))) #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 07962f5f6fba..30df295f6d94 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -214,8 +214,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) * that some other imaginary CPU is updating continuously with a * time stamp. */ - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, - "lfence", X86_FEATURE_LFENCE_RDTSC); + barrier_nospec(); return rdtsc(); } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d15d471348b8..4d57894635f2 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -150,7 +150,7 @@ extern char __indirect_thunk_end[]; * On VMEXIT we must ensure that no RSB predictions learned in the guest * can be followed in the host, by overwriting the RSB completely. Both * retpoline and IBRS mitigations for Spectre v2 need this; only on future - * CPUs with IBRS_ATT *might* it be avoided. + * CPUs with IBRS_ALL *might* it be avoided. */ static inline void vmexit_fill_RSB(void) { diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index ce245b0cdfca..0777e18a1d23 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -44,8 +44,9 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ */ #define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) -#define CPU_ENTRY_AREA_BASE \ - ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK) +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ + & PMD_MASK) #define PKMAP_BASE \ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) diff --git a/arch/x86/include/asm/pmc_core.h b/arch/x86/include/asm/pmc_core.h deleted file mode 100644 index d4855f11136d..000000000000 --- a/arch/x86/include/asm/pmc_core.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Intel Core SoC Power Management Controller Header File - * - * Copyright (c) 2016, Intel Corporation. - * All Rights Reserved. - * - * Authors: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com> - * Vishwanath Somayaji <vishwanath.somayaji@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#ifndef _ASM_PMC_CORE_H -#define _ASM_PMC_CORE_H - -/* API to read SLP_S0_RESIDENCY counter */ -int intel_pmc_slp_s0_counter_read(u32 *data); - -#endif /* _ASM_PMC_CORE_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index efbde088a718..793bae7e7ce3 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -460,8 +460,6 @@ struct thread_struct { unsigned short gsindex; #endif - u32 status; /* thread synchronous flags */ - #ifdef CONFIG_X86_64 unsigned long fsbase; unsigned long gsbase; @@ -507,6 +505,14 @@ struct thread_struct { */ }; +/* Whitelist the FPU state from the task_struct for hardened usercopy. */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = offsetof(struct thread_struct, fpu.state); + *size = fpu_kernel_xstate_size; +} + /* * Thread-synchronous status. * diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h new file mode 100644 index 000000000000..c67caafd3381 --- /dev/null +++ b/arch/x86/include/asm/sync_core.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SYNC_CORE_H +#define _ASM_X86_SYNC_CORE_H + +#include <linux/preempt.h> +#include <asm/processor.h> +#include <asm/cpufeature.h> + +/* + * Ensure that a core serializing instruction is issued before returning + * to user-mode. x86 implements return to user-space through sysexit, + * sysrel, and sysretq, which are not core serializing. + */ +static inline void sync_core_before_usermode(void) +{ + /* With PTI, we unconditionally serialize before running user code. */ + if (static_cpu_has(X86_FEATURE_PTI)) + return; + /* + * Return from interrupt and NMI is done through iret, which is core + * serializing. + */ + if (in_irq() || in_nmi()) + return; + sync_core(); +} + +#endif /* _ASM_X86_SYNC_CORE_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index e3c95e8e61c5..03eedc21246d 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, * TS_COMPAT is set for 32-bit syscall entries and then * remains set until we return to user mode. */ - if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) /* * Sign-extend the value so (int)-EFOO becomes (long)-EFOO * and will match correctly in comparisons. @@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread.status & TS_COMPAT) + if (task->thread_info.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; @@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, const unsigned long *args) { # ifdef CONFIG_IA32_EMULATION - if (task->thread.status & TS_COMPAT) + if (task->thread_info.status & TS_COMPAT) switch (i) { case 0: if (!n--) break; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d25a638a2720..a5d9521bb2cb 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -55,6 +55,7 @@ struct task_struct; struct thread_info { unsigned long flags; /* low level flags */ + u32 status; /* thread synchronous flags */ }; #define INIT_THREAD_INFO(tsk) \ @@ -219,7 +220,7 @@ static inline int arch_within_stack_frames(const void * const stack, #define in_ia32_syscall() true #else #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ - current->thread.status & TS_COMPAT) + current_thread_info()->status & TS_COMPAT) #endif /* diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d33e4a26dc7e..2b8f18ca5874 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -174,6 +174,8 @@ struct tlb_state { struct mm_struct *loaded_mm; u16 loaded_mm_asid; u16 next_asid; + /* last user mm's ctx id */ + u64 last_ctx_id; /* * We can be in one of several states: diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 574dff4d2913..aae77eb8491c 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -124,6 +124,11 @@ extern int __get_user_bad(void); #define __uaccess_begin() stac() #define __uaccess_end() clac() +#define __uaccess_begin_nospec() \ +({ \ + stac(); \ + barrier_nospec(); \ +}) /* * This is a type: either unsigned long, if the argument fits into @@ -445,7 +450,7 @@ do { \ ({ \ int __gu_err; \ __inttype(*(ptr)) __gu_val; \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ @@ -487,6 +492,10 @@ struct __large_struct { unsigned long buf[100]; }; __uaccess_begin(); \ barrier(); +#define uaccess_try_nospec do { \ + current->thread.uaccess_err = 0; \ + __uaccess_begin_nospec(); \ + #define uaccess_catch(err) \ __uaccess_end(); \ (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ @@ -548,7 +557,7 @@ struct __large_struct { unsigned long buf[100]; }; * get_user_ex(...); * } get_user_catch(err) */ -#define get_user_try uaccess_try +#define get_user_try uaccess_try_nospec #define get_user_catch(err) uaccess_catch(err) #define get_user_ex(x, ptr) do { \ @@ -582,7 +591,7 @@ extern void __cmpxchg_wrong_size(void) __typeof__(ptr) __uval = (uval); \ __typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __new = (new); \ - __uaccess_begin(); \ + __uaccess_begin_nospec(); \ switch (size) { \ case 1: \ { \ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 72950401b223..ba2dc1930630 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -29,21 +29,21 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n) switch (n) { case 1: ret = 0; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u8 *)to, from, ret, "b", "b", "=q", 1); __uaccess_end(); return ret; case 2: ret = 0; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u16 *)to, from, ret, "w", "w", "=r", 2); __uaccess_end(); return ret; case 4: ret = 0; - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u32 *)to, from, ret, "l", "k", "=r", 4); __uaccess_end(); diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f07ef3c575db..62546b3a398e 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -55,31 +55,31 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size) return copy_user_generic(dst, (__force void *)src, size); switch (size) { case 1: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src, ret, "b", "b", "=q", 1); __uaccess_end(); return ret; case 2: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src, ret, "w", "w", "=r", 2); __uaccess_end(); return ret; case 4: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src, ret, "l", "k", "=r", 4); __uaccess_end(); return ret; case 8: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 8); __uaccess_end(); return ret; case 10: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 10); if (likely(!ret)) @@ -89,7 +89,7 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size) __uaccess_end(); return ret; case 16: - __uaccess_begin(); + __uaccess_begin_nospec(); __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, ret, "q", "", "=r", 16); if (likely(!ret)) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ec3a286163c3..2aa92094b59d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -36,6 +36,7 @@ #include <linux/ioport.h> #include <linux/pci.h> #include <linux/efi-bgrt.h> +#include <linux/serial_core.h> #include <asm/e820/api.h> #include <asm/irqdomain.h> @@ -1625,6 +1626,8 @@ int __init acpi_boot_init(void) if (!acpi_noirq) x86_init.pci.init = pci_acpi_init; + /* Do not enable ACPI SPCR console by default */ + acpi_parse_spcr(earlycon_acpi_spcr_enable, false); return 0; } diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 30571fdaaf6f..a481763a3776 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str) } __setup("noreplace-smp", setup_noreplace_smp); -#ifdef CONFIG_PARAVIRT -static int __initdata_or_module noreplace_paravirt = 0; - -static int __init setup_noreplace_paravirt(char *str) -{ - noreplace_paravirt = 1; - return 1; -} -__setup("noreplace-paravirt", setup_noreplace_paravirt); -#endif - #define DPRINTK(fmt, args...) \ do { \ if (debug_alternative) \ @@ -599,9 +588,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, struct paravirt_patch_site *p; char insnbuf[MAX_PATCH_LEN]; - if (noreplace_paravirt) - return; - for (p = start; p < end; p++) { unsigned int used; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index ab1865342002..dc0ca8e29c75 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -2389,6 +2389,7 @@ static int __init apm_init(void) if (HZ != 100) idle_period = (idle_period * HZ) / 100; if (idle_threshold < 100) { + cpuidle_poll_state_init(&apm_idle_driver); if (!cpuidle_register_driver(&apm_idle_driver)) if (cpuidle_register_device(&apm_cpuidle_device)) cpuidle_unregister_driver(&apm_idle_driver); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3bfb2b23d79c..71949bf2de5a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -103,7 +103,7 @@ bool retpoline_module_ok(bool has_retpoline) if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) return true; - pr_err("System may be vunerable to spectre v2\n"); + pr_err("System may be vulnerable to spectre v2\n"); spectre_v2_bad_module = true; return false; } @@ -119,13 +119,13 @@ static inline const char *spectre_v2_module_string(void) { return ""; } static void __init spec2_print_if_insecure(const char *reason) { if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s\n", reason); + pr_info("%s selected on command line.\n", reason); } static void __init spec2_print_if_secure(const char *reason) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s\n", reason); + pr_info("%s selected on command line.\n", reason); } static inline bool retp_compiler(void) @@ -140,42 +140,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) return len == arglen && !strncmp(arg, opt, len); } +static const struct { + const char *option; + enum spectre_v2_mitigation_cmd cmd; + bool secure; +} mitigation_options[] = { + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, +}; + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) { char arg[20]; - int ret; - - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, - sizeof(arg)); - if (ret > 0) { - if (match_option(arg, ret, "off")) { - goto disable; - } else if (match_option(arg, ret, "on")) { - spec2_print_if_secure("force enabled on command line."); - return SPECTRE_V2_CMD_FORCE; - } else if (match_option(arg, ret, "retpoline")) { - spec2_print_if_insecure("retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE; - } else if (match_option(arg, ret, "retpoline,amd")) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); - return SPECTRE_V2_CMD_AUTO; - } - spec2_print_if_insecure("AMD retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE_AMD; - } else if (match_option(arg, ret, "retpoline,generic")) { - spec2_print_if_insecure("generic retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE_GENERIC; - } else if (match_option(arg, ret, "auto")) { + int ret, i; + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; + + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_NONE; + else { + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, + sizeof(arg)); + if (ret < 0) return SPECTRE_V2_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + } + + if ((cmd == SPECTRE_V2_CMD_RETPOLINE || + cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && + !IS_ENABLED(CONFIG_RETPOLINE)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; } - if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); return SPECTRE_V2_CMD_AUTO; -disable: - spec2_print_if_insecure("disabled on command line."); - return SPECTRE_V2_CMD_NONE; + } + + if (mitigation_options[i].secure) + spec2_print_if_secure(mitigation_options[i].option); + else + spec2_print_if_insecure(mitigation_options[i].option); + + return cmd; } /* Check for Skylake-like CPUs (for RSB handling) */ @@ -213,10 +239,10 @@ static void __init spectre_v2_select_mitigation(void) return; case SPECTRE_V2_CMD_FORCE: - /* FALLTRHU */ case SPECTRE_V2_CMD_AUTO: - goto retpoline_auto; - + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_auto; + break; case SPECTRE_V2_CMD_RETPOLINE_AMD: if (IS_ENABLED(CONFIG_RETPOLINE)) goto retpoline_amd; @@ -297,7 +323,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev, { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Vulnerable\n"); + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } ssize_t cpu_show_spectre_v2(struct device *dev, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c7c996a692fd..d63f4b5706e4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -750,6 +750,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) } } +static void init_speculation_control(struct cpuinfo_x86 *c) +{ + /* + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, + * and they also have a different bit for STIBP support. Also, + * a hypervisor might have set the individual AMD bits even on + * Intel CPUs, for finer-grained selection of what's available. + * + * We use the AMD bits in 0x8000_0008 EBX as the generic hardware + * features, which are visible in /proc/cpuinfo and used by the + * kernel. So set those accordingly from the Intel bits. + */ + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); + } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -844,6 +864,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); init_scattered_cpuid_features(c); + init_speculation_control(c); /* * Clear/Set all flags overridden by options, after probe. @@ -879,7 +900,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } -static const __initdata struct x86_cpu_id cpu_no_speculation[] = { +static const __initconst struct x86_cpu_id cpu_no_speculation[] = { { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, @@ -892,7 +913,7 @@ static const __initdata struct x86_cpu_id cpu_no_speculation[] = { {} }; -static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { +static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { { X86_VENDOR_AMD }, {} }; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 6936d14d4c77..319bf989fad1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -175,28 +175,17 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) c->microcode = intel_get_microcode_revision(); - /* - * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, - * and they also have a different bit for STIBP support. Also, - * a hypervisor might have set the individual AMD bits even on - * Intel CPUs, for finer-grained selection of what's available. - */ - if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { - set_cpu_cap(c, X86_FEATURE_IBRS); - set_cpu_cap(c, X86_FEATURE_IBPB); - } - if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) - set_cpu_cap(c, X86_FEATURE_STIBP); - /* Now if any of them are set, check the blacklist and clear the lot */ - if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_INTEL_STIBP) || + cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); - clear_cpu_cap(c, X86_FEATURE_IBRS); - clear_cpu_cap(c, X86_FEATURE_IBPB); - clear_cpu_cap(c, X86_FEATURE_STIBP); - clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); - clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_IBRS); + setup_clear_cpu_cap(X86_FEATURE_IBPB); + setup_clear_cpu_cap(X86_FEATURE_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); } /* diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index afbecff161d1..a2d8a3908670 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -109,7 +109,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, struct stack_info stack_info = {0}; unsigned long visit_mask = 0; int graph_idx = 0; - bool partial; + bool partial = false; printk("%sCall Trace:\n", log_lvl); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c75466232016..9eb448c7859d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -557,7 +557,7 @@ static void __set_personality_x32(void) * Pretend to come from a x32 execve. */ task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT; - current->thread.status &= ~TS_COMPAT; + current_thread_info()->status &= ~TS_COMPAT; #endif } @@ -571,7 +571,7 @@ static void __set_personality_ia32(void) current->personality |= force_personality32; /* Prepare the first "return" to user space */ task_pt_regs(current)->orig_ax = __NR_ia32_execve; - current->thread.status |= TS_COMPAT; + current_thread_info()->status |= TS_COMPAT; #endif } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f37d18124648..ed5c4cdf0a34 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -935,7 +935,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) */ regs->orig_ax = value; if (syscall_get_nr(child, regs) >= 0) - child->thread.status |= TS_I386_REGS_POKED; + child->thread_info.status |= TS_I386_REGS_POKED; break; case offsetof(struct user32, regs.eflags): diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 307d3bac5f04..11eda21eb697 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -68,6 +68,9 @@ relocate_kernel: movq %cr4, %rax movq %rax, CR4(%r11) + /* Save CR4. Required to enable the right paging mode later. */ + movq %rax, %r13 + /* zero out flags, and disable interrupts */ pushq $0 popfq @@ -126,8 +129,13 @@ identity_mapped: /* * Set cr4 to a known state: * - physical address extension enabled + * - 5-level paging, if it was enabled before */ movl $X86_CR4_PAE, %eax + testq $X86_CR4_LA57, %r13 + jz 1f + orl $X86_CR4_LA57, %eax +1: movq %rax, %cr4 jmp 1f diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b9e00e8f1c9b..4cdc0b27ec82 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -787,7 +787,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) * than the tracee. */ #ifdef CONFIG_IA32_EMULATION - if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0099e10eb045..13f5d4217e4f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -67,9 +67,7 @@ u64 kvm_supported_xcr0(void) #define F(x) bit(X86_FEATURE_##x) -/* These are scattered features in cpufeatures.h. */ -#define KVM_CPUID_BIT_AVX512_4VNNIW 2 -#define KVM_CPUID_BIT_AVX512_4FMAPS 3 +/* For scattered features from cpufeatures.h; we currently expose none */ #define KF(x) bit(KVM_CPUID_BIT_##x) int kvm_update_cpuid(struct kvm_vcpu *vcpu) @@ -367,6 +365,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + /* cpuid 0x80000008.ebx */ + const u32 kvm_cpuid_8000_0008_ebx_x86_features = + F(IBPB) | F(IBRS); + /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | @@ -392,7 +394,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | + F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -477,7 +480,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); entry->edx &= kvm_cpuid_7_0_edx_x86_features; - entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX); + cpuid_mask(&entry->edx, CPUID_7_EDX); } else { entry->ebx = 0; entry->ecx = 0; @@ -627,7 +630,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, if (!g_phys_as) g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); - entry->ebx = entry->edx = 0; + entry->edx = 0; + /* IBRS and IBPB aren't necessarily present in hardware cpuid */ + if (boot_cpu_has(X86_FEATURE_IBPB)) + entry->ebx |= F(IBPB); + if (boot_cpu_has(X86_FEATURE_IBRS)) + entry->ebx |= F(IBRS); + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); break; } case 0x80000019: diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index c2cea6651279..9a327d5b6d1f 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, }; static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f40d0da1f1d3..4e3c79530526 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -184,6 +184,8 @@ struct vcpu_svm { u64 gs_base; } host; + u64 spec_ctrl; + u32 *msrpm; ulong nmi_iret_rip; @@ -249,6 +251,8 @@ static const struct svm_direct_access_msrs { { .index = MSR_CSTAR, .always = true }, { .index = MSR_SYSCALL_MASK, .always = true }, #endif + { .index = MSR_IA32_SPEC_CTRL, .always = false }, + { .index = MSR_IA32_PRED_CMD, .always = false }, { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, @@ -529,6 +533,7 @@ struct svm_cpu_data { struct kvm_ldttss_desc *tss_desc; struct page *save_area; + struct vmcb *current_vmcb; }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); @@ -880,6 +885,25 @@ static bool valid_msr_intercept(u32 index) return false; } +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) +{ + u8 bit_write; + unsigned long tmp; + u32 offset; + u32 *msrpm; + + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: + to_svm(vcpu)->msrpm; + + offset = svm_msrpm_offset(msr); + bit_write = 2 * (msr & 0x0f) + 1; + tmp = msrpm[offset]; + + BUG_ON(offset == MSR_INVALID); + + return !!test_bit(bit_write, &tmp); +} + static void set_msr_interception(u32 *msrpm, unsigned msr, int read, int write) { @@ -1582,6 +1606,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; + svm->spec_ctrl = 0; + if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; @@ -1703,11 +1729,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So do a full IBPB now. + */ + indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int i; if (unlikely(cpu != vcpu->cpu)) { @@ -1736,6 +1768,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (static_cpu_has(X86_FEATURE_RDTSCP)) wrmsrl(MSR_TSC_AUX, svm->tsc_aux); + if (sd->current_vmcb != svm->vmcb) { + sd->current_vmcb = svm->vmcb; + indirect_branch_prediction_barrier(); + } avic_vcpu_load(vcpu, cpu); } @@ -3593,6 +3629,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_VM_CR: msr_info->data = svm->nested.vm_cr_msr; break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + return 1; + + msr_info->data = svm->spec_ctrl; + break; case MSR_IA32_UCODE_REV: msr_info->data = 0x01000065; break; @@ -3684,6 +3727,49 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; + case MSR_IA32_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + svm->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_svm_vmrun_msrpm. + * We update the L1 MSR bit as well since it will end up + * touching the MSR anyway now. + */ + set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); + break; + case MSR_IA32_PRED_CMD: + if (!msr->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + if (is_guest_mode(vcpu)) + break; + set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); + break; case MSR_STAR: svm->vmcb->save.star = data; break; @@ -4936,6 +5022,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + if (svm->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + asm volatile ( "push %%" _ASM_BP "; \n\t" "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" @@ -5028,6 +5123,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + + if (svm->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a8b96dc4cd83..bee4c49f6dd0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -34,6 +34,7 @@ #include <linux/tboot.h> #include <linux/hrtimer.h> #include <linux/frame.h> +#include <linux/nospec.h> #include "kvm_cache_regs.h" #include "x86.h" @@ -111,6 +112,14 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 +#define MSR_TYPE_RW 3 + +#define MSR_BITMAP_MODE_X2APIC 1 +#define MSR_BITMAP_MODE_X2APIC_APICV 2 +#define MSR_BITMAP_MODE_LM 4 + #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ @@ -185,7 +194,6 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; #define NR_AUTOLOAD_MSRS 8 -#define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; @@ -210,6 +218,7 @@ struct loaded_vmcs { int soft_vnmi_blocked; ktime_t entry_time; s64 vnmi_blocked_time; + unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; }; @@ -226,7 +235,7 @@ struct shared_msr_entry { * stored in guest memory specified by VMPTRLD, but is opaque to the guest, * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. * More than one of these structures may exist, if L1 runs multiple L2 guests. - * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the + * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the * underlying hardware which will be used to run L2. * This structure is packed to ensure that its layout is identical across * machines (necessary for live migration). @@ -409,13 +418,6 @@ struct __packed vmcs12 { */ #define VMCS12_SIZE 0x1000 -/* Used to remember the last vmcs02 used for some recently used vmcs12s */ -struct vmcs02_list { - struct list_head list; - gpa_t vmptr; - struct loaded_vmcs vmcs02; -}; - /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -440,15 +442,15 @@ struct nested_vmx { */ bool sync_shadow_vmcs; - /* vmcs02_list cache of VMCSs recently used to run L2 guests */ - struct list_head vmcs02_pool; - int vmcs02_num; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; + + struct loaded_vmcs vmcs02; + /* - * Guest pages referred to in vmcs02 with host-physical pointers, so - * we must keep them pinned while L2 runs. + * Guest pages referred to in the vmcs02 with host-physical + * pointers, so we must keep them pinned while L2 runs. */ struct page *apic_access_page; struct page *virtual_apic_page; @@ -457,8 +459,6 @@ struct nested_vmx { bool pi_pending; u16 posted_intr_nv; - unsigned long *msr_bitmap; - struct hrtimer preemption_timer; bool preemption_timer_expired; @@ -581,6 +581,7 @@ struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; u8 fail; + u8 msr_bitmap_mode; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -592,6 +593,10 @@ struct vcpu_vmx { u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; #endif + + u64 arch_capabilities; + u64 spec_ctrl; + u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; u32 secondary_exec_control; @@ -898,21 +903,18 @@ static const unsigned short vmcs_field_to_offset_table[] = { static inline short vmcs_field_to_offset(unsigned long field) { - BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); + const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); + unsigned short offset; - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) + BUILD_BUG_ON(size > SHRT_MAX); + if (field >= size) return -ENOENT; - /* - * FIXME: Mitigation for CVE-2017-5753. To be replaced with a - * generic mechanism. - */ - asm("lfence"); - - if (vmcs_field_to_offset_table[field] == 0) + field = array_index_nospec(field, size); + offset = vmcs_field_to_offset_table[field]; + if (offset == 0) return -ENOENT; - - return vmcs_field_to_offset_table[field]; + return offset; } static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) @@ -935,6 +937,9 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -954,12 +959,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); enum { VMX_IO_BITMAP_A, VMX_IO_BITMAP_B, - VMX_MSR_BITMAP_LEGACY, - VMX_MSR_BITMAP_LONGMODE, - VMX_MSR_BITMAP_LEGACY_X2APIC_APICV, - VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV, - VMX_MSR_BITMAP_LEGACY_X2APIC, - VMX_MSR_BITMAP_LONGMODE_X2APIC, VMX_VMREAD_BITMAP, VMX_VMWRITE_BITMAP, VMX_BITMAP_NR @@ -969,12 +968,6 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR]; #define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A]) #define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B]) -#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY]) -#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE]) -#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV]) -#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV]) -#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC]) -#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC]) #define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) #define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) @@ -1918,6 +1911,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(EXCEPTION_BITMAP, eb); } +/* + * Check if MSR is intercepted for currently loaded MSR bitmap. + */ +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + +/* + * Check if MSR is intercepted for L01 MSR bitmap. + */ +static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) +{ + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return true; + + msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + return !!test_bit(msr, msr_bitmap + 0xc00 / f); + } + + return true; +} + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { @@ -2296,6 +2335,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); + indirect_branch_prediction_barrier(); } if (!already_loaded) { @@ -2572,36 +2612,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) vmx->guest_msrs[from] = tmp; } -static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) -{ - unsigned long *msr_bitmap; - - if (is_guest_mode(vcpu)) - msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; - else if (cpu_has_secondary_exec_ctrls() && - (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv; - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode_x2apic; - else - msr_bitmap = vmx_msr_bitmap_legacy_x2apic; - } - } else { - if (is_long_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode; - else - msr_bitmap = vmx_msr_bitmap_legacy; - } - - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); -} - /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy @@ -2642,7 +2652,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx->save_nmsrs = save_nmsrs; if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(&vmx->vcpu); + vmx_update_msr_bitmap(&vmx->vcpu); } /* @@ -3276,6 +3286,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + return 1; + + msr_info->data = to_vmx(vcpu)->spec_ctrl; + break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) + return 1; + msr_info->data = to_vmx(vcpu)->arch_capabilities; + break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -3383,6 +3407,70 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr_info); break; + case MSR_IA32_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + return 1; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + return 1; + + vmx->spec_ctrl = data; + + if (!data) + break; + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. We update the vmcs01 here for L1 as well + * since it will end up touching the MSR anyway now. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_RW); + break; + case MSR_IA32_PRED_CMD: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + return 1; + + if (data & ~PRED_CMD_IBPB) + return 1; + + if (!data) + break; + + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); + + /* + * For non-nested: + * When it's written (to non-zero) for the first time, pass + * it through. + * + * For nested: + * The handling of the MSR bitmap for L2 guests is done in + * nested_vmx_merge_msr_bitmap. We should not touch the + * vmcs02.msr_bitmap here since it gets completely overwritten + * in the merging. + */ + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, + MSR_TYPE_W); + break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vmx->arch_capabilities = data; + break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -3837,11 +3925,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) return vmcs; } -static struct vmcs *alloc_vmcs(void) -{ - return alloc_vmcs_cpu(raw_smp_processor_id()); -} - static void free_vmcs(struct vmcs *vmcs) { free_pages((unsigned long)vmcs, vmcs_config.order); @@ -3857,9 +3940,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; + if (loaded_vmcs->msr_bitmap) + free_page((unsigned long)loaded_vmcs->msr_bitmap); WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } +static struct vmcs *alloc_vmcs(void) +{ + return alloc_vmcs_cpu(raw_smp_processor_id()); +} + +static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) +{ + loaded_vmcs->vmcs = alloc_vmcs(); + if (!loaded_vmcs->vmcs) + return -ENOMEM; + + loaded_vmcs->shadow_vmcs = NULL; + loaded_vmcs_init(loaded_vmcs); + + if (cpu_has_vmx_msr_bitmap()) { + loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!loaded_vmcs->msr_bitmap) + goto out_vmcs; + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); + } + return 0; + +out_vmcs: + free_loaded_vmcs(loaded_vmcs); + return -ENOMEM; +} + static void free_kvm_area(void) { int cpu; @@ -4918,10 +5030,8 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, - u32 msr, int type) +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -4955,6 +5065,50 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, } } +static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) +{ + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return; + + /* + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals + * have the write-low and read-high bitmap offsets the wrong way round. + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. + */ + if (msr <= 0x1fff) { + if (type & MSR_TYPE_R) + /* read-low */ + __set_bit(msr, msr_bitmap + 0x000 / f); + + if (type & MSR_TYPE_W) + /* write-low */ + __set_bit(msr, msr_bitmap + 0x800 / f); + + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + if (type & MSR_TYPE_R) + /* read-high */ + __set_bit(msr, msr_bitmap + 0x400 / f); + + if (type & MSR_TYPE_W) + /* write-high */ + __set_bit(msr, msr_bitmap + 0xc00 / f); + + } +} + +static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type, bool value) +{ + if (value) + vmx_enable_intercept_for_msr(msr_bitmap, msr, type); + else + vmx_disable_intercept_for_msr(msr_bitmap, msr, type); +} + /* * If a msr is allowed by L0, we should check whether it is allowed by L1. * The corresponding bit will be cleared unless both of L0 and L1 allow it. @@ -5001,30 +5155,70 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, } } -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) { - if (!longmode_only) - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, - msr, MSR_TYPE_R | MSR_TYPE_W); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, - msr, MSR_TYPE_R | MSR_TYPE_W); + u8 mode = 0; + + if (cpu_has_secondary_exec_ctrls() && + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { + mode |= MSR_BITMAP_MODE_X2APIC; + if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) + mode |= MSR_BITMAP_MODE_X2APIC_APICV; + } + + if (is_long_mode(vcpu)) + mode |= MSR_BITMAP_MODE_LM; + + return mode; } -static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active) +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) + +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, + u8 mode) { - if (apicv_active) { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv, - msr, type); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv, - msr, type); - } else { - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, - msr, type); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, - msr, type); + int msr; + + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; + } + + if (mode & MSR_BITMAP_MODE_X2APIC) { + /* + * TPR reads and writes can be virtualized even if virtual interrupt + * delivery is not in use. + */ + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); + } } } +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; + u8 mode = vmx_msr_bitmap_mode(vcpu); + u8 changed = mode ^ vmx->msr_bitmap_mode; + + if (!changed) + return; + + vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, + !(mode & MSR_BITMAP_MODE_LM)); + + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); + + vmx->msr_bitmap_mode = mode; +} + static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu) { return enable_apicv; @@ -5274,7 +5468,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) } if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static u32 vmx_exec_control(struct vcpu_vmx *vmx) @@ -5461,7 +5655,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); } if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ @@ -5539,6 +5733,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); @@ -5567,6 +5763,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u64 cr0; vmx->rmode.vm86_active = 0; + vmx->spec_ctrl = 0; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(vcpu, 0); @@ -6744,7 +6941,7 @@ void vmx_enable_tdp(void) static __init int hardware_setup(void) { - int r = -ENOMEM, i, msr; + int r = -ENOMEM, i; rdmsrl_safe(MSR_EFER, &host_efer); @@ -6764,9 +6961,6 @@ static __init int hardware_setup(void) memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; goto out; @@ -6835,42 +7029,8 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - - memcpy(vmx_msr_bitmap_legacy_x2apic_apicv, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic_apicv, - vmx_msr_bitmap_longmode, PAGE_SIZE); - memcpy(vmx_msr_bitmap_legacy_x2apic, - vmx_msr_bitmap_legacy, PAGE_SIZE); - memcpy(vmx_msr_bitmap_longmode_x2apic, - vmx_msr_bitmap_longmode, PAGE_SIZE); - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - for (msr = 0x800; msr <= 0x8ff; msr++) { - if (msr == 0x839 /* TMCCT */) - continue; - vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true); - } - - /* - * TPR reads and writes can be virtualized even if virtual interrupt - * delivery is not in use. - */ - vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true); - vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false); - - /* EOI */ - vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true); - /* SELF-IPI */ - vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true); - if (enable_ept) vmx_enable_tdp(); else @@ -6974,94 +7134,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) } /* - * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. - * We could reuse a single VMCS for all the L2 guests, but we also want the - * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this - * allows keeping them loaded on the processor, and in the future will allow - * optimizations where prepare_vmcs02 doesn't need to set all the fields on - * every entry if they never change. - * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE - * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. - * - * The following functions allocate and free a vmcs02 in this pool. - */ - -/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ -static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmx->nested.current_vmptr) { - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { - /* Recycle the least recently used VMCS. */ - item = list_last_entry(&vmx->nested.vmcs02_pool, - struct vmcs02_list, list); - item->vmptr = vmx->nested.current_vmptr; - list_move(&item->list, &vmx->nested.vmcs02_pool); - return &item->vmcs02; - } - - /* Create a new VMCS */ - item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL); - if (!item) - return NULL; - item->vmcs02.vmcs = alloc_vmcs(); - item->vmcs02.shadow_vmcs = NULL; - if (!item->vmcs02.vmcs) { - kfree(item); - return NULL; - } - loaded_vmcs_init(&item->vmcs02); - item->vmptr = vmx->nested.current_vmptr; - list_add(&(item->list), &(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num++; - return &item->vmcs02; -} - -/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ -static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) -{ - struct vmcs02_list *item; - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) - if (item->vmptr == vmptr) { - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - return; - } -} - -/* - * Free all VMCSs saved for this vcpu, except the one pointed by - * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs - * must be &vmx->vmcs01. - */ -static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) -{ - struct vmcs02_list *item, *n; - - WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); - list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { - /* - * Something will leak if the above WARN triggers. Better than - * a use-after-free. - */ - if (vmx->loaded_vmcs == &item->vmcs02) - continue; - - free_loaded_vmcs(&item->vmcs02); - list_del(&item->list); - kfree(item); - vmx->nested.vmcs02_num--; - } -} - -/* * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), * set the success or error code of an emulated VMX instruction, as specified * by Vol 2B, VMX Instruction Reference, "Conventions". @@ -7241,13 +7313,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs *shadow_vmcs; + int r; - if (cpu_has_vmx_msr_bitmap()) { - vmx->nested.msr_bitmap = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx->nested.msr_bitmap) - goto out_msr_bitmap; - } + r = alloc_loaded_vmcs(&vmx->nested.vmcs02); + if (r < 0) + goto out_vmcs02; vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_vmcs12) @@ -7264,9 +7334,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) vmx->vmcs01.shadow_vmcs = shadow_vmcs; } - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num = 0; - hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; @@ -7278,9 +7345,9 @@ out_shadow_vmcs: kfree(vmx->nested.cached_vmcs12); out_cached_vmcs12: - free_page((unsigned long)vmx->nested.msr_bitmap); + free_loaded_vmcs(&vmx->nested.vmcs02); -out_msr_bitmap: +out_vmcs02: return -ENOMEM; } @@ -7423,10 +7490,6 @@ static void free_nested(struct vcpu_vmx *vmx) free_vpid(vmx->nested.vpid02); vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; - if (vmx->nested.msr_bitmap) { - free_page((unsigned long)vmx->nested.msr_bitmap); - vmx->nested.msr_bitmap = NULL; - } if (enable_shadow_vmcs) { vmx_disable_shadow_vmcs(vmx); vmcs_clear(vmx->vmcs01.shadow_vmcs); @@ -7434,7 +7497,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->vmcs01.shadow_vmcs = NULL; } kfree(vmx->nested.cached_vmcs12); - /* Unpin physical memory we referred to in current vmcs02 */ + /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { kvm_release_page_dirty(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; @@ -7450,7 +7513,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.pi_desc = NULL; } - nested_free_all_saved_vmcss(vmx); + free_loaded_vmcs(&vmx->nested.vmcs02); } /* Emulate the VMXOFF instruction */ @@ -7493,8 +7556,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) vmptr + offsetof(struct vmcs12, launch_state), &zero, sizeof(zero)); - nested_free_vmcs02(vmx, vmptr); - nested_vmx_succeed(vcpu); return kvm_skip_emulated_instruction(vcpu); } @@ -8406,10 +8467,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) /* * The host physical addresses of some pages of guest memory - * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU - * may write to these pages via their host physical address while - * L2 is running, bypassing any address-translation-based dirty - * tracking (e.g. EPT write protection). + * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC + * Page). The CPU may write to these pages via their host + * physical address while L2 is running, bypassing any + * address-translation-based dirty tracking (e.g. EPT write + * protection). * * Mark them dirty on every exit from L2 to prevent them from * getting out of sync with dirty tracking. @@ -8943,7 +9005,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); } static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) @@ -9373,6 +9435,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_arm_hv_timer(vcpu); + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + if (vmx->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + vmx->__launched = vmx->loaded_vmcs->launched; asm( /* Store host registers */ @@ -9491,6 +9562,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* + * We do not use IBRS in the kernel. If this vCPU has used the + * SPEC_CTRL MSR it may have left it on; save the value and + * turn it off. This is much more efficient than blindly adding + * it to the atomic save/restore list. Especially as the former + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. + * + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + + if (vmx->spec_ctrl) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -9604,6 +9696,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + unsigned long *msr_bitmap; int cpu; if (!vmx) @@ -9636,13 +9729,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx->guest_msrs) goto free_pml; - vmx->loaded_vmcs = &vmx->vmcs01; - vmx->loaded_vmcs->vmcs = alloc_vmcs(); - vmx->loaded_vmcs->shadow_vmcs = NULL; - if (!vmx->loaded_vmcs->vmcs) + err = alloc_loaded_vmcs(&vmx->vmcs01); + if (err < 0) goto free_msrs; - loaded_vmcs_init(vmx->loaded_vmcs); + msr_bitmap = vmx->vmcs01.msr_bitmap; + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + vmx->msr_bitmap_mode = 0; + + vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); vmx->vcpu.cpu = cpu; @@ -10105,10 +10205,25 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, int msr; struct page *page; unsigned long *msr_bitmap_l1; - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; + /* + * pred_cmd & spec_ctrl are trying to verify two things: + * + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This + * ensures that we do not accidentally generate an L02 MSR bitmap + * from the L12 MSR bitmap that is too permissive. + * 2. That L1 or L2s have actually used the MSR. This avoids + * unnecessarily merging of the bitmap if the MSR is unused. This + * works properly because we only update the L01 MSR bitmap lazily. + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only + * updated to reflect this when L1 (or its L2s) actually write to + * the MSR. + */ + bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); + bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); - /* This shortcut is ok because we support only x2APIC MSRs so far. */ - if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && + !pred_cmd && !spec_ctrl) return false; page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); @@ -10141,6 +10256,19 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, MSR_TYPE_W); } } + + if (spec_ctrl) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_SPEC_CTRL, + MSR_TYPE_R | MSR_TYPE_W); + + if (pred_cmd) + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PRED_CMD, + MSR_TYPE_W); + kunmap(page); kvm_release_page_clean(page); @@ -10682,6 +10810,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); + if (cpu_has_vmx_msr_bitmap()) + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); + if (enable_vpid) { /* * There is no direct mapping between vpid02 and vpid12, the @@ -10903,20 +11034,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - struct loaded_vmcs *vmcs02; u32 msr_entry_idx; u32 exit_qual; - vmcs02 = nested_get_current_vmcs02(vmx); - if (!vmcs02) - return -ENOMEM; - enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); - vmx_switch_vmcs(vcpu, vmcs02); + vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); vmx_segment_cache_clear(vmx); if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { @@ -11485,7 +11611,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_write64(GUEST_IA32_DEBUGCTL, 0); if (cpu_has_vmx_msr_bitmap()) - vmx_set_msr_bitmap(vcpu); + vmx_update_msr_bitmap(vcpu); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) @@ -11534,10 +11660,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vm_exit_controls_reset_shadow(vmx); vmx_segment_cache_clear(vmx); - /* if no vmcs02 cache requested, remove the one we used */ - if (VMCS02_POOL_SIZE == 0) - nested_free_vmcs02(vmx, vmx->nested.current_vmptr); - /* Update any VMCS fields that might have changed while L2 ran */ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c53298dfbf50..f9c5171dad2b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1009,6 +1009,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES }; static unsigned num_msrs_to_save; @@ -4237,13 +4238,14 @@ set_identity_unlock: mutex_unlock(&kvm->lock); break; case KVM_XEN_HVM_CONFIG: { + struct kvm_xen_hvm_config xhc; r = -EFAULT; - if (copy_from_user(&kvm->arch.xen_hvm_config, argp, - sizeof(struct kvm_xen_hvm_config))) + if (copy_from_user(&xhc, argp, sizeof(xhc))) goto out; r = -EINVAL; - if (kvm->arch.xen_hvm_config.flags) + if (xhc.flags) goto out; + memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc)); r = 0; break; } diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index c97d935a29e8..49b167f73215 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -40,6 +40,8 @@ ENTRY(__get_user_1) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 1: movzbl (%_ASM_AX),%edx xor %eax,%eax @@ -54,6 +56,8 @@ ENTRY(__get_user_2) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 2: movzwl -1(%_ASM_AX),%edx xor %eax,%eax @@ -68,6 +72,8 @@ ENTRY(__get_user_4) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 3: movl -3(%_ASM_AX),%edx xor %eax,%eax @@ -83,6 +89,8 @@ ENTRY(__get_user_8) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 4: movq -7(%_ASM_AX),%rdx xor %eax,%eax @@ -94,6 +102,8 @@ ENTRY(__get_user_8) mov PER_CPU_VAR(current_task), %_ASM_DX cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user_8 + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ + and %_ASM_DX, %_ASM_AX ASM_STAC 4: movl -7(%_ASM_AX),%edx 5: movl -3(%_ASM_AX),%ecx diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1b377f734e64..7add8ba06887 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -331,12 +331,12 @@ do { \ unsigned long __copy_user_ll(void *to, const void *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel(to, from, n); - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_user_ll); @@ -344,7 +344,7 @@ EXPORT_SYMBOL(__copy_user_ll); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { - stac(); + __uaccess_begin_nospec(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) n = __copy_user_intel_nocache(to, from, n); @@ -353,7 +353,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr #else __copy_user(to, from, n); #endif - clac(); + __uaccess_end(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 135c9a7898c7..79cb066f40c0 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -829,23 +829,24 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; zone = page_zone(pfn_to_page(start_pfn)); - return __remove_pages(zone, start_pfn, nr_pages); + return __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 4a837289f2ad..1ab42c852069 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -772,12 +772,12 @@ static void update_end_of_memory_vars(u64 start, u64 size) } } -int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock) +int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -787,24 +787,24 @@ int add_pages(int nid, unsigned long start_pfn, return ret; } -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, want_memblock); + return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } -EXPORT_SYMBOL_GPL(arch_add_memory); #define PAGE_INUSE 0xFD -static void __meminit free_pagetable(struct page *page, int order) +static void __meminit free_pagetable(struct page *page, int order, + struct vmem_altmap *altmap) { unsigned long magic; unsigned int nr_pages = 1 << order; - struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page); if (altmap) { vmem_altmap_free(altmap, nr_pages); @@ -826,7 +826,8 @@ static void __meminit free_pagetable(struct page *page, int order) free_pages((unsigned long)page_address(page), order); } -static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, + struct vmem_altmap *altmap) { pte_t *pte; int i; @@ -838,13 +839,14 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) } /* free a pte talbe */ - free_pagetable(pmd_page(*pmd), 0); + free_pagetable(pmd_page(*pmd), 0, altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, + struct vmem_altmap *altmap) { pmd_t *pmd; int i; @@ -856,13 +858,14 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) } /* free a pmd talbe */ - free_pagetable(pud_page(*pud), 0); + free_pagetable(pud_page(*pud), 0, altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, + struct vmem_altmap *altmap) { pud_t *pud; int i; @@ -874,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) } /* free a pud talbe */ - free_pagetable(p4d_page(*p4d), 0); + free_pagetable(p4d_page(*p4d), 0, altmap); spin_lock(&init_mm.page_table_lock); p4d_clear(p4d); spin_unlock(&init_mm.page_table_lock); @@ -882,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pte_t *pte; @@ -913,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, * freed when offlining, or simplely not in use. */ if (!direct) - free_pagetable(pte_page(*pte), 0); + free_pagetable(pte_page(*pte), 0, altmap); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -936,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, page_addr = page_address(pte_page(*pte)); if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { - free_pagetable(pte_page(*pte), 0); + free_pagetable(pte_page(*pte), 0, altmap); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -953,7 +956,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, - bool direct) + bool direct, struct vmem_altmap *altmap) { unsigned long next, pages = 0; pte_t *pte_base; @@ -972,7 +975,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PMD_SIZE)) { if (!direct) free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE)); + get_order(PMD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -986,7 +990,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PMD_SIZE)) { free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE)); + get_order(PMD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -998,8 +1003,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, } pte_base = (pte_t *)pmd_page_vaddr(*pmd); - remove_pte_table(pte_base, addr, next, direct); - free_pte_table(pte_base, pmd); + remove_pte_table(pte_base, addr, next, altmap, direct); + free_pte_table(pte_base, pmd, altmap); } /* Call free_pmd_table() in remove_pud_table(). */ @@ -1009,7 +1014,7 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pmd_t *pmd_base; @@ -1028,7 +1033,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE)) { if (!direct) free_pagetable(pud_page(*pud), - get_order(PUD_SIZE)); + get_order(PUD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1042,7 +1048,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PUD_SIZE)) { free_pagetable(pud_page(*pud), - get_order(PUD_SIZE)); + get_order(PUD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1054,8 +1061,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, } pmd_base = pmd_offset(pud, 0); - remove_pmd_table(pmd_base, addr, next, direct); - free_pmd_table(pmd_base, pud); + remove_pmd_table(pmd_base, addr, next, direct, altmap); + free_pmd_table(pmd_base, pud, altmap); } if (direct) @@ -1064,7 +1071,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, static void __meminit remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pud_t *pud_base; @@ -1080,14 +1087,14 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, BUILD_BUG_ON(p4d_large(*p4d)); pud_base = pud_offset(p4d, 0); - remove_pud_table(pud_base, addr, next, direct); + remove_pud_table(pud_base, addr, next, altmap, direct); /* * For 4-level page tables we do not want to free PUDs, but in the * 5-level case we should free them. This code will have to change * to adapt for boot-time switching between 4 and 5 level page tables. */ if (CONFIG_PGTABLE_LEVELS == 5) - free_pud_table(pud_base, p4d); + free_pud_table(pud_base, p4d, altmap); } if (direct) @@ -1096,7 +1103,8 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, /* start and end are both virtual address. */ static void __meminit -remove_pagetable(unsigned long start, unsigned long end, bool direct) +remove_pagetable(unsigned long start, unsigned long end, bool direct, + struct vmem_altmap *altmap) { unsigned long next; unsigned long addr; @@ -1111,15 +1119,16 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) continue; p4d = p4d_offset(pgd, 0); - remove_p4d_table(p4d, addr, next, direct); + remove_p4d_table(p4d, addr, next, altmap, direct); } flush_tlb_all(); } -void __ref vmemmap_free(unsigned long start, unsigned long end) +void __ref vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { - remove_pagetable(start, end, false); + remove_pagetable(start, end, false, altmap); } #ifdef CONFIG_MEMORY_HOTREMOVE @@ -1129,24 +1138,22 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) start = (unsigned long)__va(start); end = (unsigned long)__va(end); - remove_pagetable(start, end, true); + remove_pagetable(start, end, true, NULL); } -int __ref arch_remove_memory(u64 start, u64 size) +int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(start_pfn); - struct vmem_altmap *altmap; struct zone *zone; int ret; /* With altmap the first mapped page is offset from @start */ - altmap = to_vmem_altmap((unsigned long) page); if (altmap) page += vmem_altmap_offset(altmap); zone = page_zone(page); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); WARN_ON_ONCE(ret); kernel_physical_mapping_remove(start, start + size); @@ -1378,7 +1385,10 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, if (pmd_none(*pmd)) { void *p; - p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); + if (altmap) + p = altmap_alloc_block_buf(PMD_SIZE, altmap); + else + p = vmemmap_alloc_block_buf(PMD_SIZE, node); if (p) { pte_t entry; @@ -1411,9 +1421,9 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, return 0; } -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { - struct vmem_altmap *altmap = to_vmem_altmap(start); int err; if (boot_cpu_has(X86_FEATURE_PSE)) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5bfe61a5e8e3..8dcc0607f805 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -6,13 +6,14 @@ #include <linux/interrupt.h> #include <linux/export.h> #include <linux/cpu.h> +#include <linux/debugfs.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/nospec-branch.h> #include <asm/cache.h> #include <asm/apic.h> #include <asm/uv/uv.h> -#include <linux/debugfs.h> /* * TLB flushing, formerly SMP-only @@ -228,6 +229,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, #endif this_cpu_write(cpu_tlbstate.is_lazy, false); + /* + * The membarrier system call requires a full memory barrier and + * core serialization before returning to user-space, after + * storing to rq->curr. Writing to CR3 provides that full + * memory barrier and core serializing instruction. + */ if (real_prev == next) { VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != next->context.ctx_id); @@ -247,6 +254,27 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, } else { u16 new_asid; bool need_flush; + u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); + + /* + * Avoid user/user BTB poisoning by flushing the branch + * predictor when switching between processes. This stops + * one process from doing Spectre-v2 attacks on another. + * + * As an optimization, flush indirect branches only when + * switching into processes that disable dumping. This + * protects high value processes like gpg, without having + * too high performance overhead. IBPB is *expensive*! + * + * This will not flush branches when switching into kernel + * threads. It will also not flush if we switch to idle + * thread and back to the same process. It will flush if we + * switch to a different non-dumpable process. + */ + if (tsk && tsk->mm && + tsk->mm->context.ctx_id != last_ctx_id && + get_dumpable(tsk->mm) != SUID_DUMP_USER) + indirect_branch_prediction_barrier(); if (IS_ENABLED(CONFIG_VMAP_STACK)) { /* @@ -292,6 +320,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); } + /* + * Record last user mm's context id, so we can avoid + * flushing branch buffer with IBPB if we switch back + * to the same user. + */ + if (next != &init_mm) + this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); + this_cpu_write(cpu_tlbstate.loaded_mm, next); this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); } @@ -369,6 +405,7 @@ void initialize_tlbstate_and_flush(void) write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ + this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); this_cpu_write(cpu_tlbstate.next_asid, 1); this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 0452629148be..52e55108404e 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -839,7 +839,8 @@ static void __init pirq_find_router(struct irq_router *r) DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n", rt->rtr_vendor, rt->rtr_device); - pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); + pirq_router_dev = pci_get_domain_bus_and_slot(0, rt->rtr_bus, + rt->rtr_devfn); if (!pirq_router_dev) { DBG(KERN_DEBUG "PCI: Interrupt router not found at " "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index c4b3646bd04c..9542a746dc50 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -409,10 +409,8 @@ int __init pci_xen_init(void) pcibios_enable_irq = xen_pcifront_enable_irq; pcibios_disable_irq = NULL; -#ifdef CONFIG_ACPI /* Keep ACPI out of the picture */ - acpi_noirq = 1; -#endif + acpi_noirq_set(); #ifdef CONFIG_PCI_MSI x86_msi.setup_msi_irqs = xen_setup_msi_irqs; diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index c35fdb585c68..afc4ed7b1578 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -145,7 +145,7 @@ static inline void resume_init_first_level_page_table(pgd_t *pg_dir) #endif } -int swsusp_arch_resume(void) +asmlinkage int swsusp_arch_resume(void) { int error; diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index f910c514438f..0ef5e5204968 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -174,7 +174,7 @@ out: return 0; } -int swsusp_arch_resume(void) +asmlinkage int swsusp_arch_resume(void) { int error; |