diff options
Diffstat (limited to 'arch')
387 files changed, 10306 insertions, 7370 deletions
diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index f49a054a1016..883391977fdf 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -18,88 +18,61 @@ static struct callee_regs *task_callee_regs(struct task_struct *tsk) static int genregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { const struct pt_regs *ptregs = task_pt_regs(target); const struct callee_regs *cregs = task_callee_regs(target); - int ret = 0; unsigned int stop_pc_val; -#define REG_O_CHUNK(START, END, PTR) \ - if (!ret) \ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, PTR, \ - offsetof(struct user_regs_struct, START), \ - offsetof(struct user_regs_struct, END)); - -#define REG_O_ONE(LOC, PTR) \ - if (!ret) \ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, PTR, \ - offsetof(struct user_regs_struct, LOC), \ - offsetof(struct user_regs_struct, LOC) + 4); - -#define REG_O_ZERO(LOC) \ - if (!ret) \ - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, \ - offsetof(struct user_regs_struct, LOC), \ - offsetof(struct user_regs_struct, LOC) + 4); - - REG_O_ZERO(pad); - REG_O_ONE(scratch.bta, &ptregs->bta); - REG_O_ONE(scratch.lp_start, &ptregs->lp_start); - REG_O_ONE(scratch.lp_end, &ptregs->lp_end); - REG_O_ONE(scratch.lp_count, &ptregs->lp_count); - REG_O_ONE(scratch.status32, &ptregs->status32); - REG_O_ONE(scratch.ret, &ptregs->ret); - REG_O_ONE(scratch.blink, &ptregs->blink); - REG_O_ONE(scratch.fp, &ptregs->fp); - REG_O_ONE(scratch.gp, &ptregs->r26); - REG_O_ONE(scratch.r12, &ptregs->r12); - REG_O_ONE(scratch.r11, &ptregs->r11); - REG_O_ONE(scratch.r10, &ptregs->r10); - REG_O_ONE(scratch.r9, &ptregs->r9); - REG_O_ONE(scratch.r8, &ptregs->r8); - REG_O_ONE(scratch.r7, &ptregs->r7); - REG_O_ONE(scratch.r6, &ptregs->r6); - REG_O_ONE(scratch.r5, &ptregs->r5); - REG_O_ONE(scratch.r4, &ptregs->r4); - REG_O_ONE(scratch.r3, &ptregs->r3); - REG_O_ONE(scratch.r2, &ptregs->r2); - REG_O_ONE(scratch.r1, &ptregs->r1); - REG_O_ONE(scratch.r0, &ptregs->r0); - REG_O_ONE(scratch.sp, &ptregs->sp); - - REG_O_ZERO(pad2); - - REG_O_ONE(callee.r25, &cregs->r25); - REG_O_ONE(callee.r24, &cregs->r24); - REG_O_ONE(callee.r23, &cregs->r23); - REG_O_ONE(callee.r22, &cregs->r22); - REG_O_ONE(callee.r21, &cregs->r21); - REG_O_ONE(callee.r20, &cregs->r20); - REG_O_ONE(callee.r19, &cregs->r19); - REG_O_ONE(callee.r18, &cregs->r18); - REG_O_ONE(callee.r17, &cregs->r17); - REG_O_ONE(callee.r16, &cregs->r16); - REG_O_ONE(callee.r15, &cregs->r15); - REG_O_ONE(callee.r14, &cregs->r14); - REG_O_ONE(callee.r13, &cregs->r13); - - REG_O_ONE(efa, &target->thread.fault_address); - - if (!ret) { - if (in_brkpt_trap(ptregs)) { - stop_pc_val = target->thread.fault_address; - pr_debug("\t\tstop_pc (brk-pt)\n"); - } else { - stop_pc_val = ptregs->ret; - pr_debug("\t\tstop_pc (others)\n"); - } - - REG_O_ONE(stop_pc, &stop_pc_val); + membuf_zero(&to, 4); // pad + membuf_store(&to, ptregs->bta); + membuf_store(&to, ptregs->lp_start); + membuf_store(&to, ptregs->lp_end); + membuf_store(&to, ptregs->lp_count); + membuf_store(&to, ptregs->status32); + membuf_store(&to, ptregs->ret); + membuf_store(&to, ptregs->blink); + membuf_store(&to, ptregs->fp); + membuf_store(&to, ptregs->r26); // gp + membuf_store(&to, ptregs->r12); + membuf_store(&to, ptregs->r11); + membuf_store(&to, ptregs->r10); + membuf_store(&to, ptregs->r9); + membuf_store(&to, ptregs->r8); + membuf_store(&to, ptregs->r7); + membuf_store(&to, ptregs->r6); + membuf_store(&to, ptregs->r5); + membuf_store(&to, ptregs->r4); + membuf_store(&to, ptregs->r3); + membuf_store(&to, ptregs->r2); + membuf_store(&to, ptregs->r1); + membuf_store(&to, ptregs->r0); + membuf_store(&to, ptregs->sp); + membuf_zero(&to, 4); // pad2 + membuf_store(&to, cregs->r25); + membuf_store(&to, cregs->r24); + membuf_store(&to, cregs->r23); + membuf_store(&to, cregs->r22); + membuf_store(&to, cregs->r21); + membuf_store(&to, cregs->r20); + membuf_store(&to, cregs->r19); + membuf_store(&to, cregs->r18); + membuf_store(&to, cregs->r17); + membuf_store(&to, cregs->r16); + membuf_store(&to, cregs->r15); + membuf_store(&to, cregs->r14); + membuf_store(&to, cregs->r13); + membuf_store(&to, target->thread.fault_address); // efa + + if (in_brkpt_trap(ptregs)) { + stop_pc_val = target->thread.fault_address; + pr_debug("\t\tstop_pc (brk-pt)\n"); + } else { + stop_pc_val = ptregs->ret; + pr_debug("\t\tstop_pc (others)\n"); } - return ret; + return membuf_store(&to, stop_pc_val); // stop_pc } static int genregs_set(struct task_struct *target, @@ -184,25 +157,20 @@ static int genregs_set(struct task_struct *target, #ifdef CONFIG_ISA_ARCV2 static int arcv2regs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { const struct pt_regs *regs = task_pt_regs(target); - int ret, copy_sz; if (IS_ENABLED(CONFIG_ARC_HAS_ACCL_REGS)) - copy_sz = sizeof(struct user_regs_arcv2); - else - copy_sz = 4; /* r30 only */ + /* + * itemized copy not needed like above as layout of regs (r30,r58,r59) + * is exactly same in kernel (pt_regs) and userspace (user_regs_arcv2) + */ + return membuf_write(&to, ®s->r30, sizeof(struct user_regs_arcv2)); - /* - * itemized copy not needed like above as layout of regs (r30,r58,r59) - * is exactly same in kernel (pt_regs) and userspace (user_regs_arcv2) - */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, ®s->r30, - 0, copy_sz); - return ret; + membuf_write(&to, ®s->r30, 4); /* r30 only */ + return membuf_zero(&to, sizeof(struct user_regs_arcv2) - 4); } static int arcv2regs_set(struct task_struct *target, @@ -237,7 +205,7 @@ static const struct user_regset arc_regsets[] = { .n = ELF_NGREG, .size = sizeof(unsigned long), .align = sizeof(unsigned long), - .get = genregs_get, + .regset_get = genregs_get, .set = genregs_set, }, #ifdef CONFIG_ISA_ARCV2 @@ -246,7 +214,7 @@ static const struct user_regset arc_regsets[] = { .n = ELF_ARCV2REG, .size = sizeof(unsigned long), .align = sizeof(unsigned long), - .get = arcv2regs_get, + .regset_get = arcv2regs_get, .set = arcv2regs_set, }, #endif diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c index ce02f92f4ab2..6c12d9fe694e 100644 --- a/arch/arm/kernel/atags_parse.c +++ b/arch/arm/kernel/atags_parse.c @@ -91,8 +91,6 @@ __tagtable(ATAG_VIDEOTEXT, parse_tag_videotext); static int __init parse_tag_ramdisk(const struct tag *tag) { rd_image_start = tag->u.ramdisk.start; - rd_doload = (tag->u.ramdisk.flags & 1) == 0; - rd_prompt = (tag->u.ramdisk.flags & 2) == 0; if (tag->u.ramdisk.size) rd_size = tag->u.ramdisk.size; diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index d0f7c8896c96..2771e682220b 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -569,14 +569,9 @@ out: static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - struct pt_regs *regs = task_pt_regs(target); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - regs, - 0, sizeof(*regs)); + return membuf_write(&to, task_pt_regs(target), sizeof(struct pt_regs)); } static int gpr_set(struct task_struct *target, @@ -602,12 +597,10 @@ static int gpr_set(struct task_struct *target, static int fpa_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &task_thread_info(target)->fpstate, - 0, sizeof(struct user_fp)); + return membuf_write(&to, &task_thread_info(target)->fpstate, + sizeof(struct user_fp)); } static int fpa_set(struct task_struct *target, @@ -642,41 +635,20 @@ static int fpa_set(struct task_struct *target, * vfp_set() ignores this chunk * * 1 word for the FPSCR - * - * The bounds-checking logic built into user_regset_copyout and friends - * means that we can make a simple sequence of calls to map the relevant data - * to/from the specified slice of the user regset structure. */ static int vfp_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; struct thread_info *thread = task_thread_info(target); struct vfp_hard_struct const *vfp = &thread->vfpstate.hard; - const size_t user_fpregs_offset = offsetof(struct user_vfp, fpregs); const size_t user_fpscr_offset = offsetof(struct user_vfp, fpscr); vfp_sync_hwstate(thread); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &vfp->fpregs, - user_fpregs_offset, - user_fpregs_offset + sizeof(vfp->fpregs)); - if (ret) - return ret; - - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - user_fpregs_offset + sizeof(vfp->fpregs), - user_fpscr_offset); - if (ret) - return ret; - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &vfp->fpscr, - user_fpscr_offset, - user_fpscr_offset + sizeof(vfp->fpscr)); + membuf_write(&to, vfp->fpregs, sizeof(vfp->fpregs)); + membuf_zero(&to, user_fpscr_offset - sizeof(vfp->fpregs)); + return membuf_store(&to, vfp->fpscr); } /* @@ -739,7 +711,7 @@ static const struct user_regset arm_regsets[] = { .n = ELF_NGREG, .size = sizeof(u32), .align = sizeof(u32), - .get = gpr_get, + .regset_get = gpr_get, .set = gpr_set }, [REGSET_FPR] = { @@ -751,7 +723,7 @@ static const struct user_regset arm_regsets[] = { .n = sizeof(struct user_fp) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = fpa_get, + .regset_get = fpa_get, .set = fpa_set }, #ifdef CONFIG_VFP @@ -764,7 +736,7 @@ static const struct user_regset arm_regsets[] = { .n = ARM_VFPREGS_SIZE / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = vfp_get, + .regset_get = vfp_get, .set = vfp_set }, #endif /* CONFIG_VFP */ diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index d40e9e5fc52b..396797ffe2b1 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/cpu.h> +#include <linux/dma-direct.h> #include <linux/dma-noncoherent.h> #include <linux/gfp.h> #include <linux/highmem.h> @@ -42,15 +43,18 @@ unsigned long xen_get_swiotlb_free_pages(unsigned int order) static bool hypercall_cflush = false; /* buffers in highmem or foreign pages cannot cross page boundaries */ -static void dma_cache_maint(dma_addr_t handle, size_t size, u32 op) +static void dma_cache_maint(struct device *dev, dma_addr_t handle, + size_t size, u32 op) { struct gnttab_cache_flush cflush; - cflush.a.dev_bus_addr = handle & XEN_PAGE_MASK; cflush.offset = xen_offset_in_page(handle); cflush.op = op; + handle &= XEN_PAGE_MASK; do { + cflush.a.dev_bus_addr = dma_to_phys(dev, handle); + if (size + cflush.offset > XEN_PAGE_SIZE) cflush.length = XEN_PAGE_SIZE - cflush.offset; else @@ -59,7 +63,7 @@ static void dma_cache_maint(dma_addr_t handle, size_t size, u32 op) HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1); cflush.offset = 0; - cflush.a.dev_bus_addr += cflush.length; + handle += cflush.length; size -= cflush.length; } while (size); } @@ -71,24 +75,20 @@ static void dma_cache_maint(dma_addr_t handle, size_t size, u32 op) * pfn_valid returns true the pages is local and we can use the native * dma-direct functions, otherwise we call the Xen specific version. */ -void xen_dma_sync_for_cpu(dma_addr_t handle, phys_addr_t paddr, size_t size, - enum dma_data_direction dir) +void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) { - if (pfn_valid(PFN_DOWN(handle))) - arch_sync_dma_for_cpu(paddr, size, dir); - else if (dir != DMA_TO_DEVICE) - dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL); + if (dir != DMA_TO_DEVICE) + dma_cache_maint(dev, handle, size, GNTTAB_CACHE_INVAL); } -void xen_dma_sync_for_device(dma_addr_t handle, phys_addr_t paddr, size_t size, - enum dma_data_direction dir) +void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) { - if (pfn_valid(PFN_DOWN(handle))) - arch_sync_dma_for_device(paddr, size, dir); - else if (dir == DMA_FROM_DEVICE) - dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL); + if (dir == DMA_FROM_DEVICE) + dma_cache_maint(dev, handle, size, GNTTAB_CACHE_INVAL); else - dma_cache_maint(handle, size, GNTTAB_CACHE_CLEAN); + dma_cache_maint(dev, handle, size, GNTTAB_CACHE_CLEAN); } bool xen_arch_need_swiotlb(struct device *dev, @@ -96,7 +96,7 @@ bool xen_arch_need_swiotlb(struct device *dev, dma_addr_t dev_addr) { unsigned int xen_pfn = XEN_PFN_DOWN(phys); - unsigned int bfn = XEN_PFN_DOWN(dev_addr); + unsigned int bfn = XEN_PFN_DOWN(dma_to_phys(dev, dev_addr)); /* * The swiotlb buffer should be used if diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 9ded4237e1c1..b181e0544b79 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -84,6 +84,7 @@ u64 __init kaslr_early_init(u64 dt_phys) void *fdt; u64 seed, offset, mask, module_range; const u8 *cmdline, *str; + unsigned long raw; int size; /* @@ -122,15 +123,12 @@ u64 __init kaslr_early_init(u64 dt_phys) } /* - * Mix in any entropy obtainable architecturally, open coded - * since this runs extremely early. + * Mix in any entropy obtainable architecturally if enabled + * and supported. */ - if (__early_cpu_has_rndr()) { - unsigned long raw; - if (__arm64_rndr(&raw)) - seed ^= raw; - } + if (arch_get_random_seed_long_early(&raw)) + seed ^= raw; if (!seed) { kaslr_status = KASLR_DISABLED_NO_SEED; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 1e02e98e68dd..d8ebfd813e28 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -474,11 +474,10 @@ static int ptrace_hbp_set_addr(unsigned int note_type, static int hw_break_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { unsigned int note_type = regset->core_note_type; - int ret, idx = 0, offset, limit; + int ret, idx = 0; u32 info, ctrl; u64 addr; @@ -487,49 +486,21 @@ static int hw_break_get(struct task_struct *target, if (ret) return ret; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &info, 0, - sizeof(info)); - if (ret) - return ret; - - /* Pad */ - offset = offsetof(struct user_hwdebug_state, pad); - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, offset, - offset + PTRACE_HBP_PAD_SZ); - if (ret) - return ret; - + membuf_write(&to, &info, sizeof(info)); + membuf_zero(&to, sizeof(u32)); /* (address, ctrl) registers */ - offset = offsetof(struct user_hwdebug_state, dbg_regs); - limit = regset->n * regset->size; - while (count && offset < limit) { + while (to.left) { ret = ptrace_hbp_get_addr(note_type, target, idx, &addr); if (ret) return ret; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &addr, - offset, offset + PTRACE_HBP_ADDR_SZ); - if (ret) - return ret; - offset += PTRACE_HBP_ADDR_SZ; - ret = ptrace_hbp_get_ctrl(note_type, target, idx, &ctrl); if (ret) return ret; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &ctrl, - offset, offset + PTRACE_HBP_CTRL_SZ); - if (ret) - return ret; - offset += PTRACE_HBP_CTRL_SZ; - - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - offset, - offset + PTRACE_HBP_PAD_SZ); - if (ret) - return ret; - offset += PTRACE_HBP_PAD_SZ; + membuf_store(&to, addr); + membuf_store(&to, ctrl); + membuf_zero(&to, sizeof(u32)); idx++; } - return 0; } @@ -589,11 +560,10 @@ static int hw_break_set(struct task_struct *target, static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct user_pt_regs *uregs = &task_pt_regs(target)->user_regs; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); + return membuf_write(&to, uregs, sizeof(*uregs)); } static int gpr_set(struct task_struct *target, const struct user_regset *regset, @@ -626,8 +596,7 @@ static int fpr_active(struct task_struct *target, const struct user_regset *regs */ static int __fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf, unsigned int start_pos) + struct membuf to) { struct user_fpsimd_state *uregs; @@ -635,13 +604,11 @@ static int __fpr_get(struct task_struct *target, uregs = &target->thread.uw.fpsimd_state; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, - start_pos, start_pos + sizeof(*uregs)); + return membuf_write(&to, uregs, sizeof(*uregs)); } static int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { if (!system_supports_fpsimd()) return -EINVAL; @@ -649,7 +616,7 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, if (target == current) fpsimd_preserve_current_state(); - return __fpr_get(target, regset, pos, count, kbuf, ubuf, 0); + return __fpr_get(target, regset, to); } static int __fpr_set(struct task_struct *target, @@ -699,15 +666,12 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, } static int tls_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - unsigned long *tls = &target->thread.uw.tp_value; - if (target == current) tls_preserve_current_state(); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1); + return membuf_store(&to, target->thread.uw.tp_value); } static int tls_set(struct task_struct *target, const struct user_regset *regset, @@ -727,13 +691,9 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset, static int system_call_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - int syscallno = task_pt_regs(target)->syscallno; - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &syscallno, 0, -1); + return membuf_store(&to, task_pt_regs(target)->syscallno); } static int system_call_set(struct task_struct *target, @@ -780,24 +740,10 @@ static unsigned int sve_size_from_header(struct user_sve_header const *header) return ALIGN(header->size, SVE_VQ_BYTES); } -static unsigned int sve_get_size(struct task_struct *target, - const struct user_regset *regset) -{ - struct user_sve_header header; - - if (!system_supports_sve()) - return 0; - - sve_init_header_from_task(&header, target); - return sve_size_from_header(&header); -} - static int sve_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; struct user_sve_header header; unsigned int vq; unsigned long start, end; @@ -809,10 +755,7 @@ static int sve_get(struct task_struct *target, sve_init_header_from_task(&header, target); vq = sve_vq_from_vl(header.vl); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header, - 0, sizeof(header)); - if (ret) - return ret; + membuf_write(&to, &header, sizeof(header)); if (target == current) fpsimd_preserve_current_state(); @@ -821,26 +764,18 @@ static int sve_get(struct task_struct *target, BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header)); if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) - return __fpr_get(target, regset, pos, count, kbuf, ubuf, - SVE_PT_FPSIMD_OFFSET); + return __fpr_get(target, regset, to); /* Otherwise: full SVE case */ BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); start = SVE_PT_SVE_OFFSET; end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - target->thread.sve_state, - start, end); - if (ret) - return ret; + membuf_write(&to, target->thread.sve_state, end - start); start = end; end = SVE_PT_SVE_FPSR_OFFSET(vq); - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - start, end); - if (ret) - return ret; + membuf_zero(&to, end - start); /* * Copy fpsr, and fpcr which must follow contiguously in @@ -848,16 +783,11 @@ static int sve_get(struct task_struct *target, */ start = end; end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.uw.fpsimd_state.fpsr, - start, end); - if (ret) - return ret; + membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr, end - start); start = end; end = sve_size_from_header(&header); - return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - start, end); + return membuf_zero(&to, end - start); } static int sve_set(struct task_struct *target, @@ -961,8 +891,7 @@ out: #ifdef CONFIG_ARM64_PTR_AUTH static int pac_mask_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { /* * The PAC bits can differ across data and instruction pointers @@ -978,7 +907,7 @@ static int pac_mask_get(struct task_struct *target, if (!system_supports_address_auth()) return -EINVAL; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &uregs, 0, -1); + return membuf_write(&to, &uregs, sizeof(uregs)); } #ifdef CONFIG_CHECKPOINT_RESTORE @@ -1017,8 +946,7 @@ static void pac_address_keys_from_user(struct ptrauth_keys_user *keys, static int pac_address_keys_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct ptrauth_keys_user *keys = &target->thread.keys_user; struct user_pac_address_keys user_keys; @@ -1028,8 +956,7 @@ static int pac_address_keys_get(struct task_struct *target, pac_address_keys_to_user(&user_keys, keys); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &user_keys, 0, -1); + return membuf_write(&to, &user_keys, sizeof(user_keys)); } static int pac_address_keys_set(struct task_struct *target, @@ -1068,8 +995,7 @@ static void pac_generic_keys_from_user(struct ptrauth_keys_user *keys, static int pac_generic_keys_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct ptrauth_keys_user *keys = &target->thread.keys_user; struct user_pac_generic_keys user_keys; @@ -1079,8 +1005,7 @@ static int pac_generic_keys_get(struct task_struct *target, pac_generic_keys_to_user(&user_keys, keys); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &user_keys, 0, -1); + return membuf_write(&to, &user_keys, sizeof(user_keys)); } static int pac_generic_keys_set(struct task_struct *target, @@ -1134,7 +1059,7 @@ static const struct user_regset aarch64_regsets[] = { .n = sizeof(struct user_pt_regs) / sizeof(u64), .size = sizeof(u64), .align = sizeof(u64), - .get = gpr_get, + .regset_get = gpr_get, .set = gpr_set }, [REGSET_FPR] = { @@ -1147,7 +1072,7 @@ static const struct user_regset aarch64_regsets[] = { .size = sizeof(u32), .align = sizeof(u32), .active = fpr_active, - .get = fpr_get, + .regset_get = fpr_get, .set = fpr_set }, [REGSET_TLS] = { @@ -1155,7 +1080,7 @@ static const struct user_regset aarch64_regsets[] = { .n = 1, .size = sizeof(void *), .align = sizeof(void *), - .get = tls_get, + .regset_get = tls_get, .set = tls_set, }, #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -1164,7 +1089,7 @@ static const struct user_regset aarch64_regsets[] = { .n = sizeof(struct user_hwdebug_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = hw_break_get, + .regset_get = hw_break_get, .set = hw_break_set, }, [REGSET_HW_WATCH] = { @@ -1172,7 +1097,7 @@ static const struct user_regset aarch64_regsets[] = { .n = sizeof(struct user_hwdebug_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = hw_break_get, + .regset_get = hw_break_get, .set = hw_break_set, }, #endif @@ -1181,7 +1106,7 @@ static const struct user_regset aarch64_regsets[] = { .n = 1, .size = sizeof(int), .align = sizeof(int), - .get = system_call_get, + .regset_get = system_call_get, .set = system_call_set, }, #ifdef CONFIG_ARM64_SVE @@ -1191,9 +1116,8 @@ static const struct user_regset aarch64_regsets[] = { SVE_VQ_BYTES), .size = SVE_VQ_BYTES, .align = SVE_VQ_BYTES, - .get = sve_get, + .regset_get = sve_get, .set = sve_set, - .get_size = sve_get_size, }, #endif #ifdef CONFIG_ARM64_PTR_AUTH @@ -1202,7 +1126,7 @@ static const struct user_regset aarch64_regsets[] = { .n = sizeof(struct user_pac_mask) / sizeof(u64), .size = sizeof(u64), .align = sizeof(u64), - .get = pac_mask_get, + .regset_get = pac_mask_get, /* this cannot be set dynamically */ }, #ifdef CONFIG_CHECKPOINT_RESTORE @@ -1211,7 +1135,7 @@ static const struct user_regset aarch64_regsets[] = { .n = sizeof(struct user_pac_address_keys) / sizeof(__uint128_t), .size = sizeof(__uint128_t), .align = sizeof(__uint128_t), - .get = pac_address_keys_get, + .regset_get = pac_address_keys_get, .set = pac_address_keys_set, }, [REGSET_PACG_KEYS] = { @@ -1219,7 +1143,7 @@ static const struct user_regset aarch64_regsets[] = { .n = sizeof(struct user_pac_generic_keys) / sizeof(__uint128_t), .size = sizeof(__uint128_t), .align = sizeof(__uint128_t), - .get = pac_generic_keys_get, + .regset_get = pac_generic_keys_get, .set = pac_generic_keys_set, }, #endif @@ -1237,57 +1161,31 @@ enum compat_regset { REGSET_COMPAT_VFP, }; -static int compat_gpr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) +static inline compat_ulong_t compat_get_user_reg(struct task_struct *task, int idx) { - int ret = 0; - unsigned int i, start, num_regs; + struct pt_regs *regs = task_pt_regs(task); - /* Calculate the number of AArch32 registers contained in count */ - num_regs = count / regset->size; - - /* Convert pos into an register number */ - start = pos / regset->size; - - if (start + num_regs > regset->n) - return -EIO; - - for (i = 0; i < num_regs; ++i) { - unsigned int idx = start + i; - compat_ulong_t reg; - - switch (idx) { - case 15: - reg = task_pt_regs(target)->pc; - break; - case 16: - reg = task_pt_regs(target)->pstate; - reg = pstate_to_compat_psr(reg); - break; - case 17: - reg = task_pt_regs(target)->orig_x0; - break; - default: - reg = task_pt_regs(target)->regs[idx]; - } - - if (kbuf) { - memcpy(kbuf, ®, sizeof(reg)); - kbuf += sizeof(reg); - } else { - ret = copy_to_user(ubuf, ®, sizeof(reg)); - if (ret) { - ret = -EFAULT; - break; - } - - ubuf += sizeof(reg); - } + switch (idx) { + case 15: + return regs->pc; + case 16: + return pstate_to_compat_psr(regs->pstate); + case 17: + return regs->orig_x0; + default: + return regs->regs[idx]; } +} - return ret; +static int compat_gpr_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + int i = 0; + + while (to.left) + membuf_store(&to, compat_get_user_reg(target, i++)); + return 0; } static int compat_gpr_set(struct task_struct *target, @@ -1354,12 +1252,10 @@ static int compat_gpr_set(struct task_struct *target, static int compat_vfp_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct user_fpsimd_state *uregs; compat_ulong_t fpscr; - int ret, vregs_end_pos; if (!system_supports_fpsimd()) return -EINVAL; @@ -1373,19 +1269,10 @@ static int compat_vfp_get(struct task_struct *target, * The VFP registers are packed into the fpsimd_state, so they all sit * nicely together for us. We just need to create the fpscr separately. */ - vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, - 0, vregs_end_pos); - - if (count && !ret) { - fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) | - (uregs->fpcr & VFP_FPSCR_CTRL_MASK); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpscr, - vregs_end_pos, VFP_STATE_SIZE); - } - - return ret; + membuf_write(&to, uregs, VFP_STATE_SIZE - sizeof(compat_ulong_t)); + fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) | + (uregs->fpcr & VFP_FPSCR_CTRL_MASK); + return membuf_store(&to, fpscr); } static int compat_vfp_set(struct task_struct *target, @@ -1420,11 +1307,10 @@ static int compat_vfp_set(struct task_struct *target, } static int compat_tls_get(struct task_struct *target, - const struct user_regset *regset, unsigned int pos, - unsigned int count, void *kbuf, void __user *ubuf) + const struct user_regset *regset, + struct membuf to) { - compat_ulong_t tls = (compat_ulong_t)target->thread.uw.tp_value; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); + return membuf_store(&to, (compat_ulong_t)target->thread.uw.tp_value); } static int compat_tls_set(struct task_struct *target, @@ -1449,7 +1335,7 @@ static const struct user_regset aarch32_regsets[] = { .n = COMPAT_ELF_NGREG, .size = sizeof(compat_elf_greg_t), .align = sizeof(compat_elf_greg_t), - .get = compat_gpr_get, + .regset_get = compat_gpr_get, .set = compat_gpr_set }, [REGSET_COMPAT_VFP] = { @@ -1458,7 +1344,7 @@ static const struct user_regset aarch32_regsets[] = { .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), .active = fpr_active, - .get = compat_vfp_get, + .regset_get = compat_vfp_get, .set = compat_vfp_set }, }; @@ -1474,7 +1360,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = { .n = COMPAT_ELF_NGREG, .size = sizeof(compat_elf_greg_t), .align = sizeof(compat_elf_greg_t), - .get = compat_gpr_get, + .regset_get = compat_gpr_get, .set = compat_gpr_set }, [REGSET_FPR] = { @@ -1482,7 +1368,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = { .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), - .get = compat_vfp_get, + .regset_get = compat_vfp_get, .set = compat_vfp_set }, [REGSET_TLS] = { @@ -1490,7 +1376,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = { .n = 1, .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), - .get = compat_tls_get, + .regset_get = compat_tls_get, .set = compat_tls_set, }, #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -1499,7 +1385,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = { .n = sizeof(struct user_hwdebug_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = hw_break_get, + .regset_get = hw_break_get, .set = hw_break_set, }, [REGSET_HW_WATCH] = { @@ -1507,7 +1393,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = { .n = sizeof(struct user_hwdebug_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = hw_break_get, + .regset_get = hw_break_get, .set = hw_break_set, }, #endif @@ -1516,7 +1402,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = { .n = 1, .size = sizeof(int), .align = sizeof(int), - .get = system_call_get, + .regset_get = system_call_get, .set = system_call_set, }, }; @@ -1541,9 +1427,7 @@ static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, else if (off == COMPAT_PT_TEXT_END_ADDR) tmp = tsk->mm->end_code; else if (off < sizeof(compat_elf_gregset_t)) - return copy_regset_to_user(tsk, &user_aarch32_view, - REGSET_COMPAT_GPR, off, - sizeof(compat_ulong_t), ret); + tmp = compat_get_user_reg(tsk, off >> 2); else if (off >= COMPAT_USER_SZ) return -EIO; else @@ -1555,8 +1439,8 @@ static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t val) { - int ret; - mm_segment_t old_fs = get_fs(); + struct pt_regs newregs = *task_pt_regs(tsk); + unsigned int idx = off / 4; if (off & 3 || off >= COMPAT_USER_SZ) return -EIO; @@ -1564,14 +1448,25 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, if (off >= sizeof(compat_elf_gregset_t)) return 0; - set_fs(KERNEL_DS); - ret = copy_regset_from_user(tsk, &user_aarch32_view, - REGSET_COMPAT_GPR, off, - sizeof(compat_ulong_t), - &val); - set_fs(old_fs); + switch (idx) { + case 15: + newregs.pc = val; + break; + case 16: + newregs.pstate = compat_psr_to_pstate(val); + break; + case 17: + newregs.orig_x0 = val; + break; + default: + newregs.regs[idx] = val; + } - return ret; + if (!valid_user_regs(&newregs.user_regs, tsk)) + return -EINVAL; + + *task_pt_regs(tsk) = newregs; + return 0; } #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/c6x/kernel/ptrace.c b/arch/c6x/kernel/ptrace.c index 67af1562da86..3cdaa8cf0ed6 100644 --- a/arch/c6x/kernel/ptrace.c +++ b/arch/c6x/kernel/ptrace.c @@ -57,14 +57,9 @@ static inline int put_reg(struct task_struct *task, static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - struct pt_regs *regs = task_pt_regs(target); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - regs, - 0, sizeof(*regs)); + return membuf_write(&to, task_pt_regs(target), sizeof(struct pt_regs)); } enum c6x_regset { @@ -77,7 +72,7 @@ static const struct user_regset c6x_regsets[] = { .n = ELF_NGREG, .size = sizeof(u32), .align = sizeof(u32), - .get = gpr_get, + .regset_get = gpr_get, }, }; diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c index b06612c408c4..d822144906ac 100644 --- a/arch/csky/kernel/ptrace.c +++ b/arch/csky/kernel/ptrace.c @@ -76,17 +76,14 @@ enum csky_regset { static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - struct pt_regs *regs; - - regs = task_pt_regs(target); + struct pt_regs *regs = task_pt_regs(target); /* Abiv1 regs->tls is fake and we need sync here. */ regs->tls = task_thread_info(target)->tp_value; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, regs, 0, -1); + return membuf_write(&to, regs, sizeof(regs)); } static int gpr_set(struct task_struct *target, @@ -114,8 +111,7 @@ static int gpr_set(struct task_struct *target, static int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct user_fp *regs = (struct user_fp *)&target->thread.user_fp; @@ -131,9 +127,9 @@ static int fpr_get(struct task_struct *target, for (i = 0; i < 32; i++) tmp.vr[64 + i] = regs->vr[32 + i]; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tmp, 0, -1); + return membuf_write(&to, &tmp, sizeof(tmp)); #else - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, regs, 0, -1); + return membuf_write(&to, regs, sizeof(*regs)); #endif } @@ -173,16 +169,16 @@ static const struct user_regset csky_regsets[] = { .n = sizeof(struct pt_regs) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = &gpr_get, - .set = &gpr_set, + .regset_get = gpr_get, + .set = gpr_set, }, [REGSET_FPR] = { .core_note_type = NT_PRFPREG, .n = sizeof(struct user_fp) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = &fpr_get, - .set = &fpr_set, + .regset_get = fpr_get, + .set = fpr_set, }, }; diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c index 0dc1c8f622bc..a11db009d0ea 100644 --- a/arch/h8300/kernel/ptrace.c +++ b/arch/h8300/kernel/ptrace.c @@ -87,20 +87,15 @@ int h8300_put_reg(struct task_struct *task, int regno, unsigned long data) static int regs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { int r; - struct user_regs_struct regs; - long *reg = (long *)®s; - /* build user regs in buffer */ - BUILD_BUG_ON(sizeof(regs) % sizeof(long) != 0); - for (r = 0; r < sizeof(regs) / sizeof(long); r++) - *reg++ = h8300_get_reg(target, r); + BUILD_BUG_ON(sizeof(struct user_regs_struct) % sizeof(long) != 0); + for (r = 0; r < ELF_NGREG; r++) + membuf_store(&to, h8300_get_reg(target, r)); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - ®s, 0, sizeof(regs)); + return 0; } static int regs_set(struct task_struct *target, @@ -139,7 +134,7 @@ static const struct user_regset h8300_regsets[] = { .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), - .get = regs_get, + .regset_get = regs_get, .set = regs_set, }, }; diff --git a/arch/hexagon/kernel/ptrace.c b/arch/hexagon/kernel/ptrace.c index dcbf7ea960cc..a5a89e944257 100644 --- a/arch/hexagon/kernel/ptrace.c +++ b/arch/hexagon/kernel/ptrace.c @@ -35,58 +35,38 @@ void user_disable_single_step(struct task_struct *child) static int genregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + srtuct membuf to) { - int ret; - unsigned int dummy; struct pt_regs *regs = task_pt_regs(target); - - if (!regs) - return -EIO; - /* The general idea here is that the copyout must happen in * exactly the same order in which the userspace expects these * regs. Now, the sequence in userspace does not match the * sequence in the kernel, so everything past the 32 gprs * happens one at a time. */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - ®s->r00, 0, 32*sizeof(unsigned long)); - -#define ONEXT(KPT_REG, USR_REG) \ - if (!ret) \ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, \ - KPT_REG, offsetof(struct user_regs_struct, USR_REG), \ - offsetof(struct user_regs_struct, USR_REG) + \ - sizeof(unsigned long)); - + membuf_write(&to, ®s->r00, 32*sizeof(unsigned long)); /* Must be exactly same sequence as struct user_regs_struct */ - ONEXT(®s->sa0, sa0); - ONEXT(®s->lc0, lc0); - ONEXT(®s->sa1, sa1); - ONEXT(®s->lc1, lc1); - ONEXT(®s->m0, m0); - ONEXT(®s->m1, m1); - ONEXT(®s->usr, usr); - ONEXT(®s->preds, p3_0); - ONEXT(®s->gp, gp); - ONEXT(®s->ugp, ugp); - ONEXT(&pt_elr(regs), pc); - dummy = pt_cause(regs); - ONEXT(&dummy, cause); - ONEXT(&pt_badva(regs), badva); + membuf_store(&to, regs->sa0); + membuf_store(&to, regs->lc0); + membuf_store(&to, regs->sa1); + membuf_store(&to, regs->lc1); + membuf_store(&to, regs->m0); + membuf_store(&to, regs->m1); + membuf_store(&to, regs->usr); + membuf_store(&to, regs->p3_0); + membuf_store(&to, regs->gp); + membuf_store(&to, regs->ugp); + membuf_store(&to, pt_elr(regs)); // pc + membuf_store(&to, (unsigned long)pt_cause(regs)); // cause + membuf_store(&to, pt_badva(regs)); // badva #if CONFIG_HEXAGON_ARCH_VERSION >=4 - ONEXT(®s->cs0, cs0); - ONEXT(®s->cs1, cs1); + membuf_store(&to, regs->cs0); + membuf_store(&to, regs->cs1); + return membuf_zero(&to, sizeof(unsigned long)); +#else + return membuf_zero(&to, 3 * sizeof(unsigned long)); #endif - - /* Pad the rest with zeros, if needed */ - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - offsetof(struct user_regs_struct, pad1), -1); - return ret; } static int genregs_set(struct task_struct *target, @@ -159,7 +139,7 @@ static const struct user_regset hexagon_regsets[] = { .n = ELF_NGREG, .size = sizeof(unsigned long), .align = sizeof(unsigned long), - .get = genregs_get, + .regset_get = genregs_get, .set = genregs_set, }, }; diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 82aaacf64583..33ca9fa0fbf5 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1273,52 +1273,43 @@ struct regset_getset { int ret; }; +static const ptrdiff_t pt_offsets[32] = +{ +#define R(n) offsetof(struct pt_regs, r##n) + [0] = -1, R(1), R(2), R(3), + [4] = -1, [5] = -1, [6] = -1, [7] = -1, + R(8), R(9), R(10), R(11), R(12), R(13), R(14), R(15), + R(16), R(17), R(18), R(19), R(20), R(21), R(22), R(23), + R(24), R(25), R(26), R(27), R(28), R(29), R(30), R(31), +#undef R +}; + static int access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info, unsigned long addr, unsigned long *data, int write_access) { - struct pt_regs *pt; - unsigned long *ptr = NULL; - int ret; - char nat = 0; + struct pt_regs *pt = task_pt_regs(target); + unsigned reg = addr / sizeof(unsigned long); + ptrdiff_t d = pt_offsets[reg]; - pt = task_pt_regs(target); - switch (addr) { - case ELF_GR_OFFSET(1): - ptr = &pt->r1; - break; - case ELF_GR_OFFSET(2): - case ELF_GR_OFFSET(3): - ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2)); - break; - case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7): + if (d >= 0) { + unsigned long *ptr = (void *)pt + d; + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; + } else { + char nat = 0; if (write_access) { /* read NaT bit first: */ unsigned long dummy; - - ret = unw_get_gr(info, addr/8, &dummy, &nat); + int ret = unw_get_gr(info, reg, &dummy, &nat); if (ret < 0) return ret; } - return unw_access_gr(info, addr/8, data, &nat, write_access); - case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11): - ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8); - break; - case ELF_GR_OFFSET(12): - case ELF_GR_OFFSET(13): - ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12); - break; - case ELF_GR_OFFSET(14): - ptr = &pt->r14; - break; - case ELF_GR_OFFSET(15): - ptr = &pt->r15; + return unw_access_gr(info, reg, data, &nat, write_access); } - if (write_access) - *ptr = *data; - else - *data = *ptr; - return 0; } static int @@ -1490,7 +1481,7 @@ static int access_elf_reg(struct task_struct *target, struct unw_frame_info *info, unsigned long addr, unsigned long *data, int write_access) { - if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15)) + if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(31)) return access_elf_gpreg(target, info, addr, data, write_access); else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7)) return access_elf_breg(target, info, addr, data, write_access); @@ -1498,12 +1489,17 @@ access_elf_reg(struct task_struct *target, struct unw_frame_info *info, return access_elf_areg(target, info, addr, data, write_access); } +struct regset_membuf { + struct membuf to; + int ret; +}; + void do_gpregs_get(struct unw_frame_info *info, void *arg) { - struct pt_regs *pt; - struct regset_getset *dst = arg; - elf_greg_t tmp[16]; - unsigned int i, index, min_copy; + struct regset_membuf *dst = arg; + struct membuf to = dst->to; + unsigned int n; + elf_greg_t reg; if (unw_unwind_to_user(info) < 0) return; @@ -1521,165 +1517,53 @@ void do_gpregs_get(struct unw_frame_info *info, void *arg) /* Skip r0 */ - if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) { - dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count, - &dst->u.get.kbuf, - &dst->u.get.ubuf, - 0, ELF_GR_OFFSET(1)); - if (dst->ret || dst->count == 0) + membuf_zero(&to, 8); + for (n = 8; to.left && n < ELF_AR_END_OFFSET; n += 8) { + if (access_elf_reg(info->task, info, n, ®, 0) < 0) { + dst->ret = -EIO; return; - } - - /* gr1 - gr15 */ - if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) { - index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t); - min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ? - (dst->pos + dst->count) : ELF_GR_OFFSET(16); - for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t), - index++) - if (access_elf_reg(dst->target, info, i, - &tmp[index], 0) < 0) { - dst->ret = -EIO; - return; - } - dst->ret = user_regset_copyout(&dst->pos, &dst->count, - &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, - ELF_GR_OFFSET(1), ELF_GR_OFFSET(16)); - if (dst->ret || dst->count == 0) - return; - } - - /* r16-r31 */ - if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) { - pt = task_pt_regs(dst->target); - dst->ret = user_regset_copyout(&dst->pos, &dst->count, - &dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16, - ELF_GR_OFFSET(16), ELF_NAT_OFFSET); - if (dst->ret || dst->count == 0) - return; - } - - /* nat, pr, b0 - b7 */ - if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) { - index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t); - min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ? - (dst->pos + dst->count) : ELF_CR_IIP_OFFSET; - for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t), - index++) - if (access_elf_reg(dst->target, info, i, - &tmp[index], 0) < 0) { - dst->ret = -EIO; - return; - } - dst->ret = user_regset_copyout(&dst->pos, &dst->count, - &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, - ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET); - if (dst->ret || dst->count == 0) - return; - } - - /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat - * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd - */ - if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) { - index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t); - min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ? - (dst->pos + dst->count) : ELF_AR_END_OFFSET; - for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t), - index++) - if (access_elf_reg(dst->target, info, i, - &tmp[index], 0) < 0) { - dst->ret = -EIO; - return; - } - dst->ret = user_regset_copyout(&dst->pos, &dst->count, - &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, - ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET); + } + membuf_store(&to, reg); } } void do_gpregs_set(struct unw_frame_info *info, void *arg) { - struct pt_regs *pt; struct regset_getset *dst = arg; - elf_greg_t tmp[16]; - unsigned int i, index; if (unw_unwind_to_user(info) < 0) return; + if (!dst->count) + return; /* Skip r0 */ - if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) { + if (dst->pos < ELF_GR_OFFSET(1)) { dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count, &dst->u.set.kbuf, &dst->u.set.ubuf, 0, ELF_GR_OFFSET(1)); - if (dst->ret || dst->count == 0) - return; - } - - /* gr1-gr15 */ - if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) { - i = dst->pos; - index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t); - dst->ret = user_regset_copyin(&dst->pos, &dst->count, - &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, - ELF_GR_OFFSET(1), ELF_GR_OFFSET(16)); if (dst->ret) return; - for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++) - if (access_elf_reg(dst->target, info, i, - &tmp[index], 1) < 0) { - dst->ret = -EIO; - return; - } - if (dst->count == 0) - return; - } - - /* gr16-gr31 */ - if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) { - pt = task_pt_regs(dst->target); - dst->ret = user_regset_copyin(&dst->pos, &dst->count, - &dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16, - ELF_GR_OFFSET(16), ELF_NAT_OFFSET); - if (dst->ret || dst->count == 0) - return; } - /* nat, pr, b0 - b7 */ - if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) { - i = dst->pos; - index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t); - dst->ret = user_regset_copyin(&dst->pos, &dst->count, - &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, - ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET); - if (dst->ret) - return; - for (; i < dst->pos; i += sizeof(elf_greg_t), index++) - if (access_elf_reg(dst->target, info, i, - &tmp[index], 1) < 0) { - dst->ret = -EIO; - return; - } - if (dst->count == 0) - return; - } + while (dst->count && dst->pos < ELF_AR_END_OFFSET) { + unsigned int n, from, to; + elf_greg_t tmp[16]; - /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat - * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd - */ - if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) { - i = dst->pos; - index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t); + from = dst->pos; + to = from + sizeof(tmp); + if (to > ELF_AR_END_OFFSET) + to = ELF_AR_END_OFFSET; + /* get up to 16 values */ dst->ret = user_regset_copyin(&dst->pos, &dst->count, &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, - ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET); + from, to); if (dst->ret) return; - for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++) - if (access_elf_reg(dst->target, info, i, - &tmp[index], 1) < 0) { + /* now copy them into registers */ + for (n = 0; from < dst->pos; from += sizeof(elf_greg_t), n++) + if (access_elf_reg(dst->target, info, from, + &tmp[n], 1) < 0) { dst->ret = -EIO; return; } @@ -1690,60 +1574,36 @@ void do_gpregs_set(struct unw_frame_info *info, void *arg) void do_fpregs_get(struct unw_frame_info *info, void *arg) { - struct regset_getset *dst = arg; - struct task_struct *task = dst->target; - elf_fpreg_t tmp[30]; - int index, min_copy, i; + struct task_struct *task = info->task; + struct regset_membuf *dst = arg; + struct membuf to = dst->to; + elf_fpreg_t reg; + unsigned int n; if (unw_unwind_to_user(info) < 0) return; /* Skip pos 0 and 1 */ - if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) { - dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count, - &dst->u.get.kbuf, - &dst->u.get.ubuf, - 0, ELF_FP_OFFSET(2)); - if (dst->count == 0 || dst->ret) - return; - } + membuf_zero(&to, 2 * sizeof(elf_fpreg_t)); /* fr2-fr31 */ - if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) { - index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t); - - min_copy = min(((unsigned int)ELF_FP_OFFSET(32)), - dst->pos + dst->count); - for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t), - index++) - if (unw_get_fr(info, i / sizeof(elf_fpreg_t), - &tmp[index])) { - dst->ret = -EIO; - return; - } - dst->ret = user_regset_copyout(&dst->pos, &dst->count, - &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, - ELF_FP_OFFSET(2), ELF_FP_OFFSET(32)); - if (dst->count == 0 || dst->ret) + for (n = 2; to.left && n < 32; n++) { + if (unw_get_fr(info, n, ®)) { + dst->ret = -EIO; return; + } + membuf_write(&to, ®, sizeof(reg)); } /* fph */ - if (dst->count > 0) { - ia64_flush_fph(dst->target); - if (task->thread.flags & IA64_THREAD_FPH_VALID) - dst->ret = user_regset_copyout( - &dst->pos, &dst->count, - &dst->u.get.kbuf, &dst->u.get.ubuf, - &dst->target->thread.fph, - ELF_FP_OFFSET(32), -1); - else - /* Zero fill instead. */ - dst->ret = user_regset_copyout_zero( - &dst->pos, &dst->count, - &dst->u.get.kbuf, &dst->u.get.ubuf, - ELF_FP_OFFSET(32), -1); - } + if (!to.left) + return; + + ia64_flush_fph(task); + if (task->thread.flags & IA64_THREAD_FPH_VALID) + membuf_write(&to, &task->thread.fph, 96 * sizeof(reg)); + else + membuf_zero(&to, 96 * sizeof(reg)); } void do_fpregs_set(struct unw_frame_info *info, void *arg) @@ -1819,6 +1679,20 @@ void do_fpregs_set(struct unw_frame_info *info, void *arg) } } +static void +unwind_and_call(void (*call)(struct unw_frame_info *, void *), + struct task_struct *target, void *data) +{ + if (target == current) + unw_init_running(call, data); + else { + struct unw_frame_info info; + memset(&info, 0, sizeof(info)); + unw_init_from_blocked_task(&info, target); + (*call)(&info, data); + } +} + static int do_regset_call(void (*call)(struct unw_frame_info *, void *), struct task_struct *target, @@ -1830,27 +1704,18 @@ do_regset_call(void (*call)(struct unw_frame_info *, void *), .pos = pos, .count = count, .u.set = { .kbuf = kbuf, .ubuf = ubuf }, .ret = 0 }; - - if (target == current) - unw_init_running(call, &info); - else { - struct unw_frame_info ufi; - memset(&ufi, 0, sizeof(ufi)); - unw_init_from_blocked_task(&ufi, target); - (*call)(&ufi, &info); - } - + unwind_and_call(call, target, &info); return info.ret; } static int gpregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - return do_regset_call(do_gpregs_get, target, regset, pos, count, - kbuf, ubuf); + struct regset_membuf info = {.to = to}; + unwind_and_call(do_gpregs_get, target, &info); + return info.ret; } static int gpregs_set(struct task_struct *target, @@ -1892,11 +1757,11 @@ fpregs_active(struct task_struct *target, const struct user_regset *regset) static int fpregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - return do_regset_call(do_fpregs_get, target, regset, pos, count, - kbuf, ubuf); + struct regset_membuf info = {.to = to}; + unwind_and_call(do_fpregs_get, target, &info); + return info.ret; } static int fpregs_set(struct task_struct *target, @@ -1913,7 +1778,6 @@ access_uarea(struct task_struct *child, unsigned long addr, unsigned long *data, int write_access) { unsigned int pos = -1; /* an invalid value */ - int ret; unsigned long *ptr, regnum; if ((addr & 0x7) != 0) { @@ -1945,14 +1809,39 @@ access_uarea(struct task_struct *child, unsigned long addr, } if (pos != -1) { - if (write_access) - ret = fpregs_set(child, NULL, pos, - sizeof(unsigned long), data, NULL); - else - ret = fpregs_get(child, NULL, pos, - sizeof(unsigned long), data, NULL); - if (ret != 0) - return -1; + unsigned reg = pos / sizeof(elf_fpreg_t); + int which_half = (pos / sizeof(unsigned long)) & 1; + + if (reg < 32) { /* fr2-fr31 */ + struct unw_frame_info info; + elf_fpreg_t fpreg; + + memset(&info, 0, sizeof(info)); + unw_init_from_blocked_task(&info, child); + if (unw_unwind_to_user(&info) < 0) + return 0; + + if (unw_get_fr(&info, reg, &fpreg)) + return -1; + if (write_access) { + fpreg.u.bits[which_half] = *data; + if (unw_set_fr(&info, reg, fpreg)) + return -1; + } else { + *data = fpreg.u.bits[which_half]; + } + } else { /* fph */ + elf_fpreg_t *p = &child->thread.fph[reg - 32]; + unsigned long *bits = &p->u.bits[which_half]; + + ia64_sync_fph(child); + if (write_access) + *bits = *data; + else if (child->thread.flags & IA64_THREAD_FPH_VALID) + *data = *bits; + else + *data = 0; + } return 0; } @@ -2038,15 +1927,14 @@ access_uarea(struct task_struct *child, unsigned long addr, } if (pos != -1) { - if (write_access) - ret = gpregs_set(child, NULL, pos, - sizeof(unsigned long), data, NULL); - else - ret = gpregs_get(child, NULL, pos, - sizeof(unsigned long), data, NULL); - if (ret != 0) - return -1; - return 0; + struct unw_frame_info info; + + memset(&info, 0, sizeof(info)); + unw_init_from_blocked_task(&info, child); + if (unw_unwind_to_user(&info) < 0) + return 0; + + return access_elf_reg(child, &info, pos, data, write_access); } /* access debug registers */ @@ -2112,14 +2000,14 @@ static const struct user_regset native_regsets[] = { .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, .size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t), - .get = gpregs_get, .set = gpregs_set, + .regset_get = gpregs_get, .set = gpregs_set, .writeback = gpregs_writeback }, { .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t), - .get = fpregs_get, .set = fpregs_set, .active = fpregs_active + .regset_get = fpregs_get, .set = fpregs_set, .active = fpregs_active }, }; diff --git a/arch/m68k/coldfire/stmark2.c b/arch/m68k/coldfire/stmark2.c index a8d2b3d172f9..8b5af9c83244 100644 --- a/arch/m68k/coldfire/stmark2.c +++ b/arch/m68k/coldfire/stmark2.c @@ -13,6 +13,7 @@ #include <linux/spi/spi.h> #include <linux/spi/spi-fsl-dspi.h> #include <linux/spi/flash.h> +#include <linux/dma-mapping.h> #include <asm/mcfsim.h> /* @@ -78,6 +79,8 @@ static struct resource dspi_spi0_resource[] = { }, }; +static u64 stmark2_dspi_mask = DMA_BIT_MASK(32); + /* SPI controller, id = bus number */ static struct platform_device dspi_spi0_device = { .name = "fsl-dspi", @@ -86,6 +89,8 @@ static struct platform_device dspi_spi0_device = { .resource = dspi_spi0_resource, .dev = { .platform_data = &dspi_spi0_info, + .dma_mask = &stmark2_dspi_mask, + .coherent_dma_mask = DMA_BIT_MASK(32), }, }; diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig index 2b746f55f419..d92306472fce 100644 --- a/arch/m68k/configs/stmark2_defconfig +++ b/arch/m68k/configs/stmark2_defconfig @@ -1,38 +1,33 @@ CONFIG_LOCALVERSION="stmark2-001" CONFIG_DEFAULT_HOSTNAME="stmark2" CONFIG_SYSVIPC=y -# CONFIG_FHANDLE is not set CONFIG_LOG_BUF_SHIFT=14 CONFIG_NAMESPACES=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_RD_BZIP2 is not set -# CONFIG_RD_LZMA is not set -# CONFIG_RD_XZ is not set -# CONFIG_RD_LZO is not set -# CONFIG_RD_LZ4 is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y +# CONFIG_FHANDLE is not set # CONFIG_AIO is not set # CONFIG_ADVISE_SYSCALLS is not set # CONFIG_MEMBARRIER is not set CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set -# CONFIG_BLK_DEV_BSG is not set -CONFIG_BLK_CMDLINE_PARSER=y -# CONFIG_MMU is not set +CONFIG_COLDFIRE=y CONFIG_M5441x=y CONFIG_CLOCK_FREQ=240000000 CONFIG_STMARK2=y +CONFIG_UBOOT=y CONFIG_RAMBASE=0x40000000 CONFIG_RAMSIZE=0x8000000 CONFIG_VECTORBASE=0x40000000 CONFIG_KERNELBASE=0x40001000 +# CONFIG_BLK_DEV_BSG is not set +CONFIG_BLK_CMDLINE_PARSER=y CONFIG_BINFMT_FLAT=y +CONFIG_BINFMT_ZFLAT=y CONFIG_BINFMT_MISC=y -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y # CONFIG_ALLOW_DEV_COREDUMP is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y @@ -65,24 +60,36 @@ CONFIG_DEBUG_GPIO=y CONFIG_GPIO_SYSFS=y CONFIG_GPIO_GENERIC_PLATFORM=y # CONFIG_HWMON is not set -# CONFIG_RC_CORE is not set # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set +CONFIG_MMC=y +CONFIG_MMC_DEBUG=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_SDHCI_ESDHC_MCF=y +CONFIG_DMADEVICES=y +CONFIG_MCF_EDMA=y +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set +CONFIG_OVERLAY_FS=y CONFIG_FSCACHE=y # CONFIG_PROC_SYSCTL is not set +CONFIG_CRAMFS=y +CONFIG_SQUASHFS=y +CONFIG_ROMFS_FS=y +CONFIG_CRYPTO_ANSI_CPRNG=y +# CONFIG_CRYPTO_HW is not set CONFIG_PRINTK_TIME=y +# CONFIG_DEBUG_BUGVERBOSE is not set # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_SLUB_DEBUG_ON=y CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set -# CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_BOOTPARAM=y -CONFIG_BOOTPARAM_STRING="console=ttyS0,115200 root=/dev/ram0 rw rootfstype=ramfs rdinit=/bin/init devtmpfs.mount=1" -CONFIG_CRYPTO=y -# CONFIG_CRYPTO_ECHAINIV is not set -CONFIG_CRYPTO_ANSI_CPRNG=y -# CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=y diff --git a/arch/m68k/include/asm/adb_iop.h b/arch/m68k/include/asm/adb_iop.h index 195d7fb1268c..6aecd020e2fc 100644 --- a/arch/m68k/include/asm/adb_iop.h +++ b/arch/m68k/include/asm/adb_iop.h @@ -29,6 +29,7 @@ #define ADB_IOP_EXPLICIT 0x80 /* nonzero if explicit command */ #define ADB_IOP_AUTOPOLL 0x40 /* auto/SRQ polling enabled */ +#define ADB_IOP_SET_AUTOPOLL 0x20 /* set autopoll device list */ #define ADB_IOP_SRQ 0x04 /* SRQ detected */ #define ADB_IOP_TIMEOUT 0x02 /* nonzero if timeout */ diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h index 38e1d7acc44d..3a3bdcfcd375 100644 --- a/arch/m68k/include/asm/cmpxchg.h +++ b/arch/m68k/include/asm/cmpxchg.h @@ -129,14 +129,6 @@ static inline unsigned long __cmpxchg(volatile void *p, unsigned long old, #else -/* - * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make - * them available. - */ -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\ - (unsigned long)(n), sizeof(*(ptr)))) - #include <asm-generic/cmpxchg.h> #endif diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h index 0498192e1d98..2c96e8480173 100644 --- a/arch/m68k/include/asm/io_no.h +++ b/arch/m68k/include/asm/io_no.h @@ -14,15 +14,15 @@ * that behavior here first before we include asm-generic/io.h. */ #define __raw_readb(addr) \ - ({ unsigned char __v = (*(volatile unsigned char *) (addr)); __v; }) + ({ u8 __v = (*(__force volatile u8 *) (addr)); __v; }) #define __raw_readw(addr) \ - ({ unsigned short __v = (*(volatile unsigned short *) (addr)); __v; }) + ({ u16 __v = (*(__force volatile u16 *) (addr)); __v; }) #define __raw_readl(addr) \ - ({ unsigned int __v = (*(volatile unsigned int *) (addr)); __v; }) + ({ u32 __v = (*(__force volatile u32 *) (addr)); __v; }) -#define __raw_writeb(b, addr) (void)((*(volatile unsigned char *) (addr)) = (b)) -#define __raw_writew(b, addr) (void)((*(volatile unsigned short *) (addr)) = (b)) -#define __raw_writel(b, addr) (void)((*(volatile unsigned int *) (addr)) = (b)) +#define __raw_writeb(b, addr) (void)((*(__force volatile u8 *) (addr)) = (b)) +#define __raw_writew(b, addr) (void)((*(__force volatile u16 *) (addr)) = (b)) +#define __raw_writel(b, addr) (void)((*(__force volatile u32 *) (addr)) = (b)) #if defined(CONFIG_COLDFIRE) /* @@ -67,7 +67,7 @@ static inline u16 readw(const volatile void __iomem *addr) { if (cf_internalio(addr)) return __raw_readw(addr); - return __le16_to_cpu(__raw_readw(addr)); + return swab16(__raw_readw(addr)); } #define readl readl @@ -75,7 +75,7 @@ static inline u32 readl(const volatile void __iomem *addr) { if (cf_internalio(addr)) return __raw_readl(addr); - return __le32_to_cpu(__raw_readl(addr)); + return swab32(__raw_readl(addr)); } #define writew writew @@ -84,7 +84,7 @@ static inline void writew(u16 value, volatile void __iomem *addr) if (cf_internalio(addr)) __raw_writew(value, addr); else - __raw_writew(__cpu_to_le16(value), addr); + __raw_writew(swab16(value), addr); } #define writel writel @@ -93,7 +93,7 @@ static inline void writel(u32 value, volatile void __iomem *addr) if (cf_internalio(addr)) __raw_writel(value, addr); else - __raw_writel(__cpu_to_le32(value), addr); + __raw_writel(swab32(value), addr); } #else diff --git a/arch/m68k/include/asm/m53xxacr.h b/arch/m68k/include/asm/m53xxacr.h index 9138a624c5c8..692f90e7fecc 100644 --- a/arch/m68k/include/asm/m53xxacr.h +++ b/arch/m68k/include/asm/m53xxacr.h @@ -89,9 +89,9 @@ * coherency though in all cases. And for copyback caches we will need * to push cached data as well. */ -#define CACHE_INIT CACR_CINVA -#define CACHE_INVALIDATE CACR_CINVA -#define CACHE_INVALIDATED CACR_CINVA +#define CACHE_INIT (CACHE_MODE + CACR_CINVA - CACR_EC) +#define CACHE_INVALIDATE (CACHE_MODE + CACR_CINVA) +#define CACHE_INVALIDATED (CACHE_MODE + CACR_CINVA) #define ACR0_MODE ((CONFIG_RAMBASE & 0xff000000) + \ (0x000f0000) + \ diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 7068126cedc1..2b9cb4a62281 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -40,7 +40,6 @@ void __init paging_init(void) unsigned long address, size; unsigned long next_pgtable, bootmem_end; unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 }; - enum zone_type zone; int i; empty_zero_page = (void *) memblock_alloc(PAGE_SIZE, PAGE_SIZE); @@ -215,11 +214,6 @@ void __init cf_mmu_context_init(void) /* * Steal a context from a task that has one at the moment. - * This is only used on 8xx and 4xx and we presently assume that - * they don't do SMP. If they do then thicfpgalloc.hs will have to check - * whether the MM we steal is in use. - * We also assume that this is only used on systems that don't - * use an MMU hash table - this is true for 8xx and 4xx. * This isn't an LRU system, it just frees up each context in * turn (sort-of pseudo-random replacement :). This would be the * place to implement an LRU scheme if anyone was motivated to do it. diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 2a61641c680b..db7c5be1d4a3 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -210,15 +210,13 @@ int ptrace_set_watch_regs(struct task_struct *child, static int gpr32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct pt_regs *regs = task_pt_regs(target); u32 uregs[ELF_NGREG] = {}; mips_dump_regs32(uregs, regs); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, - sizeof(uregs)); + return membuf_write(&to, uregs, sizeof(uregs)); } static int gpr32_set(struct task_struct *target, @@ -277,15 +275,13 @@ static int gpr32_set(struct task_struct *target, static int gpr64_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct pt_regs *regs = task_pt_regs(target); u64 uregs[ELF_NGREG] = {}; mips_dump_regs64(uregs, regs); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, - sizeof(uregs)); + return membuf_write(&to, uregs, sizeof(uregs)); } static int gpr64_set(struct task_struct *target, @@ -408,13 +404,11 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data) * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots * correspond 1:1 to buffer slots. Only general registers are copied. */ -static int fpr_get_fpa(struct task_struct *target, - unsigned int *pos, unsigned int *count, - void **kbuf, void __user **ubuf) +static void fpr_get_fpa(struct task_struct *target, + struct membuf *to) { - return user_regset_copyout(pos, count, kbuf, ubuf, - &target->thread.fpu, - 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); + membuf_write(to, &target->thread.fpu, + NUM_FPU_REGS * sizeof(elf_fpreg_t)); } /* @@ -423,25 +417,13 @@ static int fpr_get_fpa(struct task_struct *target, * general register slots are copied to buffer slots. Only general * registers are copied. */ -static int fpr_get_msa(struct task_struct *target, - unsigned int *pos, unsigned int *count, - void **kbuf, void __user **ubuf) +static void fpr_get_msa(struct task_struct *target, struct membuf *to) { unsigned int i; - u64 fpr_val; - int err; - - BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); - for (i = 0; i < NUM_FPU_REGS; i++) { - fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); - err = user_regset_copyout(pos, count, kbuf, ubuf, - &fpr_val, i * sizeof(elf_fpreg_t), - (i + 1) * sizeof(elf_fpreg_t)); - if (err) - return err; - } - return 0; + BUILD_BUG_ON(sizeof(u64) != sizeof(elf_fpreg_t)); + for (i = 0; i < NUM_FPU_REGS; i++) + membuf_store(to, get_fpr64(&target->thread.fpu.fpr[i], 0)); } /* @@ -451,31 +433,16 @@ static int fpr_get_msa(struct task_struct *target, */ static int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); - const int fir_pos = fcr31_pos + sizeof(u32); - int err; - if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) - err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf); + fpr_get_fpa(target, &to); else - err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf); - if (err) - return err; - - err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.fcr31, - fcr31_pos, fcr31_pos + sizeof(u32)); - if (err) - return err; + fpr_get_msa(target, &to); - err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &boot_cpu_data.fpu_id, - fir_pos, fir_pos + sizeof(u32)); - - return err; + membuf_write(&to, &target->thread.fpu.fcr31, sizeof(u32)); + membuf_write(&to, &boot_cpu_data.fpu_id, sizeof(u32)); + return 0; } /* @@ -576,14 +543,9 @@ static int fpr_set(struct task_struct *target, /* Copy the FP mode setting to the supplied NT_MIPS_FP_MODE buffer. */ static int fp_mode_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - int fp_mode; - - fp_mode = mips_get_process_fp_mode(target); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fp_mode, 0, - sizeof(fp_mode)); + return membuf_store(&to, (int)mips_get_process_fp_mode(target)); } /* @@ -630,13 +592,12 @@ struct msa_control_regs { unsigned int msacsr; }; -static int copy_pad_fprs(struct task_struct *target, +static void copy_pad_fprs(struct task_struct *target, const struct user_regset *regset, - unsigned int *ppos, unsigned int *pcount, - void **pkbuf, void __user **pubuf, + struct membuf *to, unsigned int live_sz) { - int i, j, start, start_pad, err; + int i, j; unsigned long long fill = ~0ull; unsigned int cp_sz, pad_sz; @@ -644,28 +605,16 @@ static int copy_pad_fprs(struct task_struct *target, pad_sz = regset->size - cp_sz; WARN_ON(pad_sz % sizeof(fill)); - i = start = err = 0; - for (; i < NUM_FPU_REGS; i++, start += regset->size) { - err |= user_regset_copyout(ppos, pcount, pkbuf, pubuf, - &target->thread.fpu.fpr[i], - start, start + cp_sz); - - start_pad = start + cp_sz; - for (j = 0; j < (pad_sz / sizeof(fill)); j++) { - err |= user_regset_copyout(ppos, pcount, pkbuf, pubuf, - &fill, start_pad, - start_pad + sizeof(fill)); - start_pad += sizeof(fill); - } + for (i = 0; i < NUM_FPU_REGS; i++) { + membuf_write(to, &target->thread.fpu.fpr[i], cp_sz); + for (j = 0; j < (pad_sz / sizeof(fill)); j++) + membuf_store(to, fill); } - - return err; } static int msa_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { const unsigned int wr_size = NUM_FPU_REGS * regset->size; const struct msa_control_regs ctrl_regs = { @@ -674,32 +623,23 @@ static int msa_get(struct task_struct *target, .msair = boot_cpu_data.msa_id, .msacsr = target->thread.fpu.msacsr, }; - int err; if (!tsk_used_math(target)) { /* The task hasn't used FP or MSA, fill with 0xff */ - err = copy_pad_fprs(target, regset, &pos, &count, - &kbuf, &ubuf, 0); + copy_pad_fprs(target, regset, &to, 0); } else if (!test_tsk_thread_flag(target, TIF_MSA_CTX_LIVE)) { /* Copy scalar FP context, fill the rest with 0xff */ - err = copy_pad_fprs(target, regset, &pos, &count, - &kbuf, &ubuf, 8); + copy_pad_fprs(target, regset, &to, 8); } else if (sizeof(target->thread.fpu.fpr[0]) == regset->size) { /* Trivially copy the vector registers */ - err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.fpr, - 0, wr_size); + membuf_write(&to, &target->thread.fpu.fpr, wr_size); } else { /* Copy as much context as possible, fill the rest with 0xff */ - err = copy_pad_fprs(target, regset, &pos, &count, - &kbuf, &ubuf, - sizeof(target->thread.fpu.fpr[0])); + copy_pad_fprs(target, regset, &to, + sizeof(target->thread.fpu.fpr[0])); } - err |= user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &ctrl_regs, wr_size, - wr_size + sizeof(ctrl_regs)); - return err; + return membuf_write(&to, &ctrl_regs, sizeof(ctrl_regs)); } static int msa_set(struct task_struct *target, @@ -752,34 +692,20 @@ static int msa_set(struct task_struct *target, */ static int dsp32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - unsigned int start, num_regs, i; u32 dspregs[NUM_DSP_REGS + 1]; + unsigned int i; - BUG_ON(count % sizeof(u32)); + BUG_ON(to.left % sizeof(u32)); if (!cpu_has_dsp) return -EIO; - start = pos / sizeof(u32); - num_regs = count / sizeof(u32); - - if (start + num_regs > NUM_DSP_REGS + 1) - return -EIO; - - for (i = start; i < num_regs; i++) - switch (i) { - case 0 ... NUM_DSP_REGS - 1: - dspregs[i] = target->thread.dsp.dspr[i]; - break; - case NUM_DSP_REGS: - dspregs[i] = target->thread.dsp.dspcontrol; - break; - } - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, dspregs, 0, - sizeof(dspregs)); + for (i = 0; i < NUM_DSP_REGS; i++) + dspregs[i] = target->thread.dsp.dspr[i]; + dspregs[NUM_DSP_REGS] = target->thread.dsp.dspcontrol; + return membuf_write(&to, dspregs, sizeof(dspregs)); } /* @@ -832,34 +758,20 @@ static int dsp32_set(struct task_struct *target, */ static int dsp64_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - unsigned int start, num_regs, i; u64 dspregs[NUM_DSP_REGS + 1]; + unsigned int i; - BUG_ON(count % sizeof(u64)); + BUG_ON(to.left % sizeof(u64)); if (!cpu_has_dsp) return -EIO; - start = pos / sizeof(u64); - num_regs = count / sizeof(u64); - - if (start + num_regs > NUM_DSP_REGS + 1) - return -EIO; - - for (i = start; i < num_regs; i++) - switch (i) { - case 0 ... NUM_DSP_REGS - 1: - dspregs[i] = target->thread.dsp.dspr[i]; - break; - case NUM_DSP_REGS: - dspregs[i] = target->thread.dsp.dspcontrol; - break; - } - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, dspregs, 0, - sizeof(dspregs)); + for (i = 0; i < NUM_DSP_REGS; i++) + dspregs[i] = target->thread.dsp.dspr[i]; + dspregs[NUM_DSP_REGS] = target->thread.dsp.dspcontrol; + return membuf_write(&to, dspregs, sizeof(dspregs)); } /* @@ -1018,7 +930,7 @@ static const struct user_regset mips_regsets[] = { .n = ELF_NGREG, .size = sizeof(unsigned int), .align = sizeof(unsigned int), - .get = gpr32_get, + .regset_get = gpr32_get, .set = gpr32_set, }, [REGSET_DSP] = { @@ -1026,7 +938,7 @@ static const struct user_regset mips_regsets[] = { .n = NUM_DSP_REGS + 1, .size = sizeof(u32), .align = sizeof(u32), - .get = dsp32_get, + .regset_get = dsp32_get, .set = dsp32_set, .active = dsp_active, }, @@ -1036,7 +948,7 @@ static const struct user_regset mips_regsets[] = { .n = ELF_NFPREG, .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t), - .get = fpr_get, + .regset_get = fpr_get, .set = fpr_set, }, [REGSET_FP_MODE] = { @@ -1044,7 +956,7 @@ static const struct user_regset mips_regsets[] = { .n = 1, .size = sizeof(int), .align = sizeof(int), - .get = fp_mode_get, + .regset_get = fp_mode_get, .set = fp_mode_set, }, #endif @@ -1054,7 +966,7 @@ static const struct user_regset mips_regsets[] = { .n = NUM_FPU_REGS + 1, .size = 16, .align = 16, - .get = msa_get, + .regset_get = msa_get, .set = msa_set, }, #endif @@ -1078,7 +990,7 @@ static const struct user_regset mips64_regsets[] = { .n = ELF_NGREG, .size = sizeof(unsigned long), .align = sizeof(unsigned long), - .get = gpr64_get, + .regset_get = gpr64_get, .set = gpr64_set, }, [REGSET_DSP] = { @@ -1086,7 +998,7 @@ static const struct user_regset mips64_regsets[] = { .n = NUM_DSP_REGS + 1, .size = sizeof(u64), .align = sizeof(u64), - .get = dsp64_get, + .regset_get = dsp64_get, .set = dsp64_set, .active = dsp_active, }, @@ -1096,7 +1008,7 @@ static const struct user_regset mips64_regsets[] = { .n = 1, .size = sizeof(int), .align = sizeof(int), - .get = fp_mode_get, + .regset_get = fp_mode_get, .set = fp_mode_set, }, [REGSET_FPR] = { @@ -1104,7 +1016,7 @@ static const struct user_regset mips64_regsets[] = { .n = ELF_NFPREG, .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t), - .get = fpr_get, + .regset_get = fpr_get, .set = fpr_set, }, #endif @@ -1114,7 +1026,7 @@ static const struct user_regset mips64_regsets[] = { .n = NUM_FPU_REGS + 1, .size = 16, .align = 16, - .get = msa_get, + .regset_get = msa_get, .set = msa_set, }, #endif diff --git a/arch/nds32/kernel/ptrace.c b/arch/nds32/kernel/ptrace.c index eaaf7a999b20..d0eda870fbc2 100644 --- a/arch/nds32/kernel/ptrace.c +++ b/arch/nds32/kernel/ptrace.c @@ -13,11 +13,10 @@ enum nds32_regset { static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user * ubuf) + struct membuf to) { - struct user_pt_regs *uregs = &task_pt_regs(target)->user_regs; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); + return membuf_write(&to, &task_pt_regs(target)->user_regs, + sizeof(struct user_pt_regs)); } static int gpr_set(struct task_struct *target, const struct user_regset *regset, @@ -41,7 +40,7 @@ static const struct user_regset nds32_regsets[] = { .n = sizeof(struct user_pt_regs) / sizeof(u32), .size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t), - .get = gpr_get, + .regset_get = gpr_get, .set = gpr_set} }; diff --git a/arch/nios2/kernel/ptrace.c b/arch/nios2/kernel/ptrace.c index de97bcb7dd44..a6ea9e1b4f61 100644 --- a/arch/nios2/kernel/ptrace.c +++ b/arch/nios2/kernel/ptrace.c @@ -21,45 +21,24 @@ static int genregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { const struct pt_regs *regs = task_pt_regs(target); const struct switch_stack *sw = (struct switch_stack *)regs - 1; - int ret = 0; - -#define REG_O_ZERO_RANGE(START, END) \ - if (!ret) \ - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, \ - START * 4, (END * 4) + 4); - -#define REG_O_ONE(PTR, LOC) \ - if (!ret) \ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, PTR, \ - LOC * 4, (LOC * 4) + 4); -#define REG_O_RANGE(PTR, START, END) \ - if (!ret) \ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, PTR, \ - START * 4, (END * 4) + 4); - - REG_O_ZERO_RANGE(PTR_R0, PTR_R0); - REG_O_RANGE(®s->r1, PTR_R1, PTR_R7); - REG_O_RANGE(®s->r8, PTR_R8, PTR_R15); - REG_O_RANGE(sw, PTR_R16, PTR_R23); - REG_O_ZERO_RANGE(PTR_R24, PTR_R25); /* et and bt */ - REG_O_ONE(®s->gp, PTR_GP); - REG_O_ONE(®s->sp, PTR_SP); - REG_O_ONE(®s->fp, PTR_FP); - REG_O_ONE(®s->ea, PTR_EA); - REG_O_ZERO_RANGE(PTR_BA, PTR_BA); - REG_O_ONE(®s->ra, PTR_RA); - REG_O_ONE(®s->ea, PTR_PC); /* use ea for PC */ - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - PTR_STATUS * 4, -1); - - return ret; + membuf_zero(&to, 4); // R0 + membuf_write(&to, ®s->r1, 7 * 4); // R1..R7 + membuf_write(&to, ®s->r8, 8 * 4); // R8..R15 + membuf_write(&to, sw, 8 * 4); // R16..R23 + membuf_zero(&to, 2 * 4); /* et and bt */ + membuf_store(&to, regs->gp); + membuf_store(&to, regs->sp); + membuf_store(&to, regs->fp); + membuf_store(&to, regs->ea); + membuf_zero(&to, 4); // PTR_BA + membuf_store(&to, regs->ra); + membuf_store(&to, regs->ea); /* use ea for PC */ + return membuf_zero(&to, (NUM_PTRACE_REG - PTR_PC) * 4); } /* @@ -121,7 +100,7 @@ static const struct user_regset nios2_regsets[] = { .n = NUM_PTRACE_REG, .size = sizeof(unsigned long), .align = sizeof(unsigned long), - .get = genregs_get, + .regset_get = genregs_get, .set = genregs_set, } }; diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c index c8f47a623754..4d60ae2a12fa 100644 --- a/arch/openrisc/kernel/ptrace.c +++ b/arch/openrisc/kernel/ptrace.c @@ -44,29 +44,15 @@ */ static int genregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user * ubuf) + struct membuf to) { const struct pt_regs *regs = task_pt_regs(target); - int ret; /* r0 */ - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, 0, 4); - - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - regs->gpr+1, 4, 4*32); - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - ®s->pc, 4*32, 4*33); - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - ®s->sr, 4*33, 4*34); - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - 4*34, -1); - - return ret; + membuf_zero(&to, 4); + membuf_write(&to, regs->gpr + 1, 31 * 4); + membuf_store(&to, regs->pc); + return membuf_store(&to, regs->sr); } /* @@ -114,7 +100,7 @@ static const struct user_regset or1k_regsets[] = { .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), - .get = genregs_get, + .regset_get = genregs_get, .set = genregs_set, }, }; diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index b51418ad8655..2127974982df 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -391,31 +391,11 @@ void do_syscall_trace_exit(struct pt_regs *regs) static int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct pt_regs *regs = task_regs(target); - __u64 *k = kbuf; - __u64 __user *u = ubuf; - __u64 reg; - - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) - for (; count > 0 && pos < ELF_NFPREG; --count) - *k++ = regs->fr[pos++]; - else - for (; count > 0 && pos < ELF_NFPREG; --count) - if (__put_user(regs->fr[pos++], u++)) - return -EFAULT; - kbuf = k; - ubuf = u; - pos *= sizeof(reg); - count *= sizeof(reg); - return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - ELF_NFPREG * sizeof(reg), -1); + return membuf_write(&to, regs->fr, ELF_NFPREG * sizeof(__u64)); } static int fpr_set(struct task_struct *target, @@ -527,30 +507,14 @@ static void set_reg(struct pt_regs *regs, int num, unsigned long val) static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct pt_regs *regs = task_regs(target); - unsigned long *k = kbuf; - unsigned long __user *u = ubuf; - unsigned long reg; + unsigned int pos; - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) - for (; count > 0 && pos < ELF_NGREG; --count) - *k++ = get_reg(regs, pos++); - else - for (; count > 0 && pos < ELF_NGREG; --count) - if (__put_user(get_reg(regs, pos++), u++)) - return -EFAULT; - kbuf = k; - ubuf = u; - pos *= sizeof(reg); - count *= sizeof(reg); - return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - ELF_NGREG * sizeof(reg), -1); + for (pos = 0; pos < ELF_NGREG; pos++) + membuf_store(&to, get_reg(regs, pos)); + return 0; } static int gpr_set(struct task_struct *target, @@ -588,12 +552,12 @@ static const struct user_regset native_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), - .get = gpr_get, .set = gpr_set + .regset_get = gpr_get, .set = gpr_set }, [REGSET_FP] = { .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, .size = sizeof(__u64), .align = sizeof(__u64), - .get = fpr_get, .set = fpr_set + .regset_get = fpr_get, .set = fpr_set } }; @@ -607,31 +571,15 @@ static const struct user_regset_view user_parisc_native_view = { static int gpr32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct pt_regs *regs = task_regs(target); - compat_ulong_t *k = kbuf; - compat_ulong_t __user *u = ubuf; - compat_ulong_t reg; + unsigned int pos; - pos /= sizeof(reg); - count /= sizeof(reg); + for (pos = 0; pos < ELF_NGREG; pos++) + membuf_store(&to, (compat_ulong_t)get_reg(regs, pos)); - if (kbuf) - for (; count > 0 && pos < ELF_NGREG; --count) - *k++ = get_reg(regs, pos++); - else - for (; count > 0 && pos < ELF_NGREG; --count) - if (__put_user((compat_ulong_t) get_reg(regs, pos++), u++)) - return -EFAULT; - - kbuf = k; - ubuf = u; - pos *= sizeof(reg); - count *= sizeof(reg); - return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - ELF_NGREG * sizeof(reg), -1); + return 0; } static int gpr32_set(struct task_struct *target, @@ -672,12 +620,12 @@ static const struct user_regset compat_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = gpr32_get, .set = gpr32_set + .regset_get = gpr32_get, .set = gpr32_set }, [REGSET_FP] = { .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, .size = sizeof(__u64), .align = sizeof(__u64), - .get = fpr_get, .set = fpr_set + .regset_get = fpr_get, .set = fpr_set } }; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 29a057908173..1f48bbfb3ce9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -131,6 +131,7 @@ config PPC select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS + select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST @@ -145,6 +146,8 @@ config PPC select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS + select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF @@ -491,6 +494,19 @@ config HOTPLUG_CPU Say N if you are unsure. +config PPC_QUEUED_SPINLOCKS + bool "Queued spinlocks" + depends on SMP + help + Say Y here to use queued spinlocks which give better scalability and + fairness on large SMP and NUMA systems without harming single threaded + performance. + + This option is currently experimental, the code is more complex and + less tested so it defaults to "N" for the moment. + + If unsure, say "N". + config ARCH_CPU_PROBE_RELEASE def_bool y depends on HOTPLUG_CPU @@ -834,13 +850,16 @@ config FORCE_MAX_ZONEORDER this in mind when choosing a value for this option. config PPC_SUBPAGE_PROT - bool "Support setting protections for 4k subpages" + bool "Support setting protections for 4k subpages (subpage_prot syscall)" + default n depends on PPC_BOOK3S_64 && PPC_64K_PAGES help - This option adds support for a system call to allow user programs + This option adds support for system call to allow user programs to set access permissions (read/write, readonly, or no access) on the 4k subpages of each 64k page. + If unsure, say N here. + config PPC_COPRO_BASE bool @@ -860,12 +879,8 @@ config PPC_DENORMALISATION Add support for handling denormalisation of single precision values. Useful for bare metal only. If unsure say Y here. -config CMDLINE_BOOL - bool "Default bootloader kernel arguments" - config CMDLINE - string "Initial kernel command string" if CMDLINE_BOOL - default "console=ttyS0,9600 console=tty0 root=/dev/sda2" if CMDLINE_BOOL + string "Initial kernel command string" default "" help On some platforms, there is currently no way for the boot loader to @@ -1199,6 +1214,7 @@ config TASK_SIZE_BOOL config TASK_SIZE hex "Size of user task space" if TASK_SIZE_BOOL default "0x80000000" if PPC_8xx + default "0xb0000000" if PPC_BOOK3S_32 && STRICT_KERNEL_RWX default "0xc0000000" endmenu diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index f310c32e88a4..3e8da9cf2eb9 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -246,7 +246,8 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # often slow when they are implemented at all KBUILD_CFLAGS += $(call cc-option,-mno-string) -cpu-as-$(CONFIG_4xx) += -Wa,-m405 +cpu-as-$(CONFIG_40x) += -Wa,-m405 +cpu-as-$(CONFIG_44x) += -Wa,-m440 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) cpu-as-$(CONFIG_E200) += -Wa,-me200 cpu-as-$(CONFIG_E500) += -Wa,-me500 diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 63d7456b9518..44af71543380 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -70,10 +70,10 @@ BOOTCFLAGS += -I$(objtree)/$(obj) -I$(srctree)/$(obj) DTC_FLAGS ?= -p 1024 $(obj)/4xx.o: BOOTCFLAGS += -mcpu=405 -$(obj)/ebony.o: BOOTCFLAGS += -mcpu=405 +$(obj)/ebony.o: BOOTCFLAGS += -mcpu=440 $(obj)/cuboot-hotfoot.o: BOOTCFLAGS += -mcpu=405 -$(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405 -$(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405 +$(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=440 +$(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=440 $(obj)/cuboot-acadia.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-iss4xx.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-currituck.o: BOOTCFLAGS += -mcpu=405 @@ -117,7 +117,7 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ elf_util.c $(zlib-y) devtree.c stdlib.c \ oflib.c ofconsole.c cuboot.c -src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c +src-wlib-$(CONFIG_PPC_MPC52xx) += mpc52xx-psc.c src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c ifndef CONFIG_PPC64_BOOT_WRAPPER src-wlib-y += crtsavres.S diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts index cd9d66041a3f..df18f8dc4642 100644 --- a/arch/powerpc/boot/dts/akebono.dts +++ b/arch/powerpc/boot/dts/akebono.dts @@ -248,7 +248,7 @@ }; }; - PCIE0: pciex@10100000000 { + PCIE0: pcie@10100000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -288,7 +288,7 @@ 0x0 0x0 0x0 0x4 &MPIC 48 0x2 /* int D */>; }; - PCIE1: pciex@20100000000 { + PCIE1: pcie@20100000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -328,7 +328,7 @@ 0x0 0x0 0x0 0x4 &MPIC 56 0x2 /* int D */>; }; - PCIE2: pciex@18100000000 { + PCIE2: pcie@18100000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -368,7 +368,7 @@ 0x0 0x0 0x0 0x4 &MPIC 64 0x2 /* int D */>; }; - PCIE3: pciex@28100000000 { + PCIE3: pcie@28100000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/bluestone.dts b/arch/powerpc/boot/dts/bluestone.dts index cc965a1816b6..aa1ae94cd776 100644 --- a/arch/powerpc/boot/dts/bluestone.dts +++ b/arch/powerpc/boot/dts/bluestone.dts @@ -325,7 +325,7 @@ }; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts index 0d6ac92d0f5e..c5fbb08e0a6e 100644 --- a/arch/powerpc/boot/dts/canyonlands.dts +++ b/arch/powerpc/boot/dts/canyonlands.dts @@ -461,7 +461,7 @@ interrupt-map = < 0x0 0x0 0x0 0x0 &UIC1 0x0 0x8 >; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -503,7 +503,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts index b6d87b9c2cef..aea8af810106 100644 --- a/arch/powerpc/boot/dts/currituck.dts +++ b/arch/powerpc/boot/dts/currituck.dts @@ -122,7 +122,7 @@ }; }; - PCIE0: pciex@10100000000 { // 4xGBIF1 + PCIE0: pcie@10100000000 { // 4xGBIF1 device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -160,7 +160,7 @@ 0x0 0x0 0x0 0x4 &MPIC 49 0x2 /* int D */>; }; - PCIE1: pciex@30100000000 { // 4xGBIF0 + PCIE1: pcie@30100000000 { // 4xGBIF0 device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -197,7 +197,7 @@ 0x0 0x0 0x0 0x4 &MPIC 41 0x2 /* int D */>; }; - PCIE2: pciex@38100000000 { // 2xGBIF0 + PCIE2: pcie@38100000000 { // 2xGBIF0 device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/p4080ds.dts b/arch/powerpc/boot/dts/fsl/p4080ds.dts index 65e20152e22f..969b32c4f2d5 100644 --- a/arch/powerpc/boot/dts/fsl/p4080ds.dts +++ b/arch/powerpc/boot/dts/fsl/p4080ds.dts @@ -125,11 +125,11 @@ i2c@118100 { eeprom@51 { - compatible = "atmel,24c256"; + compatible = "atmel,spd"; reg = <0x51>; }; eeprom@52 { - compatible = "atmel,24c256"; + compatible = "atmel,spd"; reg = <0x52>; }; rtc@68 { @@ -143,6 +143,45 @@ }; }; + i2c@118000 { + zl2006@21 { + compatible = "zl2006"; + reg = <0x21>; + }; + zl2006@22 { + compatible = "zl2006"; + reg = <0x22>; + }; + zl2006@23 { + compatible = "zl2006"; + reg = <0x23>; + }; + zl2006@24 { + compatible = "zl2006"; + reg = <0x24>; + }; + eeprom@50 { + compatible = "atmel,24c64"; + reg = <0x50>; + }; + eeprom@55 { + compatible = "atmel,24c64"; + reg = <0x55>; + }; + eeprom@56 { + compatible = "atmel,24c64"; + reg = <0x56>; + }; + eeprom@57 { + compatible = "atmel,24c02"; + reg = <0x57>; + }; + }; + + i2c@119100 { + /* 0x6E: ICS9FG108 */ + }; + usb0: usb@210000 { phy_type = "ulpi"; }; diff --git a/arch/powerpc/boot/dts/glacier.dts b/arch/powerpc/boot/dts/glacier.dts index a7a802f4ffdd..e84ff1afb58c 100644 --- a/arch/powerpc/boot/dts/glacier.dts +++ b/arch/powerpc/boot/dts/glacier.dts @@ -489,7 +489,7 @@ interrupt-map = < 0x0 0x0 0x0 0x0 &UIC1 0x0 0x8 >; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -531,7 +531,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/haleakala.dts b/arch/powerpc/boot/dts/haleakala.dts index cb16dad43c92..f81ce8786d59 100644 --- a/arch/powerpc/boot/dts/haleakala.dts +++ b/arch/powerpc/boot/dts/haleakala.dts @@ -237,7 +237,7 @@ }; }; - PCIE0: pciex@a0000000 { + PCIE0: pcie@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/icon.dts b/arch/powerpc/boot/dts/icon.dts index 2e6e3a7b2604..fbaa60b8f87a 100644 --- a/arch/powerpc/boot/dts/icon.dts +++ b/arch/powerpc/boot/dts/icon.dts @@ -315,7 +315,7 @@ interrupt-map = <0x0 0x0 0x0 0x0 &UIC1 19 0x8>; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -356,7 +356,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts index 02629e119b87..a8f353229fb7 100644 --- a/arch/powerpc/boot/dts/katmai.dts +++ b/arch/powerpc/boot/dts/katmai.dts @@ -319,7 +319,7 @@ >; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -360,7 +360,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -401,7 +401,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0x7 0x4 /* swizzled int D */>; }; - PCIE2: pciex@d40000000 { + PCIE2: pcie@d40000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts index 2a3413221cc1..a709fb47a180 100644 --- a/arch/powerpc/boot/dts/kilauea.dts +++ b/arch/powerpc/boot/dts/kilauea.dts @@ -322,7 +322,7 @@ }; }; - PCIE0: pciex@a0000000 { + PCIE0: pcie@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -363,7 +363,7 @@ 0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@c0000000 { + PCIE1: pcie@c0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/makalu.dts b/arch/powerpc/boot/dts/makalu.dts index bf8fe1629392..c473cd911bca 100644 --- a/arch/powerpc/boot/dts/makalu.dts +++ b/arch/powerpc/boot/dts/makalu.dts @@ -268,7 +268,7 @@ }; }; - PCIE0: pciex@a0000000 { + PCIE0: pcie@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -309,7 +309,7 @@ 0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@c0000000 { + PCIE1: pcie@c0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts index f3e046fb49e2..f38035a1f4a1 100644 --- a/arch/powerpc/boot/dts/redwood.dts +++ b/arch/powerpc/boot/dts/redwood.dts @@ -235,7 +235,7 @@ has-new-stacr-staopc; }; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -276,7 +276,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -317,7 +317,7 @@ 0x0 0x0 0x0 0x4 &UIC3 0x7 0x4 /* swizzled int D */>; }; - PCIE2: pciex@d40000000 { + PCIE2: pcie@d40000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index a9d209135975..cae31a6e8f02 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -104,7 +104,7 @@ static struct addr_range prep_initrd(struct addr_range vmlinux, void *chosen, { /* If we have an image attached to us, it overrides anything * supplied by the loader. */ - if (_initrd_end > _initrd_start) { + if (&_initrd_end > &_initrd_start) { printf("Attached initrd image at 0x%p-0x%p\n\r", _initrd_start, _initrd_end); initrd_addr = (unsigned long)_initrd_start; @@ -152,7 +152,7 @@ static void prep_esm_blob(struct addr_range vmlinux, void *chosen) unsigned long esm_blob_addr, esm_blob_size; /* Do we have an ESM (Enter Secure Mode) blob? */ - if (_esm_blob_end <= _esm_blob_start) + if (&_esm_blob_end <= &_esm_blob_start) return; printf("Attached ESM blob at 0x%p-0x%p\n\r", diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index c52552a681c5..6e4efbdb6b7c 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -127,7 +127,7 @@ void platform_init(void) ps3_repository_read_rm_size(&rm_size); dt_fixup_memory(0, rm_size); - if (_initrd_end > _initrd_start) { + if (&_initrd_end > &_initrd_start) { setprop_val(chosen, "linux,initrd-start", (u32)(_initrd_start)); setprop_val(chosen, "linux,initrd-end", (u32)(_initrd_end)); } diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index 0bfa7e87e546..9a19e5905485 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -128,7 +128,7 @@ int serial_console_init(void) dt_is_compatible(devp, "fsl,cpm2-smc-uart")) rc = cpm_console_init(devp, &serial_cd); #endif -#ifdef CONFIG_PPC_MPC52XX +#ifdef CONFIG_PPC_MPC52xx else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart")) rc = mpc5200_psc_console_init(devp, &serial_cd); #endif diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index 7705a5c3f4ea..3894ba8f8ffc 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -19,8 +19,6 @@ CONFIG_HIGHMEM=y CONFIG_HZ_100=y CONFIG_IRQ_ALL_CPUS=y # CONFIG_COMPACTION is not set -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" # CONFIG_SUSPEND is not set CONFIG_NET=y CONFIG_PACKET=y @@ -58,7 +56,6 @@ CONFIG_BLK_DEV_SD=y # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set -# CONFIG_NET_VENDOR_EXAR is not set CONFIG_IBM_EMAC=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MELLANOX is not set diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig index 82c6f49b8dcb..41d04e70d4fb 100644 --- a/arch/powerpc/configs/44x/arches_defconfig +++ b/arch/powerpc/configs/44x/arches_defconfig @@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_ARCHES=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig index 679213214a75..acbce718eaa8 100644 --- a/arch/powerpc/configs/44x/bamboo_defconfig +++ b/arch/powerpc/configs/44x/bamboo_defconfig @@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_BAMBOO=y # CONFIG_EBONY is not set -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig index 8006a5728afd..37088f250c9e 100644 --- a/arch/powerpc/configs/44x/bluestone_defconfig +++ b/arch/powerpc/configs/44x/bluestone_defconfig @@ -11,8 +11,6 @@ CONFIG_EXPERT=y # CONFIG_COMPAT_BRK is not set CONFIG_BLUESTONE=y # CONFIG_EBONY is not set -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig index ccc14eb7a2f1..61776ade572b 100644 --- a/arch/powerpc/configs/44x/canyonlands_defconfig +++ b/arch/powerpc/configs/44x/canyonlands_defconfig @@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_CANYONLANDS=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig index be76e066df01..34c86b3abecb 100644 --- a/arch/powerpc/configs/44x/currituck_defconfig +++ b/arch/powerpc/configs/44x/currituck_defconfig @@ -17,8 +17,6 @@ CONFIG_HIGHMEM=y CONFIG_HZ_100=y CONFIG_MATH_EMULATION=y CONFIG_IRQ_ALL_CPUS=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" # CONFIG_SUSPEND is not set CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig index 1abaa63e067f..509300f400e2 100644 --- a/arch/powerpc/configs/44x/eiger_defconfig +++ b/arch/powerpc/configs/44x/eiger_defconfig @@ -10,8 +10,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_EBONY is not set CONFIG_EIGER=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig index e67fc041ca3e..30845ce0885a 100644 --- a/arch/powerpc/configs/44x/fsp2_defconfig +++ b/arch/powerpc/configs/44x/fsp2_defconfig @@ -28,7 +28,6 @@ CONFIG_476FPE_ERR46=y CONFIG_SWIOTLB=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="ip=on rw" # CONFIG_SUSPEND is not set # CONFIG_PCI is not set diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig index 7d7ff84c8200..930948a1da76 100644 --- a/arch/powerpc/configs/44x/icon_defconfig +++ b/arch/powerpc/configs/44x/icon_defconfig @@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_EBONY is not set CONFIG_ICON=y CONFIG_HIGHMEM=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig index fb5c73a29bf4..2c3834eebca3 100644 --- a/arch/powerpc/configs/44x/iss476-smp_defconfig +++ b/arch/powerpc/configs/44x/iss476-smp_defconfig @@ -17,7 +17,6 @@ CONFIG_ISS4xx=y CONFIG_HZ_100=y CONFIG_MATH_EMULATION=y CONFIG_IRQ_ALL_CPUS=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="root=/dev/issblk0" # CONFIG_PCI is not set CONFIG_ADVANCED_OPTIONS=y diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig index c6dc1445fc04..1a0f1c3e0ee9 100644 --- a/arch/powerpc/configs/44x/katmai_defconfig +++ b/arch/powerpc/configs/44x/katmai_defconfig @@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_KATMAI=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig index c83ad03182df..6dd67de06a0b 100644 --- a/arch/powerpc/configs/44x/rainier_defconfig +++ b/arch/powerpc/configs/44x/rainier_defconfig @@ -10,8 +10,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_EBONY is not set CONFIG_RAINIER=y CONFIG_MATH_EMULATION=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig index 640fe1d5af28..e28d76416537 100644 --- a/arch/powerpc/configs/44x/redwood_defconfig +++ b/arch/powerpc/configs/44x/redwood_defconfig @@ -10,8 +10,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_EBONY is not set CONFIG_REDWOOD=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig index 22dc0dadf576..ef09786d49b9 100644 --- a/arch/powerpc/configs/44x/sam440ep_defconfig +++ b/arch/powerpc/configs/44x/sam440ep_defconfig @@ -12,8 +12,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y # CONFIG_EBONY is not set CONFIG_SAM440EP=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig index 2c0973db8837..b4984eab43eb 100644 --- a/arch/powerpc/configs/44x/sequoia_defconfig +++ b/arch/powerpc/configs/44x/sequoia_defconfig @@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_SEQUOIA=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig index a2d355ca62b2..3ea5932ab852 100644 --- a/arch/powerpc/configs/44x/taishan_defconfig +++ b/arch/powerpc/configs/44x/taishan_defconfig @@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_TAISHAN=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index af66c69c49fe..47252c2d7669 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -14,7 +14,6 @@ CONFIG_MODULE_UNLOAD=y CONFIG_WARP=y CONFIG_PPC4xx_GPIO=y CONFIG_HZ_1000=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="ip=on" # CONFIG_PCI is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/85xx-hw.config b/arch/powerpc/configs/85xx-hw.config index b507df6ac69f..524db76f47b7 100644 --- a/arch/powerpc/configs/85xx-hw.config +++ b/arch/powerpc/configs/85xx-hw.config @@ -67,7 +67,6 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_M25P80=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y CONFIG_MTD_RAW_NAND=y diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index d50aca608736..3a6381aa9fdc 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -51,9 +51,6 @@ CONFIG_NET_IPIP=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_MTD=y CONFIG_MTD_REDBOOT_PARTS=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/86xx-hw.config b/arch/powerpc/configs/86xx-hw.config index 151164cf8cb3..0cb24b33c88e 100644 --- a/arch/powerpc/configs/86xx-hw.config +++ b/arch/powerpc/configs/86xx-hw.config @@ -32,8 +32,6 @@ CONFIG_HW_RANDOM=y CONFIG_HZ_1000=y CONFIG_I2C_MPC=y CONFIG_I2C=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSEDEV is not set diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index 3c7dad19a691..df37efed0aec 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -56,7 +56,6 @@ CONFIG_IKCONFIG=y CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_INET=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MROUTE=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index a68c7f3af10e..1c674c4c1d86 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -51,7 +51,6 @@ CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_IRC=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig index 067f433c8f5e..271daff47d1d 100644 --- a/arch/powerpc/configs/holly_defconfig +++ b/arch/powerpc/configs/holly_defconfig @@ -13,7 +13,6 @@ CONFIG_EMBEDDED6xx=y CONFIG_PPC_HOLLY=y CONFIG_GEN_RTC=y CONFIG_BINFMT_MISC=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0,115200" # CONFIG_SECCOMP is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig index ea59f3d146df..d4be64f190ff 100644 --- a/arch/powerpc/configs/linkstation_defconfig +++ b/arch/powerpc/configs/linkstation_defconfig @@ -37,7 +37,6 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NETFILTER_XT_MATCH_MAC=m CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index e39346b3dc3b..e75d3f3060c9 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -47,7 +47,6 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=1 CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_BLK_DEV_RAM_DAX=y CONFIG_EEPROM_AT24=y CONFIG_EEPROM_AT25=y CONFIG_SCSI=y diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig index be125729635c..95d43f8a3869 100644 --- a/arch/powerpc/configs/mpc83xx_defconfig +++ b/arch/powerpc/configs/mpc83xx_defconfig @@ -19,7 +19,6 @@ CONFIG_MPC836x_MDS=y CONFIG_MPC836x_RDK=y CONFIG_MPC837x_MDS=y CONFIG_MPC837x_RDB=y -CONFIG_SBC834x=y CONFIG_ASP834x=y CONFIG_QE_GPIO=y CONFIG_MATH_EMULATION=y diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig index 0a0d046fc445..1fed6be95d53 100644 --- a/arch/powerpc/configs/mvme5100_defconfig +++ b/arch/powerpc/configs/mvme5100_defconfig @@ -20,10 +20,9 @@ CONFIG_EMBEDDED6xx=y CONFIG_MVME5100=y CONFIG_KVM_GUEST=y CONFIG_HZ_100=y +CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs" # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_COMPACTION is not set -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -46,7 +45,6 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NETFILTER_XT_MATCH_MAC=m CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 08b7f4cef243..af9af03059e4 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -58,7 +58,6 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_EEPROM_LEGACY=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y -CONFIG_CHR_DEV_OSST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_SCH=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 05e325ca3fbd..665a8d7cded0 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -75,7 +75,6 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_MATCH_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_TIME=m CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -90,13 +89,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_IP_DCCP=m -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -CONFIG_IRTTY_SIR=m CONFIG_BT=m CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 2de9aadf0f50..cf30fc24413b 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -64,7 +64,6 @@ CONFIG_HWPOISON_INJECT=m CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_PPC_64K_PAGES=y -CONFIG_PPC_SUBPAGE_PROT=y CONFIG_SCHED_SMT=y CONFIG_PM=y CONFIG_HOTPLUG_PCI=y @@ -246,7 +245,6 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_MAD=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_CXGB3=m CONFIG_INFINIBAND_CXGB4=m CONFIG_MLX4_INFINIBAND=m CONFIG_INFINIBAND_IPOIB=m diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig index 25f6c91e843a..7e48693775f4 100644 --- a/arch/powerpc/configs/ppc40x_defconfig +++ b/arch/powerpc/configs/ppc40x_defconfig @@ -20,9 +20,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 8d7e3e98856d..48759656a067 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -268,7 +268,6 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_MAD=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_CXGB3=m CONFIG_INFINIBAND_CXGB4=m CONFIG_MLX4_INFINIBAND=m CONFIG_INFINIBAND_IPOIB=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index feb5d47d8d1e..5e6f92ba3210 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -53,7 +53,6 @@ CONFIG_MPC836x_MDS=y CONFIG_MPC836x_RDK=y CONFIG_MPC837x_MDS=y CONFIG_MPC837x_RDB=y -CONFIG_SBC834x=y CONFIG_ASP834x=y CONFIG_PPC_86xx=y CONFIG_MPC8641_HPCN=y @@ -187,7 +186,6 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_MATCH_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_TIME=m CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -203,7 +201,6 @@ CONFIG_IP_NF_SECURITY=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_CONNTRACK_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -241,7 +238,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_DCCP=m -CONFIG_NET_DCCPPROBE=m CONFIG_TIPC=m CONFIG_ATM=m CONFIG_ATM_CLIP=m @@ -251,7 +247,6 @@ CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_DECNET=m CONFIG_DECNET_ROUTER=y -CONFIG_IPX=m CONFIG_ATALK=m CONFIG_DEV_APPLETALK=m CONFIG_IPDDP=m @@ -297,26 +292,6 @@ CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -CONFIG_IRTTY_SIR=m -CONFIG_KINGSUN_DONGLE=m -CONFIG_KSDAZZLE_DONGLE=m -CONFIG_KS959_DONGLE=m -CONFIG_USB_IRDA=m -CONFIG_SIGMATEL_FIR=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m -CONFIG_VIA_FIR=m -CONFIG_MCS_FIR=m CONFIG_BT=m CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y @@ -332,7 +307,6 @@ CONFIG_BT_HCIBFUSB=m CONFIG_BT_HCIDTL1=m CONFIG_BT_HCIBT3C=m CONFIG_BT_HCIBLUECARD=m -CONFIG_BT_HCIBTUART=m CONFIG_BT_HCIVHCI=m CONFIG_CFG80211=m CONFIG_MAC80211=m @@ -366,7 +340,6 @@ CONFIG_EEPROM_93CX6=m CONFIG_RAID_ATTRS=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=m CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_SCH=m @@ -663,7 +636,6 @@ CONFIG_I2C_MPC=m CONFIG_I2C_PCA_PLATFORM=m CONFIG_I2C_SIMTEC=m CONFIG_I2C_PARPORT=m -CONFIG_I2C_PARPORT_LIGHT=m CONFIG_I2C_TINY_USB=m CONFIG_I2C_PCA_ISA=m CONFIG_I2C_STUB=m @@ -676,7 +648,6 @@ CONFIG_W1_SLAVE_THERM=m CONFIG_W1_SLAVE_SMEM=m CONFIG_W1_SLAVE_DS2433=m CONFIG_W1_SLAVE_DS2433_CRC=y -CONFIG_W1_SLAVE_DS2760=m CONFIG_APM_POWER=m CONFIG_BATTERY_PMU=m CONFIG_HWMON=m @@ -1065,15 +1036,6 @@ CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y CONFIG_CIFS_DFS_UPCALL=y -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y CONFIG_CODA_FS=m CONFIG_9P_FS=m CONFIG_NLS_DEFAULT="utf8" @@ -1117,7 +1079,6 @@ CONFIG_NLS_KOI8_U=m CONFIG_DEBUG_INFO=y CONFIG_UNUSED_SYMBOLS=y CONFIG_HEADERS_INSTALL=y -CONFIG_HEADERS_CHECK=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_OBJECTS=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 81b55c880fc3..142f1321fa58 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -34,8 +34,6 @@ CONFIG_KEXEC=y # CONFIG_SPARSEMEM_VMEMMAP is not set # CONFIG_COMPACTION is not set CONFIG_SCHED_SMT=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_PM=y CONFIG_PM_DEBUG=y # CONFIG_SECCOMP is not set diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index dfa4a726333b..d5dece981c02 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -57,7 +57,6 @@ CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_PPC_64K_PAGES=y -CONFIG_PPC_SUBPAGE_PROT=y CONFIG_SCHED_SMT=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_RPA=m @@ -94,6 +93,7 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_VIRTIO_BLK=m +CONFIG_BLK_DEV_NVME=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -224,7 +224,6 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_MAD=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_CXGB3=m CONFIG_INFINIBAND_CXGB4=m CONFIG_MLX4_INFINIBAND=m CONFIG_INFINIBAND_IPOIB=m diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index ad6739ac63dc..b806a5d3a695 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -45,7 +45,6 @@ CONFIG_IRQ_ALL_CPUS=y CONFIG_NUMA=y CONFIG_PPC_64K_PAGES=y CONFIG_SCHED_SMT=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet" # CONFIG_SECCOMP is not set # CONFIG_PPC_MEM_KEYS is not set diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index b964084e4056..47dcfaddc1ac 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -12,7 +12,6 @@ CONFIG_EMBEDDED6xx=y CONFIG_STORCENTER=y CONFIG_HZ_100=y CONFIG_BINFMT_MISC=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0,115200" # CONFIG_SECCOMP is not set CONFIG_NET=y diff --git a/arch/powerpc/crypto/crc32-vpmsum_core.S b/arch/powerpc/crypto/crc32-vpmsum_core.S index c3524eba4d0d..a16a717c809c 100644 --- a/arch/powerpc/crypto/crc32-vpmsum_core.S +++ b/arch/powerpc/crypto/crc32-vpmsum_core.S @@ -19,7 +19,7 @@ * We then use fixed point Barrett reduction to compute a mod n over GF(2) * for n = CRC using POWER8 instructions. We use x = 32. * - * http://en.wikipedia.org/wiki/Barrett_reduction + * https://en.wikipedia.org/wiki/Barrett_reduction * * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM */ diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 2d444d09b553..90cd5c53af66 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -7,5 +7,6 @@ generic-y += export.h generic-y += kvm_types.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += qrwlock.h generic-y += vtime.h generic-y += early_ioremap.h diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 7d81e86a1e5d..de14b1a34d56 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -98,7 +98,7 @@ unsigned long __init early_init(unsigned long dt_ptr); void __init machine_init(u64 dt_ptr); #endif long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs); -notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs); +notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr); notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr); @@ -144,13 +144,13 @@ void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); /* Patch sites */ -extern s32 patch__call_flush_count_cache; +extern s32 patch__call_flush_branch_caches; extern s32 patch__flush_count_cache_return; extern s32 patch__flush_link_stack_return; extern s32 patch__call_kvm_flush_link_stack; extern s32 patch__memset_nocache, patch__memcpy_nocache; -extern long flush_count_cache; +extern long flush_branch_caches; extern long kvm_flush_link_stack; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 0311c3c42960..8a55eb8cc97b 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -191,6 +191,34 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) +/* + * Don't want to override the generic atomic_try_cmpxchg_acquire, because + * we add a lock hint to the lwarx, which may not be wanted for the + * _acquire case (and is not used by the other _acquire variants so it + * would be a surprise). + */ +static __always_inline bool +atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new) +{ + int r, o = *old; + + __asm__ __volatile__ ( +"1:\t" PPC_LWARX(%0,0,%2,1) " # atomic_try_cmpxchg_acquire \n" +" cmpw 0,%0,%3 \n" +" bne- 2f \n" +" stwcx. %4,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"2: \n" + : "=&r" (r), "+m" (v->counter) + : "r" (&v->counter), "r" (o), "r" (new) + : "cr0", "memory"); + + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} + /** * atomic_fetch_add_unless - add unless the number is a given value * @v: pointer of type atomic_t diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 123adcefd40f..f53c42380832 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -7,6 +7,10 @@ #include <asm/asm-const.h> +#ifndef __ASSEMBLY__ +#include <asm/ppc-opcode.h> +#endif + /* * Memory barrier. * The sync instruction guarantees that all memory accesses initiated @@ -76,6 +80,22 @@ do { \ ___p1; \ }) +#ifdef CONFIG_PPC64 +#define smp_cond_load_relaxed(ptr, cond_expr) ({ \ + typeof(ptr) __PTR = (ptr); \ + __unqual_scalar_typeof(*ptr) VAL; \ + VAL = READ_ONCE(*__PTR); \ + if (unlikely(!(cond_expr))) { \ + spin_begin(); \ + do { \ + VAL = READ_ONCE(*__PTR); \ + } while (!(cond_expr)); \ + spin_end(); \ + } \ + (typeof(*ptr))VAL; \ +}) +#endif + #ifdef CONFIG_PPC_BOOK3S_64 #define NOSPEC_BARRIER_SLOT nop #elif defined(CONFIG_PPC_FSL_BOOK3E) @@ -97,6 +117,15 @@ do { \ #define barrier_nospec() #endif /* CONFIG_PPC_BARRIER_NOSPEC */ +/* + * pmem_wmb() ensures that all stores for which the modification + * are written to persistent storage by preceding dcbfps/dcbstps + * instructions have updated persistent storage before any data + * access or data transfer caused by subsequent instructions is + * initiated. + */ +#define pmem_wmb() __asm__ __volatile__(PPC_PHWSYNC ::: "memory") + #include <asm-generic/barrier.h> #endif /* _ASM_POWERPC_BARRIER_H */ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 224912432821..36443cda8dcf 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -184,17 +184,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #define VMALLOC_OFFSET (0x1000000) /* 16M */ -/* - * With CONFIG_STRICT_KERNEL_RWX, kernel segments are set NX. But when modules - * are used, NX cannot be set on VMALLOC space. So vmalloc VM space and linear - * memory shall not share segments. - */ -#if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_MODULES) -#define VMALLOC_START ((ALIGN((long)high_memory, 256L << 20) + VMALLOC_OFFSET) & \ - ~(VMALLOC_OFFSET - 1)) -#else #define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) -#endif #ifdef CONFIG_KASAN_VMALLOC #define VMALLOC_END ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) @@ -202,6 +192,11 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #define VMALLOC_END ioremap_bot #endif +#ifdef CONFIG_STRICT_KERNEL_RWX +#define MODULES_END ALIGN_DOWN(PAGE_OFFSET, SZ_256M) +#define MODULES_VADDR (MODULES_END - SZ_256M) +#endif + #ifndef __ASSEMBLY__ #include <linux/sched.h> #include <linux/threads.h> diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 3f9ae3585ab9..082b98808701 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -34,11 +34,11 @@ #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) -#define H_PAGE_F_GIX_SHIFT 53 -#define H_PAGE_F_SECOND _RPAGE_RPN44 /* HPTE is in 2ndary HPTEG */ -#define H_PAGE_F_GIX (_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41) -#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ -#define H_PAGE_HASHPTE _RPAGE_RSV2 /* software: PTE & hash are busy */ +#define H_PAGE_F_GIX_SHIFT _PAGE_PA_MAX +#define H_PAGE_F_SECOND _RPAGE_PKEY_BIT0 /* HPTE is in 2ndary HPTEG */ +#define H_PAGE_F_GIX (_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41) +#define H_PAGE_BUSY _RPAGE_RSV1 +#define H_PAGE_HASHPTE _RPAGE_PKEY_BIT4 /* PTE flags to conserve for HPTE identification */ #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \ @@ -57,11 +57,12 @@ #define H_PMD_FRAG_NR (PAGE_SIZE >> H_PMD_FRAG_SIZE_SHIFT) /* memory key bits, only 8 keys supported */ -#define H_PTE_PKEY_BIT0 0 -#define H_PTE_PKEY_BIT1 0 -#define H_PTE_PKEY_BIT2 _RPAGE_RSV3 -#define H_PTE_PKEY_BIT3 _RPAGE_RSV4 -#define H_PTE_PKEY_BIT4 _RPAGE_RSV5 +#define H_PTE_PKEY_BIT4 0 +#define H_PTE_PKEY_BIT3 0 +#define H_PTE_PKEY_BIT2 _RPAGE_PKEY_BIT3 +#define H_PTE_PKEY_BIT1 _RPAGE_PKEY_BIT2 +#define H_PTE_PKEY_BIT0 _RPAGE_PKEY_BIT1 + /* * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 0729c034e56f..f20de1149ebe 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -32,15 +32,15 @@ */ #define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */ #define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */ -#define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */ +#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ #define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ /* memory key bits. */ -#define H_PTE_PKEY_BIT0 _RPAGE_RSV1 -#define H_PTE_PKEY_BIT1 _RPAGE_RSV2 -#define H_PTE_PKEY_BIT2 _RPAGE_RSV3 -#define H_PTE_PKEY_BIT3 _RPAGE_RSV4 -#define H_PTE_PKEY_BIT4 _RPAGE_RSV5 +#define H_PTE_PKEY_BIT4 _RPAGE_PKEY_BIT4 +#define H_PTE_PKEY_BIT3 _RPAGE_PKEY_BIT3 +#define H_PTE_PKEY_BIT2 _RPAGE_PKEY_BIT2 +#define H_PTE_PKEY_BIT1 _RPAGE_PKEY_BIT1 +#define H_PTE_PKEY_BIT0 _RPAGE_PKEY_BIT0 /* * We need to differentiate between explicit huge page and THP huge diff --git a/arch/powerpc/include/asm/book3s/64/hash-pkey.h b/arch/powerpc/include/asm/book3s/64/hash-pkey.h new file mode 100644 index 000000000000..795010897e5d --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/hash-pkey.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H +#define _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H + +static inline u64 hash__vmflag_to_pte_pkey_bits(u64 vm_flags) +{ + return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT0 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT1 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT3 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT4 : 0x0UL)); +} + +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +{ + return (((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL)); +} + +static inline u16 hash__pte_to_pkey_bits(u64 pteflags) +{ + return (((pteflags & H_PTE_PKEY_BIT4) ? 0x10 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? 0x8 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? 0x2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT0) ? 0x1 : 0x0UL)); +} + +#endif diff --git a/arch/powerpc/include/asm/book3s/64/kexec.h b/arch/powerpc/include/asm/book3s/64/kexec.h new file mode 100644 index 000000000000..6b5c3a248ba2 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/kexec.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_POWERPC_BOOK3S_64_KEXEC_H_ +#define _ASM_POWERPC_BOOK3S_64_KEXEC_H_ + + +#define reset_sprs reset_sprs +static inline void reset_sprs(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + mtspr(SPRN_AMR, 0); + mtspr(SPRN_UAMOR, 0); + } + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_IAMR, 0); + } + + /* Do we need isync()? We are going via a kexec reset */ + isync(); +} + +#endif diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 3fa1b962dc27..93d18da5e7ec 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -86,8 +86,8 @@ #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) -#define HPTE_R_KEY_BIT0 ASM_CONST(0x2000000000000000) -#define HPTE_R_KEY_BIT1 ASM_CONST(0x1000000000000000) +#define HPTE_R_KEY_BIT4 ASM_CONST(0x2000000000000000) +#define HPTE_R_KEY_BIT3 ASM_CONST(0x1000000000000000) #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) #define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000) @@ -103,8 +103,8 @@ #define HPTE_R_R ASM_CONST(0x0000000000000100) #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) #define HPTE_R_KEY_BIT2 ASM_CONST(0x0000000000000800) -#define HPTE_R_KEY_BIT3 ASM_CONST(0x0000000000000400) -#define HPTE_R_KEY_BIT4 ASM_CONST(0x0000000000000200) +#define HPTE_R_KEY_BIT1 ASM_CONST(0x0000000000000400) +#define HPTE_R_KEY_BIT0 ASM_CONST(0x0000000000000200) #define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI) #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) @@ -793,7 +793,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, } /* - * For kernel space, we use context ids as below + * For kernel space, we use context ids as * below. Range is 512TB per context. * * 0x00001 - [ 0xc000000000000000 - 0xc001ffffffffffff] diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 5393a535240c..55442d45c597 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -82,6 +82,11 @@ extern unsigned int mmu_pid_bits; /* Base PID to allocate from */ extern unsigned int mmu_base_pid; +/* + * memory block size used with radix translation. + */ +extern unsigned int __ro_after_init radix_mem_block_size; + #define PRTB_SIZE_SHIFT (mmu_pid_bits + 4) #define PRTB_ENTRIES (1ul << mmu_pid_bits) @@ -209,6 +214,12 @@ extern int mmu_io_psize; void mmu_early_init_devtree(void); void hash__early_init_devtree(void); void radix__early_init_devtree(void); +#ifdef CONFIG_PPC_MEM_KEYS +void pkey_early_init_devtree(void); +#else +static inline void pkey_early_init_devtree(void) {} +#endif + extern void hash__early_init_mmu(void); extern void radix__early_init_mmu(void); static inline void __init early_init_mmu(void) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 69c5b051734f..e1af0b394ceb 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -107,9 +107,23 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) return pud; } +static inline void __pud_free(pud_t *pud) +{ + struct page *page = virt_to_page(pud); + + /* + * Early pud pages allocated via memblock allocator + * can't be directly freed to slab + */ + if (PageReserved(page)) + free_reserved_page(page); + else + kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud); +} + static inline void pud_free(struct mm_struct *mm, pud_t *pud) { - kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud); + return __pud_free(pud); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 25c3cb8272c0..6de56c3b33c4 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -20,9 +20,13 @@ #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) #define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) #define _PAGE_PRIVILEGED 0x00008 /* kernel access only */ -#define _PAGE_SAO 0x00010 /* Strong access order */ + +#define _PAGE_CACHE_CTL 0x00030 /* Bits for the folowing cache modes */ + /* No bits set is normal cacheable memory */ + /* 0x00010 unused, is SAO bit on radix POWER9 */ #define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */ #define _PAGE_TOLERANT 0x00030 /* tolerant memory, cache inhibited */ + #define _PAGE_DIRTY 0x00080 /* C: page changed */ #define _PAGE_ACCESSED 0x00100 /* R: page referenced */ /* @@ -32,11 +36,13 @@ #define _RPAGE_SW1 0x00800 #define _RPAGE_SW2 0x00400 #define _RPAGE_SW3 0x00200 -#define _RPAGE_RSV1 0x1000000000000000UL -#define _RPAGE_RSV2 0x0800000000000000UL -#define _RPAGE_RSV3 0x0400000000000000UL -#define _RPAGE_RSV4 0x0200000000000000UL -#define _RPAGE_RSV5 0x00040UL +#define _RPAGE_RSV1 0x00040UL + +#define _RPAGE_PKEY_BIT4 0x1000000000000000UL +#define _RPAGE_PKEY_BIT3 0x0800000000000000UL +#define _RPAGE_PKEY_BIT2 0x0400000000000000UL +#define _RPAGE_PKEY_BIT1 0x0200000000000000UL +#define _RPAGE_PKEY_BIT0 0x0100000000000000UL #define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */ @@ -58,13 +64,12 @@ */ #define _RPAGE_RPN0 0x01000 #define _RPAGE_RPN1 0x02000 -#define _RPAGE_RPN44 0x0100000000000000UL #define _RPAGE_RPN43 0x0080000000000000UL #define _RPAGE_RPN42 0x0040000000000000UL #define _RPAGE_RPN41 0x0020000000000000UL /* Max physical address bit as per radix table */ -#define _RPAGE_PA_MAX 57 +#define _RPAGE_PA_MAX 56 /* * Max physical address bit we will use for now. @@ -125,8 +130,6 @@ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) -#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ - H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) /* * We define 2 sets of base prot bits, one for basic pages (ie, * cacheable kernel and user pages) and one for non cacheable @@ -825,8 +828,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, return hash__set_pte_at(mm, addr, ptep, pte, percpu); } -#define _PAGE_CACHE_CTL (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT) - #define pgprot_noncached pgprot_noncached static inline pgprot_t pgprot_noncached(pgprot_t prot) { diff --git a/arch/powerpc/include/asm/book3s/64/pkeys.h b/arch/powerpc/include/asm/book3s/64/pkeys.h new file mode 100644 index 000000000000..b7d9f4267bcd --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/pkeys.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _ASM_POWERPC_BOOK3S_64_PKEYS_H +#define _ASM_POWERPC_BOOK3S_64_PKEYS_H + +#include <asm/book3s/64/hash-pkey.h> + +extern u64 __ro_after_init default_uamor; + +static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) +{ + if (!mmu_has_feature(MMU_FTR_PKEY)) + return 0x0UL; + + if (radix_enabled()) + BUG(); + return hash__vmflag_to_pte_pkey_bits(vm_flags); +} + +static inline u16 pte_to_pkey_bits(u64 pteflags) +{ + if (radix_enabled()) + BUG(); + return hash__pte_to_pkey_bits(pteflags); +} + +#endif /*_ASM_POWERPC_KEYS_H */ diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h index d5f5ab73dc7f..035ceecd6d67 100644 --- a/arch/powerpc/include/asm/book3s/64/radix-4k.h +++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h @@ -11,7 +11,7 @@ #define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB /* - * One fragment per per page + * One fragment per page */ #define RADIX_PTE_FRAG_SIZE_SHIFT (RADIX_PTE_INDEX_SIZE + 3) #define RADIX_PTE_FRAG_NR (PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index ca8db193ae38..94439e0cefc9 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -2,10 +2,25 @@ #ifndef _ASM_POWERPC_TLBFLUSH_RADIX_H #define _ASM_POWERPC_TLBFLUSH_RADIX_H +#include <asm/hvcall.h> + struct vm_area_struct; struct mm_struct; struct mmu_gather; +static inline u64 psize_to_rpti_pgsize(unsigned long psize) +{ + if (psize == MMU_PAGE_4K) + return H_RPTI_PAGE_4K; + if (psize == MMU_PAGE_64K) + return H_RPTI_PAGE_64K; + if (psize == MMU_PAGE_2M) + return H_RPTI_PAGE_2M; + if (psize == MMU_PAGE_1G) + return H_RPTI_PAGE_1G; + return H_RPTI_PAGE_ALL; +} + static inline int mmu_get_ap(int psize) { return mmu_psize_defs[psize].ap; diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index de600b915a3c..54764c6e922d 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -6,6 +6,7 @@ #include <linux/mm.h> #include <asm/cputable.h> +#include <asm/cpu_has_feature.h> #ifdef CONFIG_PPC_BOOK3S_64 /* diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index bac2252c839e..fdddb822d564 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -191,14 +191,14 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_SPURR LONG_ASM_CONST(0x0000000001000000) #define CPU_FTR_DSCR LONG_ASM_CONST(0x0000000002000000) #define CPU_FTR_VSX LONG_ASM_CONST(0x0000000004000000) -#define CPU_FTR_SAO LONG_ASM_CONST(0x0000000008000000) +// Free LONG_ASM_CONST(0x0000000008000000) #define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000000010000000) #define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0000000020000000) #define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0000000040000000) #define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0000000080000000) #define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0000000100000000) #define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0000000200000000) -#define CPU_FTR_PKEY LONG_ASM_CONST(0x0000000400000000) +/* LONG_ASM_CONST(0x0000000400000000) Free */ #define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0000000800000000) #define CPU_FTR_TM LONG_ASM_CONST(0x0000001000000000) #define CPU_FTR_CFAR LONG_ASM_CONST(0x0000002000000000) @@ -214,6 +214,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000) #define CPU_FTR_P9_RADIX_PREFETCH_BUG LONG_ASM_CONST(0x0002000000000000) #define CPU_FTR_ARCH_31 LONG_ASM_CONST(0x0004000000000000) +#define CPU_FTR_DAWR1 LONG_ASM_CONST(0x0008000000000000) #ifndef __ASSEMBLY__ @@ -435,32 +436,32 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ + CPU_FTR_DSCR | CPU_FTR_ASYM_SMT | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | \ - CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY) + CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX ) #define CPU_FTRS_POWER8 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_SAO | \ + CPU_FTR_DSCR | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP ) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) #define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_SAO | \ + CPU_FTR_DSCR | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ - CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ - CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR) + CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_P9_TLBIE_STQ_BUG | \ + CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR) #define CPU_FTRS_POWER9_DD2_0 (CPU_FTRS_POWER9 | CPU_FTR_P9_RADIX_PREFETCH_BUG) #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | \ CPU_FTR_P9_RADIX_PREFETCH_BUG | \ @@ -473,12 +474,12 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_SAO | \ + CPU_FTR_DSCR | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ - CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ - CPU_FTR_ARCH_31) + CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_ARCH_31 | \ + CPU_FTR_DAWR | CPU_FTR_DAWR1) #define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -628,9 +629,10 @@ enum { /* * Maximum number of hw breakpoint supported on powerpc. Number of - * breakpoints supported by actual hw might be less than this. + * breakpoints supported by actual hw might be less than this, which + * is decided at run time in nr_wp_slots(). */ -#define HBP_NUM_MAX 1 +#define HBP_NUM_MAX 2 #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 0fccd5ea1e9a..ed75d1c318e3 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -36,6 +36,8 @@ static inline unsigned long cputime_to_usecs(const cputime_t ct) return mulhdu((__force u64) ct, __cputime_usec_factor); } +#define cputime_to_nsecs(cputime) tb_to_ns((__force u64)cputime) + /* * PPC64 uses PACA which is task independent for storing accounting data while * PPC32 uses struct thread_info, therefore at task switch the accounting data @@ -65,7 +67,7 @@ static inline void arch_vtime_task_switch(struct task_struct *prev) /* * account_cpu_user_entry/exit runs "unreconciled", so can't trace, - * can't use use get_paca() + * can't use get_paca() */ static notrace inline void account_cpu_user_entry(void) { diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h b/arch/powerpc/include/asm/crashdump-ppc64.h new file mode 100644 index 000000000000..68d9717cc5ee --- /dev/null +++ b/arch/powerpc/include/asm/crashdump-ppc64.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H +#define _ASM_POWERPC_CRASHDUMP_PPC64_H + +/* + * Backup region - first 64KB of System RAM + * + * If ever the below macros are to be changed, please be judicious. + * The implicit assumptions are: + * - start, end & size are less than UINT32_MAX. + * - start & size are at least 8 byte aligned. + * + * For implementation details: arch/powerpc/purgatory/trampoline_64.S + */ +#define BACKUP_SRC_START 0 +#define BACKUP_SRC_END 0xffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) + +#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */ diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h index 4ce6808deed3..3e9da22a2779 100644 --- a/arch/powerpc/include/asm/dbell.h +++ b/arch/powerpc/include/asm/dbell.h @@ -11,8 +11,10 @@ #include <linux/smp.h> #include <linux/threads.h> +#include <asm/cputhreads.h> #include <asm/ppc-opcode.h> #include <asm/feature-fixups.h> +#include <asm/kvm_ppc.h> #define PPC_DBELL_MSG_BRDCAST (0x04000000) #define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36)) @@ -87,9 +89,6 @@ static inline void ppc_msgsync(void) #endif /* CONFIG_PPC_BOOK3S */ -extern void doorbell_global_ipi(int cpu); -extern void doorbell_core_ipi(int cpu); -extern int doorbell_try_core_ipi(int cpu); extern void doorbell_exception(struct pt_regs *regs); static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag) @@ -100,4 +99,63 @@ static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag) _ppc_msgsnd(msg); } +#ifdef CONFIG_SMP + +/* + * Doorbells must only be used if CPU_FTR_DBELL is available. + * msgsnd is used in HV, and msgsndp is used in !HV. + * + * These should be used by platform code that is aware of restrictions. + * Other arch code should use ->cause_ipi. + * + * doorbell_global_ipi() sends a dbell to any target CPU. + * Must be used only by architectures that address msgsnd target + * by PIR/get_hard_smp_processor_id. + */ +static inline void doorbell_global_ipi(int cpu) +{ + u32 tag = get_hard_smp_processor_id(cpu); + + kvmppc_set_host_ipi(cpu); + /* Order previous accesses vs. msgsnd, which is treated as a store */ + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); +} + +/* + * doorbell_core_ipi() sends a dbell to a target CPU in the same core. + * Must be used only by architectures that address msgsnd target + * by TIR/cpu_thread_in_core. + */ +static inline void doorbell_core_ipi(int cpu) +{ + u32 tag = cpu_thread_in_core(cpu); + + kvmppc_set_host_ipi(cpu); + /* Order previous accesses vs. msgsnd, which is treated as a store */ + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); +} + +/* + * Attempt to cause a core doorbell if destination is on the same core. + * Returns 1 on success, 0 on failure. + */ +static inline int doorbell_try_core_ipi(int cpu) +{ + int this_cpu = get_cpu(); + int ret = 0; + + if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) { + doorbell_core_ipi(cpu); + ret = 1; + } + + put_cpu(); + + return ret; +} + +#endif /* CONFIG_SMP */ + #endif /* _ASM_POWERPC_DBELL_H */ diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 452402215e12..d8a0729cf754 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -44,6 +44,9 @@ struct dev_archdata { #ifdef CONFIG_CXL_BASE struct cxl_context *cxl_ctx; #endif +#ifdef CONFIG_PCI_IOV + void *iov_data; +#endif }; struct pdev_archdata { diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 414d209f45bb..17ccc6474ab6 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -90,13 +90,14 @@ static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb) } u64 drmem_lmb_memory_max(void); -void __init walk_drmem_lmbs(struct device_node *dn, - void (*func)(struct drmem_lmb *, const __be32 **)); +int walk_drmem_lmbs(struct device_node *dn, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)); int drmem_update_dt(void); #ifdef CONFIG_PPC_PSERIES -void __init walk_drmem_lmbs_early(unsigned long node, - void (*func)(struct drmem_lmb *, const __be32 **)); +int __init +walk_drmem_lmbs_early(unsigned long node, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)); #endif static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 964a54292b36..d5f369bcd130 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -133,7 +133,6 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe) struct eeh_dev { int mode; /* EEH mode */ - int class_code; /* Class code of the device */ int bdfn; /* bdfn of device (for cfg ops) */ struct pci_controller *controller; int pe_config_addr; /* PE config address */ @@ -148,7 +147,10 @@ struct eeh_dev { struct pci_dn *pdn; /* Associated PCI device node */ struct pci_dev *pdev; /* Associated PCI device */ bool in_error; /* Error flag for edev */ + + /* VF specific properties */ struct pci_dev *physfn; /* Associated SRIOV PF */ + int vf_index; /* Index of this VF */ }; /* "fmt" must be a simple literal string */ @@ -217,18 +219,17 @@ struct eeh_ops { int (*init)(void); struct eeh_dev *(*probe)(struct pci_dev *pdev); int (*set_option)(struct eeh_pe *pe, int option); - int (*get_pe_addr)(struct eeh_pe *pe); int (*get_state)(struct eeh_pe *pe, int *delay); int (*reset)(struct eeh_pe *pe, int option); int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); int (*configure_bridge)(struct eeh_pe *pe); int (*err_inject)(struct eeh_pe *pe, int type, int func, unsigned long addr, unsigned long mask); - int (*read_config)(struct pci_dn *pdn, int where, int size, u32 *val); - int (*write_config)(struct pci_dn *pdn, int where, int size, u32 val); + int (*read_config)(struct eeh_dev *edev, int where, int size, u32 *val); + int (*write_config)(struct eeh_dev *edev, int where, int size, u32 val); int (*next_error)(struct eeh_pe **pe); - int (*restore_config)(struct pci_dn *pdn); - int (*notify_resume)(struct pci_dn *pdn); + int (*restore_config)(struct eeh_dev *edev); + int (*notify_resume)(struct eeh_dev *edev); }; extern int eeh_subsystem_flags; @@ -282,8 +283,8 @@ struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root); struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no, int config_addr); -int eeh_add_to_parent_pe(struct eeh_dev *edev); -int eeh_rmv_from_parent_pe(struct eeh_dev *edev); +int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent); +int eeh_pe_tree_remove(struct eeh_dev *edev); void eeh_pe_update_time_stamp(struct eeh_pe *pe); void *eeh_pe_traverse(struct eeh_pe *root, eeh_pe_traverse_func fn, void *flag); @@ -293,8 +294,6 @@ void eeh_pe_restore_bars(struct eeh_pe *pe); const char *eeh_pe_loc_get(struct eeh_pe *pe); struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); -struct eeh_dev *eeh_dev_init(struct pci_dn *pdn); -void eeh_dev_phb_init_dynamic(struct pci_controller *phb); void eeh_show_enabled(void); int __init eeh_ops_register(struct eeh_ops *ops); int __exit eeh_ops_unregister(const char *name); @@ -314,7 +313,6 @@ int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed); int eeh_pe_configure(struct eeh_pe *pe); int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, unsigned long addr, unsigned long mask); -int eeh_restore_vf_config(struct pci_dn *pdn); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. @@ -340,11 +338,6 @@ static inline bool eeh_enabled(void) static inline void eeh_show_enabled(void) { } -static inline void *eeh_dev_init(struct pci_dn *pdn, void *data) -{ - return NULL; -} - static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { } static inline int eeh_check_failure(const volatile void __iomem *token) @@ -362,6 +355,7 @@ static inline void eeh_remove_device(struct pci_dev *dev) { } #define EEH_POSSIBLE_ERROR(val, type) (0) #define EEH_IO_ERROR_VALUE(size) (-1UL) +static inline int eeh_phb_pe_create(struct pci_controller *phb) { return 0; } #endif /* CONFIG_EEH */ #if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_EEH) diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index d3a7e36f1402..c99ba08a408d 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -37,7 +37,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* A "hypercall" is an "sc 1" instruction. This header file file provides C +/* A "hypercall" is an "sc 1" instruction. This header file provides C * wrapper functions for the ePAPR hypervisor interface. It is inteded * for use by Linux device drivers and other operating systems. * @@ -246,7 +246,7 @@ static inline unsigned int ev_int_get_mask(unsigned int interrupt, * ev_int_eoi - signal the end of interrupt processing * @interrupt: the interrupt number * - * This function signals the end of processing for the the specified + * This function signals the end of processing for the specified * interrupt, which must be the interrupt currently in service. By * definition, this is also the highest-priority interrupt. * diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index 54a98ef7d7fe..40cdcb2fb057 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -66,14 +66,7 @@ #define EX_TLB_SRR0 (10 * 8) #define EX_TLB_SRR1 (11 * 8) #define EX_TLB_R7 (12 * 8) -#ifdef CONFIG_BOOK3E_MMU_TLB_STATS -#define EX_TLB_R8 (13 * 8) -#define EX_TLB_R9 (14 * 8) -#define EX_TLB_LR (15 * 8) -#define EX_TLB_SIZE (16 * 8) -#else #define EX_TLB_SIZE (13 * 8) -#endif #define START_EXCEPTION(label) \ .globl exc_##label##_book3e; \ @@ -110,8 +103,7 @@ exc_##label##_book3e: std r11,EX_TLB_R12(r12); \ mtspr SPRN_SPRG_TLB_EXFRAME,r14; \ std r15,EX_TLB_SRR1(r12); \ - std r16,EX_TLB_SRR0(r12); \ - TLB_MISS_PROLOG_STATS + std r16,EX_TLB_SRR0(r12); /* And these are the matching epilogs that restores things * @@ -143,7 +135,6 @@ exc_##label##_book3e: mtspr SPRN_SRR0,r15; \ ld r15,EX_TLB_R15(r12); \ mtspr SPRN_SRR1,r16; \ - TLB_MISS_RESTORE_STATS \ ld r16,EX_TLB_R16(r12); \ ld r12,EX_TLB_R12(r12); \ @@ -158,53 +149,15 @@ exc_##label##_book3e: addi r11,r13,PACA_EXTLB; \ TLB_MISS_RESTORE(r11) -#ifdef CONFIG_BOOK3E_MMU_TLB_STATS -#define TLB_MISS_PROLOG_STATS \ - mflr r10; \ - std r8,EX_TLB_R8(r12); \ - std r9,EX_TLB_R9(r12); \ - std r10,EX_TLB_LR(r12); -#define TLB_MISS_RESTORE_STATS \ - ld r16,EX_TLB_LR(r12); \ - ld r9,EX_TLB_R9(r12); \ - ld r8,EX_TLB_R8(r12); \ - mtlr r16; -#define TLB_MISS_STATS_D(name) \ - addi r9,r13,MMSTAT_DSTATS+name; \ - bl tlb_stat_inc; -#define TLB_MISS_STATS_I(name) \ - addi r9,r13,MMSTAT_ISTATS+name; \ - bl tlb_stat_inc; -#define TLB_MISS_STATS_X(name) \ - ld r8,PACA_EXTLB+EX_TLB_ESR(r13); \ - cmpdi cr2,r8,-1; \ - beq cr2,61f; \ - addi r9,r13,MMSTAT_DSTATS+name; \ - b 62f; \ -61: addi r9,r13,MMSTAT_ISTATS+name; \ -62: bl tlb_stat_inc; -#define TLB_MISS_STATS_SAVE_INFO \ - std r14,EX_TLB_ESR(r12); /* save ESR */ -#define TLB_MISS_STATS_SAVE_INFO_BOLTED \ - std r14,PACA_EXTLB+EX_TLB_ESR(r13); /* save ESR */ -#else -#define TLB_MISS_PROLOG_STATS -#define TLB_MISS_RESTORE_STATS -#define TLB_MISS_PROLOG_STATS_BOLTED -#define TLB_MISS_RESTORE_STATS_BOLTED -#define TLB_MISS_STATS_D(name) -#define TLB_MISS_STATS_I(name) -#define TLB_MISS_STATS_X(name) -#define TLB_MISS_STATS_Y(name) -#define TLB_MISS_STATS_SAVE_INFO -#define TLB_MISS_STATS_SAVE_INFO_BOLTED -#endif - #define SET_IVOR(vector_number, vector_offset) \ LOAD_REG_ADDR(r3,interrupt_base_book3e);\ ori r3,r3,vector_offset@l; \ mtspr SPRN_IVOR##vector_number,r3; - +/* + * powerpc relies on return from interrupt/syscall being context synchronising + * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional + * synchronisation instructions. + */ #define RFI_TO_KERNEL \ rfi diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 47bd4ea0837d..ebe95aa04d53 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -68,6 +68,14 @@ * * The nop instructions allow us to insert one or more instructions to flush the * L1-D cache when returning to userspace or a guest. + * + * powerpc relies on return from interrupt/syscall being context synchronising + * (which hrfid, rfid, and rfscv are) to support ARCH_HAS_MEMBARRIER_SYNC_CORE + * without additional synchronisation instructions. + * + * soft-masked interrupt replay does not include a context-synchronising rfid, + * but those always return to kernel, the sync is only required when returning + * to user. */ #define RFI_FLUSH_SLOT \ RFI_FLUSH_FIXUP_SECTION; \ @@ -123,6 +131,12 @@ hrfid; \ b hrfi_flush_fallback +#define RFSCV_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ + RFI_FLUSH_SLOT; \ + RFSCV; \ + b rfscv_flush_fallback + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_EXCEPTION_H */ diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 6003c2e533a0..0b295bdb201e 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,6 +52,7 @@ #define FW_FEATURE_PAPR_SCM ASM_CONST(0x0000002000000000) #define FW_FEATURE_ULTRAVISOR ASM_CONST(0x0000004000000000) #define FW_FEATURE_STUFF_TCE ASM_CONST(0x0000008000000000) +#define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000) #ifndef __ASSEMBLY__ @@ -71,7 +72,8 @@ enum { FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN | FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE | - FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR, + FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR | + FW_FEATURE_RPT_INVALIDATE, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR, FW_FEATURE_POWERNV_ALWAYS = 0, @@ -132,6 +134,12 @@ extern int ibm_nmi_interlock_token; extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup; +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +bool is_kvm_guest(void); +#else +static inline bool is_kvm_guest(void) { return false; } +#endif + #ifdef CONFIG_PPC_PSERIES void pseries_probe_fw_features(void); #else diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 29188810ba30..925cf89cbf4b 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -52,7 +52,7 @@ enum fixed_addresses { FIX_HOLE, /* reserve the top 128K for early debugging purposes */ FIX_EARLY_DEBUG_TOP = FIX_HOLE, - FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+((128*1024)/PAGE_SIZE)-1, + FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128, PAGE_SIZE)/PAGE_SIZE)-1, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index f1e9067bd5ac..f133b5930ae1 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -13,7 +13,6 @@ typedef struct { unsigned int pmu_irqs; unsigned int mce_exceptions; unsigned int spurious_irqs; - unsigned int hmi_exceptions; unsigned int sreset_irqs; #ifdef CONFIG_PPC_WATCHDOG unsigned int soft_nmi_irqs; diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 2dabcf668292..4cb9efa2eb21 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -128,7 +128,7 @@ name: .if ((start) % (size) != 0); \ .error "Fixed section exception vector misalignment"; \ .endif; \ - .if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100); \ + .if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100) && ((size) != 0x1000); \ .error "Fixed section exception vector bad size"; \ .endif; \ .if (start) < sname##_start; \ diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 551a9d4d3958..013165e62618 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -57,6 +57,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); +void gigantic_hugetlb_cma_reserve(void) __init; #include <asm-generic/hugetlb.h> #else /* ! CONFIG_HUGETLB_PAGE */ @@ -71,6 +72,12 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, { return NULL; } + + +static inline void __init gigantic_hugetlb_cma_reserve(void) +{ +} + #endif /* CONFIG_HUGETLB_PAGE */ #endif /* _ASM_POWERPC_HUGETLB_H */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index e90c073e437e..fbb377055471 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -237,7 +237,7 @@ #define H_CREATE_RPT 0x1A4 #define H_REMOVE_RPT 0x1A8 #define H_REGISTER_RPAGES 0x1AC -#define H_DISABLE_AND_GETC 0x1B0 +#define H_DISABLE_AND_GET 0x1B0 #define H_ERROR_DATA 0x1B4 #define H_GET_HCA_INFO 0x1B8 #define H_GET_PERF_COUNT 0x1BC @@ -305,7 +305,8 @@ #define H_SCM_UNBIND_ALL 0x3FC #define H_SCM_HEALTH 0x400 #define H_SCM_PERFORMANCE_STATS 0x418 -#define MAX_HCALL_OPCODE H_SCM_PERFORMANCE_STATS +#define H_RPT_INVALIDATE 0x448 +#define MAX_HCALL_OPCODE H_RPT_INVALIDATE /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) @@ -354,9 +355,10 @@ /* Values for 2nd argument to H_SET_MODE */ #define H_SET_MODE_RESOURCE_SET_CIABR 1 -#define H_SET_MODE_RESOURCE_SET_DAWR 2 +#define H_SET_MODE_RESOURCE_SET_DAWR0 2 #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 #define H_SET_MODE_RESOURCE_LE 4 +#define H_SET_MODE_RESOURCE_SET_DAWR1 5 /* Values for argument to H_SIGNAL_SYS_RESET */ #define H_SIGNAL_SYS_RESET_ALL -1 @@ -389,6 +391,37 @@ #define PROC_TABLE_RADIX 0x04 #define PROC_TABLE_GTSE 0x01 +/* + * Defines for + * H_RPT_INVALIDATE - Invalidate RPT translation lookaside information. + */ + +/* Type of translation to invalidate (type) */ +#define H_RPTI_TYPE_NESTED 0x0001 /* Invalidate nested guest partition-scope */ +#define H_RPTI_TYPE_TLB 0x0002 /* Invalidate TLB */ +#define H_RPTI_TYPE_PWC 0x0004 /* Invalidate Page Walk Cache */ +/* Invalidate Process Table Entries if H_RPTI_TYPE_NESTED is clear */ +#define H_RPTI_TYPE_PRT 0x0008 +/* Invalidate Partition Table Entries if H_RPTI_TYPE_NESTED is set */ +#define H_RPTI_TYPE_PAT 0x0008 +#define H_RPTI_TYPE_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \ + H_RPTI_TYPE_PRT) +#define H_RPTI_TYPE_NESTED_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \ + H_RPTI_TYPE_PAT) + +/* Invalidation targets (target) */ +#define H_RPTI_TARGET_CMMU 0x01 /* All virtual processors in the partition */ +#define H_RPTI_TARGET_CMMU_LOCAL 0x02 /* Current virtual processor */ +/* All nest/accelerator agents in use by the partition */ +#define H_RPTI_TARGET_NMMU 0x04 + +/* Page size mask (page sizes) */ +#define H_RPTI_PAGE_4K 0x01 +#define H_RPTI_PAGE_64K 0x02 +#define H_RPTI_PAGE_2M 0x04 +#define H_RPTI_PAGE_1G 0x08 +#define H_RPTI_PAGE_ALL (-1UL) + #ifndef __ASSEMBLY__ #include <linux/types.h> diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index cb424799da0d..db206a7f38e2 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -9,6 +9,8 @@ #ifndef _PPC_BOOK3S_64_HW_BREAKPOINT_H #define _PPC_BOOK3S_64_HW_BREAKPOINT_H +#include <asm/cpu_has_feature.h> + #ifdef __KERNEL__ struct arch_hw_breakpoint { unsigned long address; @@ -17,7 +19,7 @@ struct arch_hw_breakpoint { u16 hw_len; /* length programmed in hw */ }; -/* Note: Don't change the the first 6 bits below as they are in the same order +/* Note: Don't change the first 6 bits below as they are in the same order * as the dabr and dabrx. */ #define HW_BRK_TYPE_READ 0x01 @@ -46,7 +48,7 @@ struct arch_hw_breakpoint { static inline int nr_wp_slots(void) { - return HBP_NUM_MAX; + return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1; } #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index b3b0f2d020f0..ae02eb53d6ef 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -10,7 +10,7 @@ * * © Copyright 1995 Apple Computer, Inc. All rights reserved. * - * It's available online from http://www.cpu.lu/~mlan/ftp/MacTech.pdf + * It's available online from https://www.cpu.lu/~mlan/ftp/MacTech.pdf * You can obtain paper copies of this book from computer bookstores or by * writing Morgan Kaufmann Publishers, Inc., 340 Pine Street, Sixth Floor, San * Francisco, CA 94104. Reference ISBN 1-55860-393-X. diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 4da4fcba0684..4f897993b710 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -99,6 +99,11 @@ struct trace_imc_data { */ #define IMC_TRACE_RECORD_TB1_MASK 0x3ffffffffffULL +/* + * Bit 0:1 in third DW of IMC trace record + * specifies the MSR[HV PR] values. + */ +#define IMC_TRACE_RECORD_VAL_HVPR(x) ((x) >> 62) /* * Device tree parser code detects IMC pmu support and diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 45f3ec868258..cc73c1267572 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -122,6 +122,25 @@ static inline u64 ppc_inst_as_u64(struct ppc_inst x) #endif } +#define PPC_INST_STR_LEN sizeof("00000000 00000000") + +static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_inst x) +{ + if (ppc_inst_prefixed(x)) + sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x)); + else + sprintf(str, "%08x", ppc_inst_val(x)); + + return str; +} + +#define ppc_inst_as_str(x) \ +({ \ + char __str[PPC_INST_STR_LEN]; \ + __ppc_inst_as_str(__str, x); \ + __str; \ +}) + int probe_user_read_inst(struct ppc_inst *inst, struct ppc_inst __user *nip); diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index be85c7005fb1..d635b96c7ea6 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -27,10 +27,12 @@ #ifdef CONFIG_KASAN void kasan_early_init(void); +void kasan_mmu_init(void); void kasan_init(void); void kasan_late_init(void); #else static inline void kasan_init(void) { } +static inline void kasan_mmu_init(void) { } static inline void kasan_late_init(void) { } #endif diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index c68476818753..55d6ede30c19 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -100,15 +100,26 @@ void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; -#ifdef CONFIG_IMA_KEXEC #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { + struct crash_mem *exclude_ranges; + + unsigned long backup_start; + void *backup_buf; + + unsigned long elfcorehdr_addr; + unsigned long elf_headers_sz; + void *elf_headers; + +#ifdef CONFIG_IMA_KEXEC phys_addr_t ima_buffer_addr; size_t ima_buffer_size; -}; #endif +}; +char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len); int setup_purgatory(struct kimage *image, const void *slave_code, const void *fdt, unsigned long kernel_load_addr, unsigned long fdt_load_addr); @@ -116,6 +127,20 @@ int setup_new_fdt(const struct kimage *image, void *fdt, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline); int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size); + +#ifdef CONFIG_PPC64 +struct kexec_buf; + +int load_crashdump_segments_ppc64(struct kimage *image, + struct kexec_buf *kbuf); +int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr); +int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, + unsigned long initrd_len, const char *cmdline); +#endif /* CONFIG_PPC64 */ + #endif /* CONFIG_KEXEC_FILE */ #else /* !CONFIG_KEXEC_CORE */ @@ -150,6 +175,18 @@ static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) } #endif /* CONFIG_KEXEC_CORE */ + +#ifdef CONFIG_PPC_BOOK3S_64 +#include <asm/book3s/64/kexec.h> +#endif + +#ifndef reset_sprs +#define reset_sprs reset_sprs +static inline void reset_sprs(void) +{ +} +#endif + #endif /* ! __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_KEXEC_H */ diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h new file mode 100644 index 000000000000..7a90000f8d15 --- /dev/null +++ b/arch/powerpc/include/asm/kexec_ranges.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_KEXEC_RANGES_H +#define _ASM_POWERPC_KEXEC_RANGES_H + +#define MEM_RANGE_CHUNK_SZ 2048 /* Memory ranges size chunk */ + +void sort_memory_ranges(struct crash_mem *mrngs, bool merge); +struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); +int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); +int add_tce_mem_ranges(struct crash_mem **mem_ranges); +int add_initrd_mem_range(struct crash_mem **mem_ranges); +#ifdef CONFIG_PPC_BOOK3S_64 +int add_htab_mem_range(struct crash_mem **mem_ranges); +#else +static inline int add_htab_mem_range(struct crash_mem **mem_ranges) +{ + return 0; +} +#endif +int add_kernel_mem_range(struct crash_mem **mem_ranges); +int add_rtas_mem_range(struct crash_mem **mem_ranges); +int add_opal_mem_range(struct crash_mem **mem_ranges); +int add_reserved_mem_ranges(struct crash_mem **mem_ranges); + +#endif /* _ASM_POWERPC_KEXEC_RANGES_H */ diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 45704f2716e2..078f4648ea27 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -119,7 +119,7 @@ struct kvmppc_host_state { void __iomem *xive_tima_virt; u32 saved_xirr; u64 dabr; - u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */ + u64 host_mmcr[10]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER, MMCR3, SIER2/3 */ u32 host_pmc[8]; u64 host_purr; u64 host_spurr; diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index 310ba48d13f0..0c3401b2e19e 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -89,10 +89,12 @@ static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) return vcpu->arch.regs.nip; } +#ifdef CONFIG_BOOKE static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { return vcpu->arch.fault_dear; } +#endif static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 7e2d061d0445..e020d269416d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -637,12 +637,14 @@ struct kvm_vcpu_arch { u32 ccr1; u32 dbsr; - u64 mmcr[5]; + u64 mmcr[4]; /* MMCR0, MMCR1, MMCR2, MMCR3 */ + u64 mmcra; + u64 mmcrs; u32 pmc[8]; u32 spmc[2]; u64 siar; u64 sdar; - u64 sier; + u64 sier[3]; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM u64 tfhar; u64 texasr; diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 9c1f6b4b9bbf..744612054c94 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -8,35 +8,15 @@ #ifndef __POWERPC_KVM_PARA_H__ #define __POWERPC_KVM_PARA_H__ -#include <uapi/asm/kvm_para.h> - -#ifdef CONFIG_KVM_GUEST - -#include <linux/of.h> - -static inline int kvm_para_available(void) -{ - struct device_node *hyper_node; - - hyper_node = of_find_node_by_path("/hypervisor"); - if (!hyper_node) - return 0; +#include <asm/firmware.h> - if (!of_device_is_compatible(hyper_node, "linux,kvm")) - return 0; - - return 1; -} - -#else +#include <uapi/asm/kvm_para.h> static inline int kvm_para_available(void) { - return 0; + return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest(); } -#endif - static inline unsigned int kvm_arch_para_features(void) { unsigned long r; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 7bcb64444a39..a90b892f0bfe 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -131,7 +131,7 @@ struct machdep_calls { unsigned long dabrx); /* Set DAWR for this platform, leave empty for default implementation */ - int (*set_dawr)(unsigned long dawr, + int (*set_dawr)(int nr, unsigned long dawr, unsigned long dawrx); #ifdef CONFIG_PPC32 /* XXX for now */ diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 376a395daf32..adf2cda67f9a 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -86,6 +86,7 @@ enum MCE_TlbErrorType { enum MCE_UserErrorType { MCE_USER_ERROR_INDETERMINATE = 0, MCE_USER_ERROR_TLBIE = 1, + MCE_USER_ERROR_SCV = 2, }; enum MCE_RaErrorType { @@ -220,6 +221,8 @@ extern void machine_check_print_event_info(struct machine_check_event *evt, unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); extern void mce_common_process_ue(struct pt_regs *regs, struct mce_error_info *mce_err); +int mce_register_notifier(struct notifier_block *nb); +int mce_unregister_notifier(struct notifier_block *nb); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index d610c2e07b28..7c07728af300 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -13,42 +13,20 @@ #include <linux/pkeys.h> #include <asm/cpu_has_feature.h> -/* - * This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits() - * here. How important is the optimization? - */ +#ifdef CONFIG_PPC_MEM_KEYS static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { -#ifdef CONFIG_PPC_MEM_KEYS - return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey)); -#else - return ((prot & PROT_SAO) ? VM_SAO : 0); -#endif + return pkey_to_vmflag_bits(pkey); } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) { -#ifdef CONFIG_PPC_MEM_KEYS - return (vm_flags & VM_SAO) ? - __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) : - __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags)); -#else - return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0); -#endif + return __pgprot(vmflag_to_pte_pkey_bits(vm_flags)); } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) - -static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) -{ - if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO)) - return false; - if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO)) - return false; - return true; -} -#define arch_validate_prot arch_validate_prot +#endif #endif /* CONFIG_PPC64 */ #endif /* _ASM_POWERPC_MMAN_H */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index f4ac25d4df05..255a1837e9f7 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -29,6 +29,19 @@ */ /* + * Support for KUEP feature. + */ +#define MMU_FTR_KUEP ASM_CONST(0x00000400) + +/* + * Support for memory protection keys. + */ +#define MMU_FTR_PKEY ASM_CONST(0x00000800) + +/* Guest Translation Shootdown Enable */ +#define MMU_FTR_GTSE ASM_CONST(0x00001000) + +/* * Support for 68 bit VA space. We added that from ISA 2.05 */ #define MMU_FTR_68_BIT_VA ASM_CONST(0x00002000) @@ -173,10 +186,18 @@ enum { #endif #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | + MMU_FTR_GTSE | #ifdef CONFIG_PPC_KUAP MMU_FTR_RADIX_KUAP | #endif /* CONFIG_PPC_KUAP */ #endif /* CONFIG_PPC_RADIX_MMU */ +#ifdef CONFIG_PPC_MEM_KEYS + MMU_FTR_PKEY | +#endif +#ifdef CONFIG_PPC_KUEP + MMU_FTR_KUEP | +#endif /* CONFIG_PPC_KUAP */ + 0, }; @@ -356,6 +377,8 @@ extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size); static inline void mmu_early_init_devtree(void) { } +static inline void pkey_early_init_devtree(void) {} + extern void *abatron_pteptrs[2]; #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 1a474f6b1992..7f3658a97384 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -218,8 +218,6 @@ static inline void inc_mm_active_cpus(struct mm_struct *mm) { } static inline void dec_mm_active_cpus(struct mm_struct *mm) { } static inline void mm_context_add_copro(struct mm_struct *mm) { } static inline void mm_context_remove_copro(struct mm_struct *mm) { } -static inline void mm_context_add_vas_windows(struct mm_struct *mm) { } -static inline void mm_context_remove_vas_windows(struct mm_struct *mm) { } #endif diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b0afbdd07740..b9e134d0f03a 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -249,6 +249,18 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p return old; } + +#ifdef CONFIG_PPC_16K_PAGES +#define __HAVE_ARCH_PTEP_GET +static inline pte_t ptep_get(pte_t *ptep) +{ + pte_basic_t val = READ_ONCE(ptep->pte); + pte_t pte = {val, val, val, val}; + + return pte; +} +#endif /* CONFIG_PPC_16K_PAGES */ + #else static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, unsigned long clr, unsigned long set, int huge) @@ -284,16 +296,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return __pte(pte_update(mm, addr, ptep, ~0, 0, 0)); } -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) -#define __HAVE_ARCH_PTEP_GET -static inline pte_t ptep_get(pte_t *ptep) -{ - pte_t pte = {READ_ONCE(ptep->pte), 0, 0, 0}; - - return pte; -} -#endif - #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 6cb8aa357191..59ee9fa4ae09 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -82,8 +82,6 @@ */ #include <asm/nohash/pte-book3e.h> -#define _PAGE_SAO 0 - #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) /* diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 84b2564cf5a4..9454d29ff4b4 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -225,6 +225,7 @@ struct paca_struct { u16 in_mce; u8 hmi_event_available; /* HMI event is available */ u8 hmi_p9_special_emu; /* HMI P9 special emulation */ + u32 hmi_irqs; /* HMI irq stat */ #endif u8 ftrace_enabled; /* Hard disable ftrace */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index a63fe6f3a0ff..254687258f42 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -255,8 +255,10 @@ static inline bool pfn_valid(unsigned long pfn) */ #ifdef CONFIG_PPC_BOOK3E_64 #define is_kernel_addr(x) ((x) >= 0x8000000000000000ul) -#else +#elif defined(CONFIG_PPC_BOOK3S_64) #define is_kernel_addr(x) ((x) >= PAGE_OFFSET) +#else +#define is_kernel_addr(x) ((x) >= TASK_SIZE) #endif #ifndef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h new file mode 100644 index 000000000000..9362c94fe3aa --- /dev/null +++ b/arch/powerpc/include/asm/paravirt.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_PARAVIRT_H +#define _ASM_POWERPC_PARAVIRT_H + +#include <linux/jump_label.h> +#include <asm/smp.h> +#ifdef CONFIG_PPC64 +#include <asm/paca.h> +#include <asm/hvcall.h> +#endif + +#ifdef CONFIG_PPC_SPLPAR +DECLARE_STATIC_KEY_FALSE(shared_processor); + +static inline bool is_shared_processor(void) +{ + return static_branch_unlikely(&shared_processor); +} + +/* If bit 0 is set, the cpu has been preempted */ +static inline u32 yield_count_of(int cpu) +{ + __be32 yield_count = READ_ONCE(lppaca_of(cpu).yield_count); + return be32_to_cpu(yield_count); +} + +static inline void yield_to_preempted(int cpu, u32 yield_count) +{ + plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count); +} + +static inline void prod_cpu(int cpu) +{ + plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); +} + +static inline void yield_to_any(void) +{ + plpar_hcall_norets(H_CONFER, -1, 0); +} +#else +static inline bool is_shared_processor(void) +{ + return false; +} + +static inline u32 yield_count_of(int cpu) +{ + return 0; +} + +extern void ___bad_yield_to_preempted(void); +static inline void yield_to_preempted(int cpu, u32 yield_count) +{ + ___bad_yield_to_preempted(); /* This would be a bug */ +} + +extern void ___bad_yield_to_any(void); +static inline void yield_to_any(void) +{ + ___bad_yield_to_any(); /* This would be a bug */ +} + +extern void ___bad_prod_cpu(void); +static inline void prod_cpu(int cpu) +{ + ___bad_prod_cpu(); /* This would be a bug */ +} + +#endif + +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + if (!is_shared_processor()) + return false; + if (yield_count_of(cpu) & 1) + return true; + return false; +} + +static inline bool pv_is_native_spin_unlock(void) +{ + return !is_shared_processor(); +} + +#endif /* _ASM_POWERPC_PARAVIRT_H */ diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index b92e81b256e5..d2a2a14e56f9 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -202,7 +202,6 @@ struct pci_dn { #define IODA_INVALID_PE 0xFFFFFFFF unsigned int pe_number; #ifdef CONFIG_PCI_IOV - int vf_index; /* VF index in the PF */ u16 vfs_expanded; /* number of VFs IOV BAR expanded */ u16 num_vfs; /* number of VFs enabled*/ unsigned int *pe_num_map; /* PE# for the first VF PE or array */ diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h index dce863a7635c..8e5b7d0b851c 100644 --- a/arch/powerpc/include/asm/percpu.h +++ b/arch/powerpc/include/asm/percpu.h @@ -10,8 +10,6 @@ #ifdef CONFIG_SMP -#include <asm/paca.h> - #define __my_cpu_offset local_paca->data_offset #endif /* CONFIG_SMP */ @@ -19,4 +17,6 @@ #include <asm-generic/percpu.h> +#include <asm/paca.h> + #endif /* _ASM_POWERPC_PERCPU_H_ */ diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index eed3954082fa..1e8b2e1ec1db 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -12,6 +12,8 @@ #ifdef CONFIG_PPC_PERF_CTRS #include <asm/perf_event_server.h> +#else +static inline bool is_sier_available(void) { return false; } #endif #ifdef CONFIG_FSL_EMB_PERF_EVENT diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 3e9703f44c7c..86c9eb064b22 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -17,6 +17,13 @@ struct perf_event; +struct mmcr_regs { + unsigned long mmcr0; + unsigned long mmcr1; + unsigned long mmcr2; + unsigned long mmcra; + unsigned long mmcr3; +}; /* * This struct provides the constants and functions needed to * describe the PMU on a particular POWER-family CPU. @@ -28,7 +35,7 @@ struct power_pmu { unsigned long add_fields; unsigned long test_adder; int (*compute_mmcr)(u64 events[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]); int (*get_constraint)(u64 event_id, unsigned long *mskp, unsigned long *valp); @@ -41,13 +48,13 @@ struct power_pmu { unsigned long group_constraint_val; u64 (*bhrb_filter_map)(u64 branch_sample_type); void (*config_bhrb)(u64 pmu_bhrb_filter); - void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); + void (*disable_pmc)(unsigned int pmc, struct mmcr_regs *mmcr); int (*limited_pmc_event)(u64 event_id); u32 flags; const struct attribute_group **attr_groups; int n_generic; int *generic_events; - int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + u64 (*cache_events)[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; @@ -69,6 +76,7 @@ struct power_pmu { #define PPMU_HAS_SIER 0x00000040 /* Has SIER */ #define PPMU_ARCH_207S 0x00000080 /* PMC is architecture v2.07S */ #define PPMU_NO_SIAR 0x00000100 /* Do not use SIAR */ +#define PPMU_ARCH_31 0x00000200 /* Has MMCR3, SIER2 and SIER3 */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index 2fe6cae14d10..a7951049e129 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -11,9 +11,7 @@ #include <linux/jump_label.h> #include <asm/firmware.h> -DECLARE_STATIC_KEY_TRUE(pkey_disabled); -extern int pkeys_total; /* total pkeys as per device tree */ -extern u32 initial_allocation_mask; /* bits set for the initially allocated keys */ +extern int num_pkey; extern u32 reserved_allocation_mask; /* bits set for reserved keys */ #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \ @@ -25,48 +23,28 @@ extern u32 reserved_allocation_mask; /* bits set for reserved keys */ PKEY_DISABLE_WRITE | \ PKEY_DISABLE_EXECUTE) +#ifdef CONFIG_PPC_BOOK3S_64 +#include <asm/book3s/64/pkeys.h> +#else +#error "Not supported" +#endif + + static inline u64 pkey_to_vmflag_bits(u16 pkey) { return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS); } -static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) -{ - if (static_branch_likely(&pkey_disabled)) - return 0x0UL; - - return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT4 : 0x0UL) | - ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT3 : 0x0UL) | - ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) | - ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT1 : 0x0UL) | - ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT0 : 0x0UL)); -} - static inline int vma_pkey(struct vm_area_struct *vma) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return 0; return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT; } -#define arch_max_pkey() pkeys_total - -static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +static inline int arch_max_pkey(void) { - return (((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL)); -} - -static inline u16 pte_to_pkey_bits(u64 pteflags) -{ - return (((pteflags & H_PTE_PKEY_BIT0) ? 0x10 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT1) ? 0x8 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT3) ? 0x2 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT4) ? 0x1 : 0x0UL)); + return num_pkey; } #define pkey_alloc_mask(pkey) (0x1 << pkey) @@ -114,9 +92,8 @@ static inline int mm_pkey_alloc(struct mm_struct *mm) u32 all_pkeys_mask = (u32)(~(0x0)); int ret; - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return -1; - /* * Are we out of pkeys? We must handle this specially because ffz() * behavior is undefined if there are no zeros. @@ -132,7 +109,7 @@ static inline int mm_pkey_alloc(struct mm_struct *mm) static inline int mm_pkey_free(struct mm_struct *mm, int pkey) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return -1; if (!mm_pkey_is_allocated(mm, pkey)) @@ -147,21 +124,13 @@ static inline int mm_pkey_free(struct mm_struct *mm, int pkey) * Try to dedicate one of the protection keys to be used as an * execute-only protection key. */ -extern int __execute_only_pkey(struct mm_struct *mm); -static inline int execute_only_pkey(struct mm_struct *mm) -{ - if (static_branch_likely(&pkey_disabled)) - return -1; - - return __execute_only_pkey(mm); -} - +extern int execute_only_pkey(struct mm_struct *mm); extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey); static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return 0; /* @@ -179,7 +148,7 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return -EINVAL; /* @@ -196,7 +165,7 @@ static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline bool arch_pkeys_enabled(void) { - return !static_branch_likely(&pkey_disabled); + return mmu_has_feature(MMU_FTR_PKEY); } extern void pkey_mm_init(struct mm_struct *mm); diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 4497c8afb573..ece84a430701 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -312,7 +312,12 @@ static inline long plpar_set_ciabr(unsigned long ciabr) static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0) { - return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR0, dawr0, dawrx0); +} + +static inline long plpar_set_watchpoint1(unsigned long dawr1, unsigned long dawrx1) +{ + return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR1, dawr1, dawrx1); } static inline long plpar_signal_sys_reset(long cpu) @@ -334,6 +339,51 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) return rc; } +/* + * Wrapper to H_RPT_INVALIDATE hcall that handles return values appropriately + * + * - Returns H_SUCCESS on success + * - For H_BUSY return value, we retry the hcall. + * - For any other hcall failures, attempt a full flush once before + * resorting to BUG(). + * + * Note: This hcall is expected to fail only very rarely. The correct + * error recovery of killing the process/guest will be eventually + * needed. + */ +static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type, + u64 page_sizes, u64 start, u64 end) +{ + long rc; + unsigned long all; + + while (true) { + rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target, type, + page_sizes, start, end); + if (rc == H_BUSY) { + cpu_relax(); + continue; + } else if (rc == H_SUCCESS) + return rc; + + /* Flush request failed, try with a full flush once */ + if (type & H_RPTI_TYPE_NESTED) + all = H_RPTI_TYPE_NESTED | H_RPTI_TYPE_NESTED_ALL; + else + all = H_RPTI_TYPE_ALL; +retry: + rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target, + all, page_sizes, 0, -1UL); + if (rc == H_BUSY) { + cpu_relax(); + goto retry; + } else if (rc == H_SUCCESS) + return rc; + + BUG(); + } +} + #else /* !CONFIG_PPC_PSERIES */ static inline long plpar_set_ciabr(unsigned long ciabr) @@ -346,6 +396,13 @@ static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex, { return 0; } + +static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type, + u64 page_sizes, u64 start, u64 end) +{ + return 0; +} + #endif /* CONFIG_PPC_PSERIES */ #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index 7de82647e761..ee79d2cd9fb6 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -9,28 +9,26 @@ #define PNV_OCXL_TL_BITS_PER_RATE 4 #define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8) -extern int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, - u16 *supported); -extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); +int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported); +int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); -extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, +int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, char *rate_buf, int rate_buf_size); -extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, - uint64_t rate_buf_phys, int rate_buf_size); - -extern int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq); -extern void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, - void __iomem *tfc, void __iomem *pe_handle); -extern int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, - void __iomem **dar, void __iomem **tfc, - void __iomem **pe_handle); - -extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, - void **platform_data); -extern void pnv_ocxl_spa_release(void *platform_data); -extern int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); - -extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr); -extern void pnv_ocxl_free_xive_irq(u32 irq); +int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, + uint64_t rate_buf_phys, int rate_buf_size); + +int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq); +void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, + void __iomem *tfc, void __iomem *pe_handle); +int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, + void __iomem **dar, void __iomem **tfc, + void __iomem **pe_handle); + +int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **platform_data); +void pnv_ocxl_spa_release(void *platform_data); +int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); + +int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr); +void pnv_ocxl_free_xive_irq(u32 irq); #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 2a39c716c343..a6e3700c4566 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -76,6 +76,19 @@ #define __REGA0_R30 30 #define __REGA0_R31 31 +#define IMM_L(i) ((uintptr_t)(i) & 0xffff) +#define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) + +/* + * 16-bit immediate helper macros: HA() is for use with sign-extending instrs + * (e.g. LD, ADDI). If the bottom 16 bits is "-ve", add another bit into the + * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000). + */ +#define IMM_H(i) ((uintptr_t)(i)>>16) +#define IMM_HA(i) (((uintptr_t)(i)>>16) + \ + (((uintptr_t)(i) & 0x8000) >> 15)) + + /* opcode and xopcode for instructions */ #define OP_TRAP 3 #define OP_TRAP_64 2 @@ -195,56 +208,28 @@ #define OP_LQ 56 /* sorted alphabetically */ -#define PPC_INST_BHRBE 0x7c00025c -#define PPC_INST_CLRBHRB 0x7c00035c +#define PPC_INST_BCCTR_FLUSH 0x4c400420 #define PPC_INST_COPY 0x7c20060c -#define PPC_INST_CP_ABORT 0x7c00068c -#define PPC_INST_DARN 0x7c0005e6 #define PPC_INST_DCBA 0x7c0005ec #define PPC_INST_DCBA_MASK 0xfc0007fe -#define PPC_INST_DCBAL 0x7c2005ec -#define PPC_INST_DCBZL 0x7c2007ec -#define PPC_INST_ICBT 0x7c00002c -#define PPC_INST_ICSWX 0x7c00032d -#define PPC_INST_ICSWEPX 0x7c00076d #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e -#define PPC_INST_LDARX 0x7c0000a8 -#define PPC_INST_STDCX 0x7c0001ad -#define PPC_INST_LQARX 0x7c000228 -#define PPC_INST_STQCX 0x7c00016d #define PPC_INST_LSWI 0x7c0004aa #define PPC_INST_LSWX 0x7c00042a -#define PPC_INST_LWARX 0x7c000028 -#define PPC_INST_STWCX 0x7c00012d #define PPC_INST_LWSYNC 0x7c2004ac #define PPC_INST_SYNC 0x7c0004ac #define PPC_INST_SYNC_MASK 0xfc0007fe #define PPC_INST_ISYNC 0x4c00012c -#define PPC_INST_LXVD2X 0x7c000698 #define PPC_INST_MCRXR 0x7c000400 #define PPC_INST_MCRXR_MASK 0xfc0007fe #define PPC_INST_MFSPR_PVR 0x7c1f42a6 #define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe -#define PPC_INST_MFTMR 0x7c0002dc -#define PPC_INST_MSGSND 0x7c00019c -#define PPC_INST_MSGCLR 0x7c0001dc -#define PPC_INST_MSGSYNC 0x7c0006ec -#define PPC_INST_MSGSNDP 0x7c00011c -#define PPC_INST_MSGCLRP 0x7c00015c #define PPC_INST_MTMSRD 0x7c000164 -#define PPC_INST_MTTMR 0x7c0003dc #define PPC_INST_NOP 0x60000000 -#define PPC_INST_PASTE 0x7c20070d #define PPC_INST_POPCNTB 0x7c0000f4 #define PPC_INST_POPCNTB_MASK 0xfc0007fe -#define PPC_INST_POPCNTD 0x7c0003f4 -#define PPC_INST_POPCNTW 0x7c0002f4 #define PPC_INST_RFEBB 0x4c000124 -#define PPC_INST_RFCI 0x4c000066 -#define PPC_INST_RFDI 0x4c00004e #define PPC_INST_RFID 0x4c000024 -#define PPC_INST_RFMCI 0x4c00004c #define PPC_INST_MFSPR 0x7c0002a6 #define PPC_INST_MFSPR_DSCR 0x7c1102a6 #define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe @@ -254,131 +239,43 @@ #define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 #define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1ffffe -#define PPC_INST_MFVSRD 0x7c000066 -#define PPC_INST_MTVSRD 0x7c000166 #define PPC_INST_SC 0x44000002 -#define PPC_INST_SLBFEE 0x7c0007a7 -#define PPC_INST_SLBIA 0x7c0003e4 - #define PPC_INST_STRING 0x7c00042a #define PPC_INST_STRING_MASK 0xfc0007fe #define PPC_INST_STRING_GEN_MASK 0xfc00067e - #define PPC_INST_STSWI 0x7c0005aa #define PPC_INST_STSWX 0x7c00052a -#define PPC_INST_STXVD2X 0x7c000798 -#define PPC_INST_TLBIE 0x7c000264 -#define PPC_INST_TLBIEL 0x7c000224 -#define PPC_INST_TLBILX 0x7c000024 -#define PPC_INST_WAIT 0x7c00007c -#define PPC_INST_TLBIVAX 0x7c000624 -#define PPC_INST_TLBSRX_DOT 0x7c0006a5 -#define PPC_INST_VPMSUMW 0x10000488 -#define PPC_INST_VPMSUMD 0x100004c8 -#define PPC_INST_VPERMXOR 0x1000002d -#define PPC_INST_XXLOR 0xf0000490 -#define PPC_INST_XXSWAPD 0xf0000250 -#define PPC_INST_XVCPSGNDP 0xf0000780 #define PPC_INST_TRECHKPT 0x7c0007dd #define PPC_INST_TRECLAIM 0x7c00075d -#define PPC_INST_TABORT 0x7c00071d #define PPC_INST_TSR 0x7c0005dd - -#define PPC_INST_NAP 0x4c000364 -#define PPC_INST_SLEEP 0x4c0003a4 -#define PPC_INST_WINKLE 0x4c0003e4 - -#define PPC_INST_STOP 0x4c0002e4 - -/* A2 specific instructions */ -#define PPC_INST_ERATWE 0x7c0001a6 -#define PPC_INST_ERATRE 0x7c000166 -#define PPC_INST_ERATILX 0x7c000066 -#define PPC_INST_ERATIVAX 0x7c000666 -#define PPC_INST_ERATSX 0x7c000126 -#define PPC_INST_ERATSX_DOT 0x7c000127 - -/* Misc instructions for BPF compiler */ -#define PPC_INST_LBZ 0x88000000 #define PPC_INST_LD 0xe8000000 -#define PPC_INST_LDX 0x7c00002a -#define PPC_INST_LHZ 0xa0000000 -#define PPC_INST_LWZ 0x80000000 -#define PPC_INST_LHBRX 0x7c00062c -#define PPC_INST_LDBRX 0x7c000428 -#define PPC_INST_STB 0x98000000 -#define PPC_INST_STH 0xb0000000 #define PPC_INST_STD 0xf8000000 -#define PPC_INST_STDX 0x7c00012a -#define PPC_INST_STDU 0xf8000001 -#define PPC_INST_STW 0x90000000 -#define PPC_INST_STWU 0x94000000 #define PPC_INST_MFLR 0x7c0802a6 -#define PPC_INST_MTLR 0x7c0803a6 #define PPC_INST_MTCTR 0x7c0903a6 -#define PPC_INST_CMPWI 0x2c000000 -#define PPC_INST_CMPDI 0x2c200000 -#define PPC_INST_CMPW 0x7c000000 -#define PPC_INST_CMPD 0x7c200000 -#define PPC_INST_CMPLW 0x7c000040 -#define PPC_INST_CMPLD 0x7c200040 -#define PPC_INST_CMPLWI 0x28000000 -#define PPC_INST_CMPLDI 0x28200000 #define PPC_INST_ADDI 0x38000000 #define PPC_INST_ADDIS 0x3c000000 #define PPC_INST_ADD 0x7c000214 -#define PPC_INST_ADDC 0x7c000014 -#define PPC_INST_SUB 0x7c000050 #define PPC_INST_BLR 0x4e800020 -#define PPC_INST_BLRL 0x4e800021 #define PPC_INST_BCTR 0x4e800420 -#define PPC_INST_MULLD 0x7c0001d2 -#define PPC_INST_MULLW 0x7c0001d6 -#define PPC_INST_MULHWU 0x7c000016 -#define PPC_INST_MULLI 0x1c000000 -#define PPC_INST_MADDHD 0x10000030 -#define PPC_INST_MADDHDU 0x10000031 -#define PPC_INST_MADDLD 0x10000033 -#define PPC_INST_DIVWU 0x7c000396 +#define PPC_INST_BCTRL 0x4e800421 #define PPC_INST_DIVD 0x7c0003d2 -#define PPC_INST_DIVDU 0x7c000392 -#define PPC_INST_RLWINM 0x54000000 -#define PPC_INST_RLWINM_DOT 0x54000001 -#define PPC_INST_RLWIMI 0x50000000 -#define PPC_INST_RLDICL 0x78000000 #define PPC_INST_RLDICR 0x78000004 -#define PPC_INST_SLW 0x7c000030 -#define PPC_INST_SLD 0x7c000036 -#define PPC_INST_SRW 0x7c000430 -#define PPC_INST_SRAW 0x7c000630 -#define PPC_INST_SRAWI 0x7c000670 -#define PPC_INST_SRD 0x7c000436 -#define PPC_INST_SRAD 0x7c000634 -#define PPC_INST_SRADI 0x7c000674 -#define PPC_INST_AND 0x7c000038 -#define PPC_INST_ANDDOT 0x7c000039 -#define PPC_INST_OR 0x7c000378 -#define PPC_INST_XOR 0x7c000278 -#define PPC_INST_ANDI 0x70000000 #define PPC_INST_ORI 0x60000000 #define PPC_INST_ORIS 0x64000000 -#define PPC_INST_XORI 0x68000000 -#define PPC_INST_XORIS 0x6c000000 -#define PPC_INST_NEG 0x7c0000d0 -#define PPC_INST_EXTSW 0x7c0007b4 #define PPC_INST_BRANCH 0x48000000 #define PPC_INST_BRANCH_COND 0x40800000 -#define PPC_INST_LBZCIX 0x7c0006aa -#define PPC_INST_STBCIX 0x7c0007aa -#define PPC_INST_LWZX 0x7c00002e -#define PPC_INST_LFSX 0x7c00042e -#define PPC_INST_STFSX 0x7c00052e -#define PPC_INST_LFDX 0x7c0004ae -#define PPC_INST_STFDX 0x7c0005ae -#define PPC_INST_LVX 0x7c0000ce -#define PPC_INST_STVX 0x7c0001ce -#define PPC_INST_VCMPEQUD 0x100000c7 -#define PPC_INST_VCMPEQUB 0x10000006 + +/* Prefixes */ +#define PPC_INST_LFS 0xc0000000 +#define PPC_INST_STFS 0xd0000000 +#define PPC_INST_LFD 0xc8000000 +#define PPC_INST_STFD 0xd8000000 +#define PPC_PREFIX_MLS 0x06000000 +#define PPC_PREFIX_8LS 0x04000000 + +/* Prefixed instructions */ +#define PPC_INST_PLD 0xe4000000 +#define PPC_INST_PSTD 0xf4000000 /* macros to insert fields into opcodes */ #define ___PPC_RA(a) (((a) & 0x1f) << 16) @@ -411,6 +308,7 @@ #define __PPC_CT(t) (((t) & 0x0f) << 21) #define __PPC_SPR(r) ((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11)) #define __PPC_RC21 (0x1 << 10) +#define __PPC_PRFX_R(r) (((r) & 0x1) << 20) /* * Both low and high 16 bits are added as SIGNED additions, so if low 16 bits @@ -431,175 +329,278 @@ #define __PPC_EH(eh) 0 #endif +/* Base instruction encoding */ +#define PPC_RAW_CP_ABORT (0x7c00068c) +#define PPC_RAW_COPY(a, b) (PPC_INST_COPY | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DARN(t, l) (0x7c0005e6 | ___PPC_RT(t) | (((l) & 0x3) << 16)) +#define PPC_RAW_DCBAL(a, b) (0x7c2005ec | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_RAW_DCBZL(a, b) (0x7c2007ec | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_RAW_LQARX(t, a, b, eh) (0x7c000228 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_RAW_LDARX(t, a, b, eh) (0x7c0000a8 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_RAW_LWARX(t, a, b, eh) (0x7c000028 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_RAW_PHWSYNC (0x7c8004ac) +#define PPC_RAW_PLWSYNC (0x7ca004ac) +#define PPC_RAW_STQCX(t, a, b) (0x7c00016d | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_MADDHD(t, a, b, c) (0x10000030 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c)) +#define PPC_RAW_MADDHDU(t, a, b, c) (0x10000031 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c)) +#define PPC_RAW_MADDLD(t, a, b, c) (0x10000033 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c)) +#define PPC_RAW_MSGSND(b) (0x7c00019c | ___PPC_RB(b)) +#define PPC_RAW_MSGSYNC (0x7c0006ec) +#define PPC_RAW_MSGCLR(b) (0x7c0001dc | ___PPC_RB(b)) +#define PPC_RAW_MSGSNDP(b) (0x7c00011c | ___PPC_RB(b)) +#define PPC_RAW_MSGCLRP(b) (0x7c00015c | ___PPC_RB(b)) +#define PPC_RAW_PASTE(a, b) (0x7c20070d | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_POPCNTB(a, s) (PPC_INST_POPCNTB | __PPC_RA(a) | __PPC_RS(s)) +#define PPC_RAW_POPCNTD(a, s) (0x7c0003f4 | __PPC_RA(a) | __PPC_RS(s)) +#define PPC_RAW_POPCNTW(a, s) (0x7c0002f4 | __PPC_RA(a) | __PPC_RS(s)) +#define PPC_RAW_RFCI (0x4c000066) +#define PPC_RAW_RFDI (0x4c00004e) +#define PPC_RAW_RFMCI (0x4c00004c) +#define PPC_RAW_TLBILX(t, a, b) (0x7c000024 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_WAIT(w) (0x7c00007c | __PPC_WC(w)) +#define PPC_RAW_TLBIE(lp, a) (0x7c000264 | ___PPC_RB(a) | ___PPC_RS(lp)) +#define PPC_RAW_TLBIE_5(rb, rs, ric, prs, r) \ + (0x7c000264 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r)) +#define PPC_RAW_TLBIEL(rb, rs, ric, prs, r) \ + (0x7c000224 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r)) +#define PPC_RAW_TLBSRX_DOT(a, b) (0x7c0006a5 | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_TLBIVAX(a, b) (0x7c000624 | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_ERATWE(s, a, w) (0x7c0001a6 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) +#define PPC_RAW_ERATRE(s, a, w) (0x7c000166 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) +#define PPC_RAW_ERATILX(t, a, b) (0x7c000066 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_ERATIVAX(s, a, b) (0x7c000666 | __PPC_RS(s) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_ERATSX(t, a, w) (0x7c000126 | __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_ERATSX_DOT(t, a, w) (0x7c000127 | __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_SLBFEE_DOT(t, b) (0x7c0007a7 | __PPC_RT(t) | __PPC_RB(b)) +#define __PPC_RAW_SLBFEE_DOT(t, b) (0x7c0007a7 | ___PPC_RT(t) | ___PPC_RB(b)) +#define PPC_RAW_ICBT(c, a, b) (0x7c00002c | __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_LBZCIX(t, a, b) (0x7c0006aa | __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_RAW_STBCIX(s, a, b) (0x7c0007aa | __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_RAW_DCBFPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21)) +#define PPC_RAW_DCBSTPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21)) +/* + * Define what the VSX XX1 form instructions will look like, then add + * the 128 bit load store instructions based on that. + */ +#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b)) +#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b)) +#define PPC_RAW_STXVD2X(s, a, b) (0x7c000798 | VSX_XX1((s), a, b)) +#define PPC_RAW_LXVD2X(s, a, b) (0x7c000698 | VSX_XX1((s), a, b)) +#define PPC_RAW_MFVRD(a, t) (0x7c000066 | VSX_XX1((t) + 32, a, R0)) +#define PPC_RAW_MTVRD(t, a) (0x7c000166 | VSX_XX1((t) + 32, a, R0)) +#define PPC_RAW_VPMSUMW(t, a, b) (0x10000488 | VSX_XX3((t), a, b)) +#define PPC_RAW_VPMSUMD(t, a, b) (0x100004c8 | VSX_XX3((t), a, b)) +#define PPC_RAW_XXLOR(t, a, b) (0xf0000490 | VSX_XX3((t), a, b)) +#define PPC_RAW_XXSWAPD(t, a) (0xf0000250 | VSX_XX3((t), a, a)) +#define PPC_RAW_XVCPSGNDP(t, a, b) ((0xf0000780 | VSX_XX3((t), (a), (b)))) +#define PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc) \ + ((0x1000002d | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6))) +#define PPC_RAW_NAP (0x4c000364) +#define PPC_RAW_SLEEP (0x4c0003a4) +#define PPC_RAW_WINKLE (0x4c0003e4) +#define PPC_RAW_STOP (0x4c0002e4) +#define PPC_RAW_CLRBHRB (0x7c00035c) +#define PPC_RAW_MFBHRBE(r, n) (0x7c00025c | __PPC_RT(r) | (((n) & 0x3ff) << 11)) +#define PPC_RAW_TRECHKPT (PPC_INST_TRECHKPT) +#define PPC_RAW_TRECLAIM(r) (PPC_INST_TRECLAIM | __PPC_RA(r)) +#define PPC_RAW_TABORT(r) (0x7c00071d | __PPC_RA(r)) +#define TMRN(x) ((((x) & 0x1f) << 16) | (((x) & 0x3e0) << 6)) +#define PPC_RAW_MTTMR(tmr, r) (0x7c0003dc | TMRN(tmr) | ___PPC_RS(r)) +#define PPC_RAW_MFTMR(tmr, r) (0x7c0002dc | TMRN(tmr) | ___PPC_RT(r)) +#define PPC_RAW_ICSWX(s, a, b) (0x7c00032d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ICSWEPX(s, a, b) (0x7c00076d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_SLBIA(IH) (0x7c0003e4 | (((IH) & 0x7) << 21)) +#define PPC_RAW_VCMPEQUD_RC(vrt, vra, vrb) \ + (0x100000c7 | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | __PPC_RC21) +#define PPC_RAW_VCMPEQUB_RC(vrt, vra, vrb) \ + (0x10000006 | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | __PPC_RC21) +#define PPC_RAW_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_DS(i)) +#define PPC_RAW_LWZ(r, base, i) (0x80000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_LWZX(t, a, b) (0x7c00002e | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | ___PPC_RA(base) | IMM_DS(i)) +#define PPC_RAW_STDCX(s, a, b) (0x7c0001ad | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_LFSX(t, a, b) (0x7c00042e | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STFSX(s, a, b) (0x7c00052e | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_LFDX(t, a, b) (0x7c0004ae | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STFDX(s, a, b) (0x7c0005ae | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_LVX(t, a, b) (0x7c0000ce | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STVX(s, a, b) (0x7c0001ce | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADD(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#define PPC_RAW_ADDC(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADDC_DOT(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#define PPC_RAW_NOP() (PPC_INST_NOP) +#define PPC_RAW_BLR() (PPC_INST_BLR) +#define PPC_RAW_BLRL() (0x4e800021) +#define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r)) +#define PPC_RAW_BCTR() (PPC_INST_BCTR) +#define PPC_RAW_MTCTR(r) (PPC_INST_MTCTR | ___PPC_RT(r)) +#define PPC_RAW_ADDI(d, a, i) (PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_LI(r, i) PPC_RAW_ADDI(r, 0, i) +#define PPC_RAW_ADDIS(d, a, i) (PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_LIS(r, i) PPC_RAW_ADDIS(r, 0, i) +#define PPC_RAW_STDX(r, base, b) (0x7c00012a | ___PPC_RS(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_STDU(r, base, i) (0xf8000001 | ___PPC_RS(r) | ___PPC_RA(base) | ((i) & 0xfffc)) +#define PPC_RAW_STW(r, base, i) (0x90000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_STWU(r, base, i) (0x94000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_STH(r, base, i) (0xb0000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_STB(r, base, i) (0x98000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_LBZ(r, base, i) (0x88000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_CMPDI(a, i) (0x2c200000 | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_CMPW(a, b) (0x7c000000 | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_CMPD(a, b) (0x7c200000 | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_CMPLWI(a, i) (0x28000000 | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_CMPLDI(a, i) (0x28200000 | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_CMPLW(a, b) (0x7c000040 | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_CMPLD(a, b) (0x7c200040 | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_SUB(d, a, b) (0x7c000050 | ___PPC_RT(d) | ___PPC_RB(a) | ___PPC_RA(b)) +#define PPC_RAW_MULD(d, a, b) (0x7c0001d2 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_MULW(d, a, b) (0x7c0001d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_MULHWU(d, a, b) (0x7c000016 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_MULI(d, a, i) (0x1c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_DIVWU(d, a, b) (0x7c000396 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DIVDU(d, a, b) (0x7c000392 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DIVDE(t, a, b) (0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DIVDE_DOT(t, a, b) (0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#define PPC_RAW_DIVDEU(t, a, b) (0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DIVDEU_DOT(t, a, b) (0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#define PPC_RAW_AND(d, a, b) (0x7c000038 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) +#define PPC_RAW_ANDI(d, a, i) (0x70000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_AND_DOT(d, a, b) (0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) +#define PPC_RAW_OR(d, a, b) (0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) +#define PPC_RAW_MR(d, a) PPC_RAW_OR(d, a, a) +#define PPC_RAW_ORI(d, a, i) (PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_ORIS(d, a, i) (PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_XOR(d, a, b) (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) +#define PPC_RAW_XORI(d, a, i) (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_XORIS(d, a, i) (0x6c000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_EXTSW(d, a) (0x7c0007b4 | ___PPC_RA(d) | ___PPC_RS(a)) +#define PPC_RAW_SLW(d, a, s) (0x7c000030 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SLD(d, a, s) (0x7c000036 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRW(d, a, s) (0x7c000430 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRAW(d, a, s) (0x7c000630 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRAWI(d, a, i) (0x7c000670 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i)) +#define PPC_RAW_SRD(d, a, s) (0x7c000436 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRAD(d, a, s) (0x7c000634 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRADI(d, a, i) (0x7c000674 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i)) +#define PPC_RAW_RLWINM(d, a, i, mb, me) (0x54000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) +#define PPC_RAW_RLWINM_DOT(d, a, i, mb, me) \ + (0x54000001 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) +#define PPC_RAW_RLWIMI(d, a, i, mb, me) (0x50000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) +#define PPC_RAW_RLDICL(d, a, i, mb) (0x78000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_MB64(mb)) +#define PPC_RAW_RLDICR(d, a, i, me) (PPC_INST_RLDICR | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me)) + +/* slwi = rlwinm Rx, Ry, n, 0, 31-n */ +#define PPC_RAW_SLWI(d, a, i) PPC_RAW_RLWINM(d, a, i, 0, 31-(i)) +/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */ +#define PPC_RAW_SRWI(d, a, i) PPC_RAW_RLWINM(d, a, 32-(i), i, 31) +/* sldi = rldicr Rx, Ry, n, 63-n */ +#define PPC_RAW_SLDI(d, a, i) PPC_RAW_RLDICR(d, a, i, 63-(i)) +/* sldi = rldicl Rx, Ry, 64-n, n */ +#define PPC_RAW_SRDI(d, a, i) PPC_RAW_RLDICL(d, a, 64-(i), i) + +#define PPC_RAW_NEG(d, a) (0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a)) + /* Deal with instructions that older assemblers aren't aware of */ -#define PPC_CP_ABORT stringify_in_c(.long PPC_INST_CP_ABORT) -#define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_DARN(t, l) stringify_in_c(.long PPC_INST_DARN | \ - ___PPC_RT(t) | \ - (((l) & 0x3) << 16)) -#define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ - __PPC_RA(a) | __PPC_RB(b)) -#define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ - __PPC_RA(a) | __PPC_RB(b)) -#define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LQARX | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | __PPC_EH(eh)) -#define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LDARX | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | __PPC_EH(eh)) -#define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | __PPC_EH(eh)) -#define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_INST_STQCX | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_INST_MADDHD | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | ___PPC_RC(c)) -#define PPC_MADDHDU(t, a, b, c) stringify_in_c(.long PPC_INST_MADDHDU | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | ___PPC_RC(c)) -#define PPC_MADDLD(t, a, b, c) stringify_in_c(.long PPC_INST_MADDLD | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | ___PPC_RC(c)) -#define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \ - ___PPC_RB(b)) -#define PPC_MSGSYNC stringify_in_c(.long PPC_INST_MSGSYNC) -#define PPC_MSGCLR(b) stringify_in_c(.long PPC_INST_MSGCLR | \ - ___PPC_RB(b)) -#define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \ - ___PPC_RB(b)) -#define PPC_MSGCLRP(b) stringify_in_c(.long PPC_INST_MSGCLRP | \ - ___PPC_RB(b)) -#define PPC_PASTE(a, b) stringify_in_c(.long PPC_INST_PASTE | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \ - __PPC_RA(a) | __PPC_RS(s)) -#define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_INST_POPCNTD | \ - __PPC_RA(a) | __PPC_RS(s)) -#define PPC_POPCNTW(a, s) stringify_in_c(.long PPC_INST_POPCNTW | \ - __PPC_RA(a) | __PPC_RS(s)) -#define PPC_RFCI stringify_in_c(.long PPC_INST_RFCI) -#define PPC_RFDI stringify_in_c(.long PPC_INST_RFDI) -#define PPC_RFMCI stringify_in_c(.long PPC_INST_RFMCI) -#define PPC_TLBILX(t, a, b) stringify_in_c(.long PPC_INST_TLBILX | \ - __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) +#define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT) +#define PPC_COPY(a, b) stringify_in_c(.long PPC_RAW_COPY(a, b)) +#define PPC_DARN(t, l) stringify_in_c(.long PPC_RAW_DARN(t, l)) +#define PPC_DCBAL(a, b) stringify_in_c(.long PPC_RAW_DCBAL(a, b)) +#define PPC_DCBZL(a, b) stringify_in_c(.long PPC_RAW_DCBZL(a, b)) +#define PPC_DIVDE(t, a, b) stringify_in_c(.long PPC_RAW_DIVDE(t, a, b)) +#define PPC_DIVDEU(t, a, b) stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b)) +#define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh)) +#define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LDARX(t, a, b, eh)) +#define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LWARX(t, a, b, eh)) +#define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_RAW_STQCX(t, a, b)) +#define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c)) +#define PPC_MADDHDU(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHDU(t, a, b, c)) +#define PPC_MADDLD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDLD(t, a, b, c)) +#define PPC_MSGSND(b) stringify_in_c(.long PPC_RAW_MSGSND(b)) +#define PPC_MSGSYNC stringify_in_c(.long PPC_RAW_MSGSYNC) +#define PPC_MSGCLR(b) stringify_in_c(.long PPC_RAW_MSGCLR(b)) +#define PPC_MSGSNDP(b) stringify_in_c(.long PPC_RAW_MSGSNDP(b)) +#define PPC_MSGCLRP(b) stringify_in_c(.long PPC_RAW_MSGCLRP(b)) +#define PPC_PASTE(a, b) stringify_in_c(.long PPC_RAW_PASTE(a, b)) +#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_RAW_POPCNTB(a, s)) +#define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_RAW_POPCNTD(a, s)) +#define PPC_POPCNTW(a, s) stringify_in_c(.long PPC_RAW_POPCNTW(a, s)) +#define PPC_RFCI stringify_in_c(.long PPC_RAW_RFCI) +#define PPC_RFDI stringify_in_c(.long PPC_RAW_RFDI) +#define PPC_RFMCI stringify_in_c(.long PPC_RAW_RFMCI) +#define PPC_TLBILX(t, a, b) stringify_in_c(.long PPC_RAW_TLBILX(t, a, b)) #define PPC_TLBILX_ALL(a, b) PPC_TLBILX(0, a, b) #define PPC_TLBILX_PID(a, b) PPC_TLBILX(1, a, b) #define PPC_TLBILX_VA(a, b) PPC_TLBILX(3, a, b) -#define PPC_WAIT(w) stringify_in_c(.long PPC_INST_WAIT | \ - __PPC_WC(w)) -#define PPC_TLBIE(lp,a) stringify_in_c(.long PPC_INST_TLBIE | \ - ___PPC_RB(a) | ___PPC_RS(lp)) -#define PPC_TLBIE_5(rb,rs,ric,prs,r) \ - stringify_in_c(.long PPC_INST_TLBIE | \ - ___PPC_RB(rb) | ___PPC_RS(rs) | \ - ___PPC_RIC(ric) | ___PPC_PRS(prs) | \ - ___PPC_R(r)) +#define PPC_WAIT(w) stringify_in_c(.long PPC_RAW_WAIT(w)) +#define PPC_TLBIE(lp, a) stringify_in_c(.long PPC_RAW_TLBIE(lp, a)) +#define PPC_TLBIE_5(rb, rs, ric, prs, r) \ + stringify_in_c(.long PPC_RAW_TLBIE_5(rb, rs, ric, prs, r)) #define PPC_TLBIEL(rb,rs,ric,prs,r) \ - stringify_in_c(.long PPC_INST_TLBIEL | \ - ___PPC_RB(rb) | ___PPC_RS(rs) | \ - ___PPC_RIC(ric) | ___PPC_PRS(prs) | \ - ___PPC_R(r)) -#define PPC_TLBSRX_DOT(a,b) stringify_in_c(.long PPC_INST_TLBSRX_DOT | \ - __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_TLBIVAX(a,b) stringify_in_c(.long PPC_INST_TLBIVAX | \ - __PPC_RA0(a) | __PPC_RB(b)) - -#define PPC_ERATWE(s, a, w) stringify_in_c(.long PPC_INST_ERATWE | \ - __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) -#define PPC_ERATRE(s, a, w) stringify_in_c(.long PPC_INST_ERATRE | \ - __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) -#define PPC_ERATILX(t, a, b) stringify_in_c(.long PPC_INST_ERATILX | \ - __PPC_T_TLB(t) | __PPC_RA0(a) | \ - __PPC_RB(b)) -#define PPC_ERATIVAX(s, a, b) stringify_in_c(.long PPC_INST_ERATIVAX | \ - __PPC_RS(s) | __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_ERATSX(t, a, w) stringify_in_c(.long PPC_INST_ERATSX | \ - __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_INST_ERATSX_DOT | \ - __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ - __PPC_RT(t) | __PPC_RB(b)) -#define __PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ - ___PPC_RT(t) | ___PPC_RB(b)) -#define PPC_ICBT(c,a,b) stringify_in_c(.long PPC_INST_ICBT | \ - __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b)) + stringify_in_c(.long PPC_RAW_TLBIEL(rb, rs, ric, prs, r)) +#define PPC_TLBSRX_DOT(a, b) stringify_in_c(.long PPC_RAW_TLBSRX_DOT(a, b)) +#define PPC_TLBIVAX(a, b) stringify_in_c(.long PPC_RAW_TLBIVAX(a, b)) + +#define PPC_ERATWE(s, a, w) stringify_in_c(.long PPC_RAW_ERATWE(s, a, w)) +#define PPC_ERATRE(s, a, w) stringify_in_c(.long PPC_RAW_ERATRE(a, a, w)) +#define PPC_ERATILX(t, a, b) stringify_in_c(.long PPC_RAW_ERATILX(t, a, b)) +#define PPC_ERATIVAX(s, a, b) stringify_in_c(.long PPC_RAW_ERATIVAX(s, a, b)) +#define PPC_ERATSX(t, a, w) stringify_in_c(.long PPC_RAW_ERATSX(t, a, w)) +#define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_RAW_ERATSX_DOT(t, a, w)) +#define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_RAW_SLBFEE_DOT(t, b)) +#define __PPC_SLBFEE_DOT(t, b) stringify_in_c(.long __PPC_RAW_SLBFEE_DOT(t, b)) +#define PPC_ICBT(c, a, b) stringify_in_c(.long PPC_RAW_ICBT(c, a, b)) /* PASemi instructions */ -#define LBZCIX(t,a,b) stringify_in_c(.long PPC_INST_LBZCIX | \ - __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b)) -#define STBCIX(s,a,b) stringify_in_c(.long PPC_INST_STBCIX | \ - __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b)) - -/* - * Define what the VSX XX1 form instructions will look like, then add - * the 128 bit load store instructions based on that. - */ -#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b)) -#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b)) -#define STXVD2X(s, a, b) stringify_in_c(.long PPC_INST_STXVD2X | \ - VSX_XX1((s), a, b)) -#define LXVD2X(s, a, b) stringify_in_c(.long PPC_INST_LXVD2X | \ - VSX_XX1((s), a, b)) -#define MFVRD(a, t) stringify_in_c(.long PPC_INST_MFVSRD | \ - VSX_XX1((t)+32, a, R0)) -#define MTVRD(t, a) stringify_in_c(.long PPC_INST_MTVSRD | \ - VSX_XX1((t)+32, a, R0)) -#define VPMSUMW(t, a, b) stringify_in_c(.long PPC_INST_VPMSUMW | \ - VSX_XX3((t), a, b)) -#define VPMSUMD(t, a, b) stringify_in_c(.long PPC_INST_VPMSUMD | \ - VSX_XX3((t), a, b)) -#define XXLOR(t, a, b) stringify_in_c(.long PPC_INST_XXLOR | \ - VSX_XX3((t), a, b)) -#define XXSWAPD(t, a) stringify_in_c(.long PPC_INST_XXSWAPD | \ - VSX_XX3((t), a, a)) -#define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_INST_XVCPSGNDP | \ - VSX_XX3((t), (a), (b)))) +#define LBZCIX(t, a, b) stringify_in_c(.long PPC_RAW_LBZCIX(t, a, b)) +#define STBCIX(s, a, b) stringify_in_c(.long PPC_RAW_STBCIX(s, a, b)) +#define PPC_DCBFPS(a, b) stringify_in_c(.long PPC_RAW_DCBFPS(a, b)) +#define PPC_DCBSTPS(a, b) stringify_in_c(.long PPC_RAW_DCBSTPS(a, b)) +#define PPC_PHWSYNC stringify_in_c(.long PPC_RAW_PHWSYNC) +#define PPC_PLWSYNC stringify_in_c(.long PPC_RAW_PLWSYNC) +#define STXVD2X(s, a, b) stringify_in_c(.long PPC_RAW_STXVD2X(s, a, b)) +#define LXVD2X(s, a, b) stringify_in_c(.long PPC_RAW_LXVD2X(s, a, b)) +#define MFVRD(a, t) stringify_in_c(.long PPC_RAW_MFVRD(a, t)) +#define MTVRD(t, a) stringify_in_c(.long PPC_RAW_MTVRD(t, a)) +#define VPMSUMW(t, a, b) stringify_in_c(.long PPC_RAW_VPMSUMW(t, a, b)) +#define VPMSUMD(t, a, b) stringify_in_c(.long PPC_RAW_VPMSUMD(t, a, b)) +#define XXLOR(t, a, b) stringify_in_c(.long PPC_RAW_XXLOR(t, a, b)) +#define XXSWAPD(t, a) stringify_in_c(.long PPC_RAW_XXSWAPD(t, a)) +#define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_RAW_XVCPSGNDP(t, a, b))) #define VPERMXOR(vrt, vra, vrb, vrc) \ - stringify_in_c(.long (PPC_INST_VPERMXOR | \ - ___PPC_RT(vrt) | ___PPC_RA(vra) | \ - ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6))) + stringify_in_c(.long (PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc))) -#define PPC_NAP stringify_in_c(.long PPC_INST_NAP) -#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP) -#define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE) +#define PPC_NAP stringify_in_c(.long PPC_RAW_NAP) +#define PPC_SLEEP stringify_in_c(.long PPC_RAW_SLEEP) +#define PPC_WINKLE stringify_in_c(.long PPC_RAW_WINKLE) -#define PPC_STOP stringify_in_c(.long PPC_INST_STOP) +#define PPC_STOP stringify_in_c(.long PPC_RAW_STOP) /* BHRB instructions */ -#define PPC_CLRBHRB stringify_in_c(.long PPC_INST_CLRBHRB) -#define PPC_MFBHRBE(r, n) stringify_in_c(.long PPC_INST_BHRBE | \ - __PPC_RT(r) | \ - (((n) & 0x3ff) << 11)) +#define PPC_CLRBHRB stringify_in_c(.long PPC_RAW_CLRBHRB) +#define PPC_MFBHRBE(r, n) stringify_in_c(.long PPC_RAW_MFBHRBE(r, n)) /* Transactional memory instructions */ -#define TRECHKPT stringify_in_c(.long PPC_INST_TRECHKPT) -#define TRECLAIM(r) stringify_in_c(.long PPC_INST_TRECLAIM \ - | __PPC_RA(r)) -#define TABORT(r) stringify_in_c(.long PPC_INST_TABORT \ - | __PPC_RA(r)) +#define TRECHKPT stringify_in_c(.long PPC_RAW_TRECHKPT) +#define TRECLAIM(r) stringify_in_c(.long PPC_RAW_TRECLAIM(r)) +#define TABORT(r) stringify_in_c(.long PPC_RAW_TABORT(r)) /* book3e thread control instructions */ -#define TMRN(x) ((((x) & 0x1f) << 16) | (((x) & 0x3e0) << 6)) -#define MTTMR(tmr, r) stringify_in_c(.long PPC_INST_MTTMR | \ - TMRN(tmr) | ___PPC_RS(r)) -#define MFTMR(tmr, r) stringify_in_c(.long PPC_INST_MFTMR | \ - TMRN(tmr) | ___PPC_RT(r)) +#define MTTMR(tmr, r) stringify_in_c(.long PPC_RAW_MTTMR(tmr, r)) +#define MFTMR(tmr, r) stringify_in_c(.long PPC_RAW_MFTMR(tmr, r)) /* Coprocessor instructions */ -#define PPC_ICSWX(s, a, b) stringify_in_c(.long PPC_INST_ICSWX | \ - ___PPC_RS(s) | \ - ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_ICSWEPX(s, a, b) stringify_in_c(.long PPC_INST_ICSWEPX | \ - ___PPC_RS(s) | \ - ___PPC_RA(a) | \ - ___PPC_RB(b)) - -#define PPC_SLBIA(IH) stringify_in_c(.long PPC_INST_SLBIA | \ - ((IH & 0x7) << 21)) +#define PPC_ICSWX(s, a, b) stringify_in_c(.long PPC_RAW_ICSWX(s, a, b)) +#define PPC_ICSWEPX(s, a, b) stringify_in_c(.long PPC_RAW_ICSWEPX(s, a, b)) + +#define PPC_SLBIA(IH) stringify_in_c(.long PPC_RAW_SLBIA(IH)) /* * These may only be used on ISA v3.0 or later (aka. CPU_FTR_ARCH_300, radix @@ -611,12 +612,8 @@ #define PPC_RADIX_INVALIDATE_ERAT_USER PPC_SLBIA(3) #define PPC_RADIX_INVALIDATE_ERAT_GUEST PPC_SLBIA(6) -#define VCMPEQUD_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUD | \ - ___PPC_RT(vrt) | ___PPC_RA(vra) | \ - ___PPC_RB(vrb) | __PPC_RC21) +#define VCMPEQUD_RC(vrt, vra, vrb) stringify_in_c(.long PPC_RAW_VCMPEQUD_RC(vrt, vra, vrb)) -#define VCMPEQUB_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUB | \ - ___PPC_RT(vrt) | ___PPC_RA(vra) | \ - ___PPC_RB(vrb) | __PPC_RC21) +#define VCMPEQUB_RC(vrt, vra, vrb) stringify_in_c(.long PPC_RAW_VCMPEQUB_RC(vrt, vra, vrb)) #endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 6b03dff61a05..b4cc6608131c 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -755,6 +755,8 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #define N_SLINE 68 #define N_SO 100 +#define RFSCV .long 0x4c0000a4 + /* * Create an endian fixup trampoline * @@ -774,7 +776,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #define FIXUP_ENDIAN #else /* - * This version may be used in in HV or non-HV context. + * This version may be used in HV or non-HV context. * MSR[EE] must be disabled. */ #define FIXUP_ENDIAN \ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 52a67835057a..ed0d633ab5aa 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -237,7 +237,6 @@ struct thread_struct { #ifdef CONFIG_PPC_MEM_KEYS unsigned long amr; unsigned long iamr; - unsigned long uamor; #endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ @@ -272,6 +271,10 @@ struct thread_struct { unsigned mmcr0; unsigned used_ebb; + unsigned long mmcr3; + unsigned long sier2; + unsigned long sier3; + #endif }; diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index ac3970fff0d5..155a197c0aa1 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -222,9 +222,14 @@ static inline void set_trap(struct pt_regs *regs, unsigned long val) regs->trap = (regs->trap & TRAP_FLAGS_MASK) | (val & ~TRAP_FLAGS_MASK); } +static inline bool trap_is_scv(struct pt_regs *regs) +{ + return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000); +} + static inline bool trap_is_syscall(struct pt_regs *regs) { - return TRAP(regs) == 0xc00; + return (trap_is_scv(regs) || TRAP(regs) == 0xc00); } static inline bool trap_norestart(struct pt_regs *regs) @@ -238,7 +243,7 @@ static inline void set_trap_norestart(struct pt_regs *regs) } #define arch_has_single_step() (1) -#ifndef CONFIG_BOOK3S_601 +#ifndef CONFIG_PPC_BOOK3S_601 #define arch_has_block_step() (true) #else #define arch_has_block_step() (false) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h new file mode 100644 index 000000000000..b752d34517b3 --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_QSPINLOCK_H +#define _ASM_POWERPC_QSPINLOCK_H + +#include <asm-generic/qspinlock_types.h> +#include <asm/paravirt.h> + +#define _Q_PENDING_LOOPS (1 << 9) /* not tuned */ + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_queued_spin_unlock(struct qspinlock *lock); + +static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + if (!is_shared_processor()) + native_queued_spin_lock_slowpath(lock, val); + else + __pv_queued_spin_lock_slowpath(lock, val); +} + +#define queued_spin_unlock queued_spin_unlock +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + if (!is_shared_processor()) + smp_store_release(&lock->locked, 0); + else + __pv_queued_spin_unlock(lock); +} + +#else +extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +#endif + +static __always_inline void queued_spin_lock(struct qspinlock *lock) +{ + u32 val = 0; + + if (likely(atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL))) + return; + + queued_spin_lock_slowpath(lock, val); +} +#define queued_spin_lock queued_spin_lock + +#define smp_mb__after_spinlock() smp_mb() + +static __always_inline int queued_spin_is_locked(struct qspinlock *lock) +{ + /* + * This barrier was added to simple spinlocks by commit 51d7d5205d338, + * but it should now be possible to remove it, asm arm64 has done with + * commit c6f5d02b6a0f. + */ + smp_mb(); + return atomic_read(&lock->val); +} +#define queued_spin_is_locked queued_spin_is_locked + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#define SPIN_THRESHOLD (1<<15) /* not tuned */ + +static __always_inline void pv_wait(u8 *ptr, u8 val) +{ + if (*ptr != val) + return; + yield_to_any(); + /* + * We could pass in a CPU here if waiting in the queue and yield to + * the previous CPU in the queue. + */ +} + +static __always_inline void pv_kick(int cpu) +{ + prod_cpu(cpu); +} + +extern void __pv_init_lock_hash(void); + +static inline void pv_spinlocks_init(void) +{ + __pv_init_lock_hash(); +} + +#endif + +#include <asm-generic/qspinlock.h> + +#endif /* _ASM_POWERPC_QSPINLOCK_H */ diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h new file mode 100644 index 000000000000..6b60e7736a47 --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock_paravirt.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H +#define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H + +EXPORT_SYMBOL(__pv_queued_spin_unlock); + +#endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 88e6c78100d9..41419f1fc00f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -12,6 +12,7 @@ #ifdef __KERNEL__ #include <linux/stringify.h> +#include <linux/const.h> #include <asm/cputable.h> #include <asm/asm-const.h> #include <asm/feature-fixups.h> @@ -876,7 +877,9 @@ #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ #define SPRN_MMCR1 798 #define SPRN_MMCR2 785 +#define SPRN_MMCR3 754 #define SPRN_UMMCR2 769 +#define SPRN_UMMCR3 738 #define SPRN_MMCRA 0x312 #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ #define MMCRA_SDAR_DCACHE_MISS 0x40000000UL @@ -886,6 +889,7 @@ #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ #define MMCRA_SLOT_SHIFT 24 #define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */ +#define MMCRA_BHRB_DISABLE _UL(0x2000000000) // BHRB disable bit for ISA v3.1 #define POWER6_MMCRA_SDSYNC 0x0000080000000000ULL /* SDAR/SIAR synced */ #define POWER6_MMCRA_SIHV 0x0000040000000000ULL #define POWER6_MMCRA_SIPR 0x0000020000000000ULL @@ -918,6 +922,10 @@ #define SIER_SIHV 0x1000000 /* Sampled MSR_HV */ #define SIER_SIAR_VALID 0x0400000 /* SIAR contents valid */ #define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */ +#define SPRN_SIER2 752 +#define SPRN_SIER3 753 +#define SPRN_USIER2 736 +#define SPRN_USIER3 737 #define SPRN_SIAR 796 #define SPRN_SDAR 797 #define SPRN_TACR 888 @@ -1472,7 +1480,7 @@ static inline void update_power8_hid0(unsigned long hid0) { /* * The HID0 update on Power8 should at the very least be - * preceded by a a SYNC instruction followed by an ISYNC + * preceded by a SYNC instruction followed by an ISYNC * instruction */ asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 014968f25f7e..55f9a154c95d 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -215,7 +215,6 @@ inline uint16_t pseries_errorlog_length(struct pseries_errorlog *sect) #define PSERIES_HP_ELOG_ACTION_ADD 1 #define PSERIES_HP_ELOG_ACTION_REMOVE 2 -#define PSERIES_HP_ELOG_ACTION_READD 3 #define PSERIES_HP_ELOG_ID_DRC_NAME 1 #define PSERIES_HP_ELOG_ID_DRC_INDEX 2 @@ -253,8 +252,6 @@ extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); -extern int rtas_online_cpus_mask(cpumask_var_t cpus); -extern int rtas_offline_cpus_mask(cpumask_var_t cpus); extern int rtas_ibm_suspend_me(u64 handle); struct rtc_time; diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 7c05e95a5c44..fbb8fa32150f 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -63,6 +63,8 @@ static inline bool security_ftr_enabled(u64 feature) // bcctr 2,0,0 triggers a hardware assisted count cache flush #define SEC_FTR_BCCTR_FLUSH_ASSIST 0x0000000000000800ull +// bcctr 2,0,0 triggers a hardware assisted link stack flush +#define SEC_FTR_BCCTR_LINK_FLUSH_ASSIST 0x0000000000002000ull // Features indicating need for Spectre/Meltdown mitigations diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 65676e2325b8..9efbddee2bca 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -30,12 +30,12 @@ void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 #ifdef CONFIG_PPC_PSERIES -extern void pseries_enable_reloc_on_exc(void); +extern bool pseries_enable_reloc_on_exc(void); extern void pseries_disable_reloc_on_exc(void); extern void pseries_big_endian_exceptions(void); extern void pseries_little_endian_exceptions(void); #else -static inline void pseries_enable_reloc_on_exc(void) {} +static inline bool pseries_enable_reloc_on_exc(void) { return false; } static inline void pseries_disable_reloc_on_exc(void) {} static inline void pseries_big_endian_exceptions(void) {} static inline void pseries_little_endian_exceptions(void) {} diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h new file mode 100644 index 000000000000..9c3c30534333 --- /dev/null +++ b/arch/powerpc/include/asm/simple_spinlock.h @@ -0,0 +1,288 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_H +#define _ASM_POWERPC_SIMPLE_SPINLOCK_H + +/* + * Simple spin lock operations. + * + * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM + * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM + * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM + * Rework to support virtual processors + * + * Type of int is used as a full 64b word is not necessary. + * + * (the type definitions are in asm/simple_spinlock_types.h) + */ +#include <linux/irqflags.h> +#include <asm/paravirt.h> +#include <asm/paca.h> +#include <asm/synch.h> +#include <asm/ppc-opcode.h> + +#ifdef CONFIG_PPC64 +/* use 0x800000yy when locked, where yy == CPU number */ +#ifdef __BIG_ENDIAN__ +#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) +#else +#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) +#endif +#else +#define LOCK_TOKEN 1 +#endif + +static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.slock == 0; +} + +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + smp_mb(); + return !arch_spin_value_unlocked(*lock); +} + +/* + * This returns the old value in the lock, so we succeeded + * in getting the lock if the return value is 0. + */ +static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock) +{ + unsigned long tmp, token; + + token = LOCK_TOKEN; + __asm__ __volatile__( +"1: " PPC_LWARX(%0,0,%2,1) "\n\ + cmpwi 0,%0,0\n\ + bne- 2f\n\ + stwcx. %1,0,%2\n\ + bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" + : "=&r" (tmp) + : "r" (token), "r" (&lock->slock) + : "cr0", "memory"); + + return tmp; +} + +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + return __arch_spin_trylock(lock) == 0; +} + +/* + * On a system with shared processors (that is, where a physical + * processor is multiplexed between several virtual processors), + * there is no point spinning on a lock if the holder of the lock + * isn't currently scheduled on a physical processor. Instead + * we detect this situation and ask the hypervisor to give the + * rest of our timeslice to the lock holder. + * + * So that we can tell which virtual processor is holding a lock, + * we put 0x80000000 | smp_processor_id() in the lock when it is + * held. Conveniently, we have a word in the paca that holds this + * value. + */ + +#if defined(CONFIG_PPC_SPLPAR) +/* We only yield to the hypervisor if we are in shared processor mode */ +void splpar_spin_yield(arch_spinlock_t *lock); +void splpar_rw_yield(arch_rwlock_t *lock); +#else /* SPLPAR */ +static inline void splpar_spin_yield(arch_spinlock_t *lock) {}; +static inline void splpar_rw_yield(arch_rwlock_t *lock) {}; +#endif + +static inline void spin_yield(arch_spinlock_t *lock) +{ + if (is_shared_processor()) + splpar_spin_yield(lock); + else + barrier(); +} + +static inline void rw_yield(arch_rwlock_t *lock) +{ + if (is_shared_processor()) + splpar_rw_yield(lock); + else + barrier(); +} + +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + while (1) { + if (likely(__arch_spin_trylock(lock) == 0)) + break; + do { + HMT_low(); + if (is_shared_processor()) + splpar_spin_yield(lock); + } while (unlikely(lock->slock != 0)); + HMT_medium(); + } +} + +static inline +void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) +{ + unsigned long flags_dis; + + while (1) { + if (likely(__arch_spin_trylock(lock) == 0)) + break; + local_save_flags(flags_dis); + local_irq_restore(flags); + do { + HMT_low(); + if (is_shared_processor()) + splpar_spin_yield(lock); + } while (unlikely(lock->slock != 0)); + HMT_medium(); + local_irq_restore(flags_dis); + } +} +#define arch_spin_lock_flags arch_spin_lock_flags + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + __asm__ __volatile__("# arch_spin_unlock\n\t" + PPC_RELEASE_BARRIER: : :"memory"); + lock->slock = 0; +} + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ + +#ifdef CONFIG_PPC64 +#define __DO_SIGN_EXTEND "extsw %0,%0\n" +#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */ +#else +#define __DO_SIGN_EXTEND +#define WRLOCK_TOKEN (-1) +#endif + +/* + * This returns the old value in the lock + 1, + * so we got a read lock if the return value is > 0. + */ +static inline long __arch_read_trylock(arch_rwlock_t *rw) +{ + long tmp; + + __asm__ __volatile__( +"1: " PPC_LWARX(%0,0,%1,1) "\n" + __DO_SIGN_EXTEND +" addic. %0,%0,1\n\ + ble- 2f\n" +" stwcx. %0,0,%1\n\ + bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" : "=&r" (tmp) + : "r" (&rw->lock) + : "cr0", "xer", "memory"); + + return tmp; +} + +/* + * This returns the old value in the lock, + * so we got the write lock if the return value is 0. + */ +static inline long __arch_write_trylock(arch_rwlock_t *rw) +{ + long tmp, token; + + token = WRLOCK_TOKEN; + __asm__ __volatile__( +"1: " PPC_LWARX(%0,0,%2,1) "\n\ + cmpwi 0,%0,0\n\ + bne- 2f\n" +" stwcx. %1,0,%2\n\ + bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" : "=&r" (tmp) + : "r" (token), "r" (&rw->lock) + : "cr0", "memory"); + + return tmp; +} + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + while (1) { + if (likely(__arch_read_trylock(rw) > 0)) + break; + do { + HMT_low(); + if (is_shared_processor()) + splpar_rw_yield(rw); + } while (unlikely(rw->lock < 0)); + HMT_medium(); + } +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + while (1) { + if (likely(__arch_write_trylock(rw) == 0)) + break; + do { + HMT_low(); + if (is_shared_processor()) + splpar_rw_yield(rw); + } while (unlikely(rw->lock != 0)); + HMT_medium(); + } +} + +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + return __arch_read_trylock(rw) > 0; +} + +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + return __arch_write_trylock(rw) == 0; +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + long tmp; + + __asm__ __volatile__( + "# read_unlock\n\t" + PPC_RELEASE_BARRIER +"1: lwarx %0,0,%1\n\ + addic %0,%0,-1\n" +" stwcx. %0,0,%1\n\ + bne- 1b" + : "=&r"(tmp) + : "r"(&rw->lock) + : "cr0", "xer", "memory"); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + __asm__ __volatile__("# write_unlock\n\t" + PPC_RELEASE_BARRIER: : :"memory"); + rw->lock = 0; +} + +#define arch_spin_relax(lock) spin_yield(lock) +#define arch_read_relax(lock) rw_yield(lock) +#define arch_write_relax(lock) rw_yield(lock) + +/* See include/linux/spinlock.h */ +#define smp_mb__after_spinlock() smp_mb() + +#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_H */ diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h new file mode 100644 index 000000000000..0f3cdd8faa95 --- /dev/null +++ b/arch/powerpc/include/asm/simple_spinlock_types.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H +#define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +# error "please don't include this file directly" +#endif + +typedef struct { + volatile unsigned int slock; +} arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } + +typedef struct { + volatile signed int lock; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { 0 } + +#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H */ diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h index 8dff086c0cab..4b30a0205c93 100644 --- a/arch/powerpc/include/asm/smu.h +++ b/arch/powerpc/include/asm/smu.h @@ -108,7 +108,7 @@ /* * i2c commands * - * To issue an i2c command, first is to send a parameter block to the + * To issue an i2c command, first is to send a parameter block to * the SMU. This is a command of type 0x9a with 9 bytes of header * eventually followed by data for a write: * diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index c89b32443cff..1e6fa371cc38 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -17,12 +17,6 @@ extern int create_section_mapping(unsigned long start, unsigned long end, int nid, pgprot_t prot); extern int remove_section_mapping(unsigned long start, unsigned long end); -#ifdef CONFIG_PPC_BOOK3S_64 -extern int resize_hpt_for_hotplug(unsigned long new_mem_size); -#else -static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; } -#endif - #ifdef CONFIG_NUMA extern int hot_add_scn_to_nid(unsigned long scn_addr); #else diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 2d620896cdae..6ec72282888d 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -3,312 +3,16 @@ #define __ASM_SPINLOCK_H #ifdef __KERNEL__ -/* - * Simple spin lock operations. - * - * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM - * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM - * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM - * Rework to support virtual processors - * - * Type of int is used as a full 64b word is not necessary. - * - * (the type definitions are in asm/spinlock_types.h) - */ -#include <linux/jump_label.h> -#include <linux/irqflags.h> -#ifdef CONFIG_PPC64 -#include <asm/paca.h> -#include <asm/hvcall.h> -#endif -#include <asm/synch.h> -#include <asm/ppc-opcode.h> - -#ifdef CONFIG_PPC64 -/* use 0x800000yy when locked, where yy == CPU number */ -#ifdef __BIG_ENDIAN__ -#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) -#else -#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) -#endif +#ifdef CONFIG_PPC_QUEUED_SPINLOCKS +#include <asm/qspinlock.h> +#include <asm/qrwlock.h> #else -#define LOCK_TOKEN 1 +#include <asm/simple_spinlock.h> #endif -#ifdef CONFIG_PPC_PSERIES -DECLARE_STATIC_KEY_FALSE(shared_processor); - -#define vcpu_is_preempted vcpu_is_preempted -static inline bool vcpu_is_preempted(int cpu) -{ - if (!static_branch_unlikely(&shared_processor)) - return false; - return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1); -} -#endif - -static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) -{ - return lock.slock == 0; -} - -static inline int arch_spin_is_locked(arch_spinlock_t *lock) -{ - smp_mb(); - return !arch_spin_value_unlocked(*lock); -} - -/* - * This returns the old value in the lock, so we succeeded - * in getting the lock if the return value is 0. - */ -static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock) -{ - unsigned long tmp, token; - - token = LOCK_TOKEN; - __asm__ __volatile__( -"1: " PPC_LWARX(%0,0,%2,1) "\n\ - cmpwi 0,%0,0\n\ - bne- 2f\n\ - stwcx. %1,0,%2\n\ - bne- 1b\n" - PPC_ACQUIRE_BARRIER -"2:" - : "=&r" (tmp) - : "r" (token), "r" (&lock->slock) - : "cr0", "memory"); - - return tmp; -} - -static inline int arch_spin_trylock(arch_spinlock_t *lock) -{ - return __arch_spin_trylock(lock) == 0; -} - -/* - * On a system with shared processors (that is, where a physical - * processor is multiplexed between several virtual processors), - * there is no point spinning on a lock if the holder of the lock - * isn't currently scheduled on a physical processor. Instead - * we detect this situation and ask the hypervisor to give the - * rest of our timeslice to the lock holder. - * - * So that we can tell which virtual processor is holding a lock, - * we put 0x80000000 | smp_processor_id() in the lock when it is - * held. Conveniently, we have a word in the paca that holds this - * value. - */ - -#if defined(CONFIG_PPC_SPLPAR) -/* We only yield to the hypervisor if we are in shared processor mode */ -void splpar_spin_yield(arch_spinlock_t *lock); -void splpar_rw_yield(arch_rwlock_t *lock); -#else /* SPLPAR */ -static inline void splpar_spin_yield(arch_spinlock_t *lock) {}; -static inline void splpar_rw_yield(arch_rwlock_t *lock) {}; -#endif - -static inline bool is_shared_processor(void) -{ -#ifdef CONFIG_PPC_SPLPAR - return static_branch_unlikely(&shared_processor); -#else - return false; -#endif -} - -static inline void spin_yield(arch_spinlock_t *lock) -{ - if (is_shared_processor()) - splpar_spin_yield(lock); - else - barrier(); -} - -static inline void rw_yield(arch_rwlock_t *lock) -{ - if (is_shared_processor()) - splpar_rw_yield(lock); - else - barrier(); -} - -static inline void arch_spin_lock(arch_spinlock_t *lock) -{ - while (1) { - if (likely(__arch_spin_trylock(lock) == 0)) - break; - do { - HMT_low(); - if (is_shared_processor()) - splpar_spin_yield(lock); - } while (unlikely(lock->slock != 0)); - HMT_medium(); - } -} - -static inline -void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) -{ - unsigned long flags_dis; - - while (1) { - if (likely(__arch_spin_trylock(lock) == 0)) - break; - local_save_flags(flags_dis); - local_irq_restore(flags); - do { - HMT_low(); - if (is_shared_processor()) - splpar_spin_yield(lock); - } while (unlikely(lock->slock != 0)); - HMT_medium(); - local_irq_restore(flags_dis); - } -} -#define arch_spin_lock_flags arch_spin_lock_flags - -static inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - __asm__ __volatile__("# arch_spin_unlock\n\t" - PPC_RELEASE_BARRIER: : :"memory"); - lock->slock = 0; -} - -/* - * Read-write spinlocks, allowing multiple readers - * but only one writer. - * - * NOTE! it is quite common to have readers in interrupts - * but no interrupt writers. For those circumstances we - * can "mix" irq-safe locks - any writer needs to get a - * irq-safe write-lock, but readers can get non-irqsafe - * read-locks. - */ - -#ifdef CONFIG_PPC64 -#define __DO_SIGN_EXTEND "extsw %0,%0\n" -#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */ -#else -#define __DO_SIGN_EXTEND -#define WRLOCK_TOKEN (-1) +#ifndef CONFIG_PARAVIRT_SPINLOCKS +static inline void pv_spinlocks_init(void) { } #endif -/* - * This returns the old value in the lock + 1, - * so we got a read lock if the return value is > 0. - */ -static inline long __arch_read_trylock(arch_rwlock_t *rw) -{ - long tmp; - - __asm__ __volatile__( -"1: " PPC_LWARX(%0,0,%1,1) "\n" - __DO_SIGN_EXTEND -" addic. %0,%0,1\n\ - ble- 2f\n" -" stwcx. %0,0,%1\n\ - bne- 1b\n" - PPC_ACQUIRE_BARRIER -"2:" : "=&r" (tmp) - : "r" (&rw->lock) - : "cr0", "xer", "memory"); - - return tmp; -} - -/* - * This returns the old value in the lock, - * so we got the write lock if the return value is 0. - */ -static inline long __arch_write_trylock(arch_rwlock_t *rw) -{ - long tmp, token; - - token = WRLOCK_TOKEN; - __asm__ __volatile__( -"1: " PPC_LWARX(%0,0,%2,1) "\n\ - cmpwi 0,%0,0\n\ - bne- 2f\n" -" stwcx. %1,0,%2\n\ - bne- 1b\n" - PPC_ACQUIRE_BARRIER -"2:" : "=&r" (tmp) - : "r" (token), "r" (&rw->lock) - : "cr0", "memory"); - - return tmp; -} - -static inline void arch_read_lock(arch_rwlock_t *rw) -{ - while (1) { - if (likely(__arch_read_trylock(rw) > 0)) - break; - do { - HMT_low(); - if (is_shared_processor()) - splpar_rw_yield(rw); - } while (unlikely(rw->lock < 0)); - HMT_medium(); - } -} - -static inline void arch_write_lock(arch_rwlock_t *rw) -{ - while (1) { - if (likely(__arch_write_trylock(rw) == 0)) - break; - do { - HMT_low(); - if (is_shared_processor()) - splpar_rw_yield(rw); - } while (unlikely(rw->lock != 0)); - HMT_medium(); - } -} - -static inline int arch_read_trylock(arch_rwlock_t *rw) -{ - return __arch_read_trylock(rw) > 0; -} - -static inline int arch_write_trylock(arch_rwlock_t *rw) -{ - return __arch_write_trylock(rw) == 0; -} - -static inline void arch_read_unlock(arch_rwlock_t *rw) -{ - long tmp; - - __asm__ __volatile__( - "# read_unlock\n\t" - PPC_RELEASE_BARRIER -"1: lwarx %0,0,%1\n\ - addic %0,%0,-1\n" -" stwcx. %0,0,%1\n\ - bne- 1b" - : "=&r"(tmp) - : "r"(&rw->lock) - : "cr0", "xer", "memory"); -} - -static inline void arch_write_unlock(arch_rwlock_t *rw) -{ - __asm__ __volatile__("# write_unlock\n\t" - PPC_RELEASE_BARRIER: : :"memory"); - rw->lock = 0; -} - -#define arch_spin_relax(lock) spin_yield(lock) -#define arch_read_relax(lock) rw_yield(lock) -#define arch_write_relax(lock) rw_yield(lock) - -/* See include/linux/spinlock.h */ -#define smp_mb__after_spinlock() smp_mb() - #endif /* __KERNEL__ */ #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h index 87adaf13b7e8..c5d742f18021 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -6,16 +6,11 @@ # error "please don't include this file directly" #endif -typedef struct { - volatile unsigned int slock; -} arch_spinlock_t; - -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } - -typedef struct { - volatile signed int lock; -} arch_rwlock_t; - -#define __ARCH_RW_LOCK_UNLOCKED { 0 } +#ifdef CONFIG_PPC_QUEUED_SPINLOCKS +#include <asm-generic/qspinlock_types.h> +#include <asm-generic/qrwlock_types.h> +#else +#include <asm/simple_spinlock_types.h> +#endif #endif diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 3b01c69a44aa..972ed0df154d 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -40,6 +40,7 @@ enum instruction_type { CACHEOP, BARRIER, SYSCALL, + SYSCALL_VECTORED_0, MFMSR, MTMSR, RFI, @@ -104,6 +105,12 @@ enum instruction_type { #define MKOP(t, f, s) ((t) | (f) | SIZE(s)) +/* Prefix instruction operands */ +#define GET_PREFIX_RA(i) (((i) >> 16) & 0x1f) +#define GET_PREFIX_R(i) ((i) & (1ul << 20)) + +extern s32 patch__exec_instr; + struct instruction_op { int type; int reg; diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index b72692702f35..283552cd0e58 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -28,7 +28,7 @@ extern void * memcpy(void *,const void *,__kernel_size_t); extern void * memmove(void *,const void *,__kernel_size_t); extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); -extern void * memcpy_flushcache(void *,const void *,__kernel_size_t); +void memcpy_flushcache(void *dest, const void *src, size_t size); void *__memset(void *s, int c, __kernel_size_t count); void *__memcpy(void *to, const void *from, __kernel_size_t n); diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index b287cfc2dd85..cb326720a8a1 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -39,7 +39,6 @@ struct div_result { }; /* Accessor functions for the timebase (RTC on 601) registers. */ -/* If one day CONFIG_POWER is added just define __USE_RTC as 1 */ #define __USE_RTC() (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index d2d2c4bd8435..6047402b0a4d 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -17,7 +17,7 @@ typedef unsigned long cycles_t; static inline cycles_t get_cycles(void) { - if (IS_ENABLED(CONFIG_BOOK3S_601)) + if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) return 0; return mftb(); diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 2db7ba789720..f0b6300e7dd3 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -43,7 +43,6 @@ extern void __init dump_numa_cpu_topology(void); extern int sysfs_add_device_to_node(struct device *dev, int nid); extern void sysfs_remove_device_from_node(struct device *dev, int nid); -extern int numa_update_cpu_topology(bool cpus_locked); static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) { @@ -78,11 +77,6 @@ static inline void sysfs_remove_device_from_node(struct device *dev, { } -static inline int numa_update_cpu_topology(bool cpus_locked) -{ - return 0; -} - static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {} static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) @@ -93,32 +87,12 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) #endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) -extern int start_topology_update(void); -extern int stop_topology_update(void); -extern int prrn_is_enabled(void); extern int find_and_online_cpu_nid(int cpu); -extern int timed_topology_update(int nsecs); #else -static inline int start_topology_update(void) -{ - return 0; -} -static inline int stop_topology_update(void) -{ - return 0; -} -static inline int prrn_is_enabled(void) -{ - return 0; -} static inline int find_and_online_cpu_nid(int cpu) { return 0; } -static inline int timed_topology_update(int nsecs) -{ - return 0; -} #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ @@ -141,7 +115,6 @@ int get_physical_package_id(int cpu); #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) -int dlpar_cpu_readd(int cpu); #endif #endif diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index d08ea11b271c..309b4d65b74f 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -155,7 +155,6 @@ static inline void xive_smp_probe(void) { } static inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; } static inline void xive_smp_setup_cpu(void) { } static inline void xive_smp_disable_cpu(void) { } -static inline void xive_kexec_teardown_cpu(int secondary) { } static inline void xive_shutdown(void) { } static inline void xive_flush_interrupt(void) { } diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 264e266a85bf..c3af3f324c5a 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -640,6 +640,11 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) #define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0) +/* POWER10 registers */ +#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) +#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) +#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index c0c737215b00..3a700351feca 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -11,7 +11,7 @@ #include <asm-generic/mman-common.h> -#define PROT_SAO 0x10 /* Strong Access Ordering */ +#define PROT_SAO 0x10 /* Unsupported since v5.9 */ #define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ #define MAP_NORESERVE 0x40 /* don't reserve swap pages */ diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h index 9ccecc1d6840..50ef95e2f5b1 100644 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -72,6 +72,11 @@ #define PAPR_PDSM_DIMM_CRITICAL 2 #define PAPR_PDSM_DIMM_FATAL 3 +/* struct nd_papr_pdsm_health.extension_flags field flags */ + +/* Indicate that the 'dimm_fuel_gauge' field is valid */ +#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1 + /* * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH * Various flags indicate the health status of the dimm. @@ -84,6 +89,7 @@ * dimm_locked : Contents of the dimm cant be modified until CEC reboot * dimm_encrypted : Contents of dimm are encrypted. * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX + * dimm_fuel_gauge : Life remaining of DIMM as a percentage from 0-100 */ struct nd_papr_pdsm_health { union { @@ -96,6 +102,9 @@ struct nd_papr_pdsm_health { __u8 dimm_locked; __u8 dimm_encrypted; __u16 dimm_health; + + /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */ + __u16 dimm_fuel_gauge; }; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; }; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 244542ae2a91..d4d5946224f8 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -45,11 +45,10 @@ obj-y := cputable.o syscalls.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ - of_platform.o prom_parse.o + of_platform.o prom_parse.o firmware.o obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o \ - paca.o nvram_64.o firmware.o note.o \ - syscall_64.o + paca.o nvram_64.o note.o syscall_64.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o @@ -71,7 +70,7 @@ obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o obj-$(CONFIG_PPC_DT_CPU_FTRS) += dt_cpu_ftrs.o -obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ +obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_cache.o \ eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 1f1ce8b86d5b..c7797eb958c7 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -178,11 +178,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, ret |= __get_user_inatomic(temp.v[1], p++); ret |= __get_user_inatomic(temp.v[2], p++); ret |= __get_user_inatomic(temp.v[3], p++); - /* fall through */ + fallthrough; case 4: ret |= __get_user_inatomic(temp.v[4], p++); ret |= __get_user_inatomic(temp.v[5], p++); - /* fall through */ + fallthrough; case 2: ret |= __get_user_inatomic(temp.v[6], p++); ret |= __get_user_inatomic(temp.v[7], p++); @@ -263,11 +263,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, ret |= __put_user_inatomic(data.v[1], p++); ret |= __put_user_inatomic(data.v[2], p++); ret |= __put_user_inatomic(data.v[3], p++); - /* fall through */ + fallthrough; case 4: ret |= __put_user_inatomic(data.v[4], p++); ret |= __put_user_inatomic(data.v[5], p++); - /* fall through */ + fallthrough; case 2: ret |= __put_user_inatomic(data.v[6], p++); ret |= __put_user_inatomic(data.v[7], p++); diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6657dc6b2336..8711c2164b45 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -559,6 +559,8 @@ int main(void) OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending); OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); + OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra); + OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs); OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc); OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar); @@ -696,6 +698,9 @@ int main(void) HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]); HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]); HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]); + HSTATE_FIELD(HSTATE_MMCR3, host_mmcr[7]); + HSTATE_FIELD(HSTATE_SIER2, host_mmcr[8]); + HSTATE_FIELD(HSTATE_SIER3, host_mmcr[9]); HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]); HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]); HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]); diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 470336277c67..65ab9fcebd31 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -7,6 +7,8 @@ * Author: Nathan Lynch */ +#define pr_fmt(fmt) "cacheinfo: " fmt + #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/kernel.h> @@ -166,7 +168,7 @@ static void release_cache_debugcheck(struct cache *cache) list_for_each_entry(iter, &cache_list, list) WARN_ONCE(iter->next_local == cache, - "cache for %pOF(%s) refers to cache for %pOF(%s)\n", + "cache for %pOFP(%s) refers to cache for %pOFP(%s)\n", iter->ofnode, cache_type_string(iter), cache->ofnode, @@ -178,7 +180,7 @@ static void release_cache(struct cache *cache) if (!cache) return; - pr_debug("freeing L%d %s cache for %pOF\n", cache->level, + pr_debug("freeing L%d %s cache for %pOFP\n", cache->level, cache_type_string(cache), cache->ofnode); release_cache_debugcheck(cache); @@ -193,7 +195,7 @@ static void cache_cpu_set(struct cache *cache, int cpu) while (next) { WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map), - "CPU %i already accounted in %pOF(%s)\n", + "CPU %i already accounted in %pOFP(%s)\n", cpu, next->ofnode, cache_type_string(next)); cpumask_set_cpu(cpu, &next->shared_cpu_map); @@ -352,7 +354,7 @@ static int cache_is_unified_d(const struct device_node *np) static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level) { - pr_debug("creating L%d ucache for %pOF\n", level, node); + pr_debug("creating L%d ucache for %pOFP\n", level, node); return new_cache(cache_is_unified_d(node), level, node); } @@ -362,7 +364,7 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node, { struct cache *dcache, *icache; - pr_debug("creating L%d dcache and icache for %pOF\n", level, + pr_debug("creating L%d dcache and icache for %pOFP\n", level, node); dcache = new_cache(CACHE_TYPE_DATA, level, node); @@ -418,12 +420,27 @@ static void link_cache_lists(struct cache *smaller, struct cache *bigger) } smaller->next_local = bigger; + + /* + * The cache->next_local list sorts by level ascending: + * L1d -> L1i -> L2 -> L3 ... + */ + WARN_ONCE((smaller->level == 1 && bigger->level > 2) || + (smaller->level > 1 && bigger->level != smaller->level + 1), + "linking L%i cache %pOFP to L%i cache %pOFP; skipped a level?\n", + smaller->level, smaller->ofnode, bigger->level, bigger->ofnode); } static void do_subsidiary_caches_debugcheck(struct cache *cache) { - WARN_ON_ONCE(cache->level != 1); - WARN_ON_ONCE(!of_node_is_type(cache->ofnode, "cpu")); + WARN_ONCE(cache->level != 1, + "instantiating cache chain from L%d %s cache for " + "%pOFP instead of an L1\n", cache->level, + cache_type_string(cache), cache->ofnode); + WARN_ONCE(!of_node_is_type(cache->ofnode, "cpu"), + "instantiating cache chain from node %pOFP of type '%s' " + "instead of a cpu node\n", cache->ofnode, + of_node_get_device_type(cache->ofnode)); } static void do_subsidiary_caches(struct cache *cache) @@ -647,12 +664,13 @@ static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache * return &cache->shared_cpu_map; } -static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +static ssize_t +show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bool list) { struct cache_index_dir *index; struct cache *cache; const struct cpumask *mask; - int ret, cpu; + int cpu; index = kobj_to_cache_index_dir(k); cache = index->cache; @@ -664,16 +682,25 @@ static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *att mask = &cache->shared_cpu_map; } - ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n", - cpumask_pr_args(mask)); - buf[ret++] = '\n'; - buf[ret] = '\0'; - return ret; + return cpumap_print_to_pagebuf(list, buf, mask); +} + +static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + return show_shared_cpumap(k, attr, buf, false); +} + +static ssize_t shared_cpu_list_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + return show_shared_cpumap(k, attr, buf, true); } static struct kobj_attribute cache_shared_cpu_map_attr = __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL); +static struct kobj_attribute cache_shared_cpu_list_attr = + __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); + /* Attributes which should always be created -- the kobject/sysfs core * does this automatically via kobj_type->default_attrs. This is the * minimum data required to uniquely identify a cache. @@ -682,6 +709,7 @@ static struct attribute *cache_index_default_attrs[] = { &cache_type_attr.attr, &cache_level_attr.attr, &cache_shared_cpu_map_attr.attr, + &cache_shared_cpu_list_attr.attr, NULL, }; @@ -733,13 +761,13 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) rc = attr->show(&dir->kobj, attr, buf); if (rc <= 0) { pr_debug("not creating %s attribute for " - "%pOF(%s) (rc = %zd)\n", + "%pOFP(%s) (rc = %zd)\n", attr->attr.name, cache->ofnode, cache_type, rc); continue; } if (sysfs_create_file(&dir->kobj, &attr->attr)) - pr_debug("could not create %s attribute for %pOF(%s)\n", + pr_debug("could not create %s attribute for %pOFP(%s)\n", attr->attr.name, cache->ofnode, cache_type); } @@ -855,7 +883,7 @@ static void cache_cpu_clear(struct cache *cache, int cpu) struct cache *next = cache->next_local; WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map), - "CPU %i not accounted in %pOF(%s)\n", + "CPU %i not accounted in %pOFP(%s)\n", cpu, cache->ofnode, cache_type_string(cache)); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index efdcfa714106..704e8b9501ee 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -94,13 +94,15 @@ _GLOBAL(__restore_cpu_power8) _GLOBAL(__setup_cpu_power10) mflr r11 bl __init_FSCR_power10 + bl __init_PMU + bl __init_PMU_ISA31 b 1f _GLOBAL(__setup_cpu_power9) mflr r11 - bl __init_FSCR -1: bl __init_PMU - bl __init_hvmode_206 + bl __init_FSCR_power9 + bl __init_PMU +1: bl __init_hvmode_206 mtlr r11 beqlr li r0,0 @@ -124,13 +126,15 @@ _GLOBAL(__setup_cpu_power9) _GLOBAL(__restore_cpu_power10) mflr r11 bl __init_FSCR_power10 + bl __init_PMU + bl __init_PMU_ISA31 b 1f _GLOBAL(__restore_cpu_power9) mflr r11 - bl __init_FSCR -1: bl __init_PMU - mfmsr r3 + bl __init_FSCR_power9 + bl __init_PMU +1: mfmsr r3 rldicl. r0,r3,4,63 mtlr r11 beqlr @@ -198,6 +202,12 @@ __init_FSCR_power10: mtspr SPRN_FSCR, r3 // fall through +__init_FSCR_power9: + mfspr r3, SPRN_FSCR + ori r3, r3, FSCR_SCV + mtspr SPRN_FSCR, r3 + // fall through + __init_FSCR: mfspr r3,SPRN_FSCR ori r3,r3,FSCR_TAR|FSCR_EBB @@ -233,3 +243,10 @@ __init_PMU_ISA207: li r5,0 mtspr SPRN_MMCRS,r5 blr + +__init_PMU_ISA31: + li r5,0 + mtspr SPRN_MMCR3,r5 + LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE) + mtspr SPRN_MMCRA,r5 + blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b4066354f073..3d406a9626e8 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -120,7 +120,8 @@ extern void __restore_cpu_e6500(void); #define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \ PPC_FEATURE2_ARCH_3_00 | \ PPC_FEATURE2_HAS_IEEE128 | \ - PPC_FEATURE2_DARN ) + PPC_FEATURE2_DARN | \ + PPC_FEATURE2_SCV) #define COMMON_USER_POWER10 COMMON_USER_POWER9 #define COMMON_USER2_POWER10 (COMMON_USER2_POWER9 | \ PPC_FEATURE2_ARCH_3_1 | \ diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c index 500f52fa4711..cdc2dccb987d 100644 --- a/arch/powerpc/kernel/dawr.c +++ b/arch/powerpc/kernel/dawr.c @@ -37,7 +37,7 @@ int set_dawr(int nr, struct arch_hw_breakpoint *brk) dawrx |= (mrd & 0x3f) << (63 - 53); if (ppc_md.set_dawr) - return ppc_md.set_dawr(dawr, dawrx); + return ppc_md.set_dawr(nr, dawr, dawrx); if (nr == 0) { mtspr(SPRN_DAWR0, dawr); diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index f17ff1200eaa..52680cf07c9d 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -18,61 +18,6 @@ #ifdef CONFIG_SMP -/* - * Doorbells must only be used if CPU_FTR_DBELL is available. - * msgsnd is used in HV, and msgsndp is used in !HV. - * - * These should be used by platform code that is aware of restrictions. - * Other arch code should use ->cause_ipi. - * - * doorbell_global_ipi() sends a dbell to any target CPU. - * Must be used only by architectures that address msgsnd target - * by PIR/get_hard_smp_processor_id. - */ -void doorbell_global_ipi(int cpu) -{ - u32 tag = get_hard_smp_processor_id(cpu); - - kvmppc_set_host_ipi(cpu); - /* Order previous accesses vs. msgsnd, which is treated as a store */ - ppc_msgsnd_sync(); - ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); -} - -/* - * doorbell_core_ipi() sends a dbell to a target CPU in the same core. - * Must be used only by architectures that address msgsnd target - * by TIR/cpu_thread_in_core. - */ -void doorbell_core_ipi(int cpu) -{ - u32 tag = cpu_thread_in_core(cpu); - - kvmppc_set_host_ipi(cpu); - /* Order previous accesses vs. msgsnd, which is treated as a store */ - ppc_msgsnd_sync(); - ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); -} - -/* - * Attempt to cause a core doorbell if destination is on the same core. - * Returns 1 on success, 0 on failure. - */ -int doorbell_try_core_ipi(int cpu) -{ - int this_cpu = get_cpu(); - int ret = 0; - - if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) { - doorbell_core_ipi(cpu); - ret = 1; - } - - put_cpu(); - - return ret; -} - void doorbell_exception(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 3a409517c031..6f8c0c6b937a 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -24,7 +24,6 @@ /* Device-tree visible constants follow */ -#define ISA_V2_07B 2070 #define ISA_V3_0B 3000 #define ISA_V3_1 3100 @@ -67,6 +66,7 @@ struct dt_cpu_feature { extern long __machine_check_early_realmode_p8(struct pt_regs *regs); extern long __machine_check_early_realmode_p9(struct pt_regs *regs); +extern long __machine_check_early_realmode_p10(struct pt_regs *regs); static int hv_mode; @@ -337,6 +337,7 @@ static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f) #ifdef CONFIG_PPC_RADIX_MMU cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE; + cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU; return 1; @@ -450,6 +451,39 @@ static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f) return 1; } +static void init_pmu_power10(void) +{ + init_pmu_power9(); + + mtspr(SPRN_MMCR3, 0); + mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); +} + +static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) +{ + hfscr_pmu_enable(); + + init_pmu_power10(); + init_pmu_registers = init_pmu_power10; + + cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT; + + cur_cpu_spec->num_pmcs = 6; + cur_cpu_spec->pmc_type = PPC_PMC_IBM; + cur_cpu_spec->oprofile_cpu_type = "ppc64/power10"; + + return 1; +} + +static int __init feat_enable_mce_power10(struct dt_cpu_feature *f) +{ + cur_cpu_spec->platform = "power10"; + cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10; + + return 1; +} + static int __init feat_enable_tm(struct dt_cpu_feature *f) { #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -587,6 +621,7 @@ static struct dt_cpu_feature_match __initdata {"little-endian", feat_enable_le, CPU_FTR_REAL_LE}, {"smt", feat_enable_smt, 0}, {"interrupt-facilities", feat_enable, 0}, + {"system-call-vectored", feat_enable, 0}, {"timer-facilities", feat_enable, 0}, {"timer-facilities-v3", feat_enable, 0}, {"debug-facilities", feat_enable, 0}, @@ -622,7 +657,7 @@ static struct dt_cpu_feature_match __initdata {"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL}, {"processor-utilization-of-resources-register", feat_enable_purr, 0}, {"no-execute", feat_enable, 0}, - {"strong-access-ordering", feat_enable, CPU_FTR_SAO}, + /* strong-access-ordering is unused */ {"cache-inhibited-large-page", feat_enable_large_ci, 0}, {"coprocessor-icswx", feat_enable, 0}, {"hypervisor-virtualization-interrupt", feat_enable_hvi, 0}, @@ -638,7 +673,9 @@ static struct dt_cpu_feature_match __initdata {"group-start-register", feat_enable, 0}, {"pc-relative-addressing", feat_enable, 0}, {"machine-check-power9", feat_enable_mce_power9, 0}, + {"machine-check-power10", feat_enable_mce_power10, 0}, {"performance-monitor-power9", feat_enable_pmu_power9, 0}, + {"performance-monitor-power10", feat_enable_pmu_power10, 0}, {"event-based-branch-v3", feat_enable, 0}, {"random-number-generator", feat_enable, 0}, {"system-call-vectored", feat_disable, 0}, @@ -649,6 +686,7 @@ static struct dt_cpu_feature_match __initdata {"wait-v3", feat_enable, 0}, {"prefix-instructions", feat_enable, 0}, {"matrix-multiply-assist", feat_enable_mma, 0}, + {"debug-facilities-v31", feat_enable, CPU_FTR_DAWR1}, }; static bool __initdata using_dt_cpu_ftrs; @@ -674,12 +712,12 @@ static void __init cpufeatures_setup_start(u32 isa) { pr_info("setup for ISA %d\n", isa); - if (isa >= 3000) { + if (isa >= ISA_V3_0B) { cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300; cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00; } - if (isa >= 3100) { + if (isa >= ISA_V3_1) { cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_31; cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_1; } @@ -776,12 +814,6 @@ static __init void cpufeatures_cpu_quirks(void) } update_tlbie_feature_flag(version); - /* - * PKEY was not in the initial base or feature node - * specification, but it should become optional in the next - * cpu feature version sequence. - */ - cur_cpu_spec->cpu_features |= CPU_FTR_PKEY; } static void __init cpufeatures_setup_finished(void) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index d407981dec76..94682382fc8c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -167,39 +167,33 @@ void eeh_show_enabled(void) */ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); u32 cfg; int cap, i; int n = 0, l = 0; char buffer[128]; - if (!pdn) { - pr_warn("EEH: Note: No error log for absent device.\n"); - return 0; - } - n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", - pdn->phb->global_number, pdn->busno, - PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + edev->pe->phb->global_number, edev->bdfn >> 8, + PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", - pdn->phb->global_number, pdn->busno, - PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + edev->pe->phb->global_number, edev->bdfn >> 8, + PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); - eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); + eeh_ops->read_config(edev, PCI_VENDOR_ID, 4, &cfg); n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); pr_warn("EEH: PCI device/vendor: %08x\n", cfg); - eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); + eeh_ops->read_config(edev, PCI_COMMAND, 4, &cfg); n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); /* Gather bridge-specific registers */ if (edev->mode & EEH_DEV_BRIDGE) { - eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); + eeh_ops->read_config(edev, PCI_SEC_STATUS, 2, &cfg); n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); pr_warn("EEH: Bridge secondary status: %04x\n", cfg); - eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &cfg); n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); pr_warn("EEH: Bridge control: %04x\n", cfg); } @@ -207,11 +201,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) /* Dump out the PCI-X command and status regs */ cap = edev->pcix_cap; if (cap) { - eeh_ops->read_config(pdn, cap, 4, &cfg); + eeh_ops->read_config(edev, cap, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); pr_warn("EEH: PCI-X cmd: %08x\n", cfg); - eeh_ops->read_config(pdn, cap+4, 4, &cfg); + eeh_ops->read_config(edev, cap+4, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); pr_warn("EEH: PCI-X status: %08x\n", cfg); } @@ -223,7 +217,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) pr_warn("EEH: PCI-E capabilities and status follow:\n"); for (i=0; i<=8; i++) { - eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); + eeh_ops->read_config(edev, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); if ((i % 4) == 0) { @@ -250,7 +244,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) pr_warn("EEH: PCI-E AER capability register set follows:\n"); for (i=0; i<=13; i++) { - eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); + eeh_ops->read_config(edev, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); if ((i % 4) == 0) { @@ -726,7 +720,6 @@ static void eeh_disable_and_save_dev_state(struct eeh_dev *edev, static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); struct pci_dev *dev = userdata; @@ -734,73 +727,14 @@ static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) return; /* Apply customization from firmware */ - if (pdn && eeh_ops->restore_config) - eeh_ops->restore_config(pdn); + if (eeh_ops->restore_config) + eeh_ops->restore_config(edev); /* The caller should restore state for the specified device */ if (pdev != dev) pci_restore_state(pdev); } -int eeh_restore_vf_config(struct pci_dn *pdn) -{ - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - u32 devctl, cmd, cap2, aer_capctl; - int old_mps; - - if (edev->pcie_cap) { - /* Restore MPS */ - old_mps = (ffs(pdn->mps) - 8) << 5; - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; - devctl |= old_mps; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - - /* Disable Completion Timeout if possible */ - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, - 4, &cap2); - if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) { - eeh_ops->read_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, &cap2); - cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS; - eeh_ops->write_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, cap2); - } - } - - /* Enable SERR and parity checking */ - eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); - cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); - eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); - - /* Enable report various errors */ - if (edev->pcie_cap) { - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_CERE; - devctl |= (PCI_EXP_DEVCTL_NFERE | - PCI_EXP_DEVCTL_FERE | - PCI_EXP_DEVCTL_URRE); - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - } - - /* Enable ECRC generation and check */ - if (edev->pcie_cap && edev->aer_cap) { - eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, &aer_capctl); - aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); - eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, aer_capctl); - } - - return 0; -} - /** * pcibios_set_pcie_reset_state - Set PCI-E reset state * @dev: pci device struct @@ -977,15 +911,13 @@ int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed) */ void eeh_save_bars(struct eeh_dev *edev) { - struct pci_dn *pdn; int i; - pdn = eeh_dev_to_pdn(edev); - if (!pdn) + if (!edev) return; for (i = 0; i < 16; i++) - eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); + eeh_ops->read_config(edev, i * 4, 4, &edev->config_space[i]); /* * For PCI bridges including root port, we need enable bus @@ -1096,7 +1028,7 @@ static int eeh_init(void) /* Initialize PHB PEs */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) - eeh_dev_phb_init_dynamic(hose); + eeh_phb_pe_create(hose); eeh_addr_cache_init(); @@ -1175,7 +1107,7 @@ void eeh_probe_device(struct pci_dev *dev) * FIXME: HEY MA, LOOK AT ME, NO LOCKING! */ if (edev->pdev && edev->pdev != dev) { - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); eeh_addr_cache_rmv_dev(edev->pdev); eeh_sysfs_remove_device(edev->pdev); @@ -1254,7 +1186,7 @@ void eeh_remove_device(struct pci_dev *dev) edev->in_error = false; dev->dev.archdata.edev = NULL; if (!(edev->pe->state & EEH_PE_KEEP)) - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); else edev->mode |= EEH_DEV_DISCONNECTED; } diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c deleted file mode 100644 index 7370185c7a05..000000000000 --- a/arch/powerpc/kernel/eeh_dev.c +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The file intends to implement dynamic creation of EEH device, which will - * be bound with OF node and PCI device simutaneously. The EEH devices would - * be foundamental information for EEH core components to work proerly. Besides, - * We have to support multiple situations where dynamic creation of EEH device - * is required: - * - * 1) Before PCI emunation starts, we need create EEH devices according to the - * PCI sensitive OF nodes. - * 2) When PCI emunation is done, we need do the binding between PCI device and - * the associated EEH device. - * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device - * will be created while PCI sensitive OF node is detected from DR. - * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If - * PHB is newly inserted, we also need create EEH devices accordingly. - * - * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012. - */ - -#include <linux/export.h> -#include <linux/gfp.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/string.h> - -#include <asm/pci-bridge.h> -#include <asm/ppc-pci.h> - -/** - * eeh_dev_init - Create EEH device according to OF node - * @pdn: PCI device node - * - * It will create EEH device according to the given OF node. The function - * might be called by PCI emunation, DR, PHB hotplug. - */ -struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) -{ - struct eeh_dev *edev; - - /* Allocate EEH device */ - edev = kzalloc(sizeof(*edev), GFP_KERNEL); - if (!edev) - return NULL; - - /* Associate EEH device with OF node */ - pdn->edev = edev; - edev->pdn = pdn; - edev->bdfn = (pdn->busno << 8) | pdn->devfn; - edev->controller = pdn->phb; - - return edev; -} - -/** - * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB - * @phb: PHB - * - * Scan the PHB OF node and its child association, then create the - * EEH devices accordingly - */ -void eeh_dev_phb_init_dynamic(struct pci_controller *phb) -{ - /* EEH PE for PHB */ - eeh_phb_pe_create(phb); -} diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7b048cee767c..4197e4559f65 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -425,8 +425,8 @@ static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev, pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED); #ifdef CONFIG_PCI_IOV - if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) - eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); + if (eeh_ops->notify_resume) + eeh_ops->notify_resume(edev); #endif return PCI_ERS_RESULT_NONE; } @@ -477,7 +477,7 @@ static void *eeh_add_virt_device(struct eeh_dev *edev) } #ifdef CONFIG_PCI_IOV - pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index); + pci_iov_add_virtfn(edev->physfn, edev->vf_index); #endif return NULL; } @@ -521,9 +521,7 @@ static void eeh_rmv_device(struct eeh_dev *edev, void *userdata) if (edev->physfn) { #ifdef CONFIG_PCI_IOV - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - - pci_iov_remove_virtfn(edev->physfn, pdn->vf_index); + pci_iov_remove_virtfn(edev->physfn, edev->vf_index); edev->pdev = NULL; #endif if (rmv_data) @@ -544,7 +542,7 @@ static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata) continue; edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED); - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); } return NULL; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 177852e39a25..d2aaaa73fdd5 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -319,56 +319,22 @@ struct eeh_pe *eeh_pe_get(struct pci_controller *phb, } /** - * eeh_pe_get_parent - Retrieve the parent PE + * eeh_pe_tree_insert - Add EEH device to parent PE * @edev: EEH device + * @new_pe_parent: PE to create additional PEs under * - * The whole PEs existing in the system are organized as hierarchy - * tree. The function is used to retrieve the parent PE according - * to the parent EEH device. - */ -static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) -{ - struct eeh_dev *parent; - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - - /* - * It might have the case for the indirect parent - * EEH device already having associated PE, but - * the direct parent EEH device doesn't have yet. - */ - if (edev->physfn) - pdn = pci_get_pdn(edev->physfn); - else - pdn = pdn ? pdn->parent : NULL; - while (pdn) { - /* We're poking out of PCI territory */ - parent = pdn_to_eeh_dev(pdn); - if (!parent) - return NULL; - - if (parent->pe) - return parent->pe; - - pdn = pdn->parent; - } - - return NULL; -} - -/** - * eeh_add_to_parent_pe - Add EEH device to parent PE - * @edev: EEH device + * Add EEH device to the PE in edev->pe_config_addr. If a PE already + * exists with that address then @edev is added to that PE. Otherwise + * a new PE is created and inserted into the PE tree as a child of + * @new_pe_parent. * - * Add EEH device to the parent PE. If the parent PE already - * exists, the PE type will be changed to EEH_PE_BUS. Otherwise, - * we have to create new PE to hold the EEH device and the new - * PE will be linked to its parent PE as well. + * If @new_pe_parent is NULL then the new PE will be inserted under + * directly under the the PHB. */ -int eeh_add_to_parent_pe(struct eeh_dev *edev) +int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent) { + struct pci_controller *hose = edev->controller; struct eeh_pe *pe, *parent; - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - int config_addr = (pdn->busno << 8) | (pdn->devfn); /* Check if the PE number is valid */ if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) { @@ -382,7 +348,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * PE should be composed of PCI bus and its subordinate * components. */ - pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr); + pe = eeh_pe_get(hose, edev->pe_config_addr, edev->bdfn); if (pe) { if (pe->type & EEH_PE_INVALID) { list_add_tail(&edev->entry, &pe->edevs); @@ -399,8 +365,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) parent = parent->parent; } - eeh_edev_dbg(edev, - "Added to device PE (parent: PE#%x)\n", + eeh_edev_dbg(edev, "Added to existing PE (parent: PE#%x)\n", pe->parent->addr); } else { /* Mark the PE as type of PCI bus */ @@ -416,15 +381,15 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Create a new EEH PE */ if (edev->physfn) - pe = eeh_pe_alloc(pdn->phb, EEH_PE_VF); + pe = eeh_pe_alloc(hose, EEH_PE_VF); else - pe = eeh_pe_alloc(pdn->phb, EEH_PE_DEVICE); + pe = eeh_pe_alloc(hose, EEH_PE_DEVICE); if (!pe) { pr_err("%s: out of memory!\n", __func__); return -ENOMEM; } pe->addr = edev->pe_config_addr; - pe->config_addr = config_addr; + pe->config_addr = edev->bdfn; /* * Put the new EEH PE into hierarchy tree. If the parent @@ -432,34 +397,35 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * to PHB directly. Otherwise, we have to associate the * PE with its parent. */ - parent = eeh_pe_get_parent(edev); - if (!parent) { - parent = eeh_phb_pe_get(pdn->phb); - if (!parent) { + if (!new_pe_parent) { + new_pe_parent = eeh_phb_pe_get(hose); + if (!new_pe_parent) { pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", - __func__, pdn->phb->global_number); + __func__, hose->global_number); edev->pe = NULL; kfree(pe); return -EEXIST; } } - pe->parent = parent; + + /* link new PE into the tree */ + pe->parent = new_pe_parent; + list_add_tail(&pe->child, &new_pe_parent->child_list); /* * Put the newly created PE into the child list and * link the EEH device accordingly. */ - list_add_tail(&pe->child, &parent->child_list); list_add_tail(&edev->entry, &pe->edevs); edev->pe = pe; - eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n", - pe->parent->addr); + eeh_edev_dbg(edev, "Added to new (parent: PE#%x)\n", + new_pe_parent->addr); return 0; } /** - * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE + * eeh_pe_tree_remove - Remove one EEH device from the associated PE * @edev: EEH device * * The PE hierarchy tree might be changed when doing PCI hotplug. @@ -467,7 +433,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * during EEH recovery. So we have to call the function remove the * corresponding PE accordingly if necessary. */ -int eeh_rmv_from_parent_pe(struct eeh_dev *edev) +int eeh_pe_tree_remove(struct eeh_dev *edev) { struct eeh_pe *pe, *parent, *child; bool keep, recover; @@ -698,7 +664,6 @@ void eeh_pe_state_clear(struct eeh_pe *root, int state, bool include_passed) */ static void eeh_bridge_check_link(struct eeh_dev *edev) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); int cap; uint32_t val; int timeout = 0; @@ -714,32 +679,32 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) /* Check slot status */ cap = edev->pcie_cap; - eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_SLTSTA, 2, &val); if (!(val & PCI_EXP_SLTSTA_PDS)) { eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val); return; } /* Check power status if we have the capability */ - eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCAP, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_SLTCAP, 2, &val); if (val & PCI_EXP_SLTCAP_PCP) { - eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_SLTCTL, 2, &val); if (val & PCI_EXP_SLTCTL_PCC) { eeh_edev_dbg(edev, "In power-off state, power it on ...\n"); val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC); val |= (0x0100 & PCI_EXP_SLTCTL_PIC); - eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val); + eeh_ops->write_config(edev, cap + PCI_EXP_SLTCTL, 2, val); msleep(2 * 1000); } } /* Enable link */ - eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCTL, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_LNKCTL, 2, &val); val &= ~PCI_EXP_LNKCTL_LD; - eeh_ops->write_config(pdn, cap + PCI_EXP_LNKCTL, 2, val); + eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val); /* Check link */ - eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_LNKCAP, 4, &val); if (!(val & PCI_EXP_LNKCAP_DLLLARC)) { eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val); msleep(1000); @@ -752,7 +717,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) msleep(20); timeout += 20; - eeh_ops->read_config(pdn, cap + PCI_EXP_LNKSTA, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_LNKSTA, 2, &val); if (val & PCI_EXP_LNKSTA_DLLLA) break; } @@ -769,7 +734,6 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) static void eeh_restore_bridge_bars(struct eeh_dev *edev) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); int i; /* @@ -777,20 +741,20 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev) * Bus numbers and windows: 0x18 - 0x30 */ for (i = 4; i < 13; i++) - eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]); + eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]); /* Rom: 0x38 */ - eeh_ops->write_config(pdn, 14*4, 4, edev->config_space[14]); + eeh_ops->write_config(edev, 14*4, 4, edev->config_space[14]); /* Cache line & Latency timer: 0xC 0xD */ - eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1, + eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1, SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1, - SAVED_BYTE(PCI_LATENCY_TIMER)); + eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1, + SAVED_BYTE(PCI_LATENCY_TIMER)); /* Max latency, min grant, interrupt ping and line: 0x3C */ - eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]); + eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]); /* PCI Command: 0x4 */ - eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1] | + eeh_ops->write_config(edev, PCI_COMMAND, 4, edev->config_space[1] | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); /* Check the PCIe link is ready */ @@ -799,28 +763,27 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev) static void eeh_restore_device_bars(struct eeh_dev *edev) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); int i; u32 cmd; for (i = 4; i < 10; i++) - eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]); + eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]); /* 12 == Expansion ROM Address */ - eeh_ops->write_config(pdn, 12*4, 4, edev->config_space[12]); + eeh_ops->write_config(edev, 12*4, 4, edev->config_space[12]); - eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1, + eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1, SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1, + eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1, SAVED_BYTE(PCI_LATENCY_TIMER)); /* max latency, min grant, interrupt pin and line */ - eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]); + eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]); /* * Restore PERR & SERR bits, some devices require it, * don't touch the other command bits */ - eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cmd); + eeh_ops->read_config(edev, PCI_COMMAND, 4, &cmd); if (edev->config_space[1] & PCI_COMMAND_PARITY) cmd |= PCI_COMMAND_PARITY; else @@ -829,7 +792,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev) cmd |= PCI_COMMAND_SERR; else cmd &= ~PCI_COMMAND_SERR; - eeh_ops->write_config(pdn, PCI_COMMAND, 4, cmd); + eeh_ops->write_config(edev, PCI_COMMAND, 4, cmd); } /** @@ -843,16 +806,14 @@ static void eeh_restore_device_bars(struct eeh_dev *edev) */ static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - /* Do special restore for bridges */ if (edev->mode & EEH_DEV_BRIDGE) eeh_restore_bridge_bars(edev); else eeh_restore_device_bars(edev); - if (eeh_ops->restore_config && pdn) - eeh_ops->restore_config(pdn); + if (eeh_ops->restore_config) + eeh_ops->restore_config(edev); } /** diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 4fb0f1e1017a..429620da73ba 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -99,7 +99,7 @@ static ssize_t eeh_notify_resume_store(struct device *dev, if (!edev || !edev->pe || !eeh_ops->notify_resume) return -ENODEV; - if (eeh_ops->notify_resume(pci_get_pdn(pdev))) + if (eeh_ops->notify_resume(edev)) return -EIO; return count; diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 217ebdf5b00b..f4d0af8e1136 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -36,6 +36,12 @@ #include "head_32.h" /* + * powerpc relies on return from interrupt/syscall being context synchronising + * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional + * synchronisation instructions. + */ + +/* * Align to 4k in order to ensure that all functions modyfing srr0/srr1 * fit into one page in order to not encounter a TLB miss between the * modification of srr0/srr1 and the associated rfi. diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 9d49338e0c85..33a42e42c56f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -64,15 +64,173 @@ exception_marker: .section ".text" .align 7 +#ifdef CONFIG_PPC_BOOK3S +.macro system_call_vectored name trapnr + .globl system_call_vectored_\name +system_call_vectored_\name: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ + bne .Ltabort_syscall +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif + INTERRUPT_TO_KERNEL + mr r10,r1 + ld r1,PACAKSAVE(r13) + std r10,0(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + std r0,GPR0(r1) + std r10,GPR1(r1) + std r2,GPR2(r1) + ld r2,PACATOC(r13) + mfcr r12 + li r11,0 + /* Can we avoid saving r3-r8 in common case? */ + std r3,GPR3(r1) + std r4,GPR4(r1) + std r5,GPR5(r1) + std r6,GPR6(r1) + std r7,GPR7(r1) + std r8,GPR8(r1) + /* Zero r9-r12, this should only be required when restoring all GPRs */ + std r11,GPR9(r1) + std r11,GPR10(r1) + std r11,GPR11(r1) + std r11,GPR12(r1) + std r9,GPR13(r1) + SAVE_NVGPRS(r1) + std r11,_XER(r1) + std r11,_LINK(r1) + std r11,_CTR(r1) + + li r11,\trapnr + std r11,_TRAP(r1) + std r12,_CCR(r1) + std r3,ORIG_GPR3(r1) + addi r10,r1,STACK_FRAME_OVERHEAD + ld r11,exception_marker@toc(r2) + std r11,-16(r10) /* "regshere" marker */ + + /* + * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which + * would clobber syscall parameters. Also we always enter with IRQs + * enabled and nothing pending. system_call_exception() will call + * trace_hardirqs_off(). + * + * scv enters with MSR[EE]=1, so don't set PACA_IRQ_HARD_DIS. The + * entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED. + */ + + /* Calling convention has r9 = orig r0, r10 = regs */ + mr r9,r0 + bl system_call_exception + +.Lsyscall_vectored_\name\()_exit: + addi r4,r1,STACK_FRAME_OVERHEAD + li r5,1 /* scv */ + bl syscall_exit_prepare + + ld r2,_CCR(r1) + ld r4,_NIP(r1) + ld r5,_MSR(r1) + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + +BEGIN_FTR_SECTION + HMT_MEDIUM_LOW +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + cmpdi r3,0 + bne .Lsyscall_vectored_\name\()_restore_regs + + /* rfscv returns with LR->NIA and CTR->MSR */ + mtlr r4 + mtctr r5 + + /* Could zero these as per ABI, but we may consider a stricter ABI + * which preserves these if libc implementations can benefit, so + * restore them for now until further measurement is done. */ + ld r0,GPR0(r1) + ld r4,GPR4(r1) + ld r5,GPR5(r1) + ld r6,GPR6(r1) + ld r7,GPR7(r1) + ld r8,GPR8(r1) + /* Zero volatile regs that may contain sensitive kernel data */ + li r9,0 + li r10,0 + li r11,0 + li r12,0 + mtspr SPRN_XER,r0 + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ + mtcr r2 + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r13,GPR13(r1) + ld r1,GPR1(r1) + RFSCV_TO_USER + b . /* prevent speculative execution */ + +.Lsyscall_vectored_\name\()_restore_regs: + li r3,0 + mtmsrd r3,1 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 + + ld r3,_CTR(r1) + ld r4,_LINK(r1) + ld r5,_XER(r1) + + REST_NVGPRS(r1) + ld r0,GPR0(r1) + mtcr r2 + mtctr r3 + mtlr r4 + mtspr SPRN_XER,r5 + REST_10GPRS(2, r1) + REST_2GPRS(12, r1) + ld r1,GPR1(r1) + RFI_TO_USER +.endm + +system_call_vectored common 0x3000 +/* + * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0 + * which is tested by system_call_exception when r0 is -1 (as set by vector + * entry code). + */ +system_call_vectored sigill 0x7ff0 + + +/* + * Entered via kernel return set up by kernel/sstep.c, must match entry regs + */ + .globl system_call_vectored_emulate +system_call_vectored_emulate: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + b system_call_vectored_common +#endif + + .balign IFETCH_ALIGN_BYTES .globl system_call_common system_call_common: +_ASM_NOKPROBE_SYMBOL(system_call_common) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BEGIN_FTR_SECTION extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ bne .Ltabort_syscall END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -_ASM_NOKPROBE_SYMBOL(system_call_common) mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) @@ -138,6 +296,7 @@ END_BTB_FLUSH_SECTION .Lsyscall_exit: addi r4,r1,STACK_FRAME_OVERHEAD + li r5,0 /* !scv */ bl syscall_exit_prepare ld r2,_CCR(r1) @@ -224,21 +383,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) b . /* prevent speculative execution */ #endif +#ifdef CONFIG_PPC_BOOK3S +_GLOBAL(ret_from_fork_scv) + bl schedule_tail + REST_NVGPRS(r1) + li r3,0 /* fork() return value */ + b .Lsyscall_vectored_common_exit +#endif + _GLOBAL(ret_from_fork) bl schedule_tail REST_NVGPRS(r1) - li r3,0 + li r3,0 /* fork() return value */ b .Lsyscall_exit _GLOBAL(ret_from_kernel_thread) bl schedule_tail REST_NVGPRS(r1) - mtlr r14 + mtctr r14 mr r3,r15 #ifdef PPC64_ELF_ABI_v2 mr r12,r14 #endif - blrl + bctrl li r3,0 b .Lsyscall_exit @@ -259,10 +426,7 @@ _ASM_NOKPROBE_SYMBOL(save_nvgprs); #define FLUSH_COUNT_CACHE \ 1: nop; \ - patch_site 1b, patch__call_flush_count_cache - - -#define BCCTR_FLUSH .long 0x4c400420 + patch_site 1b, patch__call_flush_branch_caches .macro nops number .rept \number @@ -271,8 +435,8 @@ _ASM_NOKPROBE_SYMBOL(save_nvgprs); .endm .balign 32 -.global flush_count_cache -flush_count_cache: +.global flush_branch_caches +flush_branch_caches: /* Save LR into r9 */ mflr r9 @@ -294,7 +458,7 @@ flush_count_cache: li r9,0x7fff mtctr r9 - BCCTR_FLUSH + PPC_BCCTR_FLUSH 2: nop patch_site 2b patch__flush_count_cache_return @@ -303,7 +467,7 @@ flush_count_cache: .rept 278 .balign 32 - BCCTR_FLUSH + PPC_BCCTR_FLUSH nops 7 .endr @@ -357,7 +521,7 @@ _GLOBAL(_switch) * kernel/sched/core.c). * * Uncacheable stores in the case of involuntary preemption must - * be taken care of. The smp_mb__before_spin_lock() in __schedule() + * be taken care of. The smp_mb__after_spinlock() in __schedule() * is implemented as hwsync on powerpc, which orders MMIO too. So * long as there is an hwsync in the context switch path, it will * be executed on the source CPU after the task has performed diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 446e54c3f71e..f7d748b88705 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -508,8 +508,24 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .macro __GEN_COMMON_BODY name .if IMASK + .if ! ISTACK + .error "No support for masked interrupt to use custom stack" + .endif + + /* If coming from user, skip soft-mask tests. */ + andi. r10,r12,MSR_PR + bne 2f + + /* Kernel code running below __end_interrupts is implicitly + * soft-masked */ + LOAD_HANDLER(r10, __end_interrupts) + cmpld r11,r10 + li r10,IMASK + blt- 1f + + /* Test the soft mask state against our interrupt's bit */ lbz r10,PACAIRQSOFTMASK(r13) - andi. r10,r10,IMASK +1: andi. r10,r10,IMASK /* Associate vector numbers with bits in paca->irq_happened */ .if IVEC == 0x500 || IVEC == 0xea0 li r10,PACA_IRQ_EE @@ -540,7 +556,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .if ISTACK andi. r10,r12,MSR_PR /* See if coming from user */ - mr r10,r1 /* Save r1 */ +2: mr r10,r1 /* Save r1 */ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */ beq- 100f ld r1,PACAKSAVE(r13) /* kernel stack to use */ @@ -740,6 +756,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) * guarantee they will be delivered virtually. Some conditions (see the ISA) * cause exceptions to be delivered in real mode. * + * The scv instructions are a special case. They get a 0x3000 offset applied. + * scv exceptions have unique reentrancy properties, see below. + * * It's impossible to receive interrupts below 0x300 via AIL. * * KVM: None of the virtual exceptions are from the guest. Anything that @@ -749,8 +768,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code * 0x0100 - 0x18ff : Real mode pSeries interrupt vectors - * 0x1900 - 0x3fff : Real mode trampolines - * 0x4000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors + * 0x1900 - 0x2fff : Real mode trampolines + * 0x3000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors * 0x5900 - 0x6fff : Relon mode trampolines * 0x7000 - 0x7fff : FWNMI data area * 0x8000 - .... : Common interrupt handlers, remaining early @@ -761,8 +780,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) * vectors there. */ OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900) -OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000) -OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900) +OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x3000) +OPEN_FIXED_SECTION(virt_vectors, 0x3000, 0x5900) OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000) #ifdef CONFIG_PPC_POWERNV @@ -798,6 +817,77 @@ USE_FIXED_SECTION(real_vectors) .globl __start_interrupts __start_interrupts: +/** + * Interrupt 0x3000 - System Call Vectored Interrupt (syscall). + * This is a synchronous interrupt invoked with the "scv" instruction. The + * system call does not alter the HV bit, so it is directed to the OS. + * + * Handling: + * scv instructions enter the kernel without changing EE, RI, ME, or HV. + * In particular, this means we can take a maskable interrupt at any point + * in the scv handler, which is unlike any other interrupt. This is solved + * by treating the instruction addresses below __end_interrupts as being + * soft-masked. + * + * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and + * ensure scv is never executed with relocation off, which means AIL-0 + * should never happen. + * + * Before leaving the below __end_interrupts text, at least of the following + * must be true: + * - MSR[PR]=1 (i.e., return to userspace) + * - MSR_EE|MSR_RI is set (no reentrant exceptions) + * - Standard kernel environment is set up (stack, paca, etc) + * + * Call convention: + * + * syscall register convention is in Documentation/powerpc/syscall64-abi.rst + */ +EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) + /* SCV 0 */ + mr r9,r13 + GET_PACA(r13) + mflr r11 + mfctr r12 + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) +#ifdef CONFIG_RELOCATABLE + b system_call_vectored_tramp +#else + b system_call_vectored_common +#endif + nop + + /* SCV 1 - 127 */ + .rept 127 + mr r9,r13 + GET_PACA(r13) + mflr r11 + mfctr r12 + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + li r0,-1 /* cause failure */ +#ifdef CONFIG_RELOCATABLE + b system_call_vectored_sigill_tramp +#else + b system_call_vectored_sigill +#endif + .endr +EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000) + +#ifdef CONFIG_RELOCATABLE +TRAMP_VIRT_BEGIN(system_call_vectored_tramp) + __LOAD_HANDLER(r10, system_call_vectored_common) + mtctr r10 + bctr + +TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp) + __LOAD_HANDLER(r10, system_call_vectored_sigill) + mtctr r10 + bctr +#endif + + /* No virt vectors corresponding with 0x0..0x100 */ EXC_VIRT_NONE(0x4000, 0x100) @@ -2838,7 +2928,8 @@ masked_interrupt: ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) - /* returns to kernel where r13 must be set up, so don't restore it */ + ld r13,PACA_EXGEN+EX_R13(r13) + /* May return to masked low address where r13 is not set up */ .if \hsrr HRFI_TO_KERNEL .else @@ -2946,6 +3037,47 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) GET_SCRATCH0(r13); hrfid +TRAMP_REAL_BEGIN(rfscv_flush_fallback) + /* system call volatile */ + mr r7,r13 + GET_PACA(r13); + mr r8,r1 + ld r1,PACAKSAVE(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ + mtctr r11 + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync + + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 + bdnz 1b + + mtctr r9 + li r9,0 + li r10,0 + li r11,0 + mr r1,r8 + mr r13,r7 + RFSCV + USE_TEXT_SECTION() MASKED_INTERRUPT MASKED_INTERRUPT hsrr=1 @@ -2997,6 +3129,10 @@ EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline) USE_FIXED_SECTION(virt_trampolines) /* + * All code below __end_interrupts is treated as soft-masked. If + * any code runs here with MSR[EE]=1, it must then cope with pending + * soft interrupt being raised (i.e., by ensuring it is replayed). + * * The __end_interrupts marker must be past the out-of-line (OOL) * handlers, so that they are copied to real address 0x100 when running * a relocatable kernel. This ensures they can be reached from the short diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 78ab9a6ee6ac..10ebb4bf71ad 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -32,6 +32,14 @@ #include <asm/fadump-internal.h> #include <asm/setup.h> +/* + * The CPU who acquired the lock to trigger the fadump crash should + * wait for other CPUs to enter. + * + * The timeout is in milliseconds. + */ +#define CRASH_TIMEOUT 500 + static struct fw_dump fw_dump; static void __init fadump_reserve_crash_area(u64 base); @@ -39,7 +47,10 @@ static void __init fadump_reserve_crash_area(u64 base); struct kobject *fadump_kobj; #ifndef CONFIG_PRESERVE_FA_DUMP + +static atomic_t cpus_in_fadump; static DEFINE_MUTEX(fadump_mutex); + struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ @@ -668,8 +679,11 @@ early_param("fadump_reserve_mem", early_fadump_reserve_mem); void crash_fadump(struct pt_regs *regs, const char *str) { + unsigned int msecs; struct fadump_crash_info_header *fdh = NULL; int old_cpu, this_cpu; + /* Do not include first CPU */ + unsigned int ncpus = num_online_cpus() - 1; if (!should_fadump_crash()) return; @@ -685,6 +699,8 @@ void crash_fadump(struct pt_regs *regs, const char *str) old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); if (old_cpu != -1) { + atomic_inc(&cpus_in_fadump); + /* * We can't loop here indefinitely. Wait as long as fadump * is in force. If we race with fadump un-registration this @@ -708,6 +724,16 @@ void crash_fadump(struct pt_regs *regs, const char *str) fdh->online_mask = *cpu_online_mask; + /* + * If we came in via system reset, wait a while for the secondary + * CPUs to enter. + */ + if (TRAP(&(fdh->regs)) == 0x100) { + msecs = CRASH_TIMEOUT; + while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0)) + mdelay(1); + } + fw_dump.ops->fadump_trigger(fdh, str); } diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index cc4a5e3f51f1..fe48d319d490 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -11,8 +11,27 @@ #include <linux/export.h> #include <linux/cache.h> +#include <linux/of.h> #include <asm/firmware.h> +#ifdef CONFIG_PPC64 unsigned long powerpc_firmware_features __read_mostly; EXPORT_SYMBOL_GPL(powerpc_firmware_features); +#endif + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +bool is_kvm_guest(void) +{ + struct device_node *hyper_node; + + hyper_node = of_find_node_by_path("/hypervisor"); + if (!hyper_node) + return 0; + + if (!of_device_is_compatible(hyper_node, "linux,kvm")) + return 0; + + return 1; +} +#endif diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index cac22cb97a8c..4ae39db70044 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -107,9 +107,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) or r12,r12,r4 std r12,_MSR(r1) #endif - /* Don't care if r4 overflows, this is desired behaviour */ - lbz r4,THREAD_LOAD_FP(r5) - addi r4,r4,1 + li r4,1 stb r4,THREAD_LOAD_FP(r5) addi r10,r5,THREAD_FPSTATE lfd fr0,FPSTATE_FPSCR(r10) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 705c042309d8..f3ab94d73936 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -474,7 +474,7 @@ InstructionTLBMiss: /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) - lis r1,PAGE_OFFSET@h /* check if kernel address */ + lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SPRG_PGDIR @@ -484,7 +484,7 @@ InstructionTLBMiss: li r1,_PAGE_PRESENT | _PAGE_EXEC #endif #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) - bge- 112f + bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ #endif @@ -541,7 +541,7 @@ DataLoadTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS - lis r1,PAGE_OFFSET@h /* check if kernel address */ + lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR #ifdef CONFIG_SWAP @@ -549,7 +549,7 @@ DataLoadTLBMiss: #else li r1, _PAGE_PRESENT #endif - bge- 112f + bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -621,7 +621,7 @@ DataStoreTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS - lis r1,PAGE_OFFSET@h /* check if kernel address */ + lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR #ifdef CONFIG_SWAP @@ -629,7 +629,7 @@ DataStoreTLBMiss: #else li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT #endif - bge- 112f + bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -673,6 +673,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #define altivec_assist_exception unknown_exception #endif +#ifndef CONFIG_TAU_INT +#define TAUException unknown_exception +#endif + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD) EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 926bfa73586a..5b282d9965a5 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -620,7 +620,7 @@ start_here: ori r6, r6, swapper_pg_dir@l lis r5, abatron_pteptrs@h ori r5, r5, abatron_pteptrs@l - stw r5, 0xf0(r0) /* Must match your Abatron config file */ + stw r5, 0xf0(0) /* Must match your Abatron config file */ tophys(r5,r5) stw r6, 0(r5) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 0000daf0e1da..1f4a1efa0074 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -418,8 +418,9 @@ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw) if (dawr_enabled()) { max_len = DAWR_MAX_LEN; - /* DAWR region can't cross 512 bytes boundary */ - if (ALIGN(start_addr, SZ_512M) != ALIGN(end_addr - 1, SZ_512M)) + /* DAWR region can't cross 512 bytes boundary on p10 predecessors */ + if (!cpu_has_feature(CPU_FTR_ARCH_31) && + (ALIGN_DOWN(start_addr, SZ_512) != ALIGN_DOWN(end_addr - 1, SZ_512))) return -EINVAL; } else if (IS_ENABLED(CONFIG_PPC_8xx)) { /* 8xx can setup a range without limitation */ @@ -498,11 +499,11 @@ static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info return ((info->address <= dar) && (dar - info->address < info->len)); } -static bool dar_user_range_overlaps(unsigned long dar, int size, - struct arch_hw_breakpoint *info) +static bool ea_user_range_overlaps(unsigned long ea, int size, + struct arch_hw_breakpoint *info) { - return ((dar < info->address + info->len) && - (dar + size > info->address)); + return ((ea < info->address + info->len) && + (ea + size > info->address)); } static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info) @@ -515,20 +516,22 @@ static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info) return ((hw_start_addr <= dar) && (hw_end_addr > dar)); } -static bool dar_hw_range_overlaps(unsigned long dar, int size, - struct arch_hw_breakpoint *info) +static bool ea_hw_range_overlaps(unsigned long ea, int size, + struct arch_hw_breakpoint *info) { unsigned long hw_start_addr, hw_end_addr; hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); - return ((dar < hw_end_addr) && (dar + size > hw_start_addr)); + return ((ea < hw_end_addr) && (ea + size > hw_start_addr)); } /* * If hw has multiple DAWR registers, we also need to check all * dawrx constraint bits to confirm this is _really_ a valid event. + * If type is UNKNOWN, but privilege level matches, consider it as + * a positive match. */ static bool check_dawrx_constraints(struct pt_regs *regs, int type, struct arch_hw_breakpoint *info) @@ -536,7 +539,12 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type, if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ)) return false; - if (OP_IS_STORE(type) && !(info->type & HW_BRK_TYPE_WRITE)) + /* + * The Cache Management instructions other than dcbz never + * cause a match. i.e. if type is CACHEOP, the instruction + * is dcbz, and dcbz is treated as Store. + */ + if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE)) return false; if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL)) @@ -553,7 +561,8 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type, * including extraneous exception. Otherwise return false. */ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, - int type, int size, struct arch_hw_breakpoint *info) + unsigned long ea, int type, int size, + struct arch_hw_breakpoint *info) { bool in_user_range = dar_in_user_range(regs->dar, info); bool dawrx_constraints; @@ -569,22 +578,27 @@ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, } if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) { - if (in_user_range) - return true; + if (cpu_has_feature(CPU_FTR_ARCH_31) && + !dar_in_hw_range(regs->dar, info)) + return false; - if (dar_in_hw_range(regs->dar, info)) { - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; - return true; - } - return false; + return true; } dawrx_constraints = check_dawrx_constraints(regs, type, info); - if (dar_user_range_overlaps(regs->dar, size, info)) + if (type == UNKNOWN) { + if (cpu_has_feature(CPU_FTR_ARCH_31) && + !dar_in_hw_range(regs->dar, info)) + return false; + + return dawrx_constraints; + } + + if (ea_user_range_overlaps(ea, size, info)) return dawrx_constraints; - if (dar_hw_range_overlaps(regs->dar, size, info)) { + if (ea_hw_range_overlaps(ea, size, info)) { if (dawrx_constraints) { info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; return true; @@ -593,8 +607,17 @@ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, return false; } +static int cache_op_size(void) +{ +#ifdef __powerpc64__ + return ppc64_caches.l1d.block_size; +#else + return L1_CACHE_BYTES; +#endif +} + static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, - int *type, int *size, bool *larx_stcx) + int *type, int *size, unsigned long *ea) { struct instruction_op op; @@ -602,16 +625,23 @@ static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, return; analyse_instr(&op, regs, *instr); - - /* - * Set size = 8 if analyse_instr() fails. If it's a userspace - * watchpoint(valid or extraneous), we can notify user about it. - * If it's a kernel watchpoint, instruction emulation will fail - * in stepping_handler() and watchpoint will be disabled. - */ *type = GETTYPE(op.type); - *size = !(*type == UNKNOWN) ? GETSIZE(op.type) : 8; - *larx_stcx = (*type == LARX || *type == STCX); + *ea = op.ea; +#ifdef __powerpc64__ + if (!(regs->msr & MSR_64BIT)) + *ea &= 0xffffffffUL; +#endif + + *size = GETSIZE(op.type); + if (*type == CACHEOP) { + *size = cache_op_size(); + *ea &= ~(*size - 1); + } +} + +static bool is_larx_stcx_instr(int type) +{ + return type == LARX || type == STCX; } /* @@ -678,7 +708,7 @@ int hw_breakpoint_handler(struct die_args *args) struct ppc_inst instr = ppc_inst(0); int type = 0; int size = 0; - bool larx_stcx = false; + unsigned long ea; /* Disable breakpoints during exception handling */ hw_breakpoint_disable(); @@ -692,7 +722,7 @@ int hw_breakpoint_handler(struct die_args *args) rcu_read_lock(); if (!IS_ENABLED(CONFIG_PPC_8xx)) - get_instr_detail(regs, &instr, &type, &size, &larx_stcx); + get_instr_detail(regs, &instr, &type, &size, &ea); for (i = 0; i < nr_wp_slots(); i++) { bp[i] = __this_cpu_read(bp_per_reg[i]); @@ -702,7 +732,7 @@ int hw_breakpoint_handler(struct die_args *args) info[i] = counter_arch_bp(bp[i]); info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (check_constraints(regs, instr, type, size, info[i])) { + if (check_constraints(regs, instr, ea, type, size, info[i])) { if (!IS_ENABLED(CONFIG_PPC_8xx) && ppc_inst_equal(instr, ppc_inst(0))) { handler_error(bp[i], info[i]); @@ -744,7 +774,7 @@ int hw_breakpoint_handler(struct die_args *args) } if (!IS_ENABLED(CONFIG_PPC_8xx)) { - if (larx_stcx) { + if (is_larx_stcx_instr(type)) { for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 05b1cc0e009e..bf21ebd36190 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -621,13 +621,14 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); seq_printf(p, " Machine check exceptions\n"); +#ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_HVMODE)) { seq_printf(p, "%*s: ", prec, "HMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat, j).hmi_exceptions); + seq_printf(p, "%10u ", paca_ptrs[j]->hmi_irqs); seq_printf(p, " Hypervisor Maintenance Interrupts\n"); } +#endif seq_printf(p, "%*s: ", prec, "NMI"); for_each_online_cpu(j) @@ -665,7 +666,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += per_cpu(irq_stat, cpu).mce_exceptions; sum += per_cpu(irq_stat, cpu).spurious_irqs; sum += per_cpu(irq_stat, cpu).timer_irqs_others; - sum += per_cpu(irq_stat, cpu).hmi_exceptions; +#ifdef CONFIG_PPC_BOOK3S_64 + sum += paca_ptrs[cpu]->hmi_irqs; +#endif sum += per_cpu(irq_stat, cpu).sreset_irqs; #ifdef CONFIG_PPC_WATCHDOG sum += per_cpu(irq_stat, cpu).soft_nmi_irqs; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 9cc792a3a6a9..6ab9b4d037c3 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -244,7 +244,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) * So, we should never get here... but, its still * good to catch them, just in case... */ - printk("Can't step on instruction %x\n", ppc_inst_val(insn)); + printk("Can't step on instruction %s\n", ppc_inst_as_str(insn)); BUG(); } else { /* diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index fd90c0eda229..ada59f6c4298 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -49,6 +49,20 @@ static struct irq_work mce_ue_event_irq_work = { DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); +static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); + +int mce_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&mce_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(mce_register_notifier); + +int mce_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&mce_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(mce_unregister_notifier); + static void mce_set_error_info(struct machine_check_event *mce, struct mce_error_info *mce_err) { @@ -278,6 +292,7 @@ static void machine_process_ue_event(struct work_struct *work) while (__this_cpu_read(mce_ue_count) > 0) { index = __this_cpu_read(mce_ue_count) - 1; evt = this_cpu_ptr(&mce_ue_event_queue[index]); + blocking_notifier_call_chain(&mce_notifier_list, 0, evt); #ifdef CONFIG_MEMORY_FAILURE /* * This should probably queued elsewhere, but @@ -370,6 +385,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, static const char *mc_user_types[] = { "Indeterminate", "tlbie(l) invalid", + "scv invalid", }; static const char *mc_ra_types[] = { "Indeterminate", @@ -711,7 +727,7 @@ long hmi_exception_realmode(struct pt_regs *regs) { int ret; - __this_cpu_inc(irq_stat.hmi_exceptions); + local_paca->hmi_irqs++; ret = hmi_handle_debugtrig(regs); if (ret >= 0) diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index c3b522bff9b4..b7e173754a2e 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -243,6 +243,45 @@ static const struct mce_ierror_table mce_p9_ierror_table[] = { MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0, 0, 0, 0, 0, 0, 0 } }; +static const struct mce_ierror_table mce_p10_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x00000000001c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008080000, true, + MCE_ERROR_TYPE_USER,MCE_USER_ERROR_SCV, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x00000000080c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008100000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008140000, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; + struct mce_derror_table { unsigned long dsisr_value; bool dar_valid; /* dar is a valid indicator of faulting address */ @@ -361,6 +400,46 @@ static const struct mce_derror_table mce_p9_derror_table[] = { MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0, false, 0, 0, 0, 0, 0 } }; +static const struct mce_derror_table mce_p10_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000200, false, + MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000040, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000020, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000010, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000008, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; + static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, uint64_t *phys_addr) { @@ -657,3 +736,8 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs) return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); } + +long __machine_check_early_realmode_p10(struct pt_regs *regs) +{ + return mce_handle_error(regs, mce_p10_derror_table, mce_p10_ierror_table); +} diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 1864605eca29..7bb46ad98207 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -413,20 +413,6 @@ _GLOBAL(kexec_sequence) li r0,0 std r0,16(r1) -BEGIN_FTR_SECTION - /* - * This is the best time to turn AMR/IAMR off. - * key 0 is used in radix for supervisor<->user - * protection, but on hash key 0 is reserved - * ideally we want to enter with a clean state. - * NOTE, we rely on r0 being 0 from above. - */ - mtspr SPRN_IAMR,r0 -BEGIN_FTR_SECTION_NESTED(42) - mtspr SPRN_AMOR,r0 -END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - /* save regs for local vars on new stack. * yes, we won't go back, but ... */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index df649acb5631..a211b0253cdb 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -86,3 +86,14 @@ int module_finalize(const Elf_Ehdr *hdr, return 0; } + +#ifdef MODULES_VADDR +void *module_alloc(unsigned long size) +{ + BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); + + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, + PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + __builtin_return_address(0)); +} +#endif diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 71a3f97dc988..f89376ff633e 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -62,8 +62,8 @@ static int of_pci_phb_probe(struct platform_device *dev) /* Init pci_dn data structures */ pci_devs_phb_init_dynamic(phb); - /* Create EEH PEs for the PHB */ - eeh_dev_phb_init_dynamic(phb); + /* Create EEH PE for the PHB */ + eeh_phb_pe_create(phb); /* Scan the bus */ pcibios_scan_phb(phb); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 74da65aacbc9..0ad15768d762 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -57,8 +57,8 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align, #define LPPACA_SIZE 0x400 -static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align, - unsigned long limit, int cpu) +static void *__init alloc_shared_lppaca(unsigned long size, unsigned long limit, + int cpu) { size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE); static unsigned long shared_lppaca_size; @@ -68,6 +68,13 @@ static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align, if (!shared_lppaca) { memblock_set_bottom_up(true); + /* + * See Documentation/powerpc/ultravisor.rst for more details. + * + * UV/HV data sharing is in PAGE_SIZE granularity. In order to + * minimize the number of pages shared, align the allocation to + * PAGE_SIZE. + */ shared_lppaca = memblock_alloc_try_nid(shared_lppaca_total_size, PAGE_SIZE, MEMBLOCK_LOW_LIMIT, @@ -122,7 +129,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit) return NULL; if (is_secure_guest()) - lp = alloc_shared_lppaca(LPPACA_SIZE, 0x400, limit, cpu); + lp = alloc_shared_lppaca(LPPACA_SIZE, limit, cpu); else lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 4e654df55969..e99b7c547d7e 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -124,9 +124,28 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev) return NULL; } +#ifdef CONFIG_EEH +static struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) +{ + struct eeh_dev *edev; + + /* Allocate EEH device */ + edev = kzalloc(sizeof(*edev), GFP_KERNEL); + if (!edev) + return NULL; + + /* Associate EEH device with OF node */ + pdn->edev = edev; + edev->pdn = pdn; + edev->bdfn = (pdn->busno << 8) | pdn->devfn; + edev->controller = pdn->phb; + + return edev; +} +#endif /* CONFIG_EEH */ + #ifdef CONFIG_PCI_IOV static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent, - int vf_index, int busno, int devfn) { struct pci_dn *pdn; @@ -143,7 +162,6 @@ static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent, pdn->parent = parent; pdn->busno = busno; pdn->devfn = devfn; - pdn->vf_index = vf_index; pdn->pe_number = IODA_INVALID_PE; INIT_LIST_HEAD(&pdn->child_list); INIT_LIST_HEAD(&pdn->list); @@ -174,7 +192,7 @@ struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev) for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { struct eeh_dev *edev __maybe_unused; - pdn = add_one_sriov_vf_pdn(parent, i, + pdn = add_one_sriov_vf_pdn(parent, pci_iov_virtfn_bus(pdev, i), pci_iov_virtfn_devfn(pdev, i)); if (!pdn) { @@ -187,7 +205,10 @@ struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev) /* Create the EEH device for the VF */ edev = eeh_dev_init(pdn); BUG_ON(!edev); + + /* FIXME: these should probably be populated by the EEH probe */ edev->physfn = pdev; + edev->vf_index = i; #endif /* CONFIG_EEH */ } return pci_get_pdn(pdev); @@ -242,7 +263,7 @@ void remove_sriov_vf_pdns(struct pci_dev *pdev) * have a configured PE. */ if (edev->pe) - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); pdn->edev = NULL; kfree(edev); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 794b754deec2..016bd831908e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -471,49 +471,58 @@ EXPORT_SYMBOL(giveup_all); #ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_FPU -static int restore_fp(struct task_struct *tsk) +static bool should_restore_fp(void) { - if (tsk->thread.load_fp) { - load_fp_state(¤t->thread.fp_state); + if (current->thread.load_fp) { current->thread.load_fp++; - return 1; + return true; } - return 0; + return false; +} + +static void do_restore_fp(void) +{ + load_fp_state(¤t->thread.fp_state); } #else -static int restore_fp(struct task_struct *tsk) { return 0; } +static bool should_restore_fp(void) { return false; } +static void do_restore_fp(void) { } #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC -#define loadvec(thr) ((thr).load_vec) -static int restore_altivec(struct task_struct *tsk) +static bool should_restore_altivec(void) { - if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) { - load_vr_state(&tsk->thread.vr_state); - tsk->thread.used_vr = 1; - tsk->thread.load_vec++; - - return 1; + if (cpu_has_feature(CPU_FTR_ALTIVEC) && (current->thread.load_vec)) { + current->thread.load_vec++; + return true; } - return 0; + return false; +} + +static void do_restore_altivec(void) +{ + load_vr_state(¤t->thread.vr_state); + current->thread.used_vr = 1; } #else -#define loadvec(thr) 0 -static inline int restore_altivec(struct task_struct *tsk) { return 0; } +static bool should_restore_altivec(void) { return false; } +static void do_restore_altivec(void) { } #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -static int restore_vsx(struct task_struct *tsk) +static bool should_restore_vsx(void) { - if (cpu_has_feature(CPU_FTR_VSX)) { - tsk->thread.used_vsr = 1; - return 1; - } - - return 0; + if (cpu_has_feature(CPU_FTR_VSX)) + return true; + return false; +} +static void do_restore_vsx(void) +{ + current->thread.used_vsr = 1; } #else -static inline int restore_vsx(struct task_struct *tsk) { return 0; } +static bool should_restore_vsx(void) { return false; } +static void do_restore_vsx(void) { } #endif /* CONFIG_VSX */ /* @@ -529,32 +538,42 @@ static inline int restore_vsx(struct task_struct *tsk) { return 0; } void notrace restore_math(struct pt_regs *regs) { unsigned long msr; - - if (!MSR_TM_ACTIVE(regs->msr) && - !current->thread.load_fp && !loadvec(current->thread)) - return; + unsigned long new_msr = 0; msr = regs->msr; - msr_check_and_set(msr_all_available); /* - * Only reload if the bit is not set in the user MSR, the bit BEING set - * indicates that the registers are hot + * new_msr tracks the facilities that are to be restored. Only reload + * if the bit is not set in the user MSR (if it is set, the registers + * are live for the user thread). */ - if ((!(msr & MSR_FP)) && restore_fp(current)) - msr |= MSR_FP | current->thread.fpexc_mode; + if ((!(msr & MSR_FP)) && should_restore_fp()) + new_msr |= MSR_FP | current->thread.fpexc_mode; - if ((!(msr & MSR_VEC)) && restore_altivec(current)) - msr |= MSR_VEC; + if ((!(msr & MSR_VEC)) && should_restore_altivec()) + new_msr |= MSR_VEC; - if ((msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC) && - restore_vsx(current)) { - msr |= MSR_VSX; + if ((!(msr & MSR_VSX)) && should_restore_vsx()) { + if (((msr | new_msr) & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) + new_msr |= MSR_VSX; } - msr_check_and_clear(msr_all_available); + if (new_msr) { + msr_check_and_set(new_msr); + + if (new_msr & MSR_FP) + do_restore_fp(); + + if (new_msr & MSR_VEC) + do_restore_altivec(); + + if (new_msr & MSR_VSX) + do_restore_vsx(); - regs->msr = msr; + msr_check_and_clear(new_msr); + + regs->msr |= new_msr; + } } #endif @@ -1599,6 +1618,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, { struct pt_regs *childregs, *kregs; extern void ret_from_fork(void); + extern void ret_from_fork_scv(void); extern void ret_from_kernel_thread(void); void (*f)(void); unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; @@ -1635,7 +1655,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, if (usp) childregs->gpr[1] = usp; p->thread.regs = childregs; - childregs->gpr[3] = 0; /* Result from fork() */ + /* 64s sets this in ret_from_fork */ + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + childregs->gpr[3] = 0; /* Result from fork() */ if (clone_flags & CLONE_SETTLS) { if (!is_32bit_task()) childregs->gpr[13] = tls; @@ -1643,7 +1665,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childregs->gpr[2] = tls; } - f = ret_from_fork; + if (trap_is_scv(regs)) + f = ret_from_fork_scv; + else + f = ret_from_fork; } childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); sp -= STACK_FRAME_OVERHEAD; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 9cc49f265c86..d8a2fb87ba0c 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -163,7 +163,7 @@ static struct ibm_pa_feature { { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE }, { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE }, #ifdef CONFIG_PPC_RADIX_MMU - { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX }, + { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE }, #endif { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, @@ -175,6 +175,8 @@ static struct ibm_pa_feature { */ { .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP, .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP }, + + { .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 }, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -468,8 +470,9 @@ static bool validate_mem_limit(u64 base, u64 *size) * This contains a list of memory blocks along with NUMA affinity * information. */ -static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, - const __be32 **usm) +static int __init early_init_drmem_lmb(struct drmem_lmb *lmb, + const __be32 **usm, + void *data) { u64 base, size; int is_kexec_kdump = 0, rngs; @@ -484,7 +487,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, */ if ((lmb->flags & DRCONF_MEM_RESERVED) || !(lmb->flags & DRCONF_MEM_ASSIGNED)) - return; + return 0; if (*usm) is_kexec_kdump = 1; @@ -499,7 +502,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, */ rngs = dt_mem_next_cell(dt_root_size_cells, usm); if (!rngs) /* there are no (base, size) duple */ - return; + return 0; } do { @@ -524,6 +527,8 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, if (lmb->flags & DRCONF_MEM_HOTREMOVABLE) memblock_mark_hotplug(base, size); } while (--rngs); + + return 0; } #endif /* CONFIG_PPC_PSERIES */ @@ -534,7 +539,7 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node, #ifdef CONFIG_PPC_PSERIES if (depth == 1 && strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) { - walk_drmem_lmbs_early(node, early_init_drmem_lmb); + walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb); return 0; } #endif @@ -815,6 +820,11 @@ void __init early_init_devtree(void *params) /* Now try to figure out if we are running on LPAR and so on */ pseries_probe_fw_features(); + /* + * Initialize pkey features and default AMR/IAMR values + */ + pkey_early_init_devtree(); + #ifdef CONFIG_PPC_PS3 /* Identify PS3 firmware */ if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3")) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 90c604d00b7d..ae7ec9903191 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -169,6 +169,7 @@ static unsigned long __prombss prom_tce_alloc_end; #ifdef CONFIG_PPC_PSERIES static bool __prombss prom_radix_disable; +static bool __prombss prom_radix_gtse_disable; static bool __prombss prom_xive_disable; #endif @@ -823,6 +824,12 @@ static void __init early_cmdline_parse(void) if (prom_radix_disable) prom_debug("Radix disabled from cmdline\n"); + opt = prom_strstr(prom_cmd_line, "radix_hcall_invalidate=on"); + if (opt) { + prom_radix_gtse_disable = true; + prom_debug("Radix GTSE disabled from cmdline\n"); + } + opt = prom_strstr(prom_cmd_line, "xive=off"); if (opt) { prom_xive_disable = true; @@ -1285,10 +1292,8 @@ static void __init prom_parse_platform_support(u8 index, u8 val, prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support); break; case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */ - if (val & OV5_FEAT(OV5_RADIX_GTSE)) { - prom_debug("Radix - GTSE supported\n"); - support->radix_gtse = true; - } + if (val & OV5_FEAT(OV5_RADIX_GTSE)) + support->radix_gtse = !prom_radix_gtse_disable; break; case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */ prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT), @@ -1336,12 +1341,15 @@ static void __init prom_check_platform_support(void) } } - if (supported.radix_mmu && supported.radix_gtse && - IS_ENABLED(CONFIG_PPC_RADIX_MMU)) { - /* Radix preferred - but we require GTSE for now */ - prom_debug("Asking for radix with GTSE\n"); + if (supported.radix_mmu && IS_ENABLED(CONFIG_PPC_RADIX_MMU)) { + /* Radix preferred - Check if GTSE is also supported */ + prom_debug("Asking for radix\n"); ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); - ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE); + if (supported.radix_gtse) + ibm_architecture_vec.vec5.radix_ext = + OV5_FEAT(OV5_RADIX_GTSE); + else + prom_debug("Radix GTSE isn't supported\n"); } else if (supported.hash_mmu) { /* Default to hash mmu (if we can) */ prom_debug("Asking for hash\n"); @@ -3262,7 +3270,7 @@ static int enter_secure_mode(unsigned long kbase, unsigned long fdt) /* * Call the Ultravisor to transfer us to secure memory if we have an ESM blob. */ -static void setup_secure_guest(unsigned long kbase, unsigned long fdt) +static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt) { int ret; @@ -3292,7 +3300,7 @@ static void setup_secure_guest(unsigned long kbase, unsigned long fdt) } } #else -static void setup_secure_guest(unsigned long kbase, unsigned long fdt) +static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt) { } #endif /* CONFIG_PPC_SVM */ diff --git a/arch/powerpc/kernel/ptrace/ptrace-altivec.c b/arch/powerpc/kernel/ptrace/ptrace-altivec.c index dd8b75dfbd06..0d9bc4bd4972 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-altivec.c +++ b/arch/powerpc/kernel/ptrace/ptrace-altivec.c @@ -41,38 +41,25 @@ int vr_active(struct task_struct *target, const struct user_regset *regset) * }; */ int vr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; + union { + elf_vrreg_t reg; + u32 word; + } vrsave; flush_altivec_to_thread(target); BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != offsetof(struct thread_vr_state, vr[32])); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.vr_state, 0, - 33 * sizeof(vector128)); - if (!ret) { - /* - * Copy out only the low-order word of vrsave. - */ - int start, end; - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - - vrsave.word = target->thread.vrsave; - - start = 33 * sizeof(vector128); - end = start + sizeof(vrsave); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, - start, end); - } - - return ret; + membuf_write(&to, &target->thread.vr_state, 33 * sizeof(vector128)); + /* + * Copy out only the low-order word of vrsave. + */ + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.vrsave; + return membuf_write(&to, &vrsave, sizeof(vrsave)); } /* diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index 3c8a81999292..67447a6197eb 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -63,8 +63,7 @@ enum powerpc_regset { /* ptrace-(no)vsx */ -int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn fpr_get; int fpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); @@ -72,8 +71,7 @@ int fpr_set(struct task_struct *target, const struct user_regset *regset, /* ptrace-vsx */ int vsr_active(struct task_struct *target, const struct user_regset *regset); -int vsr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn vsr_get; int vsr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); @@ -81,8 +79,7 @@ int vsr_set(struct task_struct *target, const struct user_regset *regset, /* ptrace-altivec */ int vr_active(struct task_struct *target, const struct user_regset *regset); -int vr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn vr_get; int vr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); @@ -90,8 +87,7 @@ int vr_set(struct task_struct *target, const struct user_regset *regset, /* ptrace-spe */ int evr_active(struct task_struct *target, const struct user_regset *regset); -int evr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn evr_get; int evr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); @@ -100,9 +96,8 @@ int evr_set(struct task_struct *target, const struct user_regset *regset, int gpr32_get_common(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf, - unsigned long *regs); + struct membuf to, + unsigned long *regs); int gpr32_set_common(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, @@ -118,55 +113,46 @@ static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } #endif int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset); -int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_cgpr_get; int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset); -int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_cfpr_get; int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset); -int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_cvmx_get; int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset); -int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_cvsx_get; int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_spr_active(struct task_struct *target, const struct user_regset *regset); -int tm_spr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_spr_get; int tm_spr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_tar_active(struct task_struct *target, const struct user_regset *regset); -int tm_tar_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_tar_get; int tm_tar_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_ppr_active(struct task_struct *target, const struct user_regset *regset); -int tm_ppr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_ppr_get; int tm_ppr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); int tm_dscr_active(struct task_struct *target, const struct user_regset *regset); -int tm_dscr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_dscr_get; int tm_dscr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); -int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +user_regset_get2_fn tm_cgpr32_get; int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); diff --git a/arch/powerpc/kernel/ptrace/ptrace-novsx.c b/arch/powerpc/kernel/ptrace/ptrace-novsx.c index b2dc4e92d11a..b3b36835658a 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-novsx.c +++ b/arch/powerpc/kernel/ptrace/ptrace-novsx.c @@ -19,15 +19,14 @@ * }; */ int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != offsetof(struct thread_fp_state, fpr[32])); flush_fp_to_thread(target); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fp_state, 0, -1); + return membuf_write(&to, &target->thread.fp_state, 33 * sizeof(u64)); } /* diff --git a/arch/powerpc/kernel/ptrace/ptrace-spe.c b/arch/powerpc/kernel/ptrace/ptrace-spe.c index 68b86b4a4be4..47034d069045 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-spe.c +++ b/arch/powerpc/kernel/ptrace/ptrace-spe.c @@ -23,25 +23,17 @@ int evr_active(struct task_struct *target, const struct user_regset *regset) } int evr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - flush_spe_to_thread(target); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.evr, - 0, sizeof(target->thread.evr)); + membuf_write(&to, &target->thread.evr, sizeof(target->thread.evr)); BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != offsetof(struct thread_struct, spefscr)); - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.acc, - sizeof(target->thread.evr), -1); - - return ret; + return membuf_write(&to, &target->thread.acc, + sizeof(u64) + sizeof(u32)); } int evr_set(struct task_struct *target, const struct user_regset *regset, diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index 32d62c606681..54f2d076206f 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -70,10 +70,7 @@ int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset) * tm_cgpr_get - get CGPR registers * @target: The target task. * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. + * @to: Destination of copy. * * This function gets transaction checkpointed GPR registers. * @@ -87,10 +84,8 @@ int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset) * }; */ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; @@ -101,31 +96,18 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, flush_fp_to_thread(target); flush_altivec_to_thread(target); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs, - 0, offsetof(struct pt_regs, msr)); - if (!ret) { - unsigned long msr = get_user_ckpt_msr(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, - offsetof(struct pt_regs, msr), - offsetof(struct pt_regs, msr) + - sizeof(msr)); - } + membuf_write(&to, &target->thread.ckpt_regs, + offsetof(struct pt_regs, msr)); + membuf_store(&to, get_user_ckpt_msr(target)); BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != offsetof(struct pt_regs, msr) + sizeof(long)); - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs.orig_gpr3, - offsetof(struct pt_regs, orig_gpr3), - sizeof(struct user_pt_regs)); - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct user_pt_regs), -1); - - return ret; + membuf_write(&to, &target->thread.ckpt_regs.orig_gpr3, + sizeof(struct user_pt_regs) - + offsetof(struct pt_regs, orig_gpr3)); + return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - + sizeof(struct user_pt_regs)); } /* @@ -229,10 +211,7 @@ int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset) * tm_cfpr_get - get CFPR registers * @target: The target task. * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. + * @to: Destination of copy. * * This function gets in transaction checkpointed FPR registers. * @@ -247,7 +226,7 @@ int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset) *}; */ int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { u64 buf[33]; int i; @@ -266,7 +245,7 @@ int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset, for (i = 0; i < 32 ; i++) buf[i] = target->thread.TS_CKFPR(i); buf[32] = target->thread.ckfp_state.fpscr; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + return membuf_write(&to, buf, sizeof(buf)); } /** @@ -344,10 +323,7 @@ int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset) * tm_cvmx_get - get CMVX registers * @target: The target task. * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. + * @to: Destination of copy. * * This function gets in transaction checkpointed VMX registers. * @@ -363,10 +339,12 @@ int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset) *}; */ int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - + union { + elf_vrreg_t reg; + u32 word; + } vrsave; BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); if (!cpu_has_feature(CPU_FTR_TM)) @@ -380,23 +358,13 @@ int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset, flush_fp_to_thread(target); flush_altivec_to_thread(target); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ckvr_state, - 0, 33 * sizeof(vector128)); - if (!ret) { - /* - * Copy out only the low-order word of vrsave. - */ - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - vrsave.word = target->thread.ckvrsave; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, - 33 * sizeof(vector128), -1); - } - - return ret; + membuf_write(&to, &target->thread.ckvr_state, 33 * sizeof(vector128)); + /* + * Copy out only the low-order word of vrsave. + */ + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.ckvrsave; + return membuf_write(&to, &vrsave, sizeof(vrsave)); } /** @@ -484,10 +452,7 @@ int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset) * tm_cvsx_get - get CVSX registers * @target: The target task. * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. + * @to: Destination of copy. * * This function gets in transaction checkpointed VSX registers. * @@ -501,10 +466,10 @@ int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset) *}; */ int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { u64 buf[32]; - int ret, i; + int i; if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; @@ -520,10 +485,7 @@ int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset, for (i = 0; i < 32 ; i++) buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - - return ret; + return membuf_write(&to, buf, 32 * sizeof(double)); } /** @@ -597,10 +559,7 @@ int tm_spr_active(struct task_struct *target, const struct user_regset *regset) * tm_spr_get - get the TM related SPR registers * @target: The target task. * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. + * @to: Destination of copy. * * This function gets transactional memory related SPR registers. * The userspace interface buffer layout is as follows. @@ -612,10 +571,8 @@ int tm_spr_active(struct task_struct *target, const struct user_regset *regset) * }; */ int tm_spr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - /* Build tests */ BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); @@ -630,21 +587,11 @@ int tm_spr_get(struct task_struct *target, const struct user_regset *regset, flush_altivec_to_thread(target); /* TFHAR register */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfhar, 0, sizeof(u64)); - + membuf_write(&to, &target->thread.tm_tfhar, sizeof(u64)); /* TEXASR register */ - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_texasr, sizeof(u64), - 2 * sizeof(u64)); - + membuf_write(&to, &target->thread.tm_texasr, sizeof(u64)); /* TFIAR register */ - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfiar, - 2 * sizeof(u64), 3 * sizeof(u64)); - return ret; + return membuf_write(&to, &target->thread.tm_tfiar, sizeof(u64)); } /** @@ -714,19 +661,15 @@ int tm_tar_active(struct task_struct *target, const struct user_regset *regset) } int tm_tar_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; if (!MSR_TM_ACTIVE(target->thread.regs->msr)) return -ENODATA; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tar, 0, sizeof(u64)); - return ret; + return membuf_write(&to, &target->thread.tm_tar, sizeof(u64)); } int tm_tar_set(struct task_struct *target, const struct user_regset *regset, @@ -759,19 +702,15 @@ int tm_ppr_active(struct task_struct *target, const struct user_regset *regset) int tm_ppr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; if (!MSR_TM_ACTIVE(target->thread.regs->msr)) return -ENODATA; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_ppr, 0, sizeof(u64)); - return ret; + return membuf_write(&to, &target->thread.tm_ppr, sizeof(u64)); } int tm_ppr_set(struct task_struct *target, const struct user_regset *regset, @@ -803,19 +742,15 @@ int tm_dscr_active(struct task_struct *target, const struct user_regset *regset) } int tm_dscr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; - if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; if (!MSR_TM_ACTIVE(target->thread.regs->msr)) return -ENODATA; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_dscr, 0, sizeof(u64)); - return ret; + return membuf_write(&to, &target->thread.tm_dscr, sizeof(u64)); } int tm_dscr_set(struct task_struct *target, const struct user_regset *regset, @@ -836,10 +771,11 @@ int tm_dscr_set(struct task_struct *target, const struct user_regset *regset, } int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + gpr32_get_common(target, regset, to, &target->thread.ckpt_regs.gpr[0]); + return membuf_zero(&to, ELF_NGREG * sizeof(u32)); } int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index caeb5822a8f4..19823a250aa0 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -215,9 +215,9 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) } static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - int i, ret; + int i; if (target->thread.regs == NULL) return -EIO; @@ -228,30 +228,17 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, target->thread.regs->gpr[i] = NV_REG_POISON; } - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - target->thread.regs, - 0, offsetof(struct pt_regs, msr)); - if (!ret) { - unsigned long msr = get_user_msr(target); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, - offsetof(struct pt_regs, msr), - offsetof(struct pt_regs, msr) + - sizeof(msr)); - } + membuf_write(&to, target->thread.regs, offsetof(struct pt_regs, msr)); + membuf_store(&to, get_user_msr(target)); BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != offsetof(struct pt_regs, msr) + sizeof(long)); - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->orig_gpr3, - offsetof(struct pt_regs, orig_gpr3), - sizeof(struct user_pt_regs)); - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct user_pt_regs), -1); - - return ret; + membuf_write(&to, &target->thread.regs->orig_gpr3, + sizeof(struct user_pt_regs) - + offsetof(struct pt_regs, orig_gpr3)); + return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - + sizeof(struct user_pt_regs)); } static int gpr_set(struct task_struct *target, const struct user_regset *regset, @@ -309,10 +296,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, #ifdef CONFIG_PPC64 static int ppr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->ppr, 0, sizeof(u64)); + return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64)); } static int ppr_set(struct task_struct *target, const struct user_regset *regset, @@ -324,10 +310,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset, } static int dscr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.dscr, 0, sizeof(u64)); + return membuf_write(&to, &target->thread.dscr, sizeof(u64)); } static int dscr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, @@ -339,10 +324,9 @@ static int dscr_set(struct task_struct *target, const struct user_regset *regset #endif #ifdef CONFIG_PPC_BOOK3S_64 static int tar_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tar, 0, sizeof(u64)); + return membuf_write(&to, &target->thread.tar, sizeof(u64)); } static int tar_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, @@ -364,7 +348,7 @@ static int ebb_active(struct task_struct *target, const struct user_regset *regs } static int ebb_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { /* Build tests */ BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); @@ -376,8 +360,7 @@ static int ebb_get(struct task_struct *target, const struct user_regset *regset, if (!target->thread.used_ebb) return -ENODATA; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ebbrr, - 0, 3 * sizeof(unsigned long)); + return membuf_write(&to, &target->thread.ebbrr, 3 * sizeof(unsigned long)); } static int ebb_set(struct task_struct *target, const struct user_regset *regset, @@ -420,7 +403,7 @@ static int pmu_active(struct task_struct *target, const struct user_regset *regs } static int pmu_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { /* Build tests */ BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); @@ -431,8 +414,7 @@ static int pmu_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_feature(CPU_FTR_ARCH_207S)) return -ENODEV; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.siar, - 0, 5 * sizeof(unsigned long)); + return membuf_write(&to, &target->thread.siar, 5 * sizeof(unsigned long)); } static int pmu_set(struct task_struct *target, const struct user_regset *regset, @@ -486,16 +468,17 @@ static int pkey_active(struct task_struct *target, const struct user_regset *reg } static int pkey_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { + int ret; + BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); - BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); if (!arch_pkeys_enabled()) return -ENODEV; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.amr, - 0, ELF_NPKEY * sizeof(unsigned long)); + membuf_write(&to, &target->thread.amr, 2 * sizeof(unsigned long)); + return membuf_store(&to, default_uamor); } static int pkey_set(struct task_struct *target, const struct user_regset *regset, @@ -517,9 +500,17 @@ static int pkey_set(struct task_struct *target, const struct user_regset *regset if (ret) return ret; - /* UAMOR determines which bits of the AMR can be set from userspace. */ - target->thread.amr = (new_amr & target->thread.uamor) | - (target->thread.amr & ~target->thread.uamor); + /* + * UAMOR determines which bits of the AMR can be set from userspace. + * UAMOR value 0b11 indicates that the AMR value can be modified + * from userspace. If the kernel is using a specific key, we avoid + * userspace modifying the AMR value for that key by masking them + * via UAMOR 0b00. + * + * Pick the AMR values for the keys that kernel is using. This + * will be indicated by the ~default_uamor bits. + */ + target->thread.amr = (new_amr & default_uamor) | (target->thread.amr & ~default_uamor); return 0; } @@ -529,110 +520,110 @@ static const struct user_regset native_regsets[] = { [REGSET_GPR] = { .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), - .get = gpr_get, .set = gpr_set + .regset_get = gpr_get, .set = gpr_set }, [REGSET_FPR] = { .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), - .get = fpr_get, .set = fpr_set + .regset_get = fpr_get, .set = fpr_set }, #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { .core_note_type = NT_PPC_VMX, .n = 34, .size = sizeof(vector128), .align = sizeof(vector128), - .active = vr_active, .get = vr_get, .set = vr_set + .active = vr_active, .regset_get = vr_get, .set = vr_set }, #endif #ifdef CONFIG_VSX [REGSET_VSX] = { .core_note_type = NT_PPC_VSX, .n = 32, .size = sizeof(double), .align = sizeof(double), - .active = vsr_active, .get = vsr_get, .set = vsr_set + .active = vsr_active, .regset_get = vsr_get, .set = vsr_set }, #endif #ifdef CONFIG_SPE [REGSET_SPE] = { .core_note_type = NT_PPC_SPE, .n = 35, .size = sizeof(u32), .align = sizeof(u32), - .active = evr_active, .get = evr_get, .set = evr_set + .active = evr_active, .regset_get = evr_get, .set = evr_set }, #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM [REGSET_TM_CGPR] = { .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), - .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set + .active = tm_cgpr_active, .regset_get = tm_cgpr_get, .set = tm_cgpr_set }, [REGSET_TM_CFPR] = { .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), - .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + .active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set }, [REGSET_TM_CVMX] = { .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, .size = sizeof(vector128), .align = sizeof(vector128), - .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + .active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set }, [REGSET_TM_CVSX] = { .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, .size = sizeof(double), .align = sizeof(double), - .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + .active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set }, [REGSET_TM_SPR] = { .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + .active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set }, [REGSET_TM_CTAR] = { .core_note_type = NT_PPC_TM_CTAR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + .active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set }, [REGSET_TM_CPPR] = { .core_note_type = NT_PPC_TM_CPPR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + .active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set }, [REGSET_TM_CDSCR] = { .core_note_type = NT_PPC_TM_CDSCR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + .active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set }, #endif #ifdef CONFIG_PPC64 [REGSET_PPR] = { .core_note_type = NT_PPC_PPR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .get = ppr_get, .set = ppr_set + .regset_get = ppr_get, .set = ppr_set }, [REGSET_DSCR] = { .core_note_type = NT_PPC_DSCR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .get = dscr_get, .set = dscr_set + .regset_get = dscr_get, .set = dscr_set }, #endif #ifdef CONFIG_PPC_BOOK3S_64 [REGSET_TAR] = { .core_note_type = NT_PPC_TAR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .get = tar_get, .set = tar_set + .regset_get = tar_get, .set = tar_set }, [REGSET_EBB] = { .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, .size = sizeof(u64), .align = sizeof(u64), - .active = ebb_active, .get = ebb_get, .set = ebb_set + .active = ebb_active, .regset_get = ebb_get, .set = ebb_set }, [REGSET_PMR] = { .core_note_type = NT_PPC_PMU, .n = ELF_NPMU, .size = sizeof(u64), .align = sizeof(u64), - .active = pmu_active, .get = pmu_get, .set = pmu_set + .active = pmu_active, .regset_get = pmu_get, .set = pmu_set }, #endif #ifdef CONFIG_PPC_MEM_KEYS [REGSET_PKEY] = { .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, .size = sizeof(u64), .align = sizeof(u64), - .active = pkey_active, .get = pkey_get, .set = pkey_set + .active = pkey_active, .regset_get = pkey_get, .set = pkey_set }, #endif }; @@ -646,49 +637,16 @@ const struct user_regset_view user_ppc_native_view = { int gpr32_get_common(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf, - unsigned long *regs) + struct membuf to, unsigned long *regs) { - compat_ulong_t *k = kbuf; - compat_ulong_t __user *u = ubuf; - compat_ulong_t reg; - - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) - for (; count > 0 && pos < PT_MSR; --count) - *k++ = regs[pos++]; - else - for (; count > 0 && pos < PT_MSR; --count) - if (__put_user((compat_ulong_t)regs[pos++], u++)) - return -EFAULT; - - if (count > 0 && pos == PT_MSR) { - reg = get_user_msr(target); - if (kbuf) - *k++ = reg; - else if (__put_user(reg, u++)) - return -EFAULT; - ++pos; - --count; - } - - if (kbuf) - for (; count > 0 && pos < PT_REGS_COUNT; --count) - *k++ = regs[pos++]; - else - for (; count > 0 && pos < PT_REGS_COUNT; --count) - if (__put_user((compat_ulong_t)regs[pos++], u++)) - return -EFAULT; + int i; - kbuf = k; - ubuf = u; - pos *= sizeof(reg); - count *= sizeof(reg); - return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - PT_REGS_COUNT * sizeof(reg), -1); + for (i = 0; i < PT_MSR; i++) + membuf_store(&to, (u32)regs[i]); + membuf_store(&to, (u32)get_user_msr(target)); + for (i++ ; i < PT_REGS_COUNT; i++) + membuf_store(&to, (u32)regs[i]); + return membuf_zero(&to, (ELF_NGREG - PT_REGS_COUNT) * sizeof(u32)); } int gpr32_set_common(struct task_struct *target, @@ -761,8 +719,7 @@ int gpr32_set_common(struct task_struct *target, static int gpr32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { int i; @@ -777,7 +734,7 @@ static int gpr32_get(struct task_struct *target, for (i = 14; i < 32; i++) target->thread.regs->gpr[i] = NV_REG_POISON; } - return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + return gpr32_get_common(target, regset, to, &target->thread.regs->gpr[0]); } @@ -801,25 +758,25 @@ static const struct user_regset compat_regsets[] = { [REGSET_GPR] = { .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = gpr32_get, .set = gpr32_set + .regset_get = gpr32_get, .set = gpr32_set }, [REGSET_FPR] = { .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), - .get = fpr_get, .set = fpr_set + .regset_get = fpr_get, .set = fpr_set }, #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { .core_note_type = NT_PPC_VMX, .n = 34, .size = sizeof(vector128), .align = sizeof(vector128), - .active = vr_active, .get = vr_get, .set = vr_set + .active = vr_active, .regset_get = vr_get, .set = vr_set }, #endif #ifdef CONFIG_SPE [REGSET_SPE] = { .core_note_type = NT_PPC_SPE, .n = 35, .size = sizeof(u32), .align = sizeof(u32), - .active = evr_active, .get = evr_get, .set = evr_set + .active = evr_active, .regset_get = evr_get, .set = evr_set }, #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -827,66 +784,66 @@ static const struct user_regset compat_regsets[] = { .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), .active = tm_cgpr_active, - .get = tm_cgpr32_get, .set = tm_cgpr32_set + .regset_get = tm_cgpr32_get, .set = tm_cgpr32_set }, [REGSET_TM_CFPR] = { .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), - .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + .active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set }, [REGSET_TM_CVMX] = { .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, .size = sizeof(vector128), .align = sizeof(vector128), - .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + .active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set }, [REGSET_TM_CVSX] = { .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, .size = sizeof(double), .align = sizeof(double), - .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + .active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set }, [REGSET_TM_SPR] = { .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + .active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set }, [REGSET_TM_CTAR] = { .core_note_type = NT_PPC_TM_CTAR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + .active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set }, [REGSET_TM_CPPR] = { .core_note_type = NT_PPC_TM_CPPR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + .active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set }, [REGSET_TM_CDSCR] = { .core_note_type = NT_PPC_TM_CDSCR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + .active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set }, #endif #ifdef CONFIG_PPC64 [REGSET_PPR] = { .core_note_type = NT_PPC_PPR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .get = ppr_get, .set = ppr_set + .regset_get = ppr_get, .set = ppr_set }, [REGSET_DSCR] = { .core_note_type = NT_PPC_DSCR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .get = dscr_get, .set = dscr_set + .regset_get = dscr_get, .set = dscr_set }, #endif #ifdef CONFIG_PPC_BOOK3S_64 [REGSET_TAR] = { .core_note_type = NT_PPC_TAR, .n = 1, .size = sizeof(u64), .align = sizeof(u64), - .get = tar_get, .set = tar_set + .regset_get = tar_get, .set = tar_set }, [REGSET_EBB] = { .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, .size = sizeof(u64), .align = sizeof(u64), - .active = ebb_active, .get = ebb_get, .set = ebb_set + .active = ebb_active, .regset_get = ebb_get, .set = ebb_set }, #endif }; diff --git a/arch/powerpc/kernel/ptrace/ptrace-vsx.c b/arch/powerpc/kernel/ptrace/ptrace-vsx.c index d53466d49cc0..1da4303128ef 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-vsx.c +++ b/arch/powerpc/kernel/ptrace/ptrace-vsx.c @@ -19,7 +19,7 @@ * }; */ int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { u64 buf[33]; int i; @@ -30,7 +30,7 @@ int fpr_get(struct task_struct *target, const struct user_regset *regset, for (i = 0; i < 32 ; i++) buf[i] = target->thread.TS_FPR(i); buf[32] = target->thread.fp_state.fpscr; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + return membuf_write(&to, buf, 33 * sizeof(u64)); } /* @@ -95,10 +95,10 @@ int vsr_active(struct task_struct *target, const struct user_regset *regset) * }; */ int vsr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) + struct membuf to) { u64 buf[32]; - int ret, i; + int i; flush_tmregs_to_thread(target); flush_fp_to_thread(target); @@ -108,10 +108,7 @@ int vsr_get(struct task_struct *target, const struct user_regset *regset, for (i = 0; i < 32 ; i++) buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - - return ret; + return membuf_write(&to, buf, 32 * sizeof(double)); } /* diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index a09eba03f180..806d554ce357 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -843,96 +843,6 @@ static void rtas_percpu_suspend_me(void *info) __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } -enum rtas_cpu_state { - DOWN, - UP, -}; - -#ifndef CONFIG_SMP -static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, - cpumask_var_t cpus) -{ - if (!cpumask_empty(cpus)) { - cpumask_clear(cpus); - return -EINVAL; - } else - return 0; -} -#else -/* On return cpumask will be altered to indicate CPUs changed. - * CPUs with states changed will be set in the mask, - * CPUs with status unchanged will be unset in the mask. */ -static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, - cpumask_var_t cpus) -{ - int cpu; - int cpuret = 0; - int ret = 0; - - if (cpumask_empty(cpus)) - return 0; - - for_each_cpu(cpu, cpus) { - struct device *dev = get_cpu_device(cpu); - - switch (state) { - case DOWN: - cpuret = device_offline(dev); - break; - case UP: - cpuret = device_online(dev); - break; - } - if (cpuret < 0) { - pr_debug("%s: cpu_%s for cpu#%d returned %d.\n", - __func__, - ((state == UP) ? "up" : "down"), - cpu, cpuret); - if (!ret) - ret = cpuret; - if (state == UP) { - /* clear bits for unchanged cpus, return */ - cpumask_shift_right(cpus, cpus, cpu); - cpumask_shift_left(cpus, cpus, cpu); - break; - } else { - /* clear bit for unchanged cpu, continue */ - cpumask_clear_cpu(cpu, cpus); - } - } - cond_resched(); - } - - return ret; -} -#endif - -int rtas_online_cpus_mask(cpumask_var_t cpus) -{ - int ret; - - ret = rtas_cpu_state_change_mask(UP, cpus); - - if (ret) { - cpumask_var_t tmp_mask; - - if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL)) - return ret; - - /* Use tmp_mask to preserve cpus mask from first failure */ - cpumask_copy(tmp_mask, cpus); - rtas_offline_cpus_mask(tmp_mask); - free_cpumask_var(tmp_mask); - } - - return ret; -} - -int rtas_offline_cpus_mask(cpumask_var_t cpus) -{ - return rtas_cpu_state_change_mask(DOWN, cpus); -} - int rtas_ibm_suspend_me(u64 handle) { long state; @@ -940,8 +850,6 @@ int rtas_ibm_suspend_me(u64 handle) unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; struct rtas_suspend_me_data data; DECLARE_COMPLETION_ONSTACK(done); - cpumask_var_t offline_mask; - int cpuret; if (!rtas_service_present("ibm,suspend-me")) return -ENOSYS; @@ -962,9 +870,6 @@ int rtas_ibm_suspend_me(u64 handle) return -EIO; } - if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL)) - return -ENOMEM; - atomic_set(&data.working, 0); atomic_set(&data.done, 0); atomic_set(&data.error, 0); @@ -973,24 +878,8 @@ int rtas_ibm_suspend_me(u64 handle) lock_device_hotplug(); - /* All present CPUs must be online */ - cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask); - cpuret = rtas_online_cpus_mask(offline_mask); - if (cpuret) { - pr_err("%s: Could not bring present CPUs online.\n", __func__); - atomic_set(&data.error, cpuret); - goto out; - } - cpu_hotplug_disable(); - /* Check if we raced with a CPU-Offline Operation */ - if (!cpumask_equal(cpu_present_mask, cpu_online_mask)) { - pr_info("%s: Raced against a concurrent CPU-Offline\n", __func__); - atomic_set(&data.error, -EAGAIN); - goto out_hotplug_enable; - } - /* Call function on all CPUs. One of us will make the * rtas call */ @@ -1001,18 +890,11 @@ int rtas_ibm_suspend_me(u64 handle) if (atomic_read(&data.error) != 0) printk(KERN_ERR "Error doing global join\n"); -out_hotplug_enable: - cpu_hotplug_enable(); - /* Take down CPUs not online prior to suspend */ - cpuret = rtas_offline_cpus_mask(offline_mask); - if (cpuret) - pr_warn("%s: Could not restore CPUs to offline state.\n", - __func__); + cpu_hotplug_enable(); -out: unlock_device_hotplug(); - free_cpumask_var(offline_mask); + return atomic_read(&data.error); } diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 89b798f8f656..8561dfb33f24 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -273,37 +273,15 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) } } -#ifdef CONFIG_PPC_PSERIES -static void handle_prrn_event(s32 scope) -{ - /* - * For PRRN, we must pass the negative of the scope value in - * the RTAS event. - */ - pseries_devicetree_update(-scope); - numa_update_cpu_topology(false); -} - static void handle_rtas_event(const struct rtas_error_log *log) { - if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled()) + if (!machine_is(pseries)) return; - /* For PRRN Events the extended log length is used to denote - * the scope for calling rtas update-nodes. - */ - handle_prrn_event(rtas_error_extended_log_length(log)); + if (rtas_error_type(log) == RTAS_TYPE_PRRN) + pr_info_ratelimited("Platform resource reassignment ignored.\n"); } -#else - -static void handle_rtas_event(const struct rtas_error_log *log) -{ - return; -} - -#endif - static int rtas_log_open(struct inode * inode, struct file * file) { return 0; diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c index 4b982324d368..f9af305d9579 100644 --- a/arch/powerpc/kernel/secure_boot.c +++ b/arch/powerpc/kernel/secure_boot.c @@ -23,12 +23,19 @@ bool is_ppc_secureboot_enabled(void) { struct device_node *node; bool enabled = false; + u32 secureboot; node = get_ppc_fw_sb_node(); enabled = of_property_read_bool(node, "os-secureboot-enforcing"); - of_node_put(node); + if (enabled) + goto out; + + if (!of_property_read_u32(of_root, "ibm,secure-boot", &secureboot)) + enabled = (secureboot > 1); + +out: pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled"); return enabled; @@ -38,12 +45,19 @@ bool is_ppc_trustedboot_enabled(void) { struct device_node *node; bool enabled = false; + u32 trustedboot; node = get_ppc_fw_sb_node(); enabled = of_property_read_bool(node, "trusted-enabled"); - of_node_put(node); + if (enabled) + goto out; + + if (!of_property_read_u32(of_root, "ibm,trusted-boot", &trustedboot)) + enabled = (trustedboot > 0); + +out: pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled"); return enabled; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index d86701ce116b..c9876aab3142 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -21,13 +21,13 @@ u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; -enum count_cache_flush_type { - COUNT_CACHE_FLUSH_NONE = 0x1, - COUNT_CACHE_FLUSH_SW = 0x2, - COUNT_CACHE_FLUSH_HW = 0x4, +enum branch_cache_flush_type { + BRANCH_CACHE_FLUSH_NONE = 0x1, + BRANCH_CACHE_FLUSH_SW = 0x2, + BRANCH_CACHE_FLUSH_HW = 0x4, }; -static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; -static bool link_stack_flush_enabled; +static enum branch_cache_flush_type count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE; +static enum branch_cache_flush_type link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE; bool barrier_nospec_enabled; static bool no_nospec; @@ -219,24 +219,25 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (ccd) seq_buf_printf(&s, "Indirect branch cache disabled"); - if (link_stack_flush_enabled) - seq_buf_printf(&s, ", Software link stack flush"); - - } else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { + } else if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) { seq_buf_printf(&s, "Mitigation: Software count cache flush"); - if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW) + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) seq_buf_printf(&s, " (hardware accelerated)"); - if (link_stack_flush_enabled) - seq_buf_printf(&s, ", Software link stack flush"); - } else if (btb_flush_enabled) { seq_buf_printf(&s, "Mitigation: Branch predictor state flush"); } else { seq_buf_printf(&s, "Vulnerable"); } + if (bcs || ccd || count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) { + if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE) + seq_buf_printf(&s, ", Software link stack flush"); + if (link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) + seq_buf_printf(&s, " (hardware accelerated)"); + } + seq_buf_printf(&s, "\n"); return s.len; @@ -427,61 +428,79 @@ static __init int stf_barrier_debugfs_init(void) device_initcall(stf_barrier_debugfs_init); #endif /* CONFIG_DEBUG_FS */ -static void no_count_cache_flush(void) +static void update_branch_cache_flush(void) { - count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; - pr_info("count-cache-flush: software flush disabled.\n"); -} - -static void toggle_count_cache_flush(bool enable) -{ - if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) && - !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) - enable = false; - - if (!enable) { - patch_instruction_site(&patch__call_flush_count_cache, - ppc_inst(PPC_INST_NOP)); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + // This controls the branch from guest_exit_cont to kvm_flush_link_stack + if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { patch_instruction_site(&patch__call_kvm_flush_link_stack, ppc_inst(PPC_INST_NOP)); -#endif - pr_info("link-stack-flush: software flush disabled.\n"); - link_stack_flush_enabled = false; - no_count_cache_flush(); - return; + } else { + // Could use HW flush, but that could also flush count cache + patch_branch_site(&patch__call_kvm_flush_link_stack, + (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); } - - // This enables the branch from _switch to flush_count_cache - patch_branch_site(&patch__call_flush_count_cache, - (u64)&flush_count_cache, BRANCH_SET_LINK); - -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - // This enables the branch from guest_exit_cont to kvm_flush_link_stack - patch_branch_site(&patch__call_kvm_flush_link_stack, - (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); #endif - pr_info("link-stack-flush: software flush enabled.\n"); - link_stack_flush_enabled = true; + // This controls the branch from _switch to flush_branch_caches + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE && + link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { + patch_instruction_site(&patch__call_flush_branch_caches, + ppc_inst(PPC_INST_NOP)); + } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW && + link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) { + patch_instruction_site(&patch__call_flush_branch_caches, + ppc_inst(PPC_INST_BCCTR_FLUSH)); + } else { + patch_branch_site(&patch__call_flush_branch_caches, + (u64)&flush_branch_caches, BRANCH_SET_LINK); + + // If we just need to flush the link stack, early return + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) { + patch_instruction_site(&patch__flush_link_stack_return, + ppc_inst(PPC_INST_BLR)); + + // If we have flush instruction, early return + } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) { + patch_instruction_site(&patch__flush_count_cache_return, + ppc_inst(PPC_INST_BLR)); + } + } +} - // If we just need to flush the link stack, patch an early return - if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { - patch_instruction_site(&patch__flush_link_stack_return, - ppc_inst(PPC_INST_BLR)); - no_count_cache_flush(); - return; +static void toggle_branch_cache_flush(bool enable) +{ + if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) + count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE; + + pr_info("count-cache-flush: flush disabled.\n"); + } else { + if (security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { + count_cache_flush_type = BRANCH_CACHE_FLUSH_HW; + pr_info("count-cache-flush: hardware flush enabled.\n"); + } else { + count_cache_flush_type = BRANCH_CACHE_FLUSH_SW; + pr_info("count-cache-flush: software flush enabled.\n"); + } } - if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { - count_cache_flush_type = COUNT_CACHE_FLUSH_SW; - pr_info("count-cache-flush: full software flush sequence enabled.\n"); - return; + if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) { + if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE) + link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE; + + pr_info("link-stack-flush: flush disabled.\n"); + } else { + if (security_ftr_enabled(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST)) { + link_stack_flush_type = BRANCH_CACHE_FLUSH_HW; + pr_info("link-stack-flush: hardware flush enabled.\n"); + } else { + link_stack_flush_type = BRANCH_CACHE_FLUSH_SW; + pr_info("link-stack-flush: software flush enabled.\n"); + } } - patch_instruction_site(&patch__flush_count_cache_return, ppc_inst(PPC_INST_BLR)); - count_cache_flush_type = COUNT_CACHE_FLUSH_HW; - pr_info("count-cache-flush: hardware assisted flush sequence enabled\n"); + update_branch_cache_flush(); } void setup_count_cache_flush(void) @@ -505,7 +524,7 @@ void setup_count_cache_flush(void) security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) security_ftr_set(SEC_FTR_FLUSH_LINK_STACK); - toggle_count_cache_flush(enable); + toggle_branch_cache_flush(enable); } #ifdef CONFIG_DEBUG_FS @@ -520,14 +539,14 @@ static int count_cache_flush_set(void *data, u64 val) else return -EINVAL; - toggle_count_cache_flush(enable); + toggle_branch_cache_flush(enable); return 0; } static int count_cache_flush_get(void *data, u64 *val) { - if (count_cache_flush_type == COUNT_CACHE_FLUSH_NONE) + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) *val = 0; else *val = 1; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9d3faac53295..b198b0ff25bc 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -928,6 +928,9 @@ void __init setup_arch(char **cmdline_p) /* Reserve large chunks of memory for use by CMA for KVM. */ kvm_cma_reserve(); + /* Reserve large chunks of memory for us by CMA for hugetlb */ + gigantic_hugetlb_cma_reserve(); + klp_init_thread_info(&init_task); init_mm.start_code = (unsigned long)_stext; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 0ba1ed77dc68..6be430107c6f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -196,7 +196,10 @@ static void __init configure_exceptions(void) /* Under a PAPR hypervisor, we need hypercalls */ if (firmware_has_feature(FW_FEATURE_SET_MODE)) { /* Enable AIL if possible */ - pseries_enable_reloc_on_exc(); + if (!pseries_enable_reloc_on_exc()) { + init_task.thread.fscr &= ~FSCR_SCV; + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV; + } /* * Tell the hypervisor that we want our exceptions to diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index b4143b6ff093..d15a98c758b8 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -205,8 +205,14 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, return; /* error signalled ? */ - if (!(regs->ccr & 0x10000000)) + if (trap_is_scv(regs)) { + /* 32-bit compat mode sign extend? */ + if (!IS_ERR_VALUE(ret)) + return; + ret = -ret; + } else if (!(regs->ccr & 0x10000000)) { return; + } switch (ret) { case ERESTART_RESTARTBLOCK: @@ -239,9 +245,14 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, regs->nip -= 4; regs->result = 0; } else { - regs->result = -EINTR; - regs->gpr[3] = EINTR; - regs->ccr |= 0x10000000; + if (trap_is_scv(regs)) { + regs->result = -EINTR; + regs->gpr[3] = -EINTR; + } else { + regs->result = -EINTR; + regs->gpr[3] = EINTR; + regs->ccr |= 0x10000000; + } } } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 1415c16ab628..96950f189b5a 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -102,22 +102,18 @@ static inline int save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; - int i; - /* Force usr to alway see softe as 1 (interrupts enabled) */ - elf_greg_t64 softe = 0x1; + int val, i; WARN_ON(!FULL_REGS(regs)); for (i = 0; i <= PT_RESULT; i ++) { - if (i == 14 && !FULL_REGS(regs)) - i = 32; - if ( i == PT_SOFTE) { - if(__put_user((unsigned int)softe, &frame->mc_gregs[i])) - return -EFAULT; - else - continue; - } - if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i])) + /* Force usr to alway see softe as 1 (interrupts enabled) */ + if (i == PT_SOFTE) + val = 1; + else + val = gregs[i]; + + if (__put_user(val, &frame->mc_gregs[i])) return -EFAULT; } return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 55e5f76554da..bfc939360bad 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -21,6 +21,7 @@ #include <linux/ptrace.h> #include <linux/ratelimit.h> #include <linux/syscalls.h> +#include <linux/pagemap.h> #include <asm/sigcontext.h> #include <asm/ucontext.h> @@ -39,8 +40,8 @@ #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) #define FP_REGS_SIZE sizeof(elf_fpregset_t) -#define TRAMP_TRACEBACK 3 -#define TRAMP_SIZE 6 +#define TRAMP_TRACEBACK 4 +#define TRAMP_SIZE 7 /* * When we have signals to deliver, we set up on the user stack, @@ -600,13 +601,15 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) int i; long err = 0; + /* bctrl # call the handler */ + err |= __put_user(PPC_INST_BCTRL, &tramp[0]); /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */ err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) | - (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]); + (__SIGNAL_FRAMESIZE & 0xffff), &tramp[1]); /* li r0, __NR_[rt_]sigreturn| */ - err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[1]); + err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[2]); /* sc */ - err |= __put_user(PPC_INST_SC, &tramp[2]); + err |= __put_user(PPC_INST_SC, &tramp[3]); /* Minimal traceback info */ for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++) @@ -632,7 +635,6 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, struct ucontext __user *, new_ctx, long, ctx_size) { - unsigned char tmp; sigset_t set; unsigned long new_msr = 0; int ctx_has_vsx_region = 0; @@ -667,9 +669,8 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, } if (new_ctx == NULL) return 0; - if (!access_ok(new_ctx, ctx_size) - || __get_user(tmp, (u8 __user *) new_ctx) - || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1)) + if (!access_ok(new_ctx, ctx_size) || + fault_in_pages_readable((u8 __user *)new_ctx, ctx_size)) return -EFAULT; /* @@ -864,12 +865,12 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, /* Set up to return from userspace. */ if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) { - regs->link = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; + regs->nip = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) goto badframe; - regs->link = (unsigned long) &frame->tramp[0]; + regs->nip = (unsigned long) &frame->tramp[0]; } /* Allocate a dummy caller frame for the signal handler. */ @@ -878,8 +879,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, /* Set up "regs" so we "return" to the signal handler. */ if (is_elf2_task()) { - regs->nip = (unsigned long) ksig->ka.sa.sa_handler; - regs->gpr[12] = regs->nip; + regs->ctr = (unsigned long) ksig->ka.sa.sa_handler; + regs->gpr[12] = regs->ctr; } else { /* Handler is *really* a pointer to the function descriptor for * the signal routine. The first entry in the function @@ -889,7 +890,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, func_descr_t __user *funct_desc_ptr = (func_descr_t __user *) ksig->ka.sa.sa_handler; - err |= get_user(regs->nip, &funct_desc_ptr->entry); + err |= get_user(regs->ctr, &funct_desc_ptr->entry); err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 73199470c265..8261999c7d52 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -59,6 +59,7 @@ #include <asm/asm-prototypes.h> #include <asm/cpu_has_feature.h> #include <asm/ftrace.h> +#include <asm/kup.h> #ifdef DEBUG #include <asm/udbg.h> diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 79edba3ab312..8e50818aa50b 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -60,6 +60,11 @@ notrace long system_call_exception(long r3, long r4, long r5, local_irq_enable(); if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { + if (unlikely(regs->trap == 0x7ff0)) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } /* * We use the return value of do_syscall_trace_enter() as the * syscall number. If the syscall was rejected for any reason @@ -78,6 +83,11 @@ notrace long system_call_exception(long r3, long r4, long r5, r8 = regs->gpr[8]; } else if (unlikely(r0 >= NR_syscalls)) { + if (unlikely(regs->trap == 0x7ff0)) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } return -ENOSYS; } @@ -105,16 +115,20 @@ notrace long system_call_exception(long r3, long r4, long r5, * local irqs must be disabled. Returns false if the caller must re-enable * them, check for new work, and try again. */ -static notrace inline bool prep_irq_for_enabled_exit(void) +static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri) { /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); /* This pattern matches prep_irq_for_idle */ - __hard_EE_RI_disable(); + if (clear_ri) + __hard_EE_RI_disable(); + else + __hard_irq_disable(); if (unlikely(lazy_irq_pending_nocheck())) { /* Took an interrupt, may have more exit work to do. */ - __hard_RI_enable(); + if (clear_ri) + __hard_RI_enable(); trace_hardirqs_off(); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; @@ -136,7 +150,8 @@ static notrace inline bool prep_irq_for_enabled_exit(void) * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. */ notrace unsigned long syscall_exit_prepare(unsigned long r3, - struct pt_regs *regs) + struct pt_regs *regs, + long scv) { unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long ti_flags; @@ -151,7 +166,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ti_flags = *ti_flagsp; - if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) { + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) { if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { r3 = -r3; regs->ccr |= 0x10000000; /* Set SO bit in CR */ @@ -206,12 +221,20 @@ again: else if (cpu_has_feature(CPU_FTR_ALTIVEC)) mathflags |= MSR_VEC; + /* + * If userspace MSR has all available FP bits set, + * then they are live and no need to restore. If not, + * it means the regs were given up and restore_math + * may decide to restore them (to avoid taking an FP + * fault). + */ if ((regs->msr & mathflags) != mathflags) restore_math(regs); } } - if (unlikely(!prep_irq_for_enabled_exit())) { + /* scv need not set RI=0 because SRRs are not used */ + if (unlikely(!prep_irq_for_enabled_exit(!scv))) { local_irq_enable(); goto again; } @@ -277,12 +300,13 @@ again: else if (cpu_has_feature(CPU_FTR_ALTIVEC)) mathflags |= MSR_VEC; + /* See above restore_math comment */ if ((regs->msr & mathflags) != mathflags) restore_math(regs); } } - if (unlikely(!prep_irq_for_enabled_exit())) { + if (unlikely(!prep_irq_for_enabled_exit(true))) { local_irq_enable(); local_irq_disable(); goto again; @@ -345,7 +369,7 @@ again: } } - if (unlikely(!prep_irq_for_enabled_exit())) { + if (unlikely(!prep_irq_for_enabled_exit(true))) { /* * Can't local_irq_restore to replay if we were in * interrupt context. Must replay directly. diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 571b3259697e..46b4ebc33db7 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -622,8 +622,10 @@ SYSFS_PMCSETUP(pmc7, SPRN_PMC7); SYSFS_PMCSETUP(pmc8, SPRN_PMC8); SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); +SYSFS_PMCSETUP(mmcr3, SPRN_MMCR3); static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); +static DEVICE_ATTR(mmcr3, 0600, show_mmcr3, store_mmcr3); #endif /* HAS_PPC_PMC56 */ @@ -886,6 +888,9 @@ static int register_cpu_online(unsigned int cpu) #ifdef CONFIG_PMU_SYSFS if (cpu_has_feature(CPU_FTR_MMCRA)) device_create_file(s, &dev_attr_mmcra); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + device_create_file(s, &dev_attr_mmcr3); #endif /* CONFIG_PMU_SYSFS */ if (cpu_has_feature(CPU_FTR_PURR)) { @@ -980,6 +985,9 @@ static int unregister_cpu_online(unsigned int cpu) #ifdef CONFIG_PMU_SYSFS if (cpu_has_feature(CPU_FTR_MMCRA)) device_remove_file(s, &dev_attr_mmcra); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + device_remove_file(s, &dev_attr_mmcr3); #endif /* CONFIG_PMU_SYSFS */ if (cpu_has_feature(CPU_FTR_PURR)) { diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index c1fede6ec934..42761ebec9f7 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -73,8 +73,8 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) /* Make sure it is what we expect it to be */ if (!ppc_inst_equal(replaced, old)) { - pr_err("%p: replaced (%#x) != old (%#x)", - (void *)ip, ppc_inst_val(replaced), ppc_inst_val(old)); + pr_err("%p: replaced (%s) != old (%s)", + (void *)ip, ppc_inst_as_str(replaced), ppc_inst_as_str(old)); return -EINVAL; } @@ -137,7 +137,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -172,8 +172,8 @@ __ftrace_make_nop(struct module *mod, /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) && !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { - pr_err("Unexpected instruction %08x around bl _mcount\n", - ppc_inst_val(op)); + pr_err("Unexpected instruction %s around bl _mcount\n", + ppc_inst_as_str(op)); return -EINVAL; } #else @@ -203,7 +203,7 @@ __ftrace_make_nop(struct module *mod, } if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { - pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, ppc_inst_val(op)); + pr_err("Expected %08x found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); return -EINVAL; } #endif /* CONFIG_MPROFILE_KERNEL */ @@ -231,7 +231,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -406,7 +406,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -533,8 +533,8 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; if (!expected_nop_sequence(ip, op[0], op[1])) { - pr_err("Unexpected call sequence at %p: %x %x\n", - ip, ppc_inst_val(op[0]), ppc_inst_val(op[1])); + pr_err("Unexpected call sequence at %p: %s %s\n", + ip, ppc_inst_as_str(op[0]), ppc_inst_as_str(op[1])); return -EINVAL; } @@ -597,7 +597,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) /* It should be pointing to a nop */ if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { - pr_err("Expected NOP but have %x\n", ppc_inst_val(op)); + pr_err("Expected NOP but have %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -654,7 +654,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) } if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { - pr_err("Unexpected call sequence at %p: %x\n", ip, ppc_inst_val(op)); + pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op)); return -EINVAL; } @@ -733,7 +733,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 97413a385720..d1ebe152f210 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2060,14 +2060,6 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) NOKPROBE_SYMBOL(DebugException); #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -#if !defined(CONFIG_TAU_INT) -void TAUException(struct pt_regs *regs) -{ - printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n", - regs->nip, regs->msr, regs->trap, print_tainted()); -} -#endif /* CONFIG_INT_TAU */ - #ifdef CONFIG_ALTIVEC void altivec_assist_exception(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index e0f4ba45b6cc..8dad44262e75 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -677,7 +677,7 @@ int vdso_getcpu_init(void) node = cpu_to_node(cpu); WARN_ON_ONCE(node > 0xffff); - val = (cpu & 0xfff) | ((node & 0xffff) << 16); + val = (cpu & 0xffff) | ((node & 0xffff) << 16); mtspr(SPRN_SPRG_VDSO_WRITE, val); get_paca()->sprg_vdso = val; diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index e147bbdc12cd..87ab1152d5ce 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -50,7 +50,7 @@ $(obj-vdso32): %.o: %.S FORCE # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) + cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 5206c2eb2a1d..4c985467a668 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -111,6 +111,7 @@ SECTIONS *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.glink .iplt .plt .rela*) } } diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 32ebb3522ea1..38c317f25141 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -34,7 +34,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) + cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index 526f5ba2593e..cab14324242b 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -8,6 +8,7 @@ #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/vdso.h> +#include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> .text @@ -24,14 +25,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc mflr r12 .cfi_register lr,r12 - mr r11,r3 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r10, r0 mtlr r12 - mr r10,r3 lwz r7,CFG_DCACHE_BLOCKSZ(r10) addi r5,r7,-1 - andc r6,r11,r5 /* round low to line bdy */ + andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) @@ -48,7 +47,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) lwz r7,CFG_ICACHE_BLOCKSZ(r10) addi r5,r7,-1 - andc r6,r11,r5 /* round low to line bdy */ + andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index dc84f5ae3802..067247d3efb9 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -10,35 +10,13 @@ #include <asm/asm-offsets.h> #include <asm/unistd.h> #include <asm/vdso.h> +#include <asm/vdso_datapage.h> .text .global __kernel_datapage_offset; __kernel_datapage_offset: .long 0 -V_FUNCTION_BEGIN(__get_datapage) - .cfi_startproc - /* We don't want that exposed or overridable as we want other objects - * to be able to bl directly to here - */ - .protected __get_datapage - .hidden __get_datapage - - mflr r0 - .cfi_register lr,r0 - - bcl 20,31,data_page_branch -data_page_branch: - mflr r3 - mtlr r0 - addi r3, r3, __kernel_datapage_offset-data_page_branch - lwz r0,0(r3) - .cfi_restore lr - add r3,r0,r3 - blr - .cfi_endproc -V_FUNCTION_END(__get_datapage) - /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; * @@ -53,7 +31,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr r4,r3 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP64 cmpldi cr0,r4,0 @@ -75,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 ld r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 crclr cr0*4+so diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 1c9a04703250..20f8be40c653 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -9,6 +9,7 @@ #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/vdso.h> +#include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> @@ -26,7 +27,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) mr r11,r3 /* r11 holds tv */ mr r10,r4 /* r10 holds tz */ - bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + get_datapage r3, r0 cmpldi r11,0 /* check if tv is NULL */ beq 2f lis r7,1000000@ha /* load up USEC_PER_SEC */ @@ -71,7 +72,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) mflr r12 /* r12 saves lr */ .cfi_register lr,r12 mr r11,r4 /* r11 saves tp */ - bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + get_datapage r3, r0 lis r7,NSEC_PER_SEC@h /* want nanoseconds */ ori r7,r7,NSEC_PER_SEC@l beq cr5,70f @@ -188,7 +189,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) mflr r12 .cfi_register lr,r12 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 lwz r5, CLOCK_HRTIMER_RES(r3) mtlr r12 li r3,0 @@ -221,7 +222,7 @@ V_FUNCTION_BEGIN(__kernel_time) .cfi_register lr,r12 mr r11,r3 /* r11 holds t */ - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 ld r4,STAMP_XTIME_SEC(r3) diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S index a8cc0409d7d2..bbf68cd01088 100644 --- a/arch/powerpc/kernel/vdso64/sigtramp.S +++ b/arch/powerpc/kernel/vdso64/sigtramp.S @@ -6,6 +6,7 @@ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp. */ +#include <asm/cache.h> /* IFETCH_ALIGN_BYTES */ #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/unistd.h> @@ -14,21 +15,17 @@ .text -/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from - the return address to get an address in the middle of the presumed - call instruction. Since we don't have a call here, we artificially - extend the range covered by the unwind info by padding before the - real start. */ - nop .balign 8 + .balign IFETCH_ALIGN_BYTES V_FUNCTION_BEGIN(__kernel_sigtramp_rt64) -.Lsigrt_start = . - 4 +.Lsigrt_start: + bctrl /* call the handler */ addi r1, r1, __SIGNAL_FRAMESIZE li r0,__NR_rt_sigreturn sc .Lsigrt_end: V_FUNCTION_END(__kernel_sigtramp_rt64) -/* The ".balign 8" above and the following zeros mimic the old stack +/* The .balign 8 above and the following zeros mimic the old stack trampoline layout. The last magic value is the ucontext pointer, chosen in such a way that older libgcc unwind code returns a zero for a sigcontext pointer. */ diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 256fb9720298..4e3a8d4ee614 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -30,7 +30,7 @@ SECTIONS . = ALIGN(16); .text : { *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*) - *(.sfpr .glink) + *(.sfpr) } :text PROVIDE(__etext = .); PROVIDE(_etext = .); @@ -111,6 +111,7 @@ SECTIONS *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.glink .iplt .plt .rela*) } } diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index efc5b52f95d2..801dc28fdcca 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -76,9 +76,7 @@ _GLOBAL(load_up_altivec) oris r12,r12,MSR_VEC@h std r12,_MSR(r1) #endif - /* Don't care if r4 overflows, this is desired behaviour */ - lbz r4,THREAD_LOAD_VEC(r5) - addi r4,r4,1 + li r4,1 stb r4,THREAD_LOAD_VEC(r5) addi r6,r5,THREAD_VRSTATE li r4,1 diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile index 86380c69f5ce..4aff6846c772 100644 --- a/arch/powerpc/kexec/Makefile +++ b/arch/powerpc/kexec/Makefile @@ -7,7 +7,7 @@ obj-y += core.o crash.o core_$(BITS).o obj-$(CONFIG_PPC32) += relocate_32.o -obj-$(CONFIG_KEXEC_FILE) += file_load.o elf_$(BITS).o +obj-$(CONFIG_KEXEC_FILE) += file_load.o ranges.o file_load_$(BITS).o elf_$(BITS).o ifdef CONFIG_HAVE_IMA_KEXEC ifdef CONFIG_IMA diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index b4184092172a..8a449b2d8715 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -152,6 +152,8 @@ static void kexec_smp_down(void *arg) if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 1); + reset_sprs(); + kexec_smp_wait(); /* NOTREACHED */ } diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index 3072fd6dbe94..d0e459bb2f05 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -35,6 +35,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, void *fdt; const void *slave_code; struct elfhdr ehdr; + char *modified_cmdline = NULL; struct kexec_elf_info elf_info; struct kexec_buf kbuf = { .image = image, .buf_min = 0, .buf_max = ppc64_rma_size }; @@ -46,6 +47,14 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, if (ret) goto out; + if (image->type == KEXEC_TYPE_CRASH) { + /* min & max buffer values for kdump case */ + kbuf.buf_min = pbuf.buf_min = crashk_res.start; + kbuf.buf_max = pbuf.buf_max = + ((crashk_res.end < ppc64_rma_size) ? + crashk_res.end : (ppc64_rma_size - 1)); + } + ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr); if (ret) goto out; @@ -60,6 +69,25 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem); + /* Load additional segments needed for panic kernel */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = load_crashdump_segments_ppc64(image, &kbuf); + if (ret) { + pr_err("Failed to load kdump kernel segments\n"); + goto out; + } + + /* Setup cmdline for kdump kernel case */ + modified_cmdline = setup_kdump_cmdline(image, cmdline, + cmdline_len); + if (!modified_cmdline) { + pr_err("Setting up cmdline for kdump kernel failed\n"); + ret = -EINVAL; + goto out; + } + cmdline = modified_cmdline; + } + if (initrd != NULL) { kbuf.buffer = initrd; kbuf.bufsz = kbuf.memsz = initrd_len; @@ -88,7 +116,8 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, goto out; } - ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); + ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr, + initrd_len, cmdline); if (ret) goto out; @@ -107,12 +136,13 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr); slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset; - ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, - fdt_load_addr); + ret = setup_purgatory_ppc64(image, slave_code, fdt, kernel_load_addr, + fdt_load_addr); if (ret) pr_err("Error setting up the purgatory.\n"); out: + kfree(modified_cmdline); kexec_free_elf_info(&elf_info); /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */ diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c index 143c91724617..9a232bc36c8f 100644 --- a/arch/powerpc/kexec/file_load.c +++ b/arch/powerpc/kexec/file_load.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * ppc64 code to implement the kexec_file_load syscall + * powerpc code to implement the kexec_file_load syscall * * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) * Copyright (C) 2004 IBM Corp. @@ -18,23 +18,45 @@ #include <linux/kexec.h> #include <linux/of_fdt.h> #include <linux/libfdt.h> +#include <asm/setup.h> #include <asm/ima.h> -#define SLAVE_CODE_SIZE 256 +#define SLAVE_CODE_SIZE 256 /* First 0x100 bytes */ -const struct kexec_file_ops * const kexec_file_loaders[] = { - &kexec_elf64_ops, - NULL -}; - -int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len) +/** + * setup_kdump_cmdline - Prepend "elfcorehdr=<addr> " to command line + * of kdump kernel for exporting the core. + * @image: Kexec image + * @cmdline: Command line parameters to update. + * @cmdline_len: Length of the cmdline parameters. + * + * kdump segment must be setup before calling this function. + * + * Returns new cmdline buffer for kdump kernel on success, NULL otherwise. + */ +char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len) { - /* We don't support crash kernels yet. */ - if (image->type == KEXEC_TYPE_CRASH) - return -EOPNOTSUPP; + int elfcorehdr_strlen; + char *cmdline_ptr; + + cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); + if (!cmdline_ptr) + return NULL; + + elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", + image->arch.elfcorehdr_addr); + + if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { + pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n"); + kfree(cmdline_ptr); + return NULL; + } - return kexec_image_probe_default(image, buf, buf_len); + memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); + // Ensure it's nul terminated + cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; + return cmdline_ptr; } /** @@ -236,6 +258,20 @@ int setup_new_fdt(const struct kimage *image, void *fdt, } } + if (image->type == KEXEC_TYPE_CRASH) { + /* + * Avoid elfcorehdr from being stomped on in kdump kernel by + * setting up memory reserve map. + */ + ret = fdt_add_mem_rsv(fdt, image->arch.elfcorehdr_addr, + image->arch.elf_headers_sz); + if (ret) { + pr_err("Error reserving elfcorehdr memory: %s\n", + fdt_strerror(ret)); + goto err; + } + } + ret = setup_ima_buffer(image, fdt, chosen_node); if (ret) { pr_err("Error setting up the new device tree.\n"); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c new file mode 100644 index 000000000000..53bb71e3a2e1 --- /dev/null +++ b/arch/powerpc/kexec/file_load_64.c @@ -0,0 +1,1119 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ppc64 code to implement the kexec_file_load syscall + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2020 IBM Corporation + * + * Based on kexec-tools' kexec-ppc64.c, kexec-elf-rel-ppc64.c, fs2dt.c. + * Heavily modified for the kernel by + * Hari Bathini, IBM Corporation. + */ + +#include <linux/kexec.h> +#include <linux/of_fdt.h> +#include <linux/libfdt.h> +#include <linux/of_device.h> +#include <linux/memblock.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <asm/drmem.h> +#include <asm/kexec_ranges.h> +#include <asm/crashdump-ppc64.h> + +struct umem_info { + u64 *buf; /* data buffer for usable-memory property */ + u32 size; /* size allocated for the data buffer */ + u32 max_entries; /* maximum no. of entries */ + u32 idx; /* index of current entry */ + + /* usable memory ranges to look up */ + unsigned int nr_ranges; + const struct crash_mem_range *ranges; +}; + +const struct kexec_file_ops * const kexec_file_loaders[] = { + &kexec_elf64_ops, + NULL +}; + +/** + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes + * regions like opal/rtas, tce-table, initrd, + * kernel, htab which should be avoided while + * setting up kexec load segments. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_exclude_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_initrd_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_htab_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_kernel_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_reserved_mem_ranges(mem_ranges); + if (ret) + goto out; + + /* exclude memory ranges should be sorted for easy lookup */ + sort_memory_ranges(*mem_ranges, true); +out: + if (ret) + pr_err("Failed to setup exclude memory ranges\n"); + return ret; +} + +/** + * get_usable_memory_ranges - Get usable memory ranges. This list includes + * regions like crashkernel, opal/rtas & tce-table, + * that kdump kernel could use. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_usable_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + /* + * Early boot failure observed on guests when low memory (first memory + * block?) is not added to usable memory. So, add [0, crashk_res.end] + * instead of [crashk_res.start, crashk_res.end] to workaround it. + * Also, crashed kernel's memory must be added to reserve map to + * avoid kdump kernel from using it. + */ + ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_tce_mem_ranges(mem_ranges); +out: + if (ret) + pr_err("Failed to setup usable memory ranges\n"); + return ret; +} + +/** + * get_crash_memory_ranges - Get crash memory ranges. This list includes + * first/crashing kernel's memory regions that + * would be exported via an elfcore. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_crash_memory_ranges(struct crash_mem **mem_ranges) +{ + struct memblock_region *reg; + struct crash_mem *tmem; + int ret; + + for_each_memblock(memory, reg) { + u64 base, size; + + base = (u64)reg->base; + size = (u64)reg->size; + + /* Skip backup memory region, which needs a separate entry */ + if (base == BACKUP_SRC_START) { + if (size > BACKUP_SRC_SIZE) { + base = BACKUP_SRC_END + 1; + size -= BACKUP_SRC_SIZE; + } else + continue; + } + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + goto out; + + /* Try merging adjacent ranges before reallocation attempt */ + if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) + sort_memory_ranges(*mem_ranges, true); + } + + /* Reallocate memory ranges if there is no space to split ranges */ + tmem = *mem_ranges; + if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { + tmem = realloc_mem_ranges(mem_ranges); + if (!tmem) + goto out; + } + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); + if (ret) + goto out; + + /* + * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL + * regions are exported to save their context at the time of + * crash, they should actually be backed up just like the + * first 64K bytes of memory. + */ + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + /* create a separate program header for the backup region */ + ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); + if (ret) + goto out; + + sort_memory_ranges(*mem_ranges, false); +out: + if (ret) + pr_err("Failed to setup crash memory ranges\n"); + return ret; +} + +/** + * get_reserved_memory_ranges - Get reserve memory ranges. This list includes + * memory regions that should be added to the + * memory reserve map to ensure the region is + * protected from any mischief. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_reserved_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_reserved_mem_ranges(mem_ranges); +out: + if (ret) + pr_err("Failed to setup reserved memory ranges\n"); + return ret; +} + +/** + * __locate_mem_hole_top_down - Looks top down for a large enough memory hole + * in the memory regions between buf_min & buf_max + * for the buffer. If found, sets kbuf->mem. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * + * Returns 0 on success, negative errno on error. + */ +static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max) +{ + int ret = -EADDRNOTAVAIL; + phys_addr_t start, end; + u64 i; + + for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, + MEMBLOCK_NONE, &start, &end, NULL) { + /* + * memblock uses [start, end) convention while it is + * [start, end] here. Fix the off-by-one to have the + * same convention. + */ + end -= 1; + + if (start > buf_max) + continue; + + /* Memory hole not found */ + if (end < buf_min) + break; + + /* Adjust memory region based on the given range */ + if (start < buf_min) + start = buf_min; + if (end > buf_max) + end = buf_max; + + start = ALIGN(start, kbuf->buf_align); + if (start < end && (end - start + 1) >= kbuf->memsz) { + /* Suitable memory range found. Set kbuf->mem */ + kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1, + kbuf->buf_align); + ret = 0; + break; + } + } + + return ret; +} + +/** + * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a + * suitable buffer with top down approach. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * @emem: Exclude memory ranges. + * + * Returns 0 on success, negative errno on error. + */ +static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max, + const struct crash_mem *emem) +{ + int i, ret = 0, err = -EADDRNOTAVAIL; + u64 start, end, tmin, tmax; + + tmax = buf_max; + for (i = (emem->nr_ranges - 1); i >= 0; i--) { + start = emem->ranges[i].start; + end = emem->ranges[i].end; + + if (start > tmax) + continue; + + if (end < tmax) { + tmin = (end < buf_min ? buf_min : end + 1); + ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); + if (!ret) + return 0; + } + + tmax = start - 1; + + if (tmax < buf_min) { + ret = err; + break; + } + ret = 0; + } + + if (!ret) { + tmin = buf_min; + ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); + } + return ret; +} + +/** + * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole + * in the memory regions between buf_min & buf_max + * for the buffer. If found, sets kbuf->mem. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * + * Returns 0 on success, negative errno on error. + */ +static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max) +{ + int ret = -EADDRNOTAVAIL; + phys_addr_t start, end; + u64 i; + + for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, + MEMBLOCK_NONE, &start, &end, NULL) { + /* + * memblock uses [start, end) convention while it is + * [start, end] here. Fix the off-by-one to have the + * same convention. + */ + end -= 1; + + if (end < buf_min) + continue; + + /* Memory hole not found */ + if (start > buf_max) + break; + + /* Adjust memory region based on the given range */ + if (start < buf_min) + start = buf_min; + if (end > buf_max) + end = buf_max; + + start = ALIGN(start, kbuf->buf_align); + if (start < end && (end - start + 1) >= kbuf->memsz) { + /* Suitable memory range found. Set kbuf->mem */ + kbuf->mem = start; + ret = 0; + break; + } + } + + return ret; +} + +/** + * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a + * suitable buffer with bottom up approach. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * @emem: Exclude memory ranges. + * + * Returns 0 on success, negative errno on error. + */ +static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max, + const struct crash_mem *emem) +{ + int i, ret = 0, err = -EADDRNOTAVAIL; + u64 start, end, tmin, tmax; + + tmin = buf_min; + for (i = 0; i < emem->nr_ranges; i++) { + start = emem->ranges[i].start; + end = emem->ranges[i].end; + + if (end < tmin) + continue; + + if (start > tmin) { + tmax = (start > buf_max ? buf_max : start - 1); + ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); + if (!ret) + return 0; + } + + tmin = end + 1; + + if (tmin > buf_max) { + ret = err; + break; + } + ret = 0; + } + + if (!ret) { + tmax = buf_max; + ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); + } + return ret; +} + +/** + * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries + * @um_info: Usable memory buffer and ranges info. + * @cnt: No. of entries to accommodate. + * + * Frees up the old buffer if memory reallocation fails. + * + * Returns buffer on success, NULL on error. + */ +static u64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt) +{ + u32 new_size; + u64 *tbuf; + + if ((um_info->idx + cnt) <= um_info->max_entries) + return um_info->buf; + + new_size = um_info->size + MEM_RANGE_CHUNK_SZ; + tbuf = krealloc(um_info->buf, new_size, GFP_KERNEL); + if (tbuf) { + um_info->buf = tbuf; + um_info->size = new_size; + um_info->max_entries = (um_info->size / sizeof(u64)); + } + + return tbuf; +} + +/** + * add_usable_mem - Add the usable memory ranges within the given memory range + * to the buffer + * @um_info: Usable memory buffer and ranges info. + * @base: Base address of memory range to look for. + * @end: End address of memory range to look for. + * + * Returns 0 on success, negative errno on error. + */ +static int add_usable_mem(struct umem_info *um_info, u64 base, u64 end) +{ + u64 loc_base, loc_end; + bool add; + int i; + + for (i = 0; i < um_info->nr_ranges; i++) { + add = false; + loc_base = um_info->ranges[i].start; + loc_end = um_info->ranges[i].end; + if (loc_base >= base && loc_end <= end) + add = true; + else if (base < loc_end && end > loc_base) { + if (loc_base < base) + loc_base = base; + if (loc_end > end) + loc_end = end; + add = true; + } + + if (add) { + if (!check_realloc_usable_mem(um_info, 2)) + return -ENOMEM; + + um_info->buf[um_info->idx++] = cpu_to_be64(loc_base); + um_info->buf[um_info->idx++] = + cpu_to_be64(loc_end - loc_base + 1); + } + } + + return 0; +} + +/** + * kdump_setup_usable_lmb - This is a callback function that gets called by + * walk_drmem_lmbs for every LMB to set its + * usable memory ranges. + * @lmb: LMB info. + * @usm: linux,drconf-usable-memory property value. + * @data: Pointer to usable memory buffer and ranges info. + * + * Returns 0 on success, negative errno on error. + */ +static int kdump_setup_usable_lmb(struct drmem_lmb *lmb, const __be32 **usm, + void *data) +{ + struct umem_info *um_info; + int tmp_idx, ret; + u64 base, end; + + /* + * kdump load isn't supported on kernels already booted with + * linux,drconf-usable-memory property. + */ + if (*usm) { + pr_err("linux,drconf-usable-memory property already exists!"); + return -EINVAL; + } + + um_info = data; + tmp_idx = um_info->idx; + if (!check_realloc_usable_mem(um_info, 1)) + return -ENOMEM; + + um_info->idx++; + base = lmb->base_addr; + end = base + drmem_lmb_size() - 1; + ret = add_usable_mem(um_info, base, end); + if (!ret) { + /* + * Update the no. of ranges added. Two entries (base & size) + * for every range added. + */ + um_info->buf[tmp_idx] = + cpu_to_be64((um_info->idx - tmp_idx - 1) / 2); + } + + return ret; +} + +#define NODE_PATH_LEN 256 +/** + * add_usable_mem_property - Add usable memory property for the given + * memory node. + * @fdt: Flattened device tree for the kdump kernel. + * @dn: Memory node. + * @um_info: Usable memory buffer and ranges info. + * + * Returns 0 on success, negative errno on error. + */ +static int add_usable_mem_property(void *fdt, struct device_node *dn, + struct umem_info *um_info) +{ + int n_mem_addr_cells, n_mem_size_cells, node; + char path[NODE_PATH_LEN]; + int i, len, ranges, ret; + const __be32 *prop; + u64 base, end; + + of_node_get(dn); + + if (snprintf(path, NODE_PATH_LEN, "%pOF", dn) > (NODE_PATH_LEN - 1)) { + pr_err("Buffer (%d) too small for memory node: %pOF\n", + NODE_PATH_LEN, dn); + return -EOVERFLOW; + } + pr_debug("Memory node path: %s\n", path); + + /* Now that we know the path, find its offset in kdump kernel's fdt */ + node = fdt_path_offset(fdt, path); + if (node < 0) { + pr_err("Malformed device tree: error reading %s\n", path); + ret = -EINVAL; + goto out; + } + + /* Get the address & size cells */ + n_mem_addr_cells = of_n_addr_cells(dn); + n_mem_size_cells = of_n_size_cells(dn); + pr_debug("address cells: %d, size cells: %d\n", n_mem_addr_cells, + n_mem_size_cells); + + um_info->idx = 0; + if (!check_realloc_usable_mem(um_info, 2)) { + ret = -ENOMEM; + goto out; + } + + prop = of_get_property(dn, "reg", &len); + if (!prop || len <= 0) { + ret = 0; + goto out; + } + + /* + * "reg" property represents sequence of (addr,size) tuples + * each representing a memory range. + */ + ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); + + for (i = 0; i < ranges; i++) { + base = of_read_number(prop, n_mem_addr_cells); + prop += n_mem_addr_cells; + end = base + of_read_number(prop, n_mem_size_cells) - 1; + prop += n_mem_size_cells; + + ret = add_usable_mem(um_info, base, end); + if (ret) + goto out; + } + + /* + * No kdump kernel usable memory found in this memory node. + * Write (0,0) tuple in linux,usable-memory property for + * this region to be ignored. + */ + if (um_info->idx == 0) { + um_info->buf[0] = 0; + um_info->buf[1] = 0; + um_info->idx = 2; + } + + ret = fdt_setprop(fdt, node, "linux,usable-memory", um_info->buf, + (um_info->idx * sizeof(u64))); + +out: + of_node_put(dn); + return ret; +} + + +/** + * update_usable_mem_fdt - Updates kdump kernel's fdt with linux,usable-memory + * and linux,drconf-usable-memory DT properties as + * appropriate to restrict its memory usage. + * @fdt: Flattened device tree for the kdump kernel. + * @usable_mem: Usable memory ranges for kdump kernel. + * + * Returns 0 on success, negative errno on error. + */ +static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem) +{ + struct umem_info um_info; + struct device_node *dn; + int node, ret = 0; + + if (!usable_mem) { + pr_err("Usable memory ranges for kdump kernel not found\n"); + return -ENOENT; + } + + node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory"); + if (node == -FDT_ERR_NOTFOUND) + pr_debug("No dynamic reconfiguration memory found\n"); + else if (node < 0) { + pr_err("Malformed device tree: error reading /ibm,dynamic-reconfiguration-memory.\n"); + return -EINVAL; + } + + um_info.buf = NULL; + um_info.size = 0; + um_info.max_entries = 0; + um_info.idx = 0; + /* Memory ranges to look up */ + um_info.ranges = &(usable_mem->ranges[0]); + um_info.nr_ranges = usable_mem->nr_ranges; + + dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (dn) { + ret = walk_drmem_lmbs(dn, &um_info, kdump_setup_usable_lmb); + of_node_put(dn); + + if (ret) { + pr_err("Could not setup linux,drconf-usable-memory property for kdump\n"); + goto out; + } + + ret = fdt_setprop(fdt, node, "linux,drconf-usable-memory", + um_info.buf, (um_info.idx * sizeof(u64))); + if (ret) { + pr_err("Failed to update fdt with linux,drconf-usable-memory property"); + goto out; + } + } + + /* + * Walk through each memory node and set linux,usable-memory property + * for the corresponding node in kdump kernel's fdt. + */ + for_each_node_by_type(dn, "memory") { + ret = add_usable_mem_property(fdt, dn, &um_info); + if (ret) { + pr_err("Failed to set linux,usable-memory property for %s node", + dn->full_name); + goto out; + } + } + +out: + kfree(um_info.buf); + return ret; +} + +/** + * load_backup_segment - Locate a memory hole to place the backup region. + * @image: Kexec image. + * @kbuf: Buffer contents and memory parameters. + * + * Returns 0 on success, negative errno on error. + */ +static int load_backup_segment(struct kimage *image, struct kexec_buf *kbuf) +{ + void *buf; + int ret; + + /* + * Setup a source buffer for backup segment. + * + * A source buffer has no meaning for backup region as data will + * be copied from backup source, after crash, in the purgatory. + * But as load segment code doesn't recognize such segments, + * setup a dummy source buffer to keep it happy for now. + */ + buf = vzalloc(BACKUP_SRC_SIZE); + if (!buf) + return -ENOMEM; + + kbuf->buffer = buf; + kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf->bufsz = kbuf->memsz = BACKUP_SRC_SIZE; + kbuf->top_down = false; + + ret = kexec_add_buffer(kbuf); + if (ret) { + vfree(buf); + return ret; + } + + image->arch.backup_buf = buf; + image->arch.backup_start = kbuf->mem; + return 0; +} + +/** + * update_backup_region_phdr - Update backup region's offset for the core to + * export the region appropriately. + * @image: Kexec image. + * @ehdr: ELF core header. + * + * Assumes an exclusive program header is setup for the backup region + * in the ELF headers + * + * Returns nothing. + */ +static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr) +{ + Elf64_Phdr *phdr; + unsigned int i; + + phdr = (Elf64_Phdr *)(ehdr + 1); + for (i = 0; i < ehdr->e_phnum; i++) { + if (phdr->p_paddr == BACKUP_SRC_START) { + phdr->p_offset = image->arch.backup_start; + pr_debug("Backup region offset updated to 0x%lx\n", + image->arch.backup_start); + return; + } + } +} + +/** + * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr + * segment needed to load kdump kernel. + * @image: Kexec image. + * @kbuf: Buffer contents and memory parameters. + * + * Returns 0 on success, negative errno on error. + */ +static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) +{ + struct crash_mem *cmem = NULL; + unsigned long headers_sz; + void *headers = NULL; + int ret; + + ret = get_crash_memory_ranges(&cmem); + if (ret) + goto out; + + /* Setup elfcorehdr segment */ + ret = crash_prepare_elf64_headers(cmem, false, &headers, &headers_sz); + if (ret) { + pr_err("Failed to prepare elf headers for the core\n"); + goto out; + } + + /* Fix the offset for backup region in the ELF header */ + update_backup_region_phdr(image, headers); + + kbuf->buffer = headers; + kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf->bufsz = kbuf->memsz = headers_sz; + kbuf->top_down = false; + + ret = kexec_add_buffer(kbuf); + if (ret) { + vfree(headers); + goto out; + } + + image->arch.elfcorehdr_addr = kbuf->mem; + image->arch.elf_headers_sz = headers_sz; + image->arch.elf_headers = headers; +out: + kfree(cmem); + return ret; +} + +/** + * load_crashdump_segments_ppc64 - Initialize the additional segements needed + * to load kdump kernel. + * @image: Kexec image. + * @kbuf: Buffer contents and memory parameters. + * + * Returns 0 on success, negative errno on error. + */ +int load_crashdump_segments_ppc64(struct kimage *image, + struct kexec_buf *kbuf) +{ + int ret; + + /* Load backup segment - first 64K bytes of the crashing kernel */ + ret = load_backup_segment(image, kbuf); + if (ret) { + pr_err("Failed to load backup segment\n"); + return ret; + } + pr_debug("Loaded the backup region at 0x%lx\n", kbuf->mem); + + /* Load elfcorehdr segment - to export crashing kernel's vmcore */ + ret = load_elfcorehdr_segment(image, kbuf); + if (ret) { + pr_err("Failed to load elfcorehdr segment\n"); + return ret; + } + pr_debug("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n", + image->arch.elfcorehdr_addr, kbuf->bufsz, kbuf->memsz); + + return 0; +} + +/** + * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global + * variables and call setup_purgatory() to initialize + * common global variable. + * @image: kexec image. + * @slave_code: Slave code for the purgatory. + * @fdt: Flattened device tree for the next kernel. + * @kernel_load_addr: Address where the kernel is loaded. + * @fdt_load_addr: Address where the flattened device tree is loaded. + * + * Returns 0 on success, negative errno on error. + */ +int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr) +{ + struct device_node *dn = NULL; + int ret; + + ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, + fdt_load_addr); + if (ret) + goto out; + + if (image->type == KEXEC_TYPE_CRASH) { + u32 my_run_at_load = 1; + + /* + * Tell relocatable kernel to run at load address + * via the word meant for that at 0x5c. + */ + ret = kexec_purgatory_get_set_symbol(image, "run_at_load", + &my_run_at_load, + sizeof(my_run_at_load), + false); + if (ret) + goto out; + } + + /* Tell purgatory where to look for backup region */ + ret = kexec_purgatory_get_set_symbol(image, "backup_start", + &image->arch.backup_start, + sizeof(image->arch.backup_start), + false); + if (ret) + goto out; + + /* Setup OPAL base & entry values */ + dn = of_find_node_by_path("/ibm,opal"); + if (dn) { + u64 val; + + of_property_read_u64(dn, "opal-base-address", &val); + ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val, + sizeof(val), false); + if (ret) + goto out; + + of_property_read_u64(dn, "opal-entry-address", &val); + ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val, + sizeof(val), false); + } +out: + if (ret) + pr_err("Failed to setup purgatory symbols"); + of_node_put(dn); + return ret; +} + +/** + * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel + * being loaded. + * @image: kexec image being loaded. + * @fdt: Flattened device tree for the next kernel. + * @initrd_load_addr: Address where the next initrd will be loaded. + * @initrd_len: Size of the next initrd, or 0 if there will be none. + * @cmdline: Command line for the next kernel, or NULL if there will + * be none. + * + * Returns 0 on success, negative errno on error. + */ +int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, + unsigned long initrd_len, const char *cmdline) +{ + struct crash_mem *umem = NULL, *rmem = NULL; + int i, nr_ranges, ret; + + ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); + if (ret) + goto out; + + /* + * Restrict memory usage for kdump kernel by setting up + * usable memory ranges and memory reserve map. + */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = get_usable_memory_ranges(&umem); + if (ret) + goto out; + + ret = update_usable_mem_fdt(fdt, umem); + if (ret) { + pr_err("Error setting up usable-memory property for kdump kernel\n"); + goto out; + } + + /* + * Ensure we don't touch crashed kernel's memory except the + * first 64K of RAM, which will be backed up. + */ + ret = fdt_add_mem_rsv(fdt, BACKUP_SRC_END + 1, + crashk_res.start - BACKUP_SRC_SIZE); + if (ret) { + pr_err("Error reserving crash memory: %s\n", + fdt_strerror(ret)); + goto out; + } + + /* Ensure backup region is not used by kdump/capture kernel */ + ret = fdt_add_mem_rsv(fdt, image->arch.backup_start, + BACKUP_SRC_SIZE); + if (ret) { + pr_err("Error reserving memory for backup: %s\n", + fdt_strerror(ret)); + goto out; + } + } + + /* Update memory reserve map */ + ret = get_reserved_memory_ranges(&rmem); + if (ret) + goto out; + + nr_ranges = rmem ? rmem->nr_ranges : 0; + for (i = 0; i < nr_ranges; i++) { + u64 base, size; + + base = rmem->ranges[i].start; + size = rmem->ranges[i].end - base + 1; + ret = fdt_add_mem_rsv(fdt, base, size); + if (ret) { + pr_err("Error updating memory reserve map: %s\n", + fdt_strerror(ret)); + goto out; + } + } + +out: + kfree(rmem); + kfree(umem); + return ret; +} + +/** + * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal, + * tce-table, reserved-ranges & such (exclude + * memory ranges) as they can't be used for kexec + * segment buffer. Sets kbuf->mem when a suitable + * memory hole is found. + * @kbuf: Buffer contents and memory parameters. + * + * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align. + * + * Returns 0 on success, negative errno on error. + */ +int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) +{ + struct crash_mem **emem; + u64 buf_min, buf_max; + int ret; + + /* Look up the exclude ranges list while locating the memory hole */ + emem = &(kbuf->image->arch.exclude_ranges); + if (!(*emem) || ((*emem)->nr_ranges == 0)) { + pr_warn("No exclude range list. Using the default locate mem hole method\n"); + return kexec_locate_mem_hole(kbuf); + } + + buf_min = kbuf->buf_min; + buf_max = kbuf->buf_max; + /* Segments for kdump kernel should be within crashkernel region */ + if (kbuf->image->type == KEXEC_TYPE_CRASH) { + buf_min = (buf_min < crashk_res.start ? + crashk_res.start : buf_min); + buf_max = (buf_max > crashk_res.end ? + crashk_res.end : buf_max); + } + + if (buf_min > buf_max) { + pr_err("Invalid buffer min and/or max values\n"); + return -EINVAL; + } + + if (kbuf->top_down) + ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max, + *emem); + else + ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max, + *emem); + + /* Add the buffer allocated to the exclude list for the next lookup */ + if (!ret) { + add_mem_range(emem, kbuf->mem, kbuf->memsz); + sort_memory_ranges(*emem, true); + } else { + pr_err("Failed to locate memory buffer of size %lu\n", + kbuf->memsz); + } + return ret; +} + +/** + * arch_kexec_kernel_image_probe - Does additional handling needed to setup + * kexec segments. + * @image: kexec image being loaded. + * @buf: Buffer pointing to elf data. + * @buf_len: Length of the buffer. + * + * Returns 0 on success, negative errno on error. + */ +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + int ret; + + /* Get exclude memory ranges needed for setting up kexec segments */ + ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges)); + if (ret) { + pr_err("Failed to setup exclude memory ranges for buffer lookup\n"); + return ret; + } + + return kexec_image_probe_default(image, buf, buf_len); +} + +/** + * arch_kimage_file_post_load_cleanup - Frees up all the allocations done + * while loading the image. + * @image: kexec image being loaded. + * + * Returns 0 on success, negative errno on error. + */ +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + kfree(image->arch.exclude_ranges); + image->arch.exclude_ranges = NULL; + + vfree(image->arch.backup_buf); + image->arch.backup_buf = NULL; + + vfree(image->arch.elf_headers); + image->arch.elf_headers = NULL; + image->arch.elf_headers_sz = 0; + + return kexec_image_post_load_cleanup_default(image); +} diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c new file mode 100644 index 000000000000..6b81c852feab --- /dev/null +++ b/arch/powerpc/kexec/ranges.c @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * powerpc code to implement the kexec_file_load syscall + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2020 IBM Corporation + * + * Based on kexec-tools' kexec-ppc64.c, fs2dt.c. + * Heavily modified for the kernel by + * Hari Bathini, IBM Corporation. + */ + +#define pr_fmt(fmt) "kexec ranges: " fmt + +#include <linux/sort.h> +#include <linux/kexec.h> +#include <linux/of_device.h> +#include <linux/slab.h> +#include <asm/sections.h> +#include <asm/kexec_ranges.h> + +/** + * get_max_nr_ranges - Get the max no. of ranges crash_mem structure + * could hold, given the size allocated for it. + * @size: Allocation size of crash_mem structure. + * + * Returns the maximum no. of ranges. + */ +static inline unsigned int get_max_nr_ranges(size_t size) +{ + return ((size - sizeof(struct crash_mem)) / + sizeof(struct crash_mem_range)); +} + +/** + * get_mem_rngs_size - Get the allocated size of mem_rngs based on + * max_nr_ranges and chunk size. + * @mem_rngs: Memory ranges. + * + * Returns the maximum size of @mem_rngs. + */ +static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs) +{ + size_t size; + + if (!mem_rngs) + return 0; + + size = (sizeof(struct crash_mem) + + (mem_rngs->max_nr_ranges * sizeof(struct crash_mem_range))); + + /* + * Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ. + * So, align to get the actual length. + */ + return ALIGN(size, MEM_RANGE_CHUNK_SZ); +} + +/** + * __add_mem_range - add a memory range to memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * @base: Base address of the range to add. + * @size: Size of the memory range to add. + * + * (Re)allocates memory, if needed. + * + * Returns 0 on success, negative errno on error. + */ +static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) +{ + struct crash_mem *mem_rngs = *mem_ranges; + + if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) { + mem_rngs = realloc_mem_ranges(mem_ranges); + if (!mem_rngs) + return -ENOMEM; + } + + mem_rngs->ranges[mem_rngs->nr_ranges].start = base; + mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1; + pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n", + base, base + size - 1, mem_rngs->nr_ranges); + mem_rngs->nr_ranges++; + return 0; +} + +/** + * __merge_memory_ranges - Merges the given memory ranges list. + * @mem_rngs: Range list to merge. + * + * Assumes a sorted range list. + * + * Returns nothing. + */ +static void __merge_memory_ranges(struct crash_mem *mem_rngs) +{ + struct crash_mem_range *ranges; + int i, idx; + + if (!mem_rngs) + return; + + idx = 0; + ranges = &(mem_rngs->ranges[0]); + for (i = 1; i < mem_rngs->nr_ranges; i++) { + if (ranges[i].start <= (ranges[i-1].end + 1)) + ranges[idx].end = ranges[i].end; + else { + idx++; + if (i == idx) + continue; + + ranges[idx] = ranges[i]; + } + } + mem_rngs->nr_ranges = idx + 1; +} + +/* cmp_func_t callback to sort ranges with sort() */ +static int rngcmp(const void *_x, const void *_y) +{ + const struct crash_mem_range *x = _x, *y = _y; + + if (x->start > y->start) + return 1; + if (x->start < y->start) + return -1; + return 0; +} + +/** + * sort_memory_ranges - Sorts the given memory ranges list. + * @mem_rngs: Range list to sort. + * @merge: If true, merge the list after sorting. + * + * Returns nothing. + */ +void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge) +{ + int i; + + if (!mem_rngs) + return; + + /* Sort the ranges in-place */ + sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges, + sizeof(mem_rngs->ranges[0]), rngcmp, NULL); + + if (merge) + __merge_memory_ranges(mem_rngs); + + /* For debugging purpose */ + pr_debug("Memory ranges:\n"); + for (i = 0; i < mem_rngs->nr_ranges; i++) { + pr_debug("\t[%03d][%#016llx - %#016llx]\n", i, + mem_rngs->ranges[i].start, + mem_rngs->ranges[i].end); + } +} + +/** + * realloc_mem_ranges - reallocate mem_ranges with size incremented + * by MEM_RANGE_CHUNK_SZ. Frees up the old memory, + * if memory allocation fails. + * @mem_ranges: Memory ranges to reallocate. + * + * Returns pointer to reallocated memory on success, NULL otherwise. + */ +struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges) +{ + struct crash_mem *mem_rngs = *mem_ranges; + unsigned int nr_ranges; + size_t size; + + size = get_mem_rngs_size(mem_rngs); + nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0; + + size += MEM_RANGE_CHUNK_SZ; + mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL); + if (!mem_rngs) { + kfree(*mem_ranges); + *mem_ranges = NULL; + return NULL; + } + + mem_rngs->nr_ranges = nr_ranges; + mem_rngs->max_nr_ranges = get_max_nr_ranges(size); + *mem_ranges = mem_rngs; + + return mem_rngs; +} + +/** + * add_mem_range - Updates existing memory range, if there is an overlap. + * Else, adds a new memory range. + * @mem_ranges: Range list to add the memory range to. + * @base: Base address of the range to add. + * @size: Size of the memory range to add. + * + * (Re)allocates memory, if needed. + * + * Returns 0 on success, negative errno on error. + */ +int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) +{ + struct crash_mem *mem_rngs = *mem_ranges; + u64 mstart, mend, end; + unsigned int i; + + if (!size) + return 0; + + end = base + size - 1; + + if (!mem_rngs || !(mem_rngs->nr_ranges)) + return __add_mem_range(mem_ranges, base, size); + + for (i = 0; i < mem_rngs->nr_ranges; i++) { + mstart = mem_rngs->ranges[i].start; + mend = mem_rngs->ranges[i].end; + if (base < mend && end > mstart) { + if (base < mstart) + mem_rngs->ranges[i].start = base; + if (end > mend) + mem_rngs->ranges[i].end = end; + return 0; + } + } + + return __add_mem_range(mem_ranges, base, size); +} + +/** + * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list. + * @mem_ranges: Range list to add the memory range(s) to. + * + * Returns 0 on success, negative errno on error. + */ +int add_tce_mem_ranges(struct crash_mem **mem_ranges) +{ + struct device_node *dn = NULL; + int ret = 0; + + for_each_node_by_type(dn, "pci") { + u64 base; + u32 size; + + ret = of_property_read_u64(dn, "linux,tce-base", &base); + ret |= of_property_read_u32(dn, "linux,tce-size", &size); + if (ret) { + /* + * It is ok to have pci nodes without tce. So, ignore + * property does not exist error. + */ + if (ret == -EINVAL) { + ret = 0; + continue; + } + break; + } + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + break; + } + + of_node_put(dn); + return ret; +} + +/** + * add_initrd_mem_range - Adds initrd range to the given memory ranges list, + * if the initrd was retained. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_initrd_mem_range(struct crash_mem **mem_ranges) +{ + u64 base, end; + int ret; + + /* This range means something, only if initrd was retained */ + if (!strstr(saved_command_line, "retain_initrd")) + return 0; + + ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base); + ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end); + if (!ret) + ret = add_mem_range(mem_ranges, base, end - base + 1); + + return ret; +} + +#ifdef CONFIG_PPC_BOOK3S_64 +/** + * add_htab_mem_range - Adds htab range to the given memory ranges list, + * if it exists + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_htab_mem_range(struct crash_mem **mem_ranges) +{ + if (!htab_address) + return 0; + + return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes); +} +#endif + +/** + * add_kernel_mem_range - Adds kernel text region to the given + * memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_kernel_mem_range(struct crash_mem **mem_ranges) +{ + return add_mem_range(mem_ranges, 0, __pa(_end)); +} + +/** + * add_rtas_mem_range - Adds RTAS region to the given memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_rtas_mem_range(struct crash_mem **mem_ranges) +{ + struct device_node *dn; + u32 base, size; + int ret = 0; + + dn = of_find_node_by_path("/rtas"); + if (!dn) + return 0; + + ret = of_property_read_u32(dn, "linux,rtas-base", &base); + ret |= of_property_read_u32(dn, "rtas-size", &size); + if (!ret) + ret = add_mem_range(mem_ranges, base, size); + + of_node_put(dn); + return ret; +} + +/** + * add_opal_mem_range - Adds OPAL region to the given memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_opal_mem_range(struct crash_mem **mem_ranges) +{ + struct device_node *dn; + u64 base, size; + int ret; + + dn = of_find_node_by_path("/ibm,opal"); + if (!dn) + return 0; + + ret = of_property_read_u64(dn, "opal-base-address", &base); + ret |= of_property_read_u64(dn, "opal-runtime-size", &size); + if (!ret) + ret = add_mem_range(mem_ranges, base, size); + + of_node_put(dn); + return ret; +} + +/** + * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w + * to the given memory ranges list. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int add_reserved_mem_ranges(struct crash_mem **mem_ranges) +{ + int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0; + const __be32 *prop; + + prop = of_get_property(of_root, "reserved-ranges", &len); + if (!prop) + return 0; + + n_mem_addr_cells = of_n_addr_cells(of_root); + n_mem_size_cells = of_n_size_cells(of_root); + cells = n_mem_addr_cells + n_mem_size_cells; + + /* Each reserved range is an (address,size) pair */ + for (i = 0; i < (len / (sizeof(u32) * cells)); i++) { + u64 base, size; + + base = of_read_number(prop + (i * cells), n_mem_addr_cells); + size = of_read_number(prop + (i * cells) + n_mem_addr_cells, + n_mem_size_cells); + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + break; + } + + return ret; +} diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ebb04f331ad3..0f83f39a2bd2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -765,7 +765,7 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, return H_P3; vcpu->arch.ciabr = value1; return H_SUCCESS; - case H_SET_MODE_RESOURCE_SET_DAWR: + case H_SET_MODE_RESOURCE_SET_DAWR0: if (!kvmppc_power8_compatible(vcpu)) return H_P2; if (!ppc_breakpoint_available()) @@ -1680,10 +1680,22 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_UAMOR: *val = get_reg_val(id, vcpu->arch.uamor); break; - case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS: + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1: i = id - KVM_REG_PPC_MMCR0; *val = get_reg_val(id, vcpu->arch.mmcr[i]); break; + case KVM_REG_PPC_MMCR2: + *val = get_reg_val(id, vcpu->arch.mmcr[2]); + break; + case KVM_REG_PPC_MMCRA: + *val = get_reg_val(id, vcpu->arch.mmcra); + break; + case KVM_REG_PPC_MMCRS: + *val = get_reg_val(id, vcpu->arch.mmcrs); + break; + case KVM_REG_PPC_MMCR3: + *val = get_reg_val(id, vcpu->arch.mmcr[3]); + break; case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: i = id - KVM_REG_PPC_PMC1; *val = get_reg_val(id, vcpu->arch.pmc[i]); @@ -1699,7 +1711,13 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.sdar); break; case KVM_REG_PPC_SIER: - *val = get_reg_val(id, vcpu->arch.sier); + *val = get_reg_val(id, vcpu->arch.sier[0]); + break; + case KVM_REG_PPC_SIER2: + *val = get_reg_val(id, vcpu->arch.sier[1]); + break; + case KVM_REG_PPC_SIER3: + *val = get_reg_val(id, vcpu->arch.sier[2]); break; case KVM_REG_PPC_IAMR: *val = get_reg_val(id, vcpu->arch.iamr); @@ -1901,10 +1919,22 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_UAMOR: vcpu->arch.uamor = set_reg_val(id, *val); break; - case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS: + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1: i = id - KVM_REG_PPC_MMCR0; vcpu->arch.mmcr[i] = set_reg_val(id, *val); break; + case KVM_REG_PPC_MMCR2: + vcpu->arch.mmcr[2] = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MMCRA: + vcpu->arch.mmcra = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MMCRS: + vcpu->arch.mmcrs = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MMCR3: + *val = get_reg_val(id, vcpu->arch.mmcr[3]); + break; case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: i = id - KVM_REG_PPC_PMC1; vcpu->arch.pmc[i] = set_reg_val(id, *val); @@ -1920,7 +1950,13 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.sdar = set_reg_val(id, *val); break; case KVM_REG_PPC_SIER: - vcpu->arch.sier = set_reg_val(id, *val); + vcpu->arch.sier[0] = set_reg_val(id, *val); + break; + case KVM_REG_PPC_SIER2: + vcpu->arch.sier[1] = set_reg_val(id, *val); + break; + case KVM_REG_PPC_SIER3: + vcpu->arch.sier[2] = set_reg_val(id, *val); break; case KVM_REG_PPC_IAMR: vcpu->arch.iamr = set_reg_val(id, *val); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7cd3cf3d366b..073617ce83e0 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -113,7 +113,7 @@ void __init kvm_cma_reserve(void) selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT; if (selected_size) { - pr_debug("%s: reserving %ld MiB for global area\n", __func__, + pr_info("%s: reserving %ld MiB for global area\n", __func__, (unsigned long)selected_size / SZ_1M); align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; cma_declare_contiguous(0, selected_size, 0, align_size, diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 63fd81f3039d..59822cba454d 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -140,6 +140,14 @@ BEGIN_FTR_SECTION std r8, HSTATE_MMCR2(r13) std r9, HSTATE_SIER(r13) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + mfspr r5, SPRN_MMCR3 + mfspr r6, SPRN_SIER2 + mfspr r7, SPRN_SIER3 + std r5, HSTATE_MMCR3(r13) + std r6, HSTATE_SIER2(r13) + std r7, HSTATE_SIER3(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) mfspr r3, SPRN_PMC1 mfspr r5, SPRN_PMC2 mfspr r6, SPRN_PMC3 diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 79f7d07ef674..6028628ea3ac 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -244,7 +244,7 @@ long kvmppc_realmode_hmi_handler(void) { bool resync_req; - __this_cpu_inc(irq_stat.hmi_exceptions); + local_paca->hmi_irqs++; if (hmi_handle_debugtrig(NULL) >= 0) return 1; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 71943892c81c..799d6d0f4ead 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -3428,7 +3428,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) mtspr SPRN_PMC6, r9 ld r3, VCPU_MMCR(r4) ld r5, VCPU_MMCR + 8(r4) - ld r6, VCPU_MMCR + 16(r4) + ld r6, VCPU_MMCRA(r4) ld r7, VCPU_SIAR(r4) ld r8, VCPU_SDAR(r4) mtspr SPRN_MMCR1, r5 @@ -3436,14 +3436,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) mtspr SPRN_SIAR, r7 mtspr SPRN_SDAR, r8 BEGIN_FTR_SECTION - ld r5, VCPU_MMCR + 24(r4) + ld r5, VCPU_MMCR + 24(r4) + ld r6, VCPU_SIER + 8(r4) + ld r7, VCPU_SIER + 16(r4) + mtspr SPRN_MMCR3, r5 + mtspr SPRN_SIER2, r6 + mtspr SPRN_SIER3, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) +BEGIN_FTR_SECTION + ld r5, VCPU_MMCR + 16(r4) ld r6, VCPU_SIER(r4) mtspr SPRN_MMCR2, r5 mtspr SPRN_SIER, r6 BEGIN_FTR_SECTION_NESTED(96) lwz r7, VCPU_PMC + 24(r4) lwz r8, VCPU_PMC + 28(r4) - ld r9, VCPU_MMCR + 32(r4) + ld r9, VCPU_MMCRS(r4) mtspr SPRN_SPMC1, r7 mtspr SPRN_SPMC2, r8 mtspr SPRN_MMCRS, r9 @@ -3496,6 +3504,14 @@ BEGIN_FTR_SECTION mtspr SPRN_MMCR2, r8 mtspr SPRN_SIER, r9 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + ld r5, HSTATE_MMCR3(r13) + ld r6, HSTATE_SIER2(r13) + ld r7, HSTATE_SIER3(r13) + mtspr SPRN_MMCR3, r5 + mtspr SPRN_SIER2, r6 + mtspr SPRN_SIER3, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) mtspr SPRN_MMCR0, r3 isync mtlr r0 @@ -3551,10 +3567,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r8, SPRN_SDAR std r4, VCPU_MMCR(r9) std r5, VCPU_MMCR + 8(r9) - std r6, VCPU_MMCR + 16(r9) + std r6, VCPU_MMCRA(r9) BEGIN_FTR_SECTION - std r10, VCPU_MMCR + 24(r9) + std r10, VCPU_MMCR + 16(r9) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + mfspr r5, SPRN_MMCR3 + mfspr r6, SPRN_SIER2 + mfspr r7, SPRN_SIER3 + std r5, VCPU_MMCR + 24(r9) + std r6, VCPU_SIER + 8(r9) + std r7, VCPU_SIER + 16(r9) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) std r7, VCPU_SIAR(r9) std r8, VCPU_SDAR(r9) mfspr r3, SPRN_PMC1 @@ -3578,7 +3602,7 @@ BEGIN_FTR_SECTION_NESTED(96) mfspr r8, SPRN_MMCRS stw r6, VCPU_PMC + 24(r9) stw r7, VCPU_PMC + 28(r9) - std r8, VCPU_MMCR + 32(r9) + std r8, VCPU_MMCRS(r9) lis r4, 0x8000 mtspr SPRN_MMCRS, r4 END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index f7ad99d972ce..607a9b99c334 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -26,7 +26,7 @@ #define FUNC(name) name #define GET_SHADOW_VCPU(reg) lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2) -#endif /* CONFIG_PPC_BOOK3S_XX */ +#endif /* CONFIG_PPC_BOOK3S_64 */ #define VCPU_LOAD_NVGPRS(vcpu) \ PPC_LL r14, VCPU_GPR(R14)(vcpu); \ diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 4a61a971c34e..830a126e095d 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -89,7 +89,7 @@ {H_CREATE_RPT, "H_CREATE_RPT"}, \ {H_REMOVE_RPT, "H_REMOVE_RPT"}, \ {H_REGISTER_RPAGES, "H_REGISTER_RPAGES"}, \ - {H_DISABLE_AND_GETC, "H_DISABLE_AND_GETC"}, \ + {H_DISABLE_AND_GET, "H_DISABLE_AND_GET"}, \ {H_ERROR_DATA, "H_ERROR_DATA"}, \ {H_GET_HCA_INFO, "H_GET_HCA_INFO"}, \ {H_GET_PERF_COUNT, "H_GET_PERF_COUNT"}, \ diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 5e994cda8e40..d66a645503eb 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -41,7 +41,10 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ memcpy_64.o memcpy_mcsafe_64.o +ifndef CONFIG_PPC_QUEUED_SPINLOCKS obj64-$(CONFIG_SMP) += locks.o +endif + obj64-$(CONFIG_ALTIVEC) += vmx-helper.o obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \ test_emulate_step_exec_instr.o diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 0a051dfeb177..8c3934ea6220 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -93,7 +93,7 @@ static int map_patch_area(void *addr, unsigned long text_poke_addr) unsigned long pfn; int err; - if (is_vmalloc_addr(addr)) + if (is_vmalloc_or_module_addr(addr)) pfn = vmalloc_to_pfn(addr); else pfn = __pa_symbol(addr) >> PAGE_SHIFT; diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 6440d5943c00..04165b7a163f 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -27,14 +27,14 @@ void splpar_spin_yield(arch_spinlock_t *lock) return; holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); + + yield_count = yield_count_of(holder_cpu); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); if (lock->slock != lock_value) return; /* something has changed */ - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); + yield_to_preempted(holder_cpu, yield_count); } EXPORT_SYMBOL_GPL(splpar_spin_yield); @@ -53,13 +53,13 @@ void splpar_rw_yield(arch_rwlock_t *rw) return; /* no write lock at present */ holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); + + yield_count = yield_count_of(holder_cpu); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); if (rw->lock != lock_value) return; /* something has changed */ - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); + yield_to_preempted(holder_cpu, yield_count); } #endif diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c index 0666a8d29596..1550e0d2513a 100644 --- a/arch/powerpc/lib/pmem.c +++ b/arch/powerpc/lib/pmem.c @@ -9,20 +9,56 @@ #include <asm/cacheflush.h> +static inline void __clean_pmem_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory"); +} + +static inline void __flush_pmem_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory"); +} + +static inline void clean_pmem_range(unsigned long start, unsigned long stop) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return __clean_pmem_range(start, stop); +} + +static inline void flush_pmem_range(unsigned long start, unsigned long stop) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return __flush_pmem_range(start, stop); +} + /* * CONFIG_ARCH_HAS_PMEM_API symbols */ void arch_wb_cache_pmem(void *addr, size_t size) { unsigned long start = (unsigned long) addr; - flush_dcache_range(start, start + size); + clean_pmem_range(start, start + size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); void arch_invalidate_pmem(void *addr, size_t size) { unsigned long start = (unsigned long) addr; - flush_dcache_range(start, start + size); + flush_pmem_range(start, start + size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); @@ -35,19 +71,17 @@ long __copy_from_user_flushcache(void *dest, const void __user *src, unsigned long copied, start = (unsigned long) dest; copied = __copy_from_user(dest, src, size); - flush_dcache_range(start, start + size); + clean_pmem_range(start, start + size); return copied; } -void *memcpy_flushcache(void *dest, const void *src, size_t size) +void memcpy_flushcache(void *dest, const void *src, size_t size) { unsigned long start = (unsigned long) dest; memcpy(dest, src, size); - flush_dcache_range(start, start + size); - - return dest; + clean_pmem_range(start, start + size); } EXPORT_SYMBOL(memcpy_flushcache); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5abe98216dc2..caee8cc77e19 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -16,6 +16,7 @@ #include <asm/disassemble.h> extern char system_call_common[]; +extern char system_call_vectored_emulate[]; #ifdef CONFIG_PPC64 /* Bits in SRR1 that are copied from MSR */ @@ -200,8 +201,8 @@ static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr, unsigned int dd; unsigned long ea, d0, d1, d; - prefix_r = instr & (1ul << 20); - ra = (suffix >> 16) & 0x1f; + prefix_r = GET_PREFIX_R(instr); + ra = GET_PREFIX_RA(suffix); d0 = instr & 0x3ffff; d1 = suffix & 0xffff; @@ -1236,6 +1237,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 17: /* sc */ if ((word & 0xfe2) == 2) op->type = SYSCALL; + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && + (word & 0xfe3) == 1) + op->type = SYSCALL_VECTORED_0; else op->type = UNKNOWN; return 0; @@ -1339,8 +1343,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, switch (opcode) { #ifdef __powerpc64__ case 1: - prefix_r = word & (1ul << 20); - ra = (suffix >> 16) & 0x1f; + prefix_r = GET_PREFIX_R(word); + ra = GET_PREFIX_RA(suffix); rd = (suffix >> 21) & 0x1f; op->reg = rd; op->val = regs->gpr[rd]; @@ -1802,7 +1806,18 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->val = (int) regs->gpr[ra] / (int) regs->gpr[rb]; goto arith_done; - +#ifdef __powerpc64__ + case 425: /* divde[.] */ + asm volatile(PPC_DIVDE(%0, %1, %2) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb])); + goto arith_done; + case 393: /* divdeu[.] */ + asm volatile(PPC_DIVDEU(%0, %1, %2) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb])); + goto arith_done; +#endif case 755: /* darn */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) return -1; @@ -2715,8 +2730,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } break; case 1: /* Prefixed instructions */ - prefix_r = word & (1ul << 20); - ra = (suffix >> 16) & 0x1f; + prefix_r = GET_PREFIX_R(word); + ra = GET_PREFIX_RA(suffix); op->update_reg = ra; rd = (suffix >> 21) & 0x1f; op->reg = rd; @@ -3378,6 +3393,18 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr) regs->msr = MSR_KERNEL; return 1; +#ifdef CONFIG_PPC_BOOK3S_64 + case SYSCALL_VECTORED_0: /* scv 0 */ + regs->gpr[9] = regs->gpr[13]; + regs->gpr[10] = MSR_KERNEL; + regs->gpr[11] = regs->nip + 4; + regs->gpr[12] = regs->msr & MSR_MASK; + regs->gpr[13] = (unsigned long) get_paca(); + regs->nip = (unsigned long) &system_call_vectored_emulate; + regs->msr = MSR_KERNEL; + return 1; +#endif + case RFI: return -1; #endif diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 46af80279ebc..0a201b771477 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -8,59 +8,51 @@ #define pr_fmt(fmt) "emulate_step_test: " fmt #include <linux/ptrace.h> +#include <asm/cpu_has_feature.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> #include <asm/code-patching.h> #include <asm/inst.h> -#define IMM_L(i) ((uintptr_t)(i) & 0xffff) -#define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) - -/* - * Defined with TEST_ prefix so it does not conflict with other - * definitions. - */ -#define TEST_LD(r, base, i) ppc_inst(PPC_INST_LD | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_DS(i)) -#define TEST_LWZ(r, base, i) ppc_inst(PPC_INST_LWZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define TEST_LWZX(t, a, b) ppc_inst(PPC_INST_LWZX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STD(r, base, i) ppc_inst(PPC_INST_STD | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_DS(i)) -#define TEST_LDARX(t, a, b, eh) ppc_inst(PPC_INST_LDARX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | \ - __PPC_EH(eh)) -#define TEST_STDCX(s, a, b) ppc_inst(PPC_INST_STDCX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LFSX(t, a, b) ppc_inst(PPC_INST_LFSX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STFSX(s, a, b) ppc_inst(PPC_INST_STFSX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LFDX(t, a, b) ppc_inst(PPC_INST_LFDX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STFDX(s, a, b) ppc_inst(PPC_INST_STFDX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LVX(t, a, b) ppc_inst(PPC_INST_LVX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STVX(s, a, b) ppc_inst(PPC_INST_STVX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LXVD2X(s, a, b) ppc_inst(PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) -#define TEST_STXVD2X(s, a, b) ppc_inst(PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) -#define TEST_ADD(t, a, b) ppc_inst(PPC_INST_ADD | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_ADD_DOT(t, a, b) ppc_inst(PPC_INST_ADD | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | 0x1) -#define TEST_ADDC(t, a, b) ppc_inst(PPC_INST_ADDC | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_ADDC_DOT(t, a, b) ppc_inst(PPC_INST_ADDC | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | 0x1) - #define MAX_SUBTESTS 16 #define IGNORE_GPR(n) (0x1UL << (n)) #define IGNORE_XER (0x1UL << 32) #define IGNORE_CCR (0x1UL << 33) +#define NEGATIVE_TEST (0x1UL << 63) + +#define TEST_PLD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_PLD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLWZ(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_RAW_LWZ(r, base, i)) + +#define TEST_PSTD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_PSTD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLFS(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_LFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PSTFS(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_STFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLFD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_LFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PSTFD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_STFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PADDI(t, a, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_RAW_ADDI(t, a, i)) + static void __init init_pt_regs(struct pt_regs *regs) { @@ -105,7 +97,7 @@ static void __init test_ld(void) regs.gpr[3] = (unsigned long) &a; /* ld r5, 0(r3) */ - stepped = emulate_step(®s, TEST_LD(5, 3, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LD(5, 3, 0))); if (stepped == 1 && regs.gpr[5] == a) show_result("ld", "PASS"); @@ -113,6 +105,29 @@ static void __init test_ld(void) show_result("ld", "FAIL"); } +static void __init test_pld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + + /* pld r5, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLD(5, 3, 0, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("pld", "PASS"); + else + show_result("pld", "FAIL"); +} + static void __init test_lwz(void) { struct pt_regs regs; @@ -123,7 +138,7 @@ static void __init test_lwz(void) regs.gpr[3] = (unsigned long) &a; /* lwz r5, 0(r3) */ - stepped = emulate_step(®s, TEST_LWZ(5, 3, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LWZ(5, 3, 0))); if (stepped == 1 && regs.gpr[5] == a) show_result("lwz", "PASS"); @@ -131,6 +146,30 @@ static void __init test_lwz(void) show_result("lwz", "FAIL"); } +static void __init test_plwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("plwz", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + + /* plwz r5, 0(r3), 0 */ + + stepped = emulate_step(®s, TEST_PLWZ(5, 3, 0, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("plwz", "PASS"); + else + show_result("plwz", "FAIL"); +} + static void __init test_lwzx(void) { struct pt_regs regs; @@ -143,7 +182,7 @@ static void __init test_lwzx(void) regs.gpr[5] = 0x8765; /* lwzx r5, r3, r4 */ - stepped = emulate_step(®s, TEST_LWZX(5, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LWZX(5, 3, 4))); if (stepped == 1 && regs.gpr[5] == a[2]) show_result("lwzx", "PASS"); else @@ -161,13 +200,36 @@ static void __init test_std(void) regs.gpr[5] = 0x5678; /* std r5, 0(r3) */ - stepped = emulate_step(®s, TEST_STD(5, 3, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STD(5, 3, 0))); if (stepped == 1 && regs.gpr[5] == a) show_result("std", "PASS"); else show_result("std", "FAIL"); } +static void __init test_pstd(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pstd", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + regs.gpr[5] = 0x5678; + + /* pstd r5, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTD(5, 3, 0, 0)); + if (stepped == 1 || regs.gpr[5] == a) + show_result("pstd", "PASS"); + else + show_result("pstd", "FAIL"); +} + static void __init test_ldarx_stdcx(void) { struct pt_regs regs; @@ -186,7 +248,7 @@ static void __init test_ldarx_stdcx(void) regs.gpr[5] = 0x5678; /* ldarx r5, r3, r4, 0 */ - stepped = emulate_step(®s, TEST_LDARX(5, 3, 4, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LDARX(5, 3, 4, 0))); /* * Don't touch 'a' here. Touching 'a' can do Load/store @@ -204,7 +266,7 @@ static void __init test_ldarx_stdcx(void) regs.gpr[5] = 0x9ABC; /* stdcx. r5, r3, r4 */ - stepped = emulate_step(®s, TEST_STDCX(5, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STDCX(5, 3, 4))); /* * Two possible scenarios that indicates successful emulation @@ -244,7 +306,7 @@ static void __init test_lfsx_stfsx(void) regs.gpr[4] = 0; /* lfsx frt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LFSX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LFSX(10, 3, 4))); if (stepped == 1) show_result("lfsx", "PASS"); @@ -257,7 +319,7 @@ static void __init test_lfsx_stfsx(void) c.a = 678.91; /* stfsx frs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STFSX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STFSX(10, 3, 4))); if (stepped == 1 && c.b == cached_b) show_result("stfsx", "PASS"); @@ -265,6 +327,53 @@ static void __init test_lfsx_stfsx(void) show_result("stfsx", "FAIL"); } +static void __init test_plfs_pstfs(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + + /*** plfs ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long)&c.a; + + /* plfs frt10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLFS(10, 3, 0, 0)); + + if (stepped == 1) + show_result("plfs", "PASS"); + else + show_result("plfs", "FAIL"); + + + /*** pstfs ***/ + + c.a = 678.91; + + /* pstfs frs10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTFS(10, 3, 0, 0)); + + if (stepped == 1 && c.b == cached_b) + show_result("pstfs", "PASS"); + else + show_result("pstfs", "FAIL"); +} + static void __init test_lfdx_stfdx(void) { struct pt_regs regs; @@ -287,7 +396,7 @@ static void __init test_lfdx_stfdx(void) regs.gpr[4] = 0; /* lfdx frt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LFDX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LFDX(10, 3, 4))); if (stepped == 1) show_result("lfdx", "PASS"); @@ -300,13 +409,60 @@ static void __init test_lfdx_stfdx(void) c.a = 987654.32; /* stfdx frs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STFDX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STFDX(10, 3, 4))); if (stepped == 1 && c.b == cached_b) show_result("stfdx", "PASS"); else show_result("stfdx", "FAIL"); } + +static void __init test_plfd_pstfd(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + + /*** plfd ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long)&c.a; + + /* plfd frt10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLFD(10, 3, 0, 0)); + + if (stepped == 1) + show_result("plfd", "PASS"); + else + show_result("plfd", "FAIL"); + + + /*** pstfd ***/ + + c.a = 987654.32; + + /* pstfd frs10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTFD(10, 3, 0, 0)); + + if (stepped == 1 && c.b == cached_b) + show_result("pstfd", "PASS"); + else + show_result("pstfd", "FAIL"); +} #else static void __init test_lfsx_stfsx(void) { @@ -314,11 +470,23 @@ static void __init test_lfsx_stfsx(void) show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)"); } +static void __init test_plfs_pstfs(void) +{ + show_result("plfs", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("pstfs", "SKIP (CONFIG_PPC_FPU is not set)"); +} + static void __init test_lfdx_stfdx(void) { show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)"); show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)"); } + +static void __init test_plfd_pstfd(void) +{ + show_result("plfd", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("pstfd", "SKIP (CONFIG_PPC_FPU is not set)"); +} #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC @@ -346,7 +514,7 @@ static void __init test_lvx_stvx(void) regs.gpr[4] = 0; /* lvx vrt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LVX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LVX(10, 3, 4))); if (stepped == 1) show_result("lvx", "PASS"); @@ -362,7 +530,7 @@ static void __init test_lvx_stvx(void) c.b[3] = 498532; /* stvx vrs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STVX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STVX(10, 3, 4))); if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) @@ -403,7 +571,7 @@ static void __init test_lxvd2x_stxvd2x(void) regs.gpr[4] = 0; /* lxvd2x vsr39, r3, r4 */ - stepped = emulate_step(®s, TEST_LXVD2X(39, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVD2X(39, R3, R4))); if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { show_result("lxvd2x", "PASS"); @@ -423,7 +591,7 @@ static void __init test_lxvd2x_stxvd2x(void) c.b[3] = 4; /* stxvd2x vsr39, r3, r4 */ - stepped = emulate_step(®s, TEST_STXVD2X(39, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVD2X(39, R3, R4))); if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && cached_b[2] == c.b[2] && cached_b[3] == c.b[3] && @@ -447,18 +615,24 @@ static void __init test_lxvd2x_stxvd2x(void) static void __init run_tests_load_store(void) { test_ld(); + test_pld(); test_lwz(); + test_plwz(); test_lwzx(); test_std(); + test_pstd(); test_ldarx_stdcx(); test_lfsx_stfsx(); + test_plfs_pstfs(); test_lfdx_stfdx(); + test_plfd_pstfd(); test_lvx_stvx(); test_lxvd2x_stxvd2x(); } struct compute_test { char *mnemonic; + unsigned long cpu_feature; struct { char *descr; unsigned long flags; @@ -467,6 +641,11 @@ struct compute_test { } subtests[MAX_SUBTESTS + 1]; }; +/* Extreme values for si0||si1 (the MLS:D-form 34 bit immediate field) */ +#define SI_MIN BIT(33) +#define SI_MAX (BIT(33) - 1) +#define SI_UMAX (BIT(34) - 1) + static struct compute_test compute_tests[] = { { .mnemonic = "nop", @@ -485,7 +664,7 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -493,7 +672,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -501,7 +680,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MAX, RB = LONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -509,7 +688,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -517,7 +696,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -525,7 +704,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -533,7 +712,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -541,7 +720,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -549,7 +728,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -557,7 +736,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -571,7 +750,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", .flags = IGNORE_CCR, - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -579,7 +758,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -588,7 +767,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MAX, RB = LONG_MAX", .flags = IGNORE_CCR, - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -596,7 +775,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -604,7 +783,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -612,7 +791,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -620,7 +799,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -628,7 +807,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -636,7 +815,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -644,7 +823,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -657,7 +836,7 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -665,7 +844,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -673,7 +852,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MAX, RB = LONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -681,7 +860,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -689,7 +868,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -697,7 +876,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -705,7 +884,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -713,7 +892,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -721,7 +900,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -729,7 +908,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -737,7 +916,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN | (uint)INT_MIN, .gpr[22] = LONG_MIN | (uint)INT_MIN, @@ -751,7 +930,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", .flags = IGNORE_CCR, - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -759,7 +938,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -768,7 +947,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MAX, RB = LONG_MAX", .flags = IGNORE_CCR, - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -776,7 +955,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -784,7 +963,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -792,7 +971,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -800,7 +979,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -808,7 +987,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -816,7 +995,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -824,7 +1003,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -832,31 +1011,320 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN | (uint)INT_MIN, .gpr[22] = LONG_MIN | (uint)INT_MIN, } } } + }, + { + .mnemonic = "divde", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + } + } + }, + { + .mnemonic = "divde.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + } + } + }, + { + .mnemonic = "divdeu", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX - 1, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX - 1, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MIN + 1, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN + 1, + .gpr[22] = LONG_MIN, + } + } + } + }, + { + .mnemonic = "divdeu.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX - 1, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX - 1, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MIN + 1, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN + 1, + .gpr[22] = LONG_MIN, + } + } + } + }, + { + .mnemonic = "paddi", + .cpu_feature = CPU_FTR_ARCH_31, + .subtests = { + { + .descr = "RA = LONG_MIN, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MIN, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, SI = SI_UMAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_UMAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, SI = 0x1, R = 0", + .instr = TEST_PADDI(21, 22, 0x1, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = INT_MIN, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MIN, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, SI = 0x1, R = 0", + .instr = TEST_PADDI(21, 22, 0x1, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA is r0, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 0, SI_MIN, 0), + .regs = { + .gpr[21] = 0x0, + } + }, + { + .descr = "RA = 0, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0x0, + .gpr[22] = 0x0, + } + }, + { + .descr = "RA is r0, SI = 0, R = 1", + .instr = TEST_PADDI(21, 0, 0, 1), + .regs = { + .gpr[21] = 0, + } + }, + { + .descr = "RA is r0, SI = SI_MIN, R = 1", + .instr = TEST_PADDI(21, 0, SI_MIN, 1), + .regs = { + .gpr[21] = 0, + } + }, + /* Invalid instruction form with R = 1 and RA != 0 */ + { + .descr = "RA = R22(0), SI = 0, R = 1", + .instr = TEST_PADDI(21, 22, 0, 1), + .flags = NEGATIVE_TEST, + .regs = { + .gpr[21] = 0, + .gpr[22] = 0, + } + } + } } }; static int __init emulate_compute_instr(struct pt_regs *regs, - struct ppc_inst instr) + struct ppc_inst instr, + bool negative) { + int analysed; struct instruction_op op; if (!regs || !ppc_inst_val(instr)) return -EINVAL; - if (analyse_instr(&op, regs, instr) != 1 || - GETTYPE(op.type) != COMPUTE) { - pr_info("emulation failed, instruction = 0x%08x\n", ppc_inst_val(instr)); + regs->nip = patch_site_addr(&patch__exec_instr); + + analysed = analyse_instr(&op, regs, instr); + if (analysed != 1 || GETTYPE(op.type) != COMPUTE) { + if (negative) + return -EFAULT; + pr_info("emulation failed, instruction = %s\n", ppc_inst_as_str(instr)); return -EFAULT; } - - emulate_update_regs(regs, &op); + if (analysed == 1 && negative) + pr_info("negative test failed, instruction = %s\n", ppc_inst_as_str(instr)); + if (!negative) + emulate_update_regs(regs, &op); return 0; } @@ -864,7 +1332,6 @@ static int __init execute_compute_instr(struct pt_regs *regs, struct ppc_inst instr) { extern int exec_instr(struct pt_regs *regs); - extern s32 patch__exec_instr; if (!regs || !ppc_inst_val(instr)) return -EINVAL; @@ -872,7 +1339,7 @@ static int __init execute_compute_instr(struct pt_regs *regs, /* Patch the NOP with the actual instruction */ patch_instruction_site(&patch__exec_instr, instr); if (exec_instr(regs)) { - pr_info("execution failed, instruction = 0x%08x\n", ppc_inst_val(instr)); + pr_info("execution failed, instruction = %s\n", ppc_inst_as_str(instr)); return -EFAULT; } @@ -894,15 +1361,21 @@ static void __init run_tests_compute(void) struct pt_regs *regs, exp, got; unsigned int i, j, k; struct ppc_inst instr; - bool ignore_gpr, ignore_xer, ignore_ccr, passed; + bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative; for (i = 0; i < ARRAY_SIZE(compute_tests); i++) { test = &compute_tests[i]; + if (test->cpu_feature && !early_cpu_has_feature(test->cpu_feature)) { + show_result(test->mnemonic, "SKIP (!CPU_FTR)"); + continue; + } + for (j = 0; j < MAX_SUBTESTS && test->subtests[j].descr; j++) { instr = test->subtests[j].instr; flags = test->subtests[j].flags; regs = &test->subtests[j].regs; + negative = flags & NEGATIVE_TEST; ignore_xer = flags & IGNORE_XER; ignore_ccr = flags & IGNORE_CCR; passed = true; @@ -917,8 +1390,12 @@ static void __init run_tests_compute(void) exp.msr = MSR_KERNEL; got.msr = MSR_KERNEL; - if (emulate_compute_instr(&got, instr) || - execute_compute_instr(&exp, instr)) { + rc = emulate_compute_instr(&got, instr, negative) != 0; + if (negative) { + /* skip executing instruction */ + passed = rc; + goto print; + } else if (rc || execute_compute_instr(&exp, instr)) { passed = false; goto print; } diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S index 1580f34f4f4f..9ef941d958d8 100644 --- a/arch/powerpc/lib/test_emulate_step_exec_instr.S +++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S @@ -80,7 +80,9 @@ _GLOBAL(exec_instr) REST_NVGPRS(r31) /* Placeholder for the test instruction */ + .balign 64 1: nop + nop patch_site 1b patch__exec_instr /* diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 923ad8f374eb..1690d369688b 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -62,7 +62,7 @@ _GLOBAL(hash_page) isync #endif /* Get PTE (linux-style) and check access */ - lis r0,KERNELBASE@h /* check if kernel address */ + lis r0, TASK_SIZE@h /* check if kernel address */ cmplw 0,r4,r0 ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 03b6ba54460e..c0162911f6cb 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -187,6 +187,17 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return __mmu_mapin_ram(border, top); } +static bool is_module_segment(unsigned long addr) +{ + if (!IS_ENABLED(CONFIG_MODULES)) + return false; + if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M)) + return false; + if (addr >= ALIGN(VMALLOC_END, SZ_256M)) + return false; + return true; +} + void mmu_mark_initmem_nx(void) { int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; @@ -223,9 +234,9 @@ void mmu_mark_initmem_nx(void) for (i = TASK_SIZE >> 28; i < 16; i++) { /* Do not set NX on VM space for modules */ - if (IS_ENABLED(CONFIG_MODULES) && - (VMALLOC_START & 0xf0000000) == i << 28) - break; + if (is_module_segment(i << 28)) + continue; + mtsrin(mfsrin(i << 28) | 0x10000000, i << 28); } } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 9b9f92ad0e7a..1478fceeb683 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -232,8 +232,6 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) rflags |= HPTE_R_I; else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT) rflags |= (HPTE_R_I | HPTE_R_G); - else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) - rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M); else /* * Add memory coherence if cache inhibited is not set @@ -596,7 +594,7 @@ static void __init htab_scan_page_sizes(void) } #ifdef CONFIG_HUGETLB_PAGE - if (!hugetlb_disabled) { + if (!hugetlb_disabled && !early_radix_enabled() ) { /* Reserve 16G huge page memory sections for huge pages */ of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); } @@ -663,11 +661,10 @@ static void __init htab_init_page_sizes(void) * Pick a size for the linear mapping. Currently, we only * support 16M, 1M and 4K which is the default */ - if (IS_ENABLED(STRICT_KERNEL_RWX) && + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && (unsigned long)_stext % 0x1000000) { if (mmu_psize_defs[MMU_PAGE_16M].shift) - pr_warn("Kernel not 16M aligned, " - "disabling 16M linear map alignment"); + pr_warn("Kernel not 16M aligned, disabling 16M linear map alignment\n"); aligned = false; } @@ -788,7 +785,7 @@ static unsigned long __init htab_get_table_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int resize_hpt_for_hotplug(unsigned long new_mem_size) +static int resize_hpt_for_hotplug(unsigned long new_mem_size) { unsigned target_hpt_shift; @@ -822,6 +819,8 @@ int hash__create_section_mapping(unsigned long start, unsigned long end, return -1; } + resize_hpt_for_hotplug(memblock_phys_mem_size()); + rc = htab_bolt_mapping(start, end, __pa(start), pgprot_val(prot), mmu_linear_psize, mmu_kernel_ssize); @@ -839,6 +838,10 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end) int rc = htab_remove_mapping(start, end, mmu_linear_psize, mmu_kernel_ssize); WARN_ON(rc < 0); + + if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) + pr_warn("Hash collision while resizing HPT\n"); + return rc; } #endif /* CONFIG_MEMORY_HOTPLUG */ @@ -1111,6 +1114,10 @@ void hash__early_init_mmu_secondary(void) if (cpu_has_feature(CPU_FTR_ARCH_206) && cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); + +#ifdef CONFIG_PPC_MEM_KEYS + mtspr(SPRN_UAMOR, default_uamor); +#endif } #endif /* CONFIG_SMP */ @@ -1731,10 +1738,6 @@ unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift, return gslot; } -/* - * WARNING: This is called from hash_low_64.S, if you change this prototype, - * do not forget to update the assembly call site ! - */ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, unsigned long flags) { diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index c58ad1049909..e18ae50a275c 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -15,6 +15,7 @@ #include <asm/powernv.h> #include <asm/firmware.h> #include <asm/ultravisor.h> +#include <asm/kexec.h> #include <mm/mmu_decl.h> #include <trace/events/thp.h> @@ -165,6 +166,8 @@ void mmu_cleanup_all(void) radix__mmu_cleanup_all(); else if (mmu_hash_ops.hpte_clear_all) mmu_hash_ops.hpte_clear_all(); + + reset_sprs(); } #ifdef CONFIG_MEMORY_HOTPLUG @@ -339,6 +342,9 @@ void pmd_fragment_free(unsigned long *pmd) { struct page *page = virt_to_page(pmd); + if (PageReserved(page)) + return free_reserved_page(page); + BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); if (atomic_dec_and_test(&page->pt_frag_refcount)) { pgtable_pmd_page_dtor(page); @@ -356,7 +362,7 @@ static inline void pgtable_free(void *table, int index) pmd_fragment_free(table); break; case PUD_INDEX: - kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table); + __pud_free(table); break; #if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE) /* 16M hugepd directory at pud level */ diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index d174106bab67..69a6b87f2bb4 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -10,58 +10,103 @@ #include <asm/mmu.h> #include <asm/setup.h> #include <linux/pkeys.h> -#include <linux/of_device.h> +#include <linux/of_fdt.h> -DEFINE_STATIC_KEY_TRUE(pkey_disabled); -int pkeys_total; /* Total pkeys as per device tree */ -u32 initial_allocation_mask; /* Bits set for the initially allocated keys */ -u32 reserved_allocation_mask; /* Bits set for reserved keys */ -static bool pkey_execute_disable_supported; -static bool pkeys_devtree_defined; /* property exported by device tree */ -static u64 pkey_amr_mask; /* Bits in AMR not to be touched */ -static u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ -static u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */ +int num_pkey; /* Max number of pkeys supported */ +/* + * Keys marked in the reservation list cannot be allocated by userspace + */ +u32 reserved_allocation_mask __ro_after_init; + +/* Bits set for the initially allocated keys */ +static u32 initial_allocation_mask __ro_after_init; + +/* + * Even if we allocate keys with sys_pkey_alloc(), we need to make sure + * other thread still find the access denied using the same keys. + */ +static u64 default_amr = ~0x0UL; +static u64 default_iamr = 0x5555555555555555UL; +u64 default_uamor __ro_after_init; +/* + * Key used to implement PROT_EXEC mmap. Denies READ/WRITE + * We pick key 2 because 0 is special key and 1 is reserved as per ISA. + */ static int execute_only_key = 2; +static bool pkey_execute_disable_supported; + #define AMR_BITS_PER_PKEY 2 #define AMR_RD_BIT 0x1UL #define AMR_WR_BIT 0x2UL #define IAMR_EX_BIT 0x1UL -#define PKEY_REG_BITS (sizeof(u64)*8) +#define PKEY_REG_BITS (sizeof(u64) * 8) #define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY)) -static void scan_pkey_feature(void) +static int __init dt_scan_storage_keys(unsigned long node, + const char *uname, int depth, + void *data) { - u32 vals[2]; - struct device_node *cpu; + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be32 *prop; + int *pkeys_total = (int *) data; - cpu = of_find_node_by_type(NULL, "cpu"); - if (!cpu) - return; + /* We are scanning "cpu" nodes only */ + if (type == NULL || strcmp(type, "cpu") != 0) + return 0; - if (of_property_read_u32_array(cpu, - "ibm,processor-storage-keys", vals, 2)) - return; + prop = of_get_flat_dt_prop(node, "ibm,processor-storage-keys", NULL); + if (!prop) + return 0; + *pkeys_total = be32_to_cpu(prop[0]); + return 1; +} + +static int scan_pkey_feature(void) +{ + int ret; + int pkeys_total = 0; /* - * Since any pkey can be used for data or execute, we will just treat - * all keys as equal and track them as one entity. + * Pkey is not supported with Radix translation. */ - pkeys_total = vals[0]; - pkeys_devtree_defined = true; -} + if (early_radix_enabled()) + return 0; -static inline bool pkey_mmu_enabled(void) -{ - if (firmware_has_feature(FW_FEATURE_LPAR)) - return pkeys_total; - else - return cpu_has_feature(CPU_FTR_PKEY); + /* + * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 + */ + if (!early_cpu_has_feature(CPU_FTR_ARCH_206)) + return 0; + + ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total); + if (ret == 0) { + /* + * Let's assume 32 pkeys on P8/P9 bare metal, if its not defined by device + * tree. We make this exception since some version of skiboot forgot to + * expose this property on power8/9. + */ + if (!firmware_has_feature(FW_FEATURE_LPAR)) { + unsigned long pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E || + PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9) + pkeys_total = 32; + } + } + + /* + * Adjust the upper limit, based on the number of bits supported by + * arch-neutral code. + */ + pkeys_total = min_t(int, pkeys_total, + ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1)); + return pkeys_total; } -static int pkey_initialize(void) +void __init pkey_early_init_devtree(void) { - int os_reserved, i; + int pkeys_total, i; /* * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral @@ -80,31 +125,14 @@ static int pkey_initialize(void) != (sizeof(u64) * BITS_PER_BYTE)); /* scan the device tree for pkey feature */ - scan_pkey_feature(); - - /* - * Let's assume 32 pkeys on P8 bare metal, if its not defined by device - * tree. We make this exception since skiboot forgot to expose this - * property on power8. - */ - if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) && - cpu_has_feature(CPU_FTRS_POWER8)) - pkeys_total = 32; - - /* - * Adjust the upper limit, based on the number of bits supported by - * arch-neutral code. - */ - pkeys_total = min_t(int, pkeys_total, - ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)+1)); + pkeys_total = scan_pkey_feature(); + if (!pkeys_total) + goto out; - if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total) - static_branch_enable(&pkey_disabled); - else - static_branch_disable(&pkey_disabled); + /* Allow all keys to be modified by default */ + default_uamor = ~0x0UL; - if (static_branch_likely(&pkey_disabled)) - return 0; + cur_cpu_spec->mmu_features |= MMU_FTR_PKEY; /* * The device tree cannot be relied to indicate support for @@ -118,53 +146,86 @@ static int pkey_initialize(void) #ifdef CONFIG_PPC_4K_PAGES /* * The OS can manage only 8 pkeys due to its inability to represent them - * in the Linux 4K PTE. + * in the Linux 4K PTE. Mark all other keys reserved. */ - os_reserved = pkeys_total - 8; + num_pkey = min(8, pkeys_total); #else - os_reserved = 0; + num_pkey = pkeys_total; #endif - /* Bits are in LE format. */ - reserved_allocation_mask = (0x1 << 1) | (0x1 << execute_only_key); - - /* register mask is in BE format */ - pkey_amr_mask = ~0x0ul; - pkey_amr_mask &= ~(0x3ul << pkeyshift(0)); - - pkey_iamr_mask = ~0x0ul; - pkey_iamr_mask &= ~(0x3ul << pkeyshift(0)); - pkey_iamr_mask &= ~(0x3ul << pkeyshift(execute_only_key)); - pkey_uamor_mask = ~0x0ul; - pkey_uamor_mask &= ~(0x3ul << pkeyshift(0)); - pkey_uamor_mask &= ~(0x3ul << pkeyshift(execute_only_key)); - - /* mark the rest of the keys as reserved and hence unavailable */ - for (i = (pkeys_total - os_reserved); i < pkeys_total; i++) { - reserved_allocation_mask |= (0x1 << i); - pkey_uamor_mask &= ~(0x3ul << pkeyshift(i)); - } - initial_allocation_mask = reserved_allocation_mask | (0x1 << 0); - - if (unlikely((pkeys_total - os_reserved) <= execute_only_key)) { + if (unlikely(num_pkey <= execute_only_key) || !pkey_execute_disable_supported) { /* * Insufficient number of keys to support * execute only key. Mark it unavailable. - * Any AMR, UAMOR, IAMR bit set for - * this key is irrelevant since this key - * can never be allocated. */ execute_only_key = -1; + } else { + /* + * Mark the execute_only_pkey as not available for + * user allocation via pkey_alloc. + */ + reserved_allocation_mask |= (0x1 << execute_only_key); + + /* + * Deny READ/WRITE for execute_only_key. + * Allow execute in IAMR. + */ + default_amr |= (0x3ul << pkeyshift(execute_only_key)); + default_iamr &= ~(0x1ul << pkeyshift(execute_only_key)); + + /* + * Clear the uamor bits for this key. + */ + default_uamor &= ~(0x3ul << pkeyshift(execute_only_key)); } - return 0; -} + /* + * Allow access for only key 0. And prevent any other modification. + */ + default_amr &= ~(0x3ul << pkeyshift(0)); + default_iamr &= ~(0x1ul << pkeyshift(0)); + default_uamor &= ~(0x3ul << pkeyshift(0)); + /* + * key 0 is special in that we want to consider it an allocated + * key which is preallocated. We don't allow changing AMR bits + * w.r.t key 0. But one can pkey_free(key0) + */ + initial_allocation_mask |= (0x1 << 0); -arch_initcall(pkey_initialize); + /* + * key 1 is recommended not to be used. PowerISA(3.0) page 1015, + * programming note. + */ + reserved_allocation_mask |= (0x1 << 1); + default_uamor &= ~(0x3ul << pkeyshift(1)); + + /* + * Prevent the usage of OS reserved keys. Update UAMOR + * for those keys. Also mark the rest of the bits in the + * 32 bit mask as reserved. + */ + for (i = num_pkey; i < 32 ; i++) { + reserved_allocation_mask |= (0x1 << i); + default_uamor &= ~(0x3ul << pkeyshift(i)); + } + /* + * Prevent the allocation of reserved keys too. + */ + initial_allocation_mask |= reserved_allocation_mask; + + pr_info("Enabling pkeys with max key count %d\n", num_pkey); +out: + /* + * Setup uamor on boot cpu + */ + mtspr(SPRN_UAMOR, default_uamor); + + return; +} void pkey_mm_init(struct mm_struct *mm) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; mm_pkey_allocation_map(mm) = initial_allocation_mask; mm->context.execute_only_pkey = execute_only_key; @@ -196,30 +257,6 @@ static inline void write_iamr(u64 value) mtspr(SPRN_IAMR, value); } -static inline u64 read_uamor(void) -{ - return mfspr(SPRN_UAMOR); -} - -static inline void write_uamor(u64 value) -{ - mtspr(SPRN_UAMOR, value); -} - -static bool is_pkey_enabled(int pkey) -{ - u64 uamor = read_uamor(); - u64 pkey_bits = 0x3ul << pkeyshift(pkey); - u64 uamor_pkey_bits = (uamor & pkey_bits); - - /* - * Both the bits in UAMOR corresponding to the key should be set or - * reset. - */ - WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits)); - return !!(uamor_pkey_bits); -} - static inline void init_amr(int pkey, u8 init_bits) { u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); @@ -245,8 +282,18 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, { u64 new_amr_bits = 0x0ul; u64 new_iamr_bits = 0x0ul; + u64 pkey_bits, uamor_pkey_bits; + + /* + * Check whether the key is disabled by UAMOR. + */ + pkey_bits = 0x3ul << pkeyshift(pkey); + uamor_pkey_bits = (default_uamor & pkey_bits); - if (!is_pkey_enabled(pkey)) + /* + * Both the bits in UAMOR corresponding to the key should be set + */ + if (uamor_pkey_bits != pkey_bits) return -EINVAL; if (init_val & PKEY_DISABLE_EXECUTE) { @@ -268,7 +315,7 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, void thread_pkey_regs_save(struct thread_struct *thread) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; /* @@ -276,38 +323,33 @@ void thread_pkey_regs_save(struct thread_struct *thread) */ thread->amr = read_amr(); thread->iamr = read_iamr(); - thread->uamor = read_uamor(); } void thread_pkey_regs_restore(struct thread_struct *new_thread, struct thread_struct *old_thread) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; if (old_thread->amr != new_thread->amr) write_amr(new_thread->amr); if (old_thread->iamr != new_thread->iamr) write_iamr(new_thread->iamr); - if (old_thread->uamor != new_thread->uamor) - write_uamor(new_thread->uamor); } void thread_pkey_regs_init(struct thread_struct *thread) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; - thread->amr = pkey_amr_mask; - thread->iamr = pkey_iamr_mask; - thread->uamor = pkey_uamor_mask; + thread->amr = default_amr; + thread->iamr = default_iamr; - write_uamor(pkey_uamor_mask); - write_amr(pkey_amr_mask); - write_iamr(pkey_iamr_mask); + write_amr(default_amr); + write_iamr(default_iamr); } -int __execute_only_pkey(struct mm_struct *mm) +int execute_only_pkey(struct mm_struct *mm) { return mm->context.execute_only_pkey; } @@ -366,7 +408,7 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) bool arch_pte_access_permitted(u64 pte, bool write, bool execute) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return true; return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute); @@ -383,7 +425,7 @@ bool arch_pte_access_permitted(u64 pte, bool write, bool execute) bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return true; /* * Do not enforce our key-permissions on a foreign vma. @@ -396,7 +438,7 @@ bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; /* Duplicate the oldmm pkey state in mm: */ diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index bb00e0cba119..28c784976bed 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -15,7 +15,7 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/string_helpers.h> -#include <linux/stop_machine.h> +#include <linux/memory.h> #include <asm/pgalloc.h> #include <asm/mmu_context.h> @@ -34,6 +34,7 @@ unsigned int mmu_pid_bits; unsigned int mmu_base_pid; +unsigned int radix_mem_block_size __ro_after_init; static __ref void *early_alloc_pgtable(unsigned long size, int nid, unsigned long region_start, unsigned long region_end) @@ -56,6 +57,13 @@ static __ref void *early_alloc_pgtable(unsigned long size, int nid, return ptr; } +/* + * When allocating pud or pmd pointers, we allocate a complete page + * of PAGE_SIZE rather than PUD_TABLE_SIZE or PMD_TABLE_SIZE. This + * is to ensure that the page obtained from the memblock allocator + * can be completely used as page table page and can be freed + * correctly when the page table entries are removed. + */ static int early_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int map_page_size, @@ -72,8 +80,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, pgdp = pgd_offset_k(ea); p4dp = p4d_offset(pgdp, ea); if (p4d_none(*p4dp)) { - pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid, - region_start, region_end); + pudp = early_alloc_pgtable(PAGE_SIZE, nid, + region_start, region_end); p4d_populate(&init_mm, p4dp, pudp); } pudp = pud_offset(p4dp, ea); @@ -82,8 +90,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, goto set_the_pte; } if (pud_none(*pudp)) { - pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid, - region_start, region_end); + pmdp = early_alloc_pgtable(PAGE_SIZE, nid, region_start, + region_end); pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, ea); @@ -259,6 +267,7 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end) static int __meminit create_physical_mapping(unsigned long start, unsigned long end, + unsigned long max_mapping_size, int nid, pgprot_t _prot) { unsigned long vaddr, addr, mapping_size = 0; @@ -272,6 +281,8 @@ static int __meminit create_physical_mapping(unsigned long start, int rc; gap = next_boundary(addr, end) - addr; + if (gap > max_mapping_size) + gap = max_mapping_size; previous_size = mapping_size; prev_exec = exec; @@ -322,8 +333,9 @@ static void __init radix_init_pgtable(void) /* We don't support slb for radix */ mmu_slb_size = 0; + /* - * Create the linear mapping, using standard page size for now + * Create the linear mapping */ for_each_memblock(memory, reg) { /* @@ -339,6 +351,7 @@ static void __init radix_init_pgtable(void) WARN_ON(create_physical_mapping(reg->base, reg->base + reg->size, + radix_mem_block_size, -1, PAGE_KERNEL)); } @@ -479,6 +492,57 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, return 1; } +#ifdef CONFIG_MEMORY_HOTPLUG +static int __init probe_memory_block_size(unsigned long node, const char *uname, int + depth, void *data) +{ + unsigned long *mem_block_size = (unsigned long *)data; + const __be64 *prop; + int len; + + if (depth != 1) + return 0; + + if (strcmp(uname, "ibm,dynamic-reconfiguration-memory")) + return 0; + + prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len); + if (!prop || len < sizeof(__be64)) + /* + * Nothing in the device tree + */ + *mem_block_size = MIN_MEMORY_BLOCK_SIZE; + else + *mem_block_size = be64_to_cpup(prop); + return 1; +} + +static unsigned long radix_memory_block_size(void) +{ + unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE; + + /* + * OPAL firmware feature is set by now. Hence we are ok + * to test OPAL feature. + */ + if (firmware_has_feature(FW_FEATURE_OPAL)) + mem_block_size = 1UL * 1024 * 1024 * 1024; + else + of_scan_flat_dt(probe_memory_block_size, &mem_block_size); + + return mem_block_size; +} + +#else /* CONFIG_MEMORY_HOTPLUG */ + +static unsigned long radix_memory_block_size(void) +{ + return 1UL * 1024 * 1024 * 1024; +} + +#endif /* CONFIG_MEMORY_HOTPLUG */ + + void __init radix__early_init_devtree(void) { int rc; @@ -487,17 +551,27 @@ void __init radix__early_init_devtree(void) * Try to find the available page sizes in the device-tree */ rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL); - if (rc != 0) /* Found */ - goto found; + if (!rc) { + /* + * No page size details found in device tree. + * Let's assume we have page 4k and 64k support + */ + mmu_psize_defs[MMU_PAGE_4K].shift = 12; + mmu_psize_defs[MMU_PAGE_4K].ap = 0x0; + + mmu_psize_defs[MMU_PAGE_64K].shift = 16; + mmu_psize_defs[MMU_PAGE_64K].ap = 0x5; + } + /* - * let's assume we have page 4k and 64k support + * Max mapping size used when mapping pages. We don't use + * ppc_md.memory_block_size() here because this get called + * early and we don't have machine probe called yet. Also + * the pseries implementation only check for ibm,lmb-size. + * All hypervisor supporting radix do expose that device + * tree node. */ - mmu_psize_defs[MMU_PAGE_4K].shift = 12; - mmu_psize_defs[MMU_PAGE_4K].ap = 0x0; - - mmu_psize_defs[MMU_PAGE_64K].shift = 16; - mmu_psize_defs[MMU_PAGE_64K].ap = 0x5; -found: + radix_mem_block_size = radix_memory_block_size(); return; } @@ -519,8 +593,10 @@ void setup_kuep(bool disabled) if (disabled || !early_radix_enabled()) return; - if (smp_processor_id() == boot_cpuid) + if (smp_processor_id() == boot_cpuid) { pr_info("Activating Kernel Userspace Execution Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_KUEP; + } /* * Radix always uses key0 of the IAMR to determine if an access is @@ -544,6 +620,10 @@ void setup_kuap(bool disabled) /* Make sure userspace can't change the AMR */ mtspr(SPRN_UAMOR, 0); + + /* + * Set the default kernel AMR values on all cpus. + */ mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); isync(); } @@ -700,30 +780,19 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud) pud_clear(pud); } -struct change_mapping_params { - pte_t *pte; - unsigned long start; - unsigned long end; - unsigned long aligned_start; - unsigned long aligned_end; -}; - -static int __meminit stop_machine_change_mapping(void *data) +static void free_pud_table(pud_t *pud_start, p4d_t *p4d) { - struct change_mapping_params *params = - (struct change_mapping_params *)data; + pud_t *pud; + int i; - if (!data) - return -1; + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = pud_start + i; + if (!pud_none(*pud)) + return; + } - spin_unlock(&init_mm.page_table_lock); - pte_clear(&init_mm, params->aligned_start, params->pte); - create_physical_mapping(__pa(params->aligned_start), - __pa(params->start), -1, PAGE_KERNEL); - create_physical_mapping(__pa(params->end), __pa(params->aligned_end), - -1, PAGE_KERNEL); - spin_lock(&init_mm.page_table_lock); - return 0; + pud_free(&init_mm, pud_start); + p4d_clear(p4d); } static void remove_pte_table(pte_t *pte_start, unsigned long addr, @@ -754,53 +823,7 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr, } } -/* - * clear the pte and potentially split the mapping helper - */ -static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end, - unsigned long size, pte_t *pte) -{ - unsigned long mask = ~(size - 1); - unsigned long aligned_start = addr & mask; - unsigned long aligned_end = addr + size; - struct change_mapping_params params; - bool split_region = false; - - if ((end - addr) < size) { - /* - * We're going to clear the PTE, but not flushed - * the mapping, time to remap and flush. The - * effects if visible outside the processor or - * if we are running in code close to the - * mapping we cleared, we are in trouble. - */ - if (overlaps_kernel_text(aligned_start, addr) || - overlaps_kernel_text(end, aligned_end)) { - /* - * Hack, just return, don't pte_clear - */ - WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel " - "text, not splitting\n", addr, end); - return; - } - split_region = true; - } - - if (split_region) { - params.pte = pte; - params.start = addr; - params.end = end; - params.aligned_start = addr & ~(size - 1); - params.aligned_end = min_t(unsigned long, aligned_end, - (unsigned long)__va(memblock_end_of_DRAM())); - stop_machine(stop_machine_change_mapping, ¶ms, NULL); - return; - } - - pte_clear(&init_mm, addr, pte); -} - -static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, +static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end) { unsigned long next; @@ -815,7 +838,12 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, continue; if (pmd_is_leaf(*pmd)) { - split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd); + if (!IS_ALIGNED(addr, PMD_SIZE) || + !IS_ALIGNED(next, PMD_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + pte_clear(&init_mm, addr, (pte_t *)pmd); continue; } @@ -825,7 +853,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, } } -static void remove_pud_table(pud_t *pud_start, unsigned long addr, +static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end) { unsigned long next; @@ -840,7 +868,12 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr, continue; if (pud_is_leaf(*pud)) { - split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud); + if (!IS_ALIGNED(addr, PUD_SIZE) || + !IS_ALIGNED(next, PUD_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + pte_clear(&init_mm, addr, (pte_t *)pud); continue; } @@ -868,12 +901,19 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) continue; if (p4d_is_leaf(*p4d)) { - split_kernel_mapping(addr, end, P4D_SIZE, (pte_t *)p4d); + if (!IS_ALIGNED(addr, P4D_SIZE) || + !IS_ALIGNED(next, P4D_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + + pte_clear(&init_mm, addr, (pte_t *)pgd); continue; } pud_base = (pud_t *)p4d_page_vaddr(*p4d); remove_pud_table(pud_base, addr, next); + free_pud_table(pud_base, p4d); } spin_unlock(&init_mm.page_table_lock); @@ -889,7 +929,8 @@ int __meminit radix__create_section_mapping(unsigned long start, return -1; } - return create_physical_mapping(__pa(start), __pa(end), nid, prot); + return create_physical_mapping(__pa(start), __pa(end), + radix_mem_block_size, nid, prot); } int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index b5cc9b23cf02..0d233763441f 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -16,6 +16,7 @@ #include <asm/tlbflush.h> #include <asm/trace.h> #include <asm/cputhreads.h> +#include <asm/plpar_wrappers.h> #define RIC_FLUSH_TLB 0 #define RIC_FLUSH_PWC 1 @@ -694,7 +695,14 @@ void radix__flush_tlb_mm(struct mm_struct *mm) goto local; } - if (cputlb_use_tlbie()) { + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt = H_RPTI_TARGET_CMMU; + + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, + H_RPTI_PAGE_ALL, 0, -1UL); + } else if (cputlb_use_tlbie()) { if (mm_needs_flush_escalation(mm)) _tlbie_pid(pid, RIC_FLUSH_ALL); else @@ -727,7 +735,16 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) goto local; } } - if (cputlb_use_tlbie()) + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt = H_RPTI_TARGET_CMMU; + unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | + H_RPTI_TYPE_PRT; + + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, type, + H_RPTI_PAGE_ALL, 0, -1UL); + } else if (cputlb_use_tlbie()) _tlbie_pid(pid, RIC_FLUSH_ALL); else _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); @@ -760,7 +777,19 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, exit_flush_lazy_tlbs(mm); goto local; } - if (cputlb_use_tlbie()) + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt, pg_sizes, size; + + tgt = H_RPTI_TARGET_CMMU; + pg_sizes = psize_to_rpti_pgsize(psize); + size = 1UL << mmu_psize_to_shift(psize); + + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, + pg_sizes, vmaddr, + vmaddr + size); + } else if (cputlb_use_tlbie()) _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); else _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); @@ -810,7 +839,14 @@ static inline void _tlbiel_kernel_broadcast(void) */ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) { - if (cputlb_use_tlbie()) + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU; + unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | + H_RPTI_TYPE_PRT; + + pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL, + start, end); + } else if (cputlb_use_tlbie()) _tlbie_pid(0, RIC_FLUSH_ALL); else _tlbiel_kernel_broadcast(); @@ -864,7 +900,17 @@ is_local: nr_pages > tlb_local_single_page_flush_ceiling); } - if (full) { + if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + unsigned long tgt = H_RPTI_TARGET_CMMU; + unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M); + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes, + start, end); + } else if (full) { if (local) { _tlbiel_pid(pid, RIC_FLUSH_TLB); } else { @@ -1046,7 +1092,17 @@ is_local: nr_pages > tlb_local_single_page_flush_ceiling); } - if (full) { + if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + unsigned long tgt = H_RPTI_TARGET_CMMU; + unsigned long type = H_RPTI_TYPE_TLB; + unsigned long pg_sizes = psize_to_rpti_pgsize(psize); + + if (also_pwc) + type |= H_RPTI_TYPE_PWC; + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); + } else if (full) { if (local) { _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); } else { @@ -1111,7 +1167,19 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) exit_flush_lazy_tlbs(mm); goto local; } - if (cputlb_use_tlbie()) + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt, type, pg_sizes; + + tgt = H_RPTI_TARGET_CMMU; + type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | + H_RPTI_TYPE_PRT; + pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); + + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, type, pg_sizes, + addr, end); + } else if (cputlb_use_tlbie()) _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); else _tlbiel_va_range_multicast(mm, diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 59327cefbc6a..b2eeea39684c 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -14,6 +14,8 @@ #include <asm/prom.h> #include <asm/drmem.h> +static int n_root_addr_cells, n_root_size_cells; + static struct drmem_lmb_info __drmem_info; struct drmem_lmb_info *drmem_info = &__drmem_info; @@ -189,12 +191,13 @@ int drmem_update_dt(void) return rc; } -static void __init read_drconf_v1_cell(struct drmem_lmb *lmb, +static void read_drconf_v1_cell(struct drmem_lmb *lmb, const __be32 **prop) { const __be32 *p = *prop; - lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p); + lmb->base_addr = of_read_number(p, n_root_addr_cells); + p += n_root_addr_cells; lmb->drc_index = of_read_number(p++, 1); p++; /* skip reserved field */ @@ -205,29 +208,33 @@ static void __init read_drconf_v1_cell(struct drmem_lmb *lmb, *prop = p; } -static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, - void (*func)(struct drmem_lmb *, const __be32 **)) +static int +__walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)) { struct drmem_lmb lmb; u32 i, n_lmbs; + int ret = 0; n_lmbs = of_read_number(prop++, 1); - if (n_lmbs == 0) - return; - for (i = 0; i < n_lmbs; i++) { read_drconf_v1_cell(&lmb, &prop); - func(&lmb, &usm); + ret = func(&lmb, &usm, data); + if (ret) + break; } + + return ret; } -static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, +static void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, const __be32 **prop) { const __be32 *p = *prop; dr_cell->seq_lmbs = of_read_number(p++, 1); - dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p); + dr_cell->base_addr = of_read_number(p, n_root_addr_cells); + p += n_root_addr_cells; dr_cell->drc_index = of_read_number(p++, 1); dr_cell->aa_index = of_read_number(p++, 1); dr_cell->flags = of_read_number(p++, 1); @@ -235,17 +242,16 @@ static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, *prop = p; } -static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, - void (*func)(struct drmem_lmb *, const __be32 **)) +static int +__walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)) { struct of_drconf_cell_v2 dr_cell; struct drmem_lmb lmb; u32 i, j, lmb_sets; + int ret = 0; lmb_sets = of_read_number(prop++, 1); - if (lmb_sets == 0) - return; - for (i = 0; i < lmb_sets; i++) { read_drconf_v2_cell(&dr_cell, &prop); @@ -259,21 +265,29 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, lmb.aa_index = dr_cell.aa_index; lmb.flags = dr_cell.flags; - func(&lmb, &usm); + ret = func(&lmb, &usm, data); + if (ret) + break; } } + + return ret; } #ifdef CONFIG_PPC_PSERIES -void __init walk_drmem_lmbs_early(unsigned long node, - void (*func)(struct drmem_lmb *, const __be32 **)) +int __init walk_drmem_lmbs_early(unsigned long node, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)) { const __be32 *prop, *usm; - int len; + int len, ret = -ENODEV; prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len); if (!prop || len < dt_root_size_cells * sizeof(__be32)) - return; + return ret; + + /* Get the address & size cells */ + n_root_addr_cells = dt_root_addr_cells; + n_root_size_cells = dt_root_size_cells; drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop); @@ -281,20 +295,21 @@ void __init walk_drmem_lmbs_early(unsigned long node, prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len); if (prop) { - __walk_drmem_v1_lmbs(prop, usm, func); + ret = __walk_drmem_v1_lmbs(prop, usm, data, func); } else { prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2", &len); if (prop) - __walk_drmem_v2_lmbs(prop, usm, func); + ret = __walk_drmem_v2_lmbs(prop, usm, data, func); } memblock_dump_all(); + return ret; } #endif -static int __init init_drmem_lmb_size(struct device_node *dn) +static int init_drmem_lmb_size(struct device_node *dn) { const __be32 *prop; int len; @@ -303,12 +318,12 @@ static int __init init_drmem_lmb_size(struct device_node *dn) return 0; prop = of_get_property(dn, "ibm,lmb-size", &len); - if (!prop || len < dt_root_size_cells * sizeof(__be32)) { + if (!prop || len < n_root_size_cells * sizeof(__be32)) { pr_info("Could not determine LMB size\n"); return -1; } - drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop); + drmem_info->lmb_size = of_read_number(prop, n_root_size_cells); return 0; } @@ -329,24 +344,36 @@ static const __be32 *of_get_usable_memory(struct device_node *dn) return prop; } -void __init walk_drmem_lmbs(struct device_node *dn, - void (*func)(struct drmem_lmb *, const __be32 **)) +int walk_drmem_lmbs(struct device_node *dn, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)) { const __be32 *prop, *usm; + int ret = -ENODEV; + + if (!of_root) + return ret; + + /* Get the address & size cells */ + of_node_get(of_root); + n_root_addr_cells = of_n_addr_cells(of_root); + n_root_size_cells = of_n_size_cells(of_root); + of_node_put(of_root); if (init_drmem_lmb_size(dn)) - return; + return ret; usm = of_get_usable_memory(dn); prop = of_get_property(dn, "ibm,dynamic-memory", NULL); if (prop) { - __walk_drmem_v1_lmbs(prop, usm, func); + ret = __walk_drmem_v1_lmbs(prop, usm, data, func); } else { prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL); if (prop) - __walk_drmem_v2_lmbs(prop, usm, func); + ret = __walk_drmem_v2_lmbs(prop, usm, data, func); } + + return ret; } static void __init init_drmem_v1_lmbs(const __be32 *prop) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 641fc5f3d7dd..925a7231abb3 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -42,39 +42,7 @@ #include <asm/kup.h> #include <asm/inst.h> -/* - * Check whether the instruction inst is a store using - * an update addressing form which will update r1. - */ -static bool store_updates_sp(struct ppc_inst inst) -{ - /* check for 1 in the rA field */ - if (((ppc_inst_val(inst) >> 16) & 0x1f) != 1) - return false; - /* check major opcode */ - switch (ppc_inst_primary_opcode(inst)) { - case OP_STWU: - case OP_STBU: - case OP_STHU: - case OP_STFSU: - case OP_STFDU: - return true; - case OP_STD: /* std or stdu */ - return (ppc_inst_val(inst) & 3) == 1; - case OP_31: - /* check minor opcode */ - switch ((ppc_inst_val(inst) >> 1) & 0x3ff) { - case OP_31_XOP_STDUX: - case OP_31_XOP_STWUX: - case OP_31_XOP_STBUX: - case OP_31_XOP_STHUX: - case OP_31_XOP_STFSUX: - case OP_31_XOP_STFDUX: - return true; - } - } - return false; -} + /* * do_page_fault error handling helpers */ @@ -267,54 +235,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return false; } -static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, - struct vm_area_struct *vma, unsigned int flags, - bool *must_retry) -{ - /* - * N.B. The POWER/Open ABI allows programs to access up to - * 288 bytes below the stack pointer. - * The kernel signal delivery code writes up to about 1.5kB - * below the stack pointer (r1) before decrementing it. - * The exec code can write slightly over 640kB to the stack - * before setting the user r1. Thus we allow the stack to - * expand to 1MB without further checks. - */ - if (address + 0x100000 < vma->vm_end) { - struct ppc_inst __user *nip = (struct ppc_inst __user *)regs->nip; - /* get user regs even if this fault is in kernel mode */ - struct pt_regs *uregs = current->thread.regs; - if (uregs == NULL) - return true; - - /* - * A user-mode access to an address a long way below - * the stack pointer is only valid if the instruction - * is one which would update the stack pointer to the - * address accessed if the instruction completed, - * i.e. either stwu rs,n(r1) or stwux rs,r1,rb - * (or the byte, halfword, float or double forms). - * - * If we don't check this then any write to the area - * between the last mapped region and the stack will - * expand the stack rather than segfaulting. - */ - if (address + 2048 >= uregs->gpr[1]) - return false; - - if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && - access_ok(nip, sizeof(*nip))) { - struct ppc_inst inst; - - if (!probe_user_read_inst(&inst, nip)) - return !store_updates_sp(inst); - *must_retry = true; - } - return true; - } - return false; -} - #ifdef CONFIG_PPC_MEM_KEYS static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, struct vm_area_struct *vma) @@ -480,7 +400,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, int is_user = user_mode(regs); int is_write = page_fault_is_write(error_code); vm_fault_t fault, major = 0; - bool must_retry = false; bool kprobe_fault = kprobe_page_fault(regs, 11); if (unlikely(debugger_fault_handler(regs) || kprobe_fault)) @@ -569,30 +488,15 @@ retry: vma = find_vma(mm, address); if (unlikely(!vma)) return bad_area(regs, address); - if (likely(vma->vm_start <= address)) - goto good_area; - if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) - return bad_area(regs, address); - /* The stack is being expanded, check if it's valid */ - if (unlikely(bad_stack_expansion(regs, address, vma, flags, - &must_retry))) { - if (!must_retry) + if (unlikely(vma->vm_start > address)) { + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) return bad_area(regs, address); - mmap_read_unlock(mm); - if (fault_in_pages_readable((const char __user *)regs->nip, - sizeof(unsigned int))) - return bad_area_nosemaphore(regs, address); - goto retry; + if (unlikely(expand_stack(vma, address))) + return bad_area(regs, address); } - /* Try to expand it */ - if (unlikely(expand_stack(vma, address))) - return bad_area(regs, address); - -good_area: - #ifdef CONFIG_PPC_MEM_KEYS if (unlikely(access_pkey_error(is_write, is_exec, (error_code & DSISR_KEYFAULT), vma))) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index e9bfbccd975d..26292544630f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -684,3 +684,21 @@ void flush_dcache_icache_hugepage(struct page *page) } } } + +void __init gigantic_hugetlb_cma_reserve(void) +{ + unsigned long order = 0; + + if (radix_enabled()) + order = PUD_SHIFT - PAGE_SHIFT; + else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift) + /* + * For pseries we do use ibm,expected#pages for reserving 16G pages. + */ + order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT; + + if (order) { + VM_WARN_ON(order < MAX_ORDER); + hugetlb_cma_reserve(order); + } +} diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 7ea19dc4883b..02c7db4087cb 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -170,6 +170,8 @@ void __init MMU_init(void) btext_unmap(); #endif + kasan_mmu_init(); + setup_kup(); /* Shortly after that, the entire linear mapping will be available */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 3fd504d72c5e..02e127fa5777 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -406,13 +406,15 @@ static void __init early_check_vec5(void) } if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & OV5_FEAT(OV5_RADIX_GTSE))) { - pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n"); - } + cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE; + } else + cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; /* Do radix anyway - the hypervisor said we had to */ cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { /* Hypervisor only supports hash - disable radix */ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE; } } diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 0760e1e754e4..fb294046e00e 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -115,16 +115,35 @@ static void __init kasan_unmap_early_shadow_vmalloc(void) unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END); kasan_update_early_region(k_start, k_end, __pte(0)); + +#ifdef MODULES_VADDR + k_start = (unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR); + k_end = (unsigned long)kasan_mem_to_shadow((void *)MODULES_END); + kasan_update_early_region(k_start, k_end, __pte(0)); +#endif } -static void __init kasan_mmu_init(void) +void __init kasan_mmu_init(void) { int ret; + + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || + IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); + + if (ret) + panic("kasan: kasan_init_shadow_page_tables() failed"); + } +} + +void __init kasan_init(void) +{ struct memblock_region *reg; for_each_memblock(memory, reg) { phys_addr_t base = reg->base; phys_addr_t top = min(base + reg->size, total_lowmem); + int ret; if (base >= top) continue; @@ -134,20 +153,6 @@ static void __init kasan_mmu_init(void) panic("kasan: kasan_init_region() failed"); } - if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || - IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); - - if (ret) - panic("kasan: kasan_init_shadow_page_tables() failed"); - } - -} - -void __init kasan_init(void) -{ - kasan_mmu_init(); - kasan_remap_early_shadow_ro(); clear_page(kasan_early_shadow_page); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ec68c9eeac0e..42e25874f5a8 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -126,8 +126,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, unsigned long nr_pages = size >> PAGE_SHIFT; int rc; - resize_hpt_for_hotplug(memblock_phys_mem_size()); - start = (unsigned long)__va(start); rc = create_section_mapping(start, start + size, nid, params->pgprot); @@ -160,9 +158,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, * hit that section of memory */ vm_unmap_aliases(); - - if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) - pr_warn("Hash collision while resizing HPT\n"); } #endif diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index d5e2704d0096..bf24451f3e71 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -71,7 +71,6 @@ START_BTB_FLUSH_SECTION END_BTB_FLUSH_SECTION std r7,EX_TLB_R7(r12) #endif - TLB_MISS_PROLOG_STATS .endm .macro tlb_epilog_bolted @@ -85,7 +84,6 @@ END_BTB_FLUSH_SECTION mtcr r14 ld r14,EX_TLB_R14(r12) ld r15,EX_TLB_R15(r12) - TLB_MISS_RESTORE_STATS ld r16,EX_TLB_R16(r12) mfspr r12,SPRN_SPRG_GEN_SCRATCH .endm @@ -128,7 +126,6 @@ END_BTB_FLUSH_SECTION ori r10,r10,_PAGE_PRESENT oris r11,r10,_PAGE_ACCESSED@h - TLB_MISS_STATS_SAVE_INFO_BOLTED bne tlb_miss_kernel_bolted tlb_miss_common_bolted: @@ -209,7 +206,6 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) tlbwe tlb_miss_done_bolted: - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) tlb_epilog_bolted rfi @@ -229,11 +225,9 @@ tlb_miss_fault_bolted: andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX bne itlb_miss_fault_bolted dtlb_miss_fault_bolted: - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) tlb_epilog_bolted b exc_data_storage_book3e itlb_miss_fault_bolted: - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) tlb_epilog_bolted b exc_instruction_storage_book3e @@ -243,7 +237,6 @@ itlb_miss_fault_bolted: rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 srdi r15,r16,60 /* get region */ - TLB_MISS_STATS_SAVE_INFO_BOLTED bne- itlb_miss_fault_bolted li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */ @@ -276,7 +269,6 @@ itlb_miss_fault_bolted: srdi. r15,r16,60 /* get region */ ori r16,r16,1 - TLB_MISS_STATS_SAVE_INFO_BOLTED bne tlb_miss_kernel_e6500 /* user/kernel test */ b tlb_miss_common_e6500 @@ -288,7 +280,6 @@ itlb_miss_fault_bolted: srdi. r15,r16,60 /* get region */ rldicr r16,r16,0,62 - TLB_MISS_STATS_SAVE_INFO_BOLTED bne tlb_miss_kernel_e6500 /* user vs kernel check */ /* @@ -460,7 +451,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT) .endm tlb_unlock_e6500 - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) tlb_epilog_bolted rfi @@ -519,11 +509,9 @@ tlb_miss_fault_e6500: andi. r16,r16,1 bne itlb_miss_fault_e6500 dtlb_miss_fault_e6500: - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) tlb_epilog_bolted b exc_data_storage_book3e itlb_miss_fault_e6500: - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) tlb_epilog_bolted b exc_instruction_storage_book3e #endif /* CONFIG_PPC_FSL_BOOK3E */ @@ -548,7 +536,6 @@ itlb_miss_fault_e6500: mfspr r16,SPRN_DEAR /* get faulting address */ srdi r15,r16,60 /* get region */ cmpldi cr0,r15,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO beq tlb_load_linear /* yes -> go to linear map load */ /* The page tables are mapped virtually linear. At this point, though, @@ -600,7 +587,6 @@ itlb_miss_fault_e6500: /* We got a crappy address, just fault with whatever DEAR and ESR * are here */ - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e @@ -624,7 +610,6 @@ itlb_miss_fault_e6500: */ srdi r15,r16,60 /* get region */ cmpldi cr0,r15,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO beq tlb_load_linear /* yes -> go to linear map load */ /* We do the user/kernel test for the PID here along with the RW test @@ -646,7 +631,6 @@ itlb_miss_fault_e6500: beq+ normal_tlb_miss /* We got a crappy address, just fault */ - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e @@ -745,7 +729,6 @@ normal_tlb_miss_done: * level 0 and just going back to userland. They are only needed * if you are going to take an access fault */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) TLB_MISS_EPILOG_SUCCESS rfi @@ -757,11 +740,9 @@ normal_tlb_miss_access_fault: ld r15,EX_TLB_ESR(r12) mtspr SPRN_DEAR,r14 mtspr SPRN_ESR,r15 - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e -1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) - TLB_MISS_EPILOG_ERROR +1: TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e @@ -899,7 +880,6 @@ virt_page_table_tlb_miss_done: 1: END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) /* Return to caller, normal case */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK); TLB_MISS_EPILOG_SUCCESS rfi @@ -935,18 +915,15 @@ virt_page_table_tlb_miss_fault: beq 1f mtspr SPRN_DEAR,r15 mtspr SPRN_ESR,r16 - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT); TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e -1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT); - TLB_MISS_EPILOG_ERROR +1: TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e virt_page_table_tlb_miss_whacko_fault: /* The linear fault will restart everything so ESR and DEAR will * not have been clobbered, let's just fault with what we have */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT); TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e @@ -971,7 +948,6 @@ virt_page_table_tlb_miss_whacko_fault: mfspr r16,SPRN_DEAR /* get faulting address */ srdi r11,r16,60 /* get region */ cmpldi cr0,r11,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO beq tlb_load_linear /* yes -> go to linear map load */ /* We do the user/kernel test for the PID here along with the RW test @@ -991,7 +967,6 @@ virt_page_table_tlb_miss_whacko_fault: /* We got a crappy address, just fault with whatever DEAR and ESR * are here */ - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e @@ -1015,7 +990,6 @@ virt_page_table_tlb_miss_whacko_fault: */ srdi r11,r16,60 /* get region */ cmpldi cr0,r11,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO beq tlb_load_linear /* yes -> go to linear map load */ /* We do the user/kernel test for the PID here along with the RW test @@ -1033,7 +1007,6 @@ virt_page_table_tlb_miss_whacko_fault: beq+ htw_tlb_miss /* We got a crappy address, just fault */ - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e @@ -1130,7 +1103,6 @@ htw_tlb_miss_done: * level 0 and just going back to userland. They are only needed * if you are going to take an access fault */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK) TLB_MISS_EPILOG_SUCCESS rfi @@ -1142,11 +1114,9 @@ htw_tlb_miss_fault: beq 1f mtspr SPRN_DEAR,r16 mtspr SPRN_ESR,r14 - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT) TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e -1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT) - TLB_MISS_EPILOG_ERROR +1: TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e /* @@ -1221,7 +1191,6 @@ tlb_load_linear_done: * We do that because we can't resume a fault within a TLB * miss handler, due to MAS and TLB reservation being clobbered. */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_LINEAR) TLB_MISS_EPILOG_ERROR rfi @@ -1233,13 +1202,3 @@ tlb_load_linear_fault: b exc_data_storage_book3e 1: TLB_MISS_EPILOG_ERROR_SPECIAL b exc_instruction_storage_book3e - - -#ifdef CONFIG_BOOK3E_MMU_TLB_STATS -.tlb_stat_inc: -1: ldarx r8,0,r9 - addi r8,r8,1 - stdcx. r8,0,r9 - bne- 1b - blr -#endif diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 03a81d65095b..1f61fa2148b5 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -221,7 +221,8 @@ static void initialize_distance_lookup_table(int nid, } } -/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa +/* + * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA * info is found. */ static int associativity_to_nid(const __be32 *associativity) @@ -235,7 +236,7 @@ static int associativity_to_nid(const __be32 *associativity) nid = of_read_number(&associativity[min_common_depth], 1); /* POWER4 LPAR uses 0xffff as invalid node */ - if (nid == 0xffff || nid >= MAX_NUMNODES) + if (nid == 0xffff || nid >= nr_node_ids) nid = NUMA_NO_NODE; if (nid > 0 && @@ -448,7 +449,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb) index = lmb->aa_index * aa.array_sz + min_common_depth - 1; nid = of_read_number(&aa.arrays[index], 1); - if (nid == 0xffff || nid >= MAX_NUMNODES) + if (nid == 0xffff || nid >= nr_node_ids) nid = default_nid; if (nid > 0) { @@ -644,8 +645,9 @@ static inline int __init read_usm_ranges(const __be32 **usm) * Extract NUMA information from the ibm,dynamic-reconfiguration-memory * node. This assumes n_mem_{addr,size}_cells have been set. */ -static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, - const __be32 **usm) +static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, + const __be32 **usm, + void *data) { unsigned int ranges, is_kexec_kdump = 0; unsigned long base, size, sz; @@ -657,7 +659,7 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, */ if ((lmb->flags & DRCONF_MEM_RESERVED) || !(lmb->flags & DRCONF_MEM_ASSIGNED)) - return; + return 0; if (*usm) is_kexec_kdump = 1; @@ -669,7 +671,7 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, if (is_kexec_kdump) { ranges = read_usm_ranges(usm); if (!ranges) /* there are no (base, size) duple */ - return; + return 0; } do { @@ -686,6 +688,8 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, if (sz) memblock_set_node(base, sz, &memblock.memory, nid); } while (--ranges); + + return 0; } static int __init parse_numa_properties(void) @@ -787,7 +791,7 @@ new_range: */ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (memory) { - walk_drmem_lmbs(memory, numa_setup_drmem_lmb); + walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb); of_node_put(memory); } @@ -983,28 +987,6 @@ static int __init early_numa(char *p) } early_param("numa", early_numa); -/* - * The platform can inform us through one of several mechanisms - * (post-migration device tree updates, PRRN or VPHN) that the NUMA - * assignment of a resource has changed. This controls whether we act - * on that. Disabled by default. - */ -static bool topology_updates_enabled; - -static int __init early_topology_updates(char *p) -{ - if (!p) - return 0; - - if (!strcmp(p, "on")) { - pr_warn("Caution: enabling topology updates\n"); - topology_updates_enabled = true; - } - - return 0; -} -early_param("topology_updates", early_topology_updates); - #ifdef CONFIG_MEMORY_HOTPLUG /* * Find the node associated with a hot added memory section for @@ -1143,98 +1125,9 @@ u64 memory_hotplug_max(void) /* Virtual Processor Home Node (VPHN) support */ #ifdef CONFIG_PPC_SPLPAR -struct topology_update_data { - struct topology_update_data *next; - unsigned int cpu; - int old_nid; - int new_nid; -}; - -#define TOPOLOGY_DEF_TIMER_SECS 60 - -static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; -static cpumask_t cpu_associativity_changes_mask; -static int vphn_enabled; -static int prrn_enabled; -static void reset_topology_timer(void); -static int topology_timer_secs = 1; static int topology_inited; /* - * Change polling interval for associativity changes. - */ -int timed_topology_update(int nsecs) -{ - if (vphn_enabled) { - if (nsecs > 0) - topology_timer_secs = nsecs; - else - topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS; - - reset_topology_timer(); - } - - return 0; -} - -/* - * Store the current values of the associativity change counters in the - * hypervisor. - */ -static void setup_cpu_associativity_change_counters(void) -{ - int cpu; - - /* The VPHN feature supports a maximum of 8 reference points */ - BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8); - - for_each_possible_cpu(cpu) { - int i; - u8 *counts = vphn_cpu_change_counts[cpu]; - volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; - - for (i = 0; i < distance_ref_points_depth; i++) - counts[i] = hypervisor_counts[i]; - } -} - -/* - * The hypervisor maintains a set of 8 associativity change counters in - * the VPA of each cpu that correspond to the associativity levels in the - * ibm,associativity-reference-points property. When an associativity - * level changes, the corresponding counter is incremented. - * - * Set a bit in cpu_associativity_changes_mask for each cpu whose home - * node associativity levels have changed. - * - * Returns the number of cpus with unhandled associativity changes. - */ -static int update_cpu_associativity_changes_mask(void) -{ - int cpu; - cpumask_t *changes = &cpu_associativity_changes_mask; - - for_each_possible_cpu(cpu) { - int i, changed = 0; - u8 *counts = vphn_cpu_change_counts[cpu]; - volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; - - for (i = 0; i < distance_ref_points_depth; i++) { - if (hypervisor_counts[i] != counts[i]) { - counts[i] = hypervisor_counts[i]; - changed = 1; - } - } - if (changed) { - cpumask_or(changes, changes, cpu_sibling_mask(cpu)); - cpu = cpu_last_thread_sibling(cpu); - } - } - - return cpumask_weight(changes); -} - -/* * Retrieve the new associativity information for a virtual processor's * home node. */ @@ -1249,7 +1142,6 @@ static long vphn_get_associativity(unsigned long cpu, switch (rc) { case H_SUCCESS: dbg("VPHN hcall succeeded. Reset polling...\n"); - timed_topology_update(0); goto out; case H_FUNCTION: @@ -1268,8 +1160,6 @@ static long vphn_get_associativity(unsigned long cpu, , rc); break; } - - stop_topology_update(); out: return rc; } @@ -1313,380 +1203,8 @@ int find_and_online_cpu_nid(int cpu) return new_nid; } -/* - * Update the CPU maps and sysfs entries for a single CPU when its NUMA - * characteristics change. This function doesn't perform any locking and is - * only safe to call from stop_machine(). - */ -static int update_cpu_topology(void *data) -{ - struct topology_update_data *update; - unsigned long cpu; - - if (!data) - return -EINVAL; - - cpu = smp_processor_id(); - - for (update = data; update; update = update->next) { - int new_nid = update->new_nid; - if (cpu != update->cpu) - continue; - - unmap_cpu_from_node(cpu); - map_cpu_to_node(cpu, new_nid); - set_cpu_numa_node(cpu, new_nid); - set_cpu_numa_mem(cpu, local_memory_node(new_nid)); - vdso_getcpu_init(); - } - - return 0; -} - -static int update_lookup_table(void *data) -{ - struct topology_update_data *update; - - if (!data) - return -EINVAL; - - /* - * Upon topology update, the numa-cpu lookup table needs to be updated - * for all threads in the core, including offline CPUs, to ensure that - * future hotplug operations respect the cpu-to-node associativity - * properly. - */ - for (update = data; update; update = update->next) { - int nid, base, j; - - nid = update->new_nid; - base = cpu_first_thread_sibling(update->cpu); - - for (j = 0; j < threads_per_core; j++) { - update_numa_cpu_lookup_table(base + j, nid); - } - } - - return 0; -} - -/* - * Update the node maps and sysfs entries for each cpu whose home node - * has changed. Returns 1 when the topology has changed, and 0 otherwise. - * - * cpus_locked says whether we already hold cpu_hotplug_lock. - */ -int numa_update_cpu_topology(bool cpus_locked) -{ - unsigned int cpu, sibling, changed = 0; - struct topology_update_data *updates, *ud; - cpumask_t updated_cpus; - struct device *dev; - int weight, new_nid, i = 0; - - if (!prrn_enabled && !vphn_enabled && topology_inited) - return 0; - - weight = cpumask_weight(&cpu_associativity_changes_mask); - if (!weight) - return 0; - - updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL); - if (!updates) - return 0; - - cpumask_clear(&updated_cpus); - - for_each_cpu(cpu, &cpu_associativity_changes_mask) { - /* - * If siblings aren't flagged for changes, updates list - * will be too short. Skip on this update and set for next - * update. - */ - if (!cpumask_subset(cpu_sibling_mask(cpu), - &cpu_associativity_changes_mask)) { - pr_info("Sibling bits not set for associativity " - "change, cpu%d\n", cpu); - cpumask_or(&cpu_associativity_changes_mask, - &cpu_associativity_changes_mask, - cpu_sibling_mask(cpu)); - cpu = cpu_last_thread_sibling(cpu); - continue; - } - - new_nid = find_and_online_cpu_nid(cpu); - - if (new_nid == numa_cpu_lookup_table[cpu]) { - cpumask_andnot(&cpu_associativity_changes_mask, - &cpu_associativity_changes_mask, - cpu_sibling_mask(cpu)); - dbg("Assoc chg gives same node %d for cpu%d\n", - new_nid, cpu); - cpu = cpu_last_thread_sibling(cpu); - continue; - } - - for_each_cpu(sibling, cpu_sibling_mask(cpu)) { - ud = &updates[i++]; - ud->next = &updates[i]; - ud->cpu = sibling; - ud->new_nid = new_nid; - ud->old_nid = numa_cpu_lookup_table[sibling]; - cpumask_set_cpu(sibling, &updated_cpus); - } - cpu = cpu_last_thread_sibling(cpu); - } - - /* - * Prevent processing of 'updates' from overflowing array - * where last entry filled in a 'next' pointer. - */ - if (i) - updates[i-1].next = NULL; - - pr_debug("Topology update for the following CPUs:\n"); - if (cpumask_weight(&updated_cpus)) { - for (ud = &updates[0]; ud; ud = ud->next) { - pr_debug("cpu %d moving from node %d " - "to %d\n", ud->cpu, - ud->old_nid, ud->new_nid); - } - } - - /* - * In cases where we have nothing to update (because the updates list - * is too short or because the new topology is same as the old one), - * skip invoking update_cpu_topology() via stop-machine(). This is - * necessary (and not just a fast-path optimization) since stop-machine - * can end up electing a random CPU to run update_cpu_topology(), and - * thus trick us into setting up incorrect cpu-node mappings (since - * 'updates' is kzalloc()'ed). - * - * And for the similar reason, we will skip all the following updating. - */ - if (!cpumask_weight(&updated_cpus)) - goto out; - - if (cpus_locked) - stop_machine_cpuslocked(update_cpu_topology, &updates[0], - &updated_cpus); - else - stop_machine(update_cpu_topology, &updates[0], &updated_cpus); - - /* - * Update the numa-cpu lookup table with the new mappings, even for - * offline CPUs. It is best to perform this update from the stop- - * machine context. - */ - if (cpus_locked) - stop_machine_cpuslocked(update_lookup_table, &updates[0], - cpumask_of(raw_smp_processor_id())); - else - stop_machine(update_lookup_table, &updates[0], - cpumask_of(raw_smp_processor_id())); - - for (ud = &updates[0]; ud; ud = ud->next) { - unregister_cpu_under_node(ud->cpu, ud->old_nid); - register_cpu_under_node(ud->cpu, ud->new_nid); - - dev = get_cpu_device(ud->cpu); - if (dev) - kobject_uevent(&dev->kobj, KOBJ_CHANGE); - cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask); - changed = 1; - } - -out: - kfree(updates); - return changed; -} - -int arch_update_cpu_topology(void) -{ - return numa_update_cpu_topology(true); -} - -static void topology_work_fn(struct work_struct *work) -{ - rebuild_sched_domains(); -} -static DECLARE_WORK(topology_work, topology_work_fn); - -static void topology_schedule_update(void) -{ - schedule_work(&topology_work); -} - -static void topology_timer_fn(struct timer_list *unused) -{ - if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask)) - topology_schedule_update(); - else if (vphn_enabled) { - if (update_cpu_associativity_changes_mask() > 0) - topology_schedule_update(); - reset_topology_timer(); - } -} -static struct timer_list topology_timer; - -static void reset_topology_timer(void) -{ - if (vphn_enabled) - mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ); -} - -#ifdef CONFIG_SMP - -static int dt_update_callback(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct of_reconfig_data *update = data; - int rc = NOTIFY_DONE; - - switch (action) { - case OF_RECONFIG_UPDATE_PROPERTY: - if (of_node_is_type(update->dn, "cpu") && - !of_prop_cmp(update->prop->name, "ibm,associativity")) { - u32 core_id; - of_property_read_u32(update->dn, "reg", &core_id); - rc = dlpar_cpu_readd(core_id); - rc = NOTIFY_OK; - } - break; - } - - return rc; -} - -static struct notifier_block dt_update_nb = { - .notifier_call = dt_update_callback, -}; - -#endif - -/* - * Start polling for associativity changes. - */ -int start_topology_update(void) -{ - int rc = 0; - - if (!topology_updates_enabled) - return 0; - - if (firmware_has_feature(FW_FEATURE_PRRN)) { - if (!prrn_enabled) { - prrn_enabled = 1; -#ifdef CONFIG_SMP - rc = of_reconfig_notifier_register(&dt_update_nb); -#endif - } - } - if (firmware_has_feature(FW_FEATURE_VPHN) && - lppaca_shared_proc(get_lppaca())) { - if (!vphn_enabled) { - vphn_enabled = 1; - setup_cpu_associativity_change_counters(); - timer_setup(&topology_timer, topology_timer_fn, - TIMER_DEFERRABLE); - reset_topology_timer(); - } - } - - pr_info("Starting topology update%s%s\n", - (prrn_enabled ? " prrn_enabled" : ""), - (vphn_enabled ? " vphn_enabled" : "")); - - return rc; -} - -/* - * Disable polling for VPHN associativity changes. - */ -int stop_topology_update(void) -{ - int rc = 0; - - if (!topology_updates_enabled) - return 0; - - if (prrn_enabled) { - prrn_enabled = 0; -#ifdef CONFIG_SMP - rc = of_reconfig_notifier_unregister(&dt_update_nb); -#endif - } - if (vphn_enabled) { - vphn_enabled = 0; - rc = del_timer_sync(&topology_timer); - } - - pr_info("Stopping topology update\n"); - - return rc; -} - -int prrn_is_enabled(void) -{ - return prrn_enabled; -} - -static int topology_read(struct seq_file *file, void *v) -{ - if (vphn_enabled || prrn_enabled) - seq_puts(file, "on\n"); - else - seq_puts(file, "off\n"); - - return 0; -} - -static int topology_open(struct inode *inode, struct file *file) -{ - return single_open(file, topology_read, NULL); -} - -static ssize_t topology_write(struct file *file, const char __user *buf, - size_t count, loff_t *off) -{ - char kbuf[4]; /* "on" or "off" plus null. */ - int read_len; - - read_len = count < 3 ? count : 3; - if (copy_from_user(kbuf, buf, read_len)) - return -EINVAL; - - kbuf[read_len] = '\0'; - - if (!strncmp(kbuf, "on", 2)) { - topology_updates_enabled = true; - start_topology_update(); - } else if (!strncmp(kbuf, "off", 3)) { - stop_topology_update(); - topology_updates_enabled = false; - } else - return -EINVAL; - - return count; -} - -static const struct proc_ops topology_proc_ops = { - .proc_read = seq_read, - .proc_write = topology_write, - .proc_open = topology_open, - .proc_release = single_release, -}; - static int topology_update_init(void) { - start_topology_update(); - - if (vphn_enabled) - topology_schedule_update(); - - if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_proc_ops)) - return -ENOMEM; - topology_inited = 1; return 0; } diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c index ee4bd6d38602..97ae4935da79 100644 --- a/arch/powerpc/mm/pgtable-frag.c +++ b/arch/powerpc/mm/pgtable-frag.c @@ -110,6 +110,9 @@ void pte_fragment_free(unsigned long *table, int kernel) { struct page *page = virt_to_page(table); + if (PageReserved(page)) + return free_reserved_page(page); + BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); if (atomic_dec_and_test(&page->pt_frag_refcount)) { if (!kernel) diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index ff4b05a9e7f0..ad6df9a2e7c8 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -258,7 +258,7 @@ static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 * for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); - if (lpar_rc != H_SUCCESS) + if (lpar_rc) continue; for (j = 0; j < 4; j++) { if (HPTE_V_COMPARE(ptes[j].v, want_v) && diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index f7ba13c41d13..aca354fb670b 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -73,6 +73,10 @@ struct addr_marker { static struct addr_marker address_markers[] = { { 0, "Start of kernel VM" }, +#ifdef MODULES_VADDR + { 0, "modules start" }, + { 0, "modules end" }, +#endif { 0, "vmalloc() Area" }, { 0, "vmalloc() End" }, #ifdef CONFIG_PPC64 @@ -194,6 +198,24 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) st->wx_pages += (addr - st->start_address) / PAGE_SIZE; } +static void note_page_update_state(struct pg_state *st, unsigned long addr, + unsigned int level, u64 val, unsigned long page_size) +{ + u64 flag = val & pg_level[level].mask; + u64 pa = val & PTE_RPN_MASK; + + st->level = level; + st->current_flags = flag; + st->start_address = addr; + st->start_pa = pa; + st->page_size = page_size; + + while (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } +} + static void note_page(struct pg_state *st, unsigned long addr, unsigned int level, u64 val, unsigned long page_size) { @@ -202,13 +224,8 @@ static void note_page(struct pg_state *st, unsigned long addr, /* At first no level is set */ if (!st->level) { - st->level = level; - st->current_flags = flag; - st->start_address = addr; - st->start_pa = pa; - st->last_pa = pa; - st->page_size = page_size; pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + note_page_update_state(st, addr, level, val, page_size); /* * Dump the section of virtual memory when: * - the PTE flags from one entry to the next differs. @@ -240,19 +257,9 @@ static void note_page(struct pg_state *st, unsigned long addr, * Address indicates we have passed the end of the * current section of virtual memory */ - while (addr >= st->marker[1].start_address) { - st->marker++; - pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); - } - st->start_address = addr; - st->start_pa = pa; - st->last_pa = pa; - st->page_size = page_size; - st->current_flags = flag; - st->level = level; - } else { - st->last_pa = pa; + note_page_update_state(st, addr, level, val, page_size); } + st->last_pa = pa; } static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) @@ -347,7 +354,15 @@ static void populate_markers(void) { int i = 0; +#ifdef CONFIG_PPC64 address_markers[i++].start_address = PAGE_OFFSET; +#else + address_markers[i++].start_address = TASK_SIZE; +#endif +#ifdef MODULES_VADDR + address_markers[i++].start_address = MODULES_VADDR; + address_markers[i++].start_address = MODULES_END; +#endif address_markers[i++].start_address = VMALLOC_START; address_markers[i++].start_address = VMALLOC_END; #ifdef CONFIG_PPC64 @@ -384,7 +399,7 @@ static int ptdump_show(struct seq_file *m, void *v) struct pg_state st = { .seq = m, .marker = address_markers, - .start_address = PAGE_OFFSET, + .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE, }; #ifdef CONFIG_PPC64 @@ -428,7 +443,7 @@ void ptdump_check_wx(void) .seq = NULL, .marker = address_markers, .check_wx = true, - .start_address = PAGE_OFFSET, + .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE, }; #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 55d4377ccfae..d0a67a1bbaf1 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -11,6 +11,7 @@ #ifndef __ASSEMBLY__ #include <asm/types.h> +#include <asm/ppc-opcode.h> #ifdef PPC64_ELF_ABI_v1 #define FUNCTION_DESCR_SIZE 24 @@ -18,167 +19,10 @@ #define FUNCTION_DESCR_SIZE 0 #endif -/* - * 16-bit immediate helper macros: HA() is for use with sign-extending instrs - * (e.g. LD, ADDI). If the bottom 16 bits is "-ve", add another bit into the - * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000). - */ -#define IMM_H(i) ((uintptr_t)(i)>>16) -#define IMM_HA(i) (((uintptr_t)(i)>>16) + \ - (((uintptr_t)(i) & 0x8000) >> 15)) -#define IMM_L(i) ((uintptr_t)(i) & 0xffff) - #define PLANT_INSTR(d, idx, instr) \ do { if (d) { (d)[idx] = instr; } idx++; } while (0) #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) -#define PPC_NOP() EMIT(PPC_INST_NOP) -#define PPC_BLR() EMIT(PPC_INST_BLR) -#define PPC_BLRL() EMIT(PPC_INST_BLRL) -#define PPC_MTLR(r) EMIT(PPC_INST_MTLR | ___PPC_RT(r)) -#define PPC_BCTR() EMIT(PPC_INST_BCTR) -#define PPC_MTCTR(r) EMIT(PPC_INST_MTCTR | ___PPC_RT(r)) -#define PPC_ADDI(d, a, i) EMIT(PPC_INST_ADDI | ___PPC_RT(d) | \ - ___PPC_RA(a) | IMM_L(i)) -#define PPC_MR(d, a) PPC_OR(d, a, a) -#define PPC_LI(r, i) PPC_ADDI(r, 0, i) -#define PPC_ADDIS(d, a, i) EMIT(PPC_INST_ADDIS | \ - ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) -#define PPC_LIS(r, i) PPC_ADDIS(r, 0, i) -#define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) -#define PPC_STDX(r, base, b) EMIT(PPC_INST_STDX | ___PPC_RS(r) | \ - ___PPC_RA(base) | ___PPC_RB(b)) -#define PPC_STDU(r, base, i) EMIT(PPC_INST_STDU | ___PPC_RS(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) -#define PPC_STW(r, base, i) EMIT(PPC_INST_STW | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_STWU(r, base, i) EMIT(PPC_INST_STWU | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_STH(r, base, i) EMIT(PPC_INST_STH | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_STB(r, base, i) EMIT(PPC_INST_STB | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_L(i)) - -#define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) -#define PPC_LDX(r, base, b) EMIT(PPC_INST_LDX | ___PPC_RT(r) | \ - ___PPC_RA(base) | ___PPC_RB(b)) -#define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_LHBRX(r, base, b) EMIT(PPC_INST_LHBRX | ___PPC_RT(r) | \ - ___PPC_RA(base) | ___PPC_RB(b)) -#define PPC_LDBRX(r, base, b) EMIT(PPC_INST_LDBRX | ___PPC_RT(r) | \ - ___PPC_RA(base) | ___PPC_RB(b)) - -#define PPC_BPF_LDARX(t, a, b, eh) EMIT(PPC_INST_LDARX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | \ - __PPC_EH(eh)) -#define PPC_BPF_LWARX(t, a, b, eh) EMIT(PPC_INST_LWARX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | \ - __PPC_EH(eh)) -#define PPC_BPF_STWCX(s, a, b) EMIT(PPC_INST_STWCX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i)) -#define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i)) -#define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_CMPD(a, b) EMIT(PPC_INST_CMPD | ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_CMPLWI(a, i) EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i)) -#define PPC_CMPLDI(a, i) EMIT(PPC_INST_CMPLDI | ___PPC_RA(a) | IMM_L(i)) -#define PPC_CMPLW(a, b) EMIT(PPC_INST_CMPLW | ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_CMPLD(a, b) EMIT(PPC_INST_CMPLD | ___PPC_RA(a) | \ - ___PPC_RB(b)) - -#define PPC_SUB(d, a, b) EMIT(PPC_INST_SUB | ___PPC_RT(d) | \ - ___PPC_RB(a) | ___PPC_RA(b)) -#define PPC_ADD(d, a, b) EMIT(PPC_INST_ADD | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_MULD(d, a, b) EMIT(PPC_INST_MULLD | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_MULW(d, a, b) EMIT(PPC_INST_MULLW | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_MULHWU(d, a, b) EMIT(PPC_INST_MULHWU | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_MULI(d, a, i) EMIT(PPC_INST_MULLI | ___PPC_RT(d) | \ - ___PPC_RA(a) | IMM_L(i)) -#define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_DIVDU(d, a, b) EMIT(PPC_INST_DIVDU | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_AND(d, a, b) EMIT(PPC_INST_AND | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(b)) -#define PPC_ANDI(d, a, i) EMIT(PPC_INST_ANDI | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_AND_DOT(d, a, b) EMIT(PPC_INST_ANDDOT | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(b)) -#define PPC_OR(d, a, b) EMIT(PPC_INST_OR | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(b)) -#define PPC_MR(d, a) PPC_OR(d, a, a) -#define PPC_ORI(d, a, i) EMIT(PPC_INST_ORI | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_ORIS(d, a, i) EMIT(PPC_INST_ORIS | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_XOR(d, a, b) EMIT(PPC_INST_XOR | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(b)) -#define PPC_XORI(d, a, i) EMIT(PPC_INST_XORI | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_XORIS(d, a, i) EMIT(PPC_INST_XORIS | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_EXTSW(d, a) EMIT(PPC_INST_EXTSW | ___PPC_RA(d) | \ - ___PPC_RS(a)) -#define PPC_SLW(d, a, s) EMIT(PPC_INST_SLW | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SLD(d, a, s) EMIT(PPC_INST_SLD | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRW(d, a, s) EMIT(PPC_INST_SRW | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRAW(d, a, s) EMIT(PPC_INST_SRAW | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRAWI(d, a, i) EMIT(PPC_INST_SRAWI | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH(i)) -#define PPC_SRD(d, a, s) EMIT(PPC_INST_SRD | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRAD(d, a, s) EMIT(PPC_INST_SRAD | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRADI(d, a, i) EMIT(PPC_INST_SRADI | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH64(i)) -#define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH(i) | \ - __PPC_MB(mb) | __PPC_ME(me)) -#define PPC_RLWINM_DOT(d, a, i, mb, me) EMIT(PPC_INST_RLWINM_DOT | \ - ___PPC_RA(d) | ___PPC_RS(a) | \ - __PPC_SH(i) | __PPC_MB(mb) | \ - __PPC_ME(me)) -#define PPC_RLWIMI(d, a, i, mb, me) EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH(i) | \ - __PPC_MB(mb) | __PPC_ME(me)) -#define PPC_RLDICL(d, a, i, mb) EMIT(PPC_INST_RLDICL | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH64(i) | \ - __PPC_MB64(mb)) -#define PPC_RLDICR(d, a, i, me) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH64(i) | \ - __PPC_ME64(me)) - -/* slwi = rlwinm Rx, Ry, n, 0, 31-n */ -#define PPC_SLWI(d, a, i) PPC_RLWINM(d, a, i, 0, 31-(i)) -/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */ -#define PPC_SRWI(d, a, i) PPC_RLWINM(d, a, 32-(i), i, 31) -/* sldi = rldicr Rx, Ry, n, 63-n */ -#define PPC_SLDI(d, a, i) PPC_RLDICR(d, a, i, 63-(i)) -/* sldi = rldicl Rx, Ry, 64-n, n */ -#define PPC_SRDI(d, a, i) PPC_RLDICL(d, a, 64-(i), i) - -#define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a)) - /* Long jump; (unconditional 'branch') */ #define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ (((dest) - (ctx->idx * 4)) & 0x03fffffc)) @@ -191,11 +35,11 @@ #define PPC_LI32(d, i) do { \ if ((int)(uintptr_t)(i) >= -32768 && \ (int)(uintptr_t)(i) < 32768) \ - PPC_LI(d, i); \ + EMIT(PPC_RAW_LI(d, i)); \ else { \ - PPC_LIS(d, IMM_H(i)); \ + EMIT(PPC_RAW_LIS(d, IMM_H(i))); \ if (IMM_L(i)) \ - PPC_ORI(d, d, IMM_L(i)); \ + EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ } } while(0) #define PPC_LI64(d, i) do { \ @@ -204,19 +48,21 @@ PPC_LI32(d, i); \ else { \ if (!((uintptr_t)(i) & 0xffff800000000000ULL)) \ - PPC_LI(d, ((uintptr_t)(i) >> 32) & 0xffff); \ + EMIT(PPC_RAW_LI(d, ((uintptr_t)(i) >> 32) & \ + 0xffff)); \ else { \ - PPC_LIS(d, ((uintptr_t)(i) >> 48)); \ + EMIT(PPC_RAW_LIS(d, ((uintptr_t)(i) >> 48))); \ if ((uintptr_t)(i) & 0x0000ffff00000000ULL) \ - PPC_ORI(d, d, \ - ((uintptr_t)(i) >> 32) & 0xffff); \ + EMIT(PPC_RAW_ORI(d, d, \ + ((uintptr_t)(i) >> 32) & 0xffff)); \ } \ - PPC_SLDI(d, d, 32); \ + EMIT(PPC_RAW_SLDI(d, d, 32)); \ if ((uintptr_t)(i) & 0x00000000ffff0000ULL) \ - PPC_ORIS(d, d, \ - ((uintptr_t)(i) >> 16) & 0xffff); \ + EMIT(PPC_RAW_ORIS(d, d, \ + ((uintptr_t)(i) >> 16) & 0xffff)); \ if ((uintptr_t)(i) & 0x000000000000ffffULL) \ - PPC_ORI(d, d, (uintptr_t)(i) & 0xffff); \ + EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ + 0xffff)); \ } } while (0) #ifdef CONFIG_PPC64 @@ -240,7 +86,7 @@ static inline bool is_nearbranch(int offset) #define PPC_BCC(cond, dest) do { \ if (is_nearbranch((dest) - (ctx->idx * 4))) { \ PPC_BCC_SHORT(cond, dest); \ - PPC_NOP(); \ + EMIT(PPC_RAW_NOP()); \ } else { \ /* Flip the 'T or F' bit to invert comparison */ \ PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, (ctx->idx+2)*4); \ diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h index 4ec2a9f14f84..448dfd4d98e1 100644 --- a/arch/powerpc/net/bpf_jit32.h +++ b/arch/powerpc/net/bpf_jit32.h @@ -72,21 +72,21 @@ DECLARE_LOAD_FUNC(sk_load_half); DECLARE_LOAD_FUNC(sk_load_byte); DECLARE_LOAD_FUNC(sk_load_byte_msh); -#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LBZ(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LBZ(r, r, IMM_L(i)); } } while(0) +#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LBZ(r, base, i)); \ + else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ + EMIT(PPC_RAW_LBZ(r, r, IMM_L(i))); } } while(0) -#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LD(r, r, IMM_L(i)); } } while(0) +#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LD(r, base, i)); \ + else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ + EMIT(PPC_RAW_LD(r, r, IMM_L(i))); } } while(0) -#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LWZ(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LWZ(r, r, IMM_L(i)); } } while(0) +#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LWZ(r, base, i)); \ + else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ + EMIT(PPC_RAW_LWZ(r, r, IMM_L(i))); } } while(0) -#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LHZ(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LHZ(r, r, IMM_L(i)); } } while(0) +#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LHZ(r, base, i)); \ + else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ + EMIT(PPC_RAW_LHZ(r, r, IMM_L(i))); } } while(0) #ifdef CONFIG_PPC64 #define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0) @@ -107,20 +107,20 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); } while(0) #endif #else -#define PPC_BPF_LOAD_CPU(r) do { PPC_LI(r, 0); } while(0) +#define PPC_BPF_LOAD_CPU(r) do { EMIT(PPC_RAW_LI(r, 0)); } while(0) #endif #define PPC_LHBRX_OFFS(r, base, i) \ - do { PPC_LI32(r, i); PPC_LHBRX(r, r, base); } while(0) + do { PPC_LI32(r, i); EMIT(PPC_RAW_LHBRX(r, r, base)); } while(0) #ifdef __LITTLE_ENDIAN__ #define PPC_NTOHS_OFFS(r, base, i) PPC_LHBRX_OFFS(r, base, i) #else #define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i) #endif -#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0) -#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0) -#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0) +#define PPC_BPF_LL(r, base, i) do { EMIT(PPC_RAW_LWZ(r, base, i)); } while(0) +#define PPC_BPF_STL(r, base, i) do { EMIT(PPC_RAW_STW(r, base, i)); } while(0) +#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STWU(r, base, i)); } while(0) #define SEEN_DATAREF 0x10000 /* might call external helpers */ #define SEEN_XREG 0x20000 /* X reg is used */ diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index cf3a7e337f02..2e33c6673ff9 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -70,19 +70,21 @@ static const int b2p[] = { */ #define PPC_BPF_LL(r, base, i) do { \ if ((i) % 4) { \ - PPC_LI(b2p[TMP_REG_2], (i)); \ - PPC_LDX(r, base, b2p[TMP_REG_2]); \ + EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ + EMIT(PPC_RAW_LDX(r, base, \ + b2p[TMP_REG_2])); \ } else \ - PPC_LD(r, base, i); \ + EMIT(PPC_RAW_LD(r, base, i)); \ } while(0) #define PPC_BPF_STL(r, base, i) do { \ if ((i) % 4) { \ - PPC_LI(b2p[TMP_REG_2], (i)); \ - PPC_STDX(r, base, b2p[TMP_REG_2]); \ + EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ + EMIT(PPC_RAW_STDX(r, base, \ + b2p[TMP_REG_2])); \ } else \ - PPC_STD(r, base, i); \ + EMIT(PPC_RAW_STD(r, base, i)); \ } while(0) -#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0) +#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0) #define SEEN_FUNC 0x1000 /* might call external helpers */ #define SEEN_STACK 0x2000 /* uses BPF stack */ diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 0acc9d5fb19e..16d09b36fe06 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -61,7 +61,7 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, data_len)); PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len)); - PPC_SUB(r_HL, r_HL, r_scratch1); + EMIT(PPC_RAW_SUB(r_HL, r_HL, r_scratch1)); PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data)); } @@ -70,12 +70,12 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, * TODO: Could also detect whether first instr. sets X and * avoid this (as below, with A). */ - PPC_LI(r_X, 0); + EMIT(PPC_RAW_LI(r_X, 0)); } /* make sure we dont leak kernel information to user */ if (bpf_needs_clear_a(&filter[0])) - PPC_LI(r_A, 0); + EMIT(PPC_RAW_LI(r_A, 0)); } static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) @@ -83,10 +83,10 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) int i; if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { - PPC_ADDI(1, 1, BPF_PPC_STACKFRAME); + EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME)); if (ctx->seen & SEEN_DATAREF) { PPC_BPF_LL(0, 1, PPC_LR_STKOFF); - PPC_MTLR(0); + EMIT(PPC_RAW_MTLR(0)); PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D))); PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL))); } @@ -100,7 +100,7 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) } /* The RETs have left a return value in R3. */ - PPC_BLR(); + EMIT(PPC_RAW_BLR()); } #define CHOOSE_LOAD_FUNC(K, func) \ @@ -134,124 +134,124 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, /*** ALU ops ***/ case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ ctx->seen |= SEEN_XREG; - PPC_ADD(r_A, r_A, r_X); + EMIT(PPC_RAW_ADD(r_A, r_A, r_X)); break; case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ if (!K) break; - PPC_ADDI(r_A, r_A, IMM_L(K)); + EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(K))); if (K >= 32768) - PPC_ADDIS(r_A, r_A, IMM_HA(K)); + EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(K))); break; case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ ctx->seen |= SEEN_XREG; - PPC_SUB(r_A, r_A, r_X); + EMIT(PPC_RAW_SUB(r_A, r_A, r_X)); break; case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ if (!K) break; - PPC_ADDI(r_A, r_A, IMM_L(-K)); + EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(-K))); if (K >= 32768) - PPC_ADDIS(r_A, r_A, IMM_HA(-K)); + EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(-K))); break; case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ ctx->seen |= SEEN_XREG; - PPC_MULW(r_A, r_A, r_X); + EMIT(PPC_RAW_MULW(r_A, r_A, r_X)); break; case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ if (K < 32768) - PPC_MULI(r_A, r_A, K); + EMIT(PPC_RAW_MULI(r_A, r_A, K)); else { PPC_LI32(r_scratch1, K); - PPC_MULW(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_MULW(r_A, r_A, r_scratch1)); } break; case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */ case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ ctx->seen |= SEEN_XREG; - PPC_CMPWI(r_X, 0); + EMIT(PPC_RAW_CMPWI(r_X, 0)); if (ctx->pc_ret0 != -1) { PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); } else { PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); - PPC_LI(r_ret, 0); + EMIT(PPC_RAW_LI(r_ret, 0)); PPC_JMP(exit_addr); } if (code == (BPF_ALU | BPF_MOD | BPF_X)) { - PPC_DIVWU(r_scratch1, r_A, r_X); - PPC_MULW(r_scratch1, r_X, r_scratch1); - PPC_SUB(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_X)); + EMIT(PPC_RAW_MULW(r_scratch1, r_X, r_scratch1)); + EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); } else { - PPC_DIVWU(r_A, r_A, r_X); + EMIT(PPC_RAW_DIVWU(r_A, r_A, r_X)); } break; case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */ PPC_LI32(r_scratch2, K); - PPC_DIVWU(r_scratch1, r_A, r_scratch2); - PPC_MULW(r_scratch1, r_scratch2, r_scratch1); - PPC_SUB(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_scratch2)); + EMIT(PPC_RAW_MULW(r_scratch1, r_scratch2, r_scratch1)); + EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); break; case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ if (K == 1) break; PPC_LI32(r_scratch1, K); - PPC_DIVWU(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_DIVWU(r_A, r_A, r_scratch1)); break; case BPF_ALU | BPF_AND | BPF_X: ctx->seen |= SEEN_XREG; - PPC_AND(r_A, r_A, r_X); + EMIT(PPC_RAW_AND(r_A, r_A, r_X)); break; case BPF_ALU | BPF_AND | BPF_K: if (!IMM_H(K)) - PPC_ANDI(r_A, r_A, K); + EMIT(PPC_RAW_ANDI(r_A, r_A, K)); else { PPC_LI32(r_scratch1, K); - PPC_AND(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_AND(r_A, r_A, r_scratch1)); } break; case BPF_ALU | BPF_OR | BPF_X: ctx->seen |= SEEN_XREG; - PPC_OR(r_A, r_A, r_X); + EMIT(PPC_RAW_OR(r_A, r_A, r_X)); break; case BPF_ALU | BPF_OR | BPF_K: if (IMM_L(K)) - PPC_ORI(r_A, r_A, IMM_L(K)); + EMIT(PPC_RAW_ORI(r_A, r_A, IMM_L(K))); if (K >= 65536) - PPC_ORIS(r_A, r_A, IMM_H(K)); + EMIT(PPC_RAW_ORIS(r_A, r_A, IMM_H(K))); break; case BPF_ANC | SKF_AD_ALU_XOR_X: case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ ctx->seen |= SEEN_XREG; - PPC_XOR(r_A, r_A, r_X); + EMIT(PPC_RAW_XOR(r_A, r_A, r_X)); break; case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ if (IMM_L(K)) - PPC_XORI(r_A, r_A, IMM_L(K)); + EMIT(PPC_RAW_XORI(r_A, r_A, IMM_L(K))); if (K >= 65536) - PPC_XORIS(r_A, r_A, IMM_H(K)); + EMIT(PPC_RAW_XORIS(r_A, r_A, IMM_H(K))); break; case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ ctx->seen |= SEEN_XREG; - PPC_SLW(r_A, r_A, r_X); + EMIT(PPC_RAW_SLW(r_A, r_A, r_X)); break; case BPF_ALU | BPF_LSH | BPF_K: if (K == 0) break; else - PPC_SLWI(r_A, r_A, K); + EMIT(PPC_RAW_SLWI(r_A, r_A, K)); break; case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ ctx->seen |= SEEN_XREG; - PPC_SRW(r_A, r_A, r_X); + EMIT(PPC_RAW_SRW(r_A, r_A, r_X)); break; case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ if (K == 0) break; else - PPC_SRWI(r_A, r_A, K); + EMIT(PPC_RAW_SRWI(r_A, r_A, K)); break; case BPF_ALU | BPF_NEG: - PPC_NEG(r_A, r_A); + EMIT(PPC_RAW_NEG(r_A, r_A)); break; case BPF_RET | BPF_K: PPC_LI32(r_ret, K); @@ -277,24 +277,24 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, if (ctx->seen) PPC_JMP(exit_addr); else - PPC_BLR(); + EMIT(PPC_RAW_BLR()); } break; case BPF_RET | BPF_A: - PPC_MR(r_ret, r_A); + EMIT(PPC_RAW_MR(r_ret, r_A)); if (i != flen - 1) { if (ctx->seen) PPC_JMP(exit_addr); else - PPC_BLR(); + EMIT(PPC_RAW_BLR()); } break; case BPF_MISC | BPF_TAX: /* X = A */ - PPC_MR(r_X, r_A); + EMIT(PPC_RAW_MR(r_X, r_A)); break; case BPF_MISC | BPF_TXA: /* A = X */ ctx->seen |= SEEN_XREG; - PPC_MR(r_A, r_X); + EMIT(PPC_RAW_MR(r_A, r_X)); break; /*** Constant loads/M[] access ***/ @@ -305,19 +305,19 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_LI32(r_X, K); break; case BPF_LD | BPF_MEM: /* A = mem[K] */ - PPC_MR(r_A, r_M + (K & 0xf)); + EMIT(PPC_RAW_MR(r_A, r_M + (K & 0xf))); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; case BPF_LDX | BPF_MEM: /* X = mem[K] */ - PPC_MR(r_X, r_M + (K & 0xf)); + EMIT(PPC_RAW_MR(r_X, r_M + (K & 0xf))); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; case BPF_ST: /* mem[K] = A */ - PPC_MR(r_M + (K & 0xf), r_A); + EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_A)); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; case BPF_STX: /* mem[K] = X */ - PPC_MR(r_M + (K & 0xf), r_X); + EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_X)); ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); break; case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ @@ -346,13 +346,13 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, type) != 2); PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, dev)); - PPC_CMPDI(r_scratch1, 0); + EMIT(PPC_RAW_CMPDI(r_scratch1, 0)); if (ctx->pc_ret0 != -1) { PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); } else { /* Exit, returning 0; first pass hits here. */ PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12); - PPC_LI(r_ret, 0); + EMIT(PPC_RAW_LI(r_ret, 0)); PPC_JMP(exit_addr); } if (code == (BPF_ANC | SKF_AD_IFINDEX)) { @@ -383,9 +383,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: PPC_LBZ_OFFS(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET()); if (PKT_VLAN_PRESENT_BIT) - PPC_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT); + EMIT(PPC_RAW_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT)); if (PKT_VLAN_PRESENT_BIT < 7) - PPC_ANDI(r_A, r_A, 1); + EMIT(PPC_RAW_ANDI(r_A, r_A, 1)); break; case BPF_ANC | SKF_AD_QUEUE: BUILD_BUG_ON(sizeof_field(struct sk_buff, @@ -395,8 +395,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, break; case BPF_ANC | SKF_AD_PKTTYPE: PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET()); - PPC_ANDI(r_A, r_A, PKT_TYPE_MAX); - PPC_SRWI(r_A, r_A, 5); + EMIT(PPC_RAW_ANDI(r_A, r_A, PKT_TYPE_MAX)); + EMIT(PPC_RAW_SRWI(r_A, r_A, 5)); break; case BPF_ANC | SKF_AD_CPU: PPC_BPF_LOAD_CPU(r_A); @@ -414,9 +414,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, /* Load from [K]. */ ctx->seen |= SEEN_DATAREF; PPC_FUNC_ADDR(r_scratch1, func); - PPC_MTLR(r_scratch1); + EMIT(PPC_RAW_MTLR(r_scratch1)); PPC_LI32(r_addr, K); - PPC_BLRL(); + EMIT(PPC_RAW_BLRL()); /* * Helper returns 'lt' condition on error, and an * appropriate return value in r3 @@ -440,11 +440,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ ctx->seen |= SEEN_DATAREF | SEEN_XREG; PPC_FUNC_ADDR(r_scratch1, func); - PPC_MTLR(r_scratch1); - PPC_ADDI(r_addr, r_X, IMM_L(K)); + EMIT(PPC_RAW_MTLR(r_scratch1)); + EMIT(PPC_RAW_ADDI(r_addr, r_X, IMM_L(K))); if (K >= 32768) - PPC_ADDIS(r_addr, r_addr, IMM_HA(K)); - PPC_BLRL(); + EMIT(PPC_RAW_ADDIS(r_addr, r_addr, IMM_HA(K))); + EMIT(PPC_RAW_BLRL()); /* If error, cr0.LT set */ PPC_BCC(COND_LT, exit_addr); break; @@ -489,30 +489,30 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP | BPF_JEQ | BPF_X: ctx->seen |= SEEN_XREG; - PPC_CMPLW(r_A, r_X); + EMIT(PPC_RAW_CMPLW(r_A, r_X)); break; case BPF_JMP | BPF_JSET | BPF_X: ctx->seen |= SEEN_XREG; - PPC_AND_DOT(r_scratch1, r_A, r_X); + EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, r_X)); break; case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP | BPF_JGE | BPF_K: if (K < 32768) - PPC_CMPLWI(r_A, K); + EMIT(PPC_RAW_CMPLWI(r_A, K)); else { PPC_LI32(r_scratch1, K); - PPC_CMPLW(r_A, r_scratch1); + EMIT(PPC_RAW_CMPLW(r_A, r_scratch1)); } break; case BPF_JMP | BPF_JSET | BPF_K: if (K < 32768) /* PPC_ANDI is /only/ dot-form */ - PPC_ANDI(r_scratch1, r_A, K); + EMIT(PPC_RAW_ANDI(r_scratch1, r_A, K)); else { PPC_LI32(r_scratch1, K); - PPC_AND_DOT(r_scratch1, r_A, - r_scratch1); + EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, + r_scratch1)); } break; } diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index be3517ef0574..022103c6a201 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -95,12 +95,12 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * invoked through a tail call. */ if (ctx->seen & SEEN_TAILCALL) { - PPC_LI(b2p[TMP_REG_1], 0); + EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); /* this goes in the redzone */ PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); } else { - PPC_NOP(); - PPC_NOP(); + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_NOP()); } #define BPF_TAILCALL_PROLOGUE_SIZE 8 @@ -129,8 +129,8 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, BPF_REG_FP)) - PPC_ADDI(b2p[BPF_REG_FP], 1, - STACK_FRAME_MIN_SIZE + ctx->stack_size); + EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, + STACK_FRAME_MIN_SIZE + ctx->stack_size)); } static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) @@ -144,10 +144,10 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { - PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); + EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size)); if (ctx->seen & SEEN_FUNC) { PPC_BPF_LL(0, 1, PPC_LR_STKOFF); - PPC_MTLR(0); + EMIT(PPC_RAW_MTLR(0)); } } } @@ -157,9 +157,9 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); /* Move result to r3 */ - PPC_MR(3, b2p[BPF_REG_0]); + EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0])); - PPC_BLR(); + EMIT(PPC_RAW_BLR()); } static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, @@ -171,7 +171,7 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, /* Load actual entry point from function descriptor */ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); /* ... and move it to LR */ - PPC_MTLR(b2p[TMP_REG_1]); + EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); /* * Load TOC from function descriptor at offset 8. * We can clobber r2 since we get called through a @@ -182,9 +182,9 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, #else /* We can clobber r12 */ PPC_FUNC_ADDR(12, func); - PPC_MTLR(12); + EMIT(PPC_RAW_MTLR(12)); #endif - PPC_BLRL(); + EMIT(PPC_RAW_BLRL()); } static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, @@ -206,7 +206,7 @@ static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, * that PPC_LI64() can emit. */ for (i = ctx->idx - ctx_idx; i < 5; i++) - PPC_NOP(); + EMIT(PPC_RAW_NOP()); #ifdef PPC64_ELF_ABI_v1 /* @@ -220,8 +220,8 @@ static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, PPC_BPF_LL(12, 12, 0); #endif - PPC_MTLR(12); - PPC_BLRL(); + EMIT(PPC_RAW_MTLR(12)); + EMIT(PPC_RAW_BLRL()); } static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) @@ -239,9 +239,9 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 * if (index >= array->map.max_entries) * goto out; */ - PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); - PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31); - PPC_CMPLW(b2p_index, b2p[TMP_REG_1]); + EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31)); + EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1])); PPC_BCC(COND_GE, out); /* @@ -249,42 +249,42 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 * goto out; */ PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); - PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT); + EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); PPC_BCC(COND_GT, out); /* * tail_call_cnt++; */ - PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); /* prog = array->ptrs[index]; */ - PPC_MULI(b2p[TMP_REG_1], b2p_index, 8); - PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array); + EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); + EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array)); PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); /* * if (prog == NULL) * goto out; */ - PPC_CMPLDI(b2p[TMP_REG_1], 0); + EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0)); PPC_BCC(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); #ifdef PPC64_ELF_ABI_v1 /* skip past the function descriptor */ - PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], - FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], + FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE)); #else - PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE)); #endif - PPC_MTCTR(b2p[TMP_REG_1]); + EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); /* tear down stack, restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); - PPC_BCTR(); + EMIT(PPC_RAW_BCTR()); /* out: */ } @@ -340,11 +340,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ - PPC_ADD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ - PPC_SUB(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ @@ -354,53 +354,53 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, imm = -imm; if (imm) { if (imm >= -32768 && imm < 32768) - PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); else { PPC_LI32(b2p[TMP_REG_1], imm); - PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); } } goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ if (BPF_CLASS(code) == BPF_ALU) - PPC_MULW(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); else - PPC_MULD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_MULD(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ if (imm >= -32768 && imm < 32768) - PPC_MULI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm))); else { PPC_LI32(b2p[TMP_REG_1], imm); if (BPF_CLASS(code) == BPF_ALU) - PPC_MULW(dst_reg, dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, + b2p[TMP_REG_1])); else - PPC_MULD(dst_reg, dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_MULD(dst_reg, dst_reg, + b2p[TMP_REG_1])); } goto bpf_alu32_trunc; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ if (BPF_OP(code) == BPF_MOD) { - PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg); - PPC_MULW(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1]); - PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg, + b2p[TMP_REG_1])); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); } else - PPC_DIVWU(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ if (BPF_OP(code) == BPF_MOD) { - PPC_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg); - PPC_MULD(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1]); - PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg)); + EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg, + b2p[TMP_REG_1])); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); } else - PPC_DIVDU(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ @@ -415,35 +415,37 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, switch (BPF_CLASS(code)) { case BPF_ALU: if (BPF_OP(code) == BPF_MOD) { - PPC_DIVWU(b2p[TMP_REG_2], dst_reg, - b2p[TMP_REG_1]); - PPC_MULW(b2p[TMP_REG_1], + EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2], + dst_reg, + b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], b2p[TMP_REG_1], - b2p[TMP_REG_2]); - PPC_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1]); + b2p[TMP_REG_2])); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, + b2p[TMP_REG_1])); } else - PPC_DIVWU(dst_reg, dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, + b2p[TMP_REG_1])); break; case BPF_ALU64: if (BPF_OP(code) == BPF_MOD) { - PPC_DIVDU(b2p[TMP_REG_2], dst_reg, - b2p[TMP_REG_1]); - PPC_MULD(b2p[TMP_REG_1], + EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2], + dst_reg, + b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], b2p[TMP_REG_1], - b2p[TMP_REG_2]); - PPC_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1]); + b2p[TMP_REG_2])); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, + b2p[TMP_REG_1])); } else - PPC_DIVDU(dst_reg, dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, + b2p[TMP_REG_1])); break; } goto bpf_alu32_trunc; case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ case BPF_ALU64 | BPF_NEG: /* dst = -dst */ - PPC_NEG(dst_reg, dst_reg); + EMIT(PPC_RAW_NEG(dst_reg, dst_reg)); goto bpf_alu32_trunc; /* @@ -451,101 +453,101 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ - PPC_AND(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ if (!IMM_H(imm)) - PPC_ANDI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); else { /* Sign-extended */ PPC_LI32(b2p[TMP_REG_1], imm); - PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1])); } goto bpf_alu32_trunc; case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ - PPC_OR(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ PPC_LI32(b2p[TMP_REG_1], imm); - PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1])); } else { if (IMM_L(imm)) - PPC_ORI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); if (IMM_H(imm)) - PPC_ORIS(dst_reg, dst_reg, IMM_H(imm)); + EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm))); } goto bpf_alu32_trunc; case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ - PPC_XOR(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ PPC_LI32(b2p[TMP_REG_1], imm); - PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1])); } else { if (IMM_L(imm)) - PPC_XORI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); if (IMM_H(imm)) - PPC_XORIS(dst_reg, dst_reg, IMM_H(imm)); + EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm))); } goto bpf_alu32_trunc; case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ /* slw clears top 32 bits */ - PPC_SLW(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); /* skip zero extension move, but set address map. */ if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ - PPC_SLD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SLD(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ /* with imm 0, we still need to clear top 32 bits */ - PPC_SLWI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ if (imm != 0) - PPC_SLDI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, imm)); break; case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ - PPC_SRW(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ - PPC_SRD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SRD(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ - PPC_SRWI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ if (imm != 0) - PPC_SRDI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SRDI(dst_reg, dst_reg, imm)); break; case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ - PPC_SRAW(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SRAW(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ - PPC_SRAD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SRAD(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ - PPC_SRAWI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ if (imm != 0) - PPC_SRADI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SRADI(dst_reg, dst_reg, imm)); break; /* @@ -555,10 +557,10 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ if (imm == 1) { /* special mov32 for zext */ - PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); break; } - PPC_MR(dst_reg, src_reg); + EMIT(PPC_RAW_MR(dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ @@ -572,7 +574,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, bpf_alu32_trunc: /* Truncate to 32-bits */ if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext) - PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); break; /* @@ -590,11 +592,11 @@ bpf_alu32_trunc: switch (imm) { case 16: /* Rotate 8 bits left & mask with 0x0000ff00 */ - PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23); + EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23)); /* Rotate 8 bits right & insert LSB to reg */ - PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31); + EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31)); /* Move result back to dst_reg */ - PPC_MR(dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); break; case 32: /* @@ -602,12 +604,12 @@ bpf_alu32_trunc: * 2 bytes are already in their final position * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) */ - PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31); + EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ - PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7); + EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ - PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23); - PPC_MR(dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); break; case 64: /* @@ -619,8 +621,8 @@ bpf_alu32_trunc: * same across all passes */ PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); - PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)); - PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); break; } break; @@ -629,14 +631,14 @@ emit_clear: switch (imm) { case 16: /* zero-extend 16 bits into 64 bits */ - PPC_RLDICL(dst_reg, dst_reg, 0, 48); + EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 48)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case 32: if (!fp->aux->verifier_zext) /* zero-extend 32 bits into 64 bits */ - PPC_RLDICL(dst_reg, dst_reg, 0, 32); + EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 32)); break; case 64: /* nop */ @@ -650,18 +652,18 @@ emit_clear: case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI(b2p[TMP_REG_1], imm); + EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); src_reg = b2p[TMP_REG_1]; } - PPC_STB(src_reg, dst_reg, off); + EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI(b2p[TMP_REG_1], imm); + EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); src_reg = b2p[TMP_REG_1]; } - PPC_STH(src_reg, dst_reg, off); + EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ @@ -669,7 +671,7 @@ emit_clear: PPC_LI32(b2p[TMP_REG_1], imm); src_reg = b2p[TMP_REG_1]; } - PPC_STW(src_reg, dst_reg, off); + EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ @@ -686,24 +688,24 @@ emit_clear: /* *(u32 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_W: /* Get EA into TMP_REG_1 */ - PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ - PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); + EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); /* add value from src_reg into this */ - PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); + EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); /* store result back */ - PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); + EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); break; /* *(u64 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_DW: - PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); tmp_idx = ctx->idx * 4; - PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); - PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); - PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); + EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); + EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); PPC_BCC_SHORT(COND_NE, tmp_idx); break; @@ -712,19 +714,19 @@ emit_clear: */ /* dst = *(u8 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_B: - PPC_LBZ(dst_reg, src_reg, off); + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; /* dst = *(u16 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_H: - PPC_LHZ(dst_reg, src_reg, off); + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; /* dst = *(u32 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_W: - PPC_LWZ(dst_reg, src_reg, off); + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; @@ -775,7 +777,7 @@ emit_clear: else bpf_jit_emit_func_call_rel(image, ctx, func_addr); /* move return value from r3 to BPF_REG_0 */ - PPC_MR(b2p[BPF_REG_0], 3); + EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3)); break; /* @@ -860,9 +862,9 @@ cond_branch: case BPF_JMP32 | BPF_JNE | BPF_X: /* unsigned comparison */ if (BPF_CLASS(code) == BPF_JMP32) - PPC_CMPLW(dst_reg, src_reg); + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); else - PPC_CMPLD(dst_reg, src_reg); + EMIT(PPC_RAW_CMPLD(dst_reg, src_reg)); break; case BPF_JMP | BPF_JSGT | BPF_X: case BPF_JMP | BPF_JSLT | BPF_X: @@ -874,21 +876,21 @@ cond_branch: case BPF_JMP32 | BPF_JSLE | BPF_X: /* signed comparison */ if (BPF_CLASS(code) == BPF_JMP32) - PPC_CMPW(dst_reg, src_reg); + EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); else - PPC_CMPD(dst_reg, src_reg); + EMIT(PPC_RAW_CMPD(dst_reg, src_reg)); break; case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: if (BPF_CLASS(code) == BPF_JMP) { - PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, - src_reg); + EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg, + src_reg)); } else { int tmp_reg = b2p[TMP_REG_1]; - PPC_AND(tmp_reg, dst_reg, src_reg); - PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, - 31); + EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, + 31)); } break; case BPF_JMP | BPF_JNE | BPF_K: @@ -912,19 +914,19 @@ cond_branch: */ if (imm >= 0 && imm < 32768) { if (is_jmp32) - PPC_CMPLWI(dst_reg, imm); + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); else - PPC_CMPLDI(dst_reg, imm); + EMIT(PPC_RAW_CMPLDI(dst_reg, imm)); } else { /* sign-extending load */ PPC_LI32(b2p[TMP_REG_1], imm); /* ... but unsigned comparison */ if (is_jmp32) - PPC_CMPLW(dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_CMPLW(dst_reg, + b2p[TMP_REG_1])); else - PPC_CMPLD(dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_CMPLD(dst_reg, + b2p[TMP_REG_1])); } break; } @@ -945,17 +947,17 @@ cond_branch: */ if (imm >= -32768 && imm < 32768) { if (is_jmp32) - PPC_CMPWI(dst_reg, imm); + EMIT(PPC_RAW_CMPWI(dst_reg, imm)); else - PPC_CMPDI(dst_reg, imm); + EMIT(PPC_RAW_CMPDI(dst_reg, imm)); } else { PPC_LI32(b2p[TMP_REG_1], imm); if (is_jmp32) - PPC_CMPW(dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_CMPW(dst_reg, + b2p[TMP_REG_1])); else - PPC_CMPD(dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_CMPD(dst_reg, + b2p[TMP_REG_1])); } break; } @@ -964,19 +966,19 @@ cond_branch: /* andi does not sign-extend the immediate */ if (imm >= 0 && imm < 32768) /* PPC_ANDI is _only/always_ dot-form */ - PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm); + EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm)); else { int tmp_reg = b2p[TMP_REG_1]; PPC_LI32(tmp_reg, imm); if (BPF_CLASS(code) == BPF_JMP) { - PPC_AND_DOT(tmp_reg, dst_reg, - tmp_reg); + EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg, + tmp_reg)); } else { - PPC_AND(tmp_reg, dst_reg, - tmp_reg); - PPC_RLWINM_DOT(tmp_reg, tmp_reg, - 0, 0, 31); + EMIT(PPC_RAW_AND(tmp_reg, dst_reg, + tmp_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, + 0, 0, 31)); } } break; diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 53d614e98537..c02854dea2b2 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o \ isa207-common.o power8-pmu.o power9-pmu.o \ - generic-compat-pmu.o + generic-compat-pmu.o power10-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h index 7a2cb9e1181a..ae24d4a00da6 100644 --- a/arch/powerpc/perf/callchain.h +++ b/arch/powerpc/perf/callchain.h @@ -2,7 +2,7 @@ #ifndef _POWERPC_PERF_CALLCHAIN_H #define _POWERPC_PERF_CALLCHAIN_H -int read_user_stack_slow(void __user *ptr, void *buf, int nb); +int read_user_stack_slow(const void __user *ptr, void *buf, int nb); void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, @@ -16,4 +16,27 @@ static inline bool invalid_user_sp(unsigned long sp) return (!sp || (sp & mask) || (sp > top)); } +/* + * On 32-bit we just access the address and let hash_page create a + * HPTE if necessary, so there is no need to fall back to reading + * the page tables. Since this is called at interrupt level, + * do_page_fault() won't treat a DSI as a page fault. + */ +static inline int __read_user_stack(const void __user *ptr, void *ret, + size_t size) +{ + unsigned long addr = (unsigned long)ptr; + int rc; + + if (addr > TASK_SIZE - size || (addr & (size - 1))) + return -EFAULT; + + rc = copy_from_user_nofault(ret, ptr, size); + + if (IS_ENABLED(CONFIG_PPC64) && rc) + return read_user_stack_slow(ptr, ret, size); + + return rc; +} + #endif /* _POWERPC_PERF_CALLCHAIN_H */ diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index 542e68b8eae0..64e4013d8060 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -30,26 +30,9 @@ #endif /* CONFIG_PPC64 */ -/* - * On 32-bit we just access the address and let hash_page create a - * HPTE if necessary, so there is no need to fall back to reading - * the page tables. Since this is called at interrupt level, - * do_page_fault() won't treat a DSI as a page fault. - */ -static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +static int read_user_stack_32(const unsigned int __user *ptr, unsigned int *ret) { - int rc; - - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || - ((unsigned long)ptr & 3)) - return -EFAULT; - - rc = copy_from_user_nofault(ret, ptr, sizeof(*ret)); - - if (IS_ENABLED(CONFIG_PPC64) && rc) - return read_user_stack_slow(ptr, ret, 4); - - return rc; + return __read_user_stack(ptr, ret, sizeof(*ret)); } /* diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index fa2a1b83b9b0..fed90e827f3a 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -23,7 +23,7 @@ * interrupt context, so if the access faults, we read the page tables * to find which page (if any) is mapped and access it directly. */ -int read_user_stack_slow(void __user *ptr, void *buf, int nb) +int read_user_stack_slow(const void __user *ptr, void *buf, int nb) { unsigned long addr = (unsigned long) ptr; @@ -44,16 +44,9 @@ int read_user_stack_slow(void __user *ptr, void *buf, int nb) return -EFAULT; } -static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) +static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret) { - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || - ((unsigned long)ptr & 7)) - return -EFAULT; - - if (!copy_from_user_nofault(ret, ptr, sizeof(*ret))) - return 0; - - return read_user_stack_slow(ptr, ret, 8); + return __read_user_stack(ptr, ret, sizeof(*ret)); } /* diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 01d70280d287..78fe34986594 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -37,12 +37,7 @@ struct cpu_hw_events { struct perf_event *event[MAX_HWEVENTS]; u64 events[MAX_HWEVENTS]; unsigned int flags[MAX_HWEVENTS]; - /* - * The order of the MMCR array is: - * - 64-bit, MMCR0, MMCR1, MMCRA, MMCR2 - * - 32-bit, MMCR0, MMCR1, MMCR2 - */ - unsigned long mmcr[4]; + struct mmcr_regs mmcr; struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS]; u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; @@ -77,6 +72,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; /* * 32-bit doesn't have MMCRA but does have an MMCR2, * and a few other names are different. + * Also 32-bit doesn't have MMCR3, SIER2 and SIER3. + * Define them as zero knowing that any code path accessing + * these registers (via mtspr/mfspr) are done under ppmu flag + * check for PPMU_ARCH_31 and we will not enter that code path + * for 32-bit. */ #ifdef CONFIG_PPC32 @@ -90,7 +90,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define MMCR0_PMCC_U6 0 #define SPRN_MMCRA SPRN_MMCR2 +#define SPRN_MMCR3 0 +#define SPRN_SIER2 0 +#define SPRN_SIER3 0 #define MMCRA_SAMPLE_ENABLE 0 +#define MMCRA_BHRB_DISABLE 0 static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { @@ -121,7 +125,7 @@ static void ebb_event_add(struct perf_event *event) { } static void ebb_switch_out(unsigned long mmcr0) { } static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) { - return cpuhw->mmcr[0]; + return cpuhw->mmcr.mmcr0; } static inline void power_pmu_bhrb_enable(struct perf_event *event) {} @@ -466,8 +470,11 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events * * addresses at this point. Check the privileges before * exporting it to userspace (avoid exposure of regions * where we could have speculative execution) + * Incase of ISA v3.1, BHRB will capture only user-space + * addresses, hence include a check before filtering code */ - if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) + if (!(ppmu->flags & PPMU_ARCH_31) && + is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) continue; /* Branches are read most recent first (ie. mfbhrb 0 is @@ -586,11 +593,16 @@ static void ebb_switch_out(unsigned long mmcr0) current->thread.sdar = mfspr(SPRN_SDAR); current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK; current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK; + if (ppmu->flags & PPMU_ARCH_31) { + current->thread.mmcr3 = mfspr(SPRN_MMCR3); + current->thread.sier2 = mfspr(SPRN_SIER2); + current->thread.sier3 = mfspr(SPRN_SIER3); + } } static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) { - unsigned long mmcr0 = cpuhw->mmcr[0]; + unsigned long mmcr0 = cpuhw->mmcr.mmcr0; if (!ebb) goto out; @@ -624,7 +636,13 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) * unfreeze counters, it should not set exclude_xxx in its events and * instead manage the MMCR2 entirely by itself. */ - mtspr(SPRN_MMCR2, cpuhw->mmcr[3] | current->thread.mmcr2); + mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2 | current->thread.mmcr2); + + if (ppmu->flags & PPMU_ARCH_31) { + mtspr(SPRN_MMCR3, current->thread.mmcr3); + mtspr(SPRN_SIER2, current->thread.sier2); + mtspr(SPRN_SIER3, current->thread.sier3); + } out: return mmcr0; } @@ -845,6 +863,11 @@ void perf_event_print_debug(void) pr_info("EBBRR: %016lx BESCR: %016lx\n", mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR)); } + + if (ppmu->flags & PPMU_ARCH_31) { + pr_info("MMCR3: %016lx SIER2: %016lx SIER3: %016lx\n", + mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3)); + } #endif pr_info("SIAR: %016lx SDAR: %016lx SIER: %016lx\n", mfspr(SPRN_SIAR), sdar, sier); @@ -1196,7 +1219,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; - unsigned long flags, mmcr0, val; + unsigned long flags, mmcr0, val, mmcra; if (!ppmu) return; @@ -1229,12 +1252,24 @@ static void power_pmu_disable(struct pmu *pmu) mb(); isync(); + val = mmcra = cpuhw->mmcr.mmcra; + /* * Disable instruction sampling if it was enabled */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mtspr(SPRN_MMCRA, - cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) + val &= ~MMCRA_SAMPLE_ENABLE; + + /* Disable BHRB via mmcra (BHRBRD) for p10 */ + if (ppmu->flags & PPMU_ARCH_31) + val |= MMCRA_BHRB_DISABLE; + + /* + * Write SPRN_MMCRA if mmcra has either disabled + * instruction sampling or BHRB. + */ + if (val != mmcra) { + mtspr(SPRN_MMCRA, mmcra); mb(); isync(); } @@ -1308,18 +1343,20 @@ static void power_pmu_enable(struct pmu *pmu) * (possibly updated for removal of events). */ if (!cpuhw->n_added) { - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1); + if (ppmu->flags & PPMU_ARCH_31) + mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3); goto out_enable; } /* * Clear all MMCR settings and recompute them for the new set of events. */ - memset(cpuhw->mmcr, 0, sizeof(cpuhw->mmcr)); + memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr)); if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, - cpuhw->mmcr, cpuhw->event)) { + &cpuhw->mmcr, cpuhw->event)) { /* shouldn't ever get here */ printk(KERN_ERR "oops compute_mmcr failed\n"); goto out; @@ -1333,11 +1370,11 @@ static void power_pmu_enable(struct pmu *pmu) */ event = cpuhw->event[0]; if (event->attr.exclude_user) - cpuhw->mmcr[0] |= MMCR0_FCP; + cpuhw->mmcr.mmcr0 |= MMCR0_FCP; if (event->attr.exclude_kernel) - cpuhw->mmcr[0] |= freeze_events_kernel; + cpuhw->mmcr.mmcr0 |= freeze_events_kernel; if (event->attr.exclude_hv) - cpuhw->mmcr[0] |= MMCR0_FCHV; + cpuhw->mmcr.mmcr0 |= MMCR0_FCHV; } /* @@ -1346,12 +1383,15 @@ static void power_pmu_enable(struct pmu *pmu) * Then unfreeze the events. */ ppc_set_pmu_inuse(1); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1); + mtspr(SPRN_MMCR0, (cpuhw->mmcr.mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | MMCR0_FC); if (ppmu->flags & PPMU_ARCH_207S) - mtspr(SPRN_MMCR2, cpuhw->mmcr[3]); + mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2); + + if (ppmu->flags & PPMU_ARCH_31) + mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3); /* * Read off any pre-existing events that need to move @@ -1402,7 +1442,7 @@ static void power_pmu_enable(struct pmu *pmu) perf_event_update_userpage(event); } cpuhw->n_limited = n_lim; - cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; + cpuhw->mmcr.mmcr0 |= MMCR0_PMXE | MMCR0_FCECE; out_enable: pmao_restore_workaround(ebb); @@ -1418,9 +1458,9 @@ static void power_pmu_enable(struct pmu *pmu) /* * Enable instruction sampling if necessary */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) { mb(); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra); } out: @@ -1550,7 +1590,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) cpuhw->flags[i-1] = cpuhw->flags[i]; } --cpuhw->n_events; - ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); + ppmu->disable_pmc(event->hw.idx - 1, &cpuhw->mmcr); if (event->hw.idx) { write_pmc(event->hw.idx, 0); event->hw.idx = 0; @@ -1571,7 +1611,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) } if (cpuhw->n_events == 0) { /* disable exceptions if no events are running */ - cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); + cpuhw->mmcr.mmcr0 &= ~(MMCR0_PMXE | MMCR0_FCECE); } if (has_branch_stack(event)) @@ -1795,7 +1835,7 @@ static void hw_perf_event_destroy(struct perf_event *event) static int hw_perf_cache_event(u64 config, u64 *eventp) { unsigned long type, op, result; - int ev; + u64 ev; if (!ppmu->cache_events) return -EINVAL; @@ -2246,7 +2286,7 @@ static void __perf_event_interrupt(struct pt_regs *regs) * XXX might want to use MSR.PM to keep the events frozen until * we get back out of this interrupt. */ - write_mmcr0(cpuhw, cpuhw->mmcr[0]); + write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0); if (nmi) nmi_exit(); @@ -2268,7 +2308,7 @@ static int power_pmu_prepare_cpu(unsigned int cpu) if (ppmu) { memset(cpuhw, 0, sizeof(*cpuhw)); - cpuhw->mmcr[0] = MMCR0_FC; + cpuhw->mmcr.mmcr0 = MMCR0_FC; } return 0; } @@ -2314,6 +2354,8 @@ static int __init init_ppc64_pmu(void) return 0; else if (!init_power9_pmu()) return 0; + else if (!init_power10_pmu()) + return 0; else if (!init_ppc970_pmu()) return 0; else diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index 5e5a54d5588e..eb8a6aaf4cc1 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -101,7 +101,7 @@ static int compat_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index db213eb7cb02..cdb7bfbd157e 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -31,6 +31,8 @@ static int interface_version; /* Whether we have to aggregate result data for some domains. */ static bool aggregate_result_elements; +static cpumask_t hv_24x7_cpumask; + static bool domain_is_valid(unsigned domain) { switch (domain) { @@ -446,6 +448,12 @@ static ssize_t device_show_string(struct device *dev, return sprintf(buf, "%s\n", (char *)d->var); } +static ssize_t cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &hv_24x7_cpumask); +} + static ssize_t sockets_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1113,6 +1121,7 @@ static DEVICE_ATTR_RO(domains); static DEVICE_ATTR_RO(sockets); static DEVICE_ATTR_RO(chipspersocket); static DEVICE_ATTR_RO(coresperchip); +static DEVICE_ATTR_RO(cpumask); static struct bin_attribute *if_bin_attrs[] = { &bin_attr_catalog, @@ -1126,6 +1135,7 @@ static struct attribute *if_attrs[] = { &dev_attr_sockets.attr, &dev_attr_chipspersocket.attr, &dev_attr_coresperchip.attr, + &dev_attr_cpumask.attr, NULL, }; @@ -1641,6 +1651,45 @@ static struct pmu h_24x7_pmu = { .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; +static int ppc_hv_24x7_cpu_online(unsigned int cpu) +{ + if (cpumask_empty(&hv_24x7_cpumask)) + cpumask_set_cpu(cpu, &hv_24x7_cpumask); + + return 0; +} + +static int ppc_hv_24x7_cpu_offline(unsigned int cpu) +{ + int target; + + /* Check if exiting cpu is used for collecting 24x7 events */ + if (!cpumask_test_and_clear_cpu(cpu, &hv_24x7_cpumask)) + return 0; + + /* Find a new cpu to collect 24x7 events */ + target = cpumask_last(cpu_active_mask); + + if (target < 0 || target >= nr_cpu_ids) { + pr_err("hv_24x7: CPU hotplug init failed\n"); + return -1; + } + + /* Migrate 24x7 events to the new target */ + cpumask_set_cpu(target, &hv_24x7_cpumask); + perf_pmu_migrate_context(&h_24x7_pmu, cpu, target); + + return 0; +} + +static int hv_24x7_cpu_hotplug_init(void) +{ + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, + "perf/powerpc/hv_24x7:online", + ppc_hv_24x7_cpu_online, + ppc_hv_24x7_cpu_offline); +} + static int hv_24x7_init(void) { int r; @@ -1685,6 +1734,11 @@ static int hv_24x7_init(void) if (r) return r; + /* init cpuhotplug */ + r = hv_24x7_cpu_hotplug_init(); + if (r) + return r; + r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1); if (r) return r; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 0edcfd0b491d..a45d694a5d5d 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1288,11 +1288,30 @@ static int trace_imc_prepare_sample(struct trace_imc_data *mem, header->size = sizeof(*header) + event->header_size; header->misc = 0; - if (is_kernel_addr(data->ip)) - header->misc |= PERF_RECORD_MISC_KERNEL; - else - header->misc |= PERF_RECORD_MISC_USER; - + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + switch (IMC_TRACE_RECORD_VAL_HVPR(mem->val)) { + case 0:/* when MSR HV and PR not set in the trace-record */ + header->misc |= PERF_RECORD_MISC_GUEST_KERNEL; + break; + case 1: /* MSR HV is 0 and PR is 1 */ + header->misc |= PERF_RECORD_MISC_GUEST_USER; + break; + case 2: /* MSR HV is 1 and PR is 0 */ + header->misc |= PERF_RECORD_MISC_HYPERVISOR; + break; + case 3: /* MSR HV is 1 and PR is 1 */ + header->misc |= PERF_RECORD_MISC_USER; + break; + default: + pr_info("IMC: Unable to set the flag based on MSR bits\n"); + break; + } + } else { + if (is_kernel_addr(data->ip)) + header->misc |= PERF_RECORD_MISC_KERNEL; + else + header->misc |= PERF_RECORD_MISC_USER; + } perf_event_header__init_id(header, data, event); return 0; diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h index f755c64da137..80bbf72bfec2 100644 --- a/arch/powerpc/perf/internal.h +++ b/arch/powerpc/perf/internal.h @@ -9,4 +9,5 @@ extern int init_power6_pmu(void); extern int init_power7_pmu(void); extern int init_power8_pmu(void); extern int init_power9_pmu(void); +extern int init_power10_pmu(void); extern int init_generic_compat_pmu(void); diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 4c86da5eb28a..964437adec18 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -55,7 +55,9 @@ static bool is_event_valid(u64 event) { u64 valid_mask = EVENT_VALID_MASK; - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_31)) + valid_mask = p10_EVENT_VALID_MASK; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) valid_mask = p9_EVENT_VALID_MASK; return !(event & ~valid_mask); @@ -69,6 +71,14 @@ static inline bool is_event_marked(u64 event) return false; } +static unsigned long sdar_mod_val(u64 event) +{ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return p10_SDAR_MODE(event); + + return p9_SDAR_MODE(event); +} + static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) { /* @@ -79,7 +89,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling * mode and will be un-changed when setting MMCRA[63] (Marked events). * - * Incase of Power9: + * Incase of Power9/power10: * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'), * or if group already have any marked events. * For rest @@ -90,8 +100,8 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE)) *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES; - else if (p9_SDAR_MODE(event)) - *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; + else if (sdar_mod_val(event)) + *mmcra |= sdar_mod_val(event) << MMCRA_SDAR_MODE_SHIFT; else *mmcra |= MMCRA_SDAR_MODE_DCACHE; } else @@ -134,7 +144,11 @@ static bool is_thresh_cmp_valid(u64 event) /* * Check the mantissa upper two bits are not zero, unless the * exponent is also zero. See the THRESH_CMP_MANTISSA doc. + * Power10: thresh_cmp is replaced by l2_l3 event select. */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return false; + cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; exp = cmp >> 7; @@ -251,7 +265,12 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; - cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + cache = (event >> EVENT_CACHE_SEL_SHIFT) & + p10_EVENT_CACHE_SEL_MASK; + else + cache = (event >> EVENT_CACHE_SEL_SHIFT) & + EVENT_CACHE_SEL_MASK; ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK; if (pmc) { @@ -283,7 +302,10 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } if (unit >= 6 && unit <= 9) { - if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { + mask |= CNST_L2L3_GROUP_MASK; + value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); + } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { mask |= CNST_CACHE_GROUP_MASK; value |= CNST_CACHE_GROUP_VAL(event & 0xff); @@ -363,10 +385,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } int isa207_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]) { unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; + unsigned long mmcr3; unsigned int pmc, pmc_inuse; int i; @@ -379,7 +402,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev, pmc_inuse |= 1 << pmc; } - mmcra = mmcr1 = mmcr2 = 0; + mmcra = mmcr1 = mmcr2 = mmcr3 = 0; + + /* + * Disable bhrb unless explicitly requested + * by setting MMCRA (BHRBRD) bit. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mmcra |= MMCRA_BHRB_DISABLE; /* Second pass: assign PMCs, set all MMCR1 fields */ for (i = 0; i < n_ev; ++i) { @@ -438,8 +468,17 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcra |= val << MMCRA_THR_CTL_SHIFT; val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK; mmcra |= val << MMCRA_THR_SEL_SHIFT; - val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; - mmcra |= thresh_cmp_val(val); + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + val = (event[i] >> EVENT_THR_CMP_SHIFT) & + EVENT_THR_CMP_MASK; + mmcra |= thresh_cmp_val(val); + } + } + + if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { + val = (event[i] >> p10_L2L3_EVENT_SHIFT) & + p10_EVENT_L2L3_SEL_MASK; + mmcr2 |= val << p10_L2L3_SEL_SHIFT; } if (event[i] & EVENT_WANTS_BHRB) { @@ -447,6 +486,11 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcra |= val << MMCRA_IFM_SHIFT; } + /* set MMCRA (BHRBRD) to 0 if there is user request for BHRB */ + if (cpu_has_feature(CPU_FTR_ARCH_31) && + (has_branch_stack(pevents[i]) || (event[i] & EVENT_WANTS_BHRB))) + mmcra &= ~MMCRA_BHRB_DISABLE; + if (pevents[i]->attr.exclude_user) mmcr2 |= MMCR2_FCP(pmc); @@ -460,34 +504,43 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcr2 |= MMCR2_FCS(pmc); } + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (pmc <= 4) { + val = (event[i] >> p10_EVENT_MMCR3_SHIFT) & + p10_EVENT_MMCR3_MASK; + mmcr3 |= val << MMCR3_SHIFT(pmc); + } + } + hwc[i] = pmc - 1; } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; /* pmc_inuse is 1-based */ if (pmc_inuse & 2) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x7c) - mmcr[0] |= MMCR0_PMCjCE; + mmcr->mmcr0 |= MMCR0_PMCjCE; /* If we're not using PMC 5 or 6, freeze them */ if (!(pmc_inuse & 0x60)) - mmcr[0] |= MMCR0_FC56; + mmcr->mmcr0 |= MMCR0_FC56; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; - mmcr[3] = mmcr2; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; + mmcr->mmcr2 = mmcr2; + mmcr->mmcr3 = mmcr3; return 0; } -void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); + mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); } static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int size) diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 63fd4f3f6013..044de65e96b9 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -87,6 +87,31 @@ EVENT_LINUX_MASK | \ EVENT_PSEL_MASK)) +/* Contants to support power10 raw encoding format */ +#define p10_SDAR_MODE_SHIFT 22 +#define p10_SDAR_MODE_MASK 0x3ull +#define p10_SDAR_MODE(v) (((v) >> p10_SDAR_MODE_SHIFT) & \ + p10_SDAR_MODE_MASK) +#define p10_EVENT_L2L3_SEL_MASK 0x1f +#define p10_L2L3_SEL_SHIFT 3 +#define p10_L2L3_EVENT_SHIFT 40 +#define p10_EVENT_THRESH_MASK 0xffffull +#define p10_EVENT_CACHE_SEL_MASK 0x3ull +#define p10_EVENT_MMCR3_MASK 0x7fffull +#define p10_EVENT_MMCR3_SHIFT 45 + +#define p10_EVENT_VALID_MASK \ + ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \ + (p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ + (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ + (p10_EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \ + (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \ + (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ + (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \ + (p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \ + (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + EVENT_LINUX_MASK | \ + EVENT_PSEL_MASK)) /* * Layout of constraint bits: * @@ -135,6 +160,9 @@ #define CNST_CACHE_PMC4_VAL (1ull << 54) #define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL +#define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55) +#define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f) + /* * For NC we are counting up to 4 events. This requires three bits, and we need * the fifth event to overflow and set the 4th bit. To achieve that we bias the @@ -191,7 +219,7 @@ #define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\ MMCRA_THR_CTR_EXP_MASK) -/* MMCR1 Threshold Compare bit constant for power9 */ +/* MMCRA Threshold Compare bit constant for power9 */ #define p9_MMCRA_THR_CMP_SHIFT 45 /* Bits in MMCR2 for PowerISA v2.07 */ @@ -202,6 +230,9 @@ #define MAX_ALT 2 #define MAX_PMU_COUNTERS 6 +/* Bits in MMCR3 for PowerISA v3.10 */ +#define MMCR3_SHIFT(pmc) (49 - (15 * ((pmc) - 1))) + #define ISA207_SIER_TYPE_SHIFT 15 #define ISA207_SIER_TYPE_MASK (0x7ull << ISA207_SIER_TYPE_SHIFT) @@ -217,9 +248,9 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp); int isa207_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]); -void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]); +void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr); int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, const unsigned int ev_alt[][MAX_ALT]); void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c index 4d5ef92511d1..1919e9df9165 100644 --- a/arch/powerpc/perf/mpc7450-pmu.c +++ b/arch/powerpc/perf/mpc7450-pmu.c @@ -257,7 +257,7 @@ static const u32 pmcsel_mask[N_COUNTER] = { * Compute MMCR0/1/2 values for a set of events. */ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], - unsigned long mmcr[], + struct mmcr_regs *mmcr, struct perf_event *pevents[]) { u8 event_index[N_CLASSES][N_COUNTER]; @@ -321,9 +321,16 @@ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], mmcr0 |= MMCR0_PMCnCE; /* Return MMCRx values */ - mmcr[0] = mmcr0; - mmcr[1] = mmcr1; - mmcr[2] = mmcr2; + mmcr->mmcr0 = mmcr0; + mmcr->mmcr1 = mmcr1; + mmcr->mmcr2 = mmcr2; + /* + * 32-bit doesn't have an MMCRA and uses SPRN_MMCR2 to define + * SPRN_MMCRA. So assign mmcra of cpu_hw_events with `mmcr2` + * value to ensure that any write to this SPRN_MMCRA will + * use mmcr2 value. + */ + mmcr->mmcra = mmcr2; return 0; } @@ -331,12 +338,12 @@ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], * Disable counting by a PMC. * Note that the pmc argument is 0-based here, not 1-based. */ -static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void mpc7450_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 1) - mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); + mmcr->mmcr0 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); else - mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); + mmcr->mmcr1 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); } static int mpc7450_generic_events[] = { @@ -354,7 +361,7 @@ static int mpc7450_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0, 0x225 }, [C(OP_WRITE)] = { 0, 0x227 }, diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h new file mode 100644 index 000000000000..60c1b8111082 --- /dev/null +++ b/arch/powerpc/perf/power10-events-list.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Performance counter support for POWER10 processors. + * + * Copyright 2020 Madhavan Srinivasan, IBM Corporation. + * Copyright 2020 Athira Rajeev, IBM Corporation. + */ + +/* + * Power10 event codes. + */ +EVENT(PM_RUN_CYC, 0x600f4); +EVENT(PM_DISP_STALL_CYC, 0x100f8); +EVENT(PM_EXEC_STALL, 0x30008); +EVENT(PM_RUN_INST_CMPL, 0x500fa); +EVENT(PM_BR_CMPL, 0x4d05e); +EVENT(PM_BR_MPRED_CMPL, 0x400f6); + +/* All L1 D cache load references counted at finish, gated by reject */ +EVENT(PM_LD_REF_L1, 0x100fc); +/* Load Missed L1 */ +EVENT(PM_LD_MISS_L1, 0x3e054); +/* Store Missed L1 */ +EVENT(PM_ST_MISS_L1, 0x300f0); +/* L1 cache data prefetches */ +EVENT(PM_LD_PREFETCH_CACHE_LINE_MISS, 0x1002c); +/* Demand iCache Miss */ +EVENT(PM_L1_ICACHE_MISS, 0x200fc); +/* Instruction fetches from L1 */ +EVENT(PM_INST_FROM_L1, 0x04080); +/* Instruction Demand sectors wriittent into IL1 */ +EVENT(PM_INST_FROM_L1MISS, 0x03f00000001c040); +/* Instruction prefetch written into IL1 */ +EVENT(PM_IC_PREF_REQ, 0x040a0); +/* The data cache was reloaded from local core's L3 due to a demand load */ +EVENT(PM_DATA_FROM_L3, 0x01340000001c040); +/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ +EVENT(PM_DATA_FROM_L3MISS, 0x300fe); +/* Data PTEG reload */ +EVENT(PM_DTLB_MISS, 0x300fc); +/* ITLB Reloaded */ +EVENT(PM_ITLB_MISS, 0x400fc); + +EVENT(PM_RUN_CYC_ALT, 0x0001e); +EVENT(PM_RUN_INST_CMPL_ALT, 0x00002); + +/* + * Memory Access Events + * + * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0) + * To enable capturing of memory profiling, these MMCRA bits + * needs to be programmed and corresponding raw event format + * encoding. + * + * MMCRA bits encoding needed are + * SM (Sampling Mode) + * EM (Eligibility for Random Sampling) + * TECE (Threshold Event Counter Event) + * TS (Threshold Start Event) + * TE (Threshold End Event) + * + * Corresponding Raw Encoding bits: + * sample [EM,SM] + * thresh_sel (TECE) + * thresh start (TS) + * thresh end (TE) + */ + +EVENT(MEM_LOADS, 0x34340401e0); +EVENT(MEM_STORES, 0x343c0401e0); diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c new file mode 100644 index 000000000000..f7cff7f36a1c --- /dev/null +++ b/arch/powerpc/perf/power10-pmu.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Performance counter support for POWER10 processors. + * + * Copyright 2020 Madhavan Srinivasan, IBM Corporation. + * Copyright 2020 Athira Rajeev, IBM Corporation. + */ + +#define pr_fmt(fmt) "power10-pmu: " fmt + +#include "isa207-common.h" +#include "internal.h" + +/* + * Raw event encoding for Power10: + * + * 60 56 52 48 44 40 36 32 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * | | [ ] [ src_match ] [ src_mask ] | [ ] [ l2l3_sel ] [ thresh_ctl ] + * | | | | | | + * | | *- IFM (Linux) | | thresh start/stop -* + * | *- BHRB (Linux) | src_sel + * *- EBB (Linux) *invert_bit + * + * 28 24 20 16 12 8 4 0 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] m [ pmcxsel ] + * | | | | | | + * | | | | | *- mark + * | | | *- L1/L2/L3 cache_sel | + * | | sdar_mode | + * | *- sampling mode for marked events *- combine + * | + * *- thresh_sel + * + * Below uses IBM bit numbering. + * + * MMCR1[x:y] = unit (PMCxUNIT) + * MMCR1[24] = pmc1combine[0] + * MMCR1[25] = pmc1combine[1] + * MMCR1[26] = pmc2combine[0] + * MMCR1[27] = pmc2combine[1] + * MMCR1[28] = pmc3combine[0] + * MMCR1[29] = pmc3combine[1] + * MMCR1[30] = pmc4combine[0] + * MMCR1[31] = pmc4combine[1] + * + * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011 + * MMCR1[20:27] = thresh_ctl + * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001 + * MMCR1[20:27] = thresh_ctl + * else + * MMCRA[48:55] = thresh_ctl (THRESH START/END) + * + * if thresh_sel: + * MMCRA[45:47] = thresh_sel + * + * if l2l3_sel: + * MMCR2[56:60] = l2l3_sel[0:4] + * + * MMCR1[16] = cache_sel[0] + * MMCR1[17] = cache_sel[1] + * + * if mark: + * MMCRA[63] = 1 (SAMPLE_ENABLE) + * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG) + * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE) + * + * if EBB and BHRB: + * MMCRA[32:33] = IFM + * + * MMCRA[SDAR_MODE] = sdar_mode[0:1] + */ + +/* + * Some power10 event codes. + */ +#define EVENT(_name, _code) enum{_name = _code} + +#include "power10-events-list.h" + +#undef EVENT + +/* MMCRA IFM bits - POWER10 */ +#define POWER10_MMCRA_IFM1 0x0000000040000000UL +#define POWER10_MMCRA_IFM2 0x0000000080000000UL +#define POWER10_MMCRA_IFM3 0x00000000C0000000UL +#define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL + +/* Table of alternatives, sorted by column 0 */ +static const unsigned int power10_event_alternatives[][MAX_ALT] = { + { PM_RUN_CYC_ALT, PM_RUN_CYC }, + { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, +}; + +static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[]) +{ + int num_alt = 0; + + num_alt = isa207_get_alternatives(event, alt, + ARRAY_SIZE(power10_event_alternatives), flags, + power10_event_alternatives); + + return num_alt; +} + +GENERIC_EVENT_ATTR(cpu-cycles, PM_RUN_CYC); +GENERIC_EVENT_ATTR(instructions, PM_RUN_INST_CMPL); +GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL); +GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); +GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); +GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1); +GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS); +GENERIC_EVENT_ATTR(mem-stores, MEM_STORES); + +CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1); +CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1); +CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_LD_PREFETCH_CACHE_LINE_MISS); +CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1); +CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS); +CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1); +CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_REQ); +CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); +CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); +CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); +CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL); +CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); +CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); + +static struct attribute *power10_events_attr[] = { + GENERIC_EVENT_PTR(PM_RUN_CYC), + GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), + GENERIC_EVENT_PTR(PM_BR_CMPL), + GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), + GENERIC_EVENT_PTR(PM_LD_REF_L1), + GENERIC_EVENT_PTR(PM_LD_MISS_L1), + GENERIC_EVENT_PTR(MEM_LOADS), + GENERIC_EVENT_PTR(MEM_STORES), + CACHE_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_LD_REF_L1), + CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS), + CACHE_EVENT_PTR(PM_ST_MISS_L1), + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), + CACHE_EVENT_PTR(PM_INST_FROM_L1), + CACHE_EVENT_PTR(PM_IC_PREF_REQ), + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), + CACHE_EVENT_PTR(PM_DATA_FROM_L3), + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), + CACHE_EVENT_PTR(PM_BR_CMPL), + CACHE_EVENT_PTR(PM_DTLB_MISS), + CACHE_EVENT_PTR(PM_ITLB_MISS), + NULL +}; + +static struct attribute_group power10_pmu_events_group = { + .name = "events", + .attrs = power10_events_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-59"); +PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); +PMU_FORMAT_ATTR(mark, "config:8"); +PMU_FORMAT_ATTR(combine, "config:10-11"); +PMU_FORMAT_ATTR(unit, "config:12-15"); +PMU_FORMAT_ATTR(pmc, "config:16-19"); +PMU_FORMAT_ATTR(cache_sel, "config:20-21"); +PMU_FORMAT_ATTR(sdar_mode, "config:22-23"); +PMU_FORMAT_ATTR(sample_mode, "config:24-28"); +PMU_FORMAT_ATTR(thresh_sel, "config:29-31"); +PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); +PMU_FORMAT_ATTR(thresh_start, "config:36-39"); +PMU_FORMAT_ATTR(l2l3_sel, "config:40-44"); +PMU_FORMAT_ATTR(src_sel, "config:45-46"); +PMU_FORMAT_ATTR(invert_bit, "config:47"); +PMU_FORMAT_ATTR(src_mask, "config:48-53"); +PMU_FORMAT_ATTR(src_match, "config:54-59"); + +static struct attribute *power10_pmu_format_attr[] = { + &format_attr_event.attr, + &format_attr_pmcxsel.attr, + &format_attr_mark.attr, + &format_attr_combine.attr, + &format_attr_unit.attr, + &format_attr_pmc.attr, + &format_attr_cache_sel.attr, + &format_attr_sdar_mode.attr, + &format_attr_sample_mode.attr, + &format_attr_thresh_sel.attr, + &format_attr_thresh_stop.attr, + &format_attr_thresh_start.attr, + &format_attr_l2l3_sel.attr, + &format_attr_src_sel.attr, + &format_attr_invert_bit.attr, + &format_attr_src_mask.attr, + &format_attr_src_match.attr, + NULL, +}; + +static struct attribute_group power10_pmu_format_group = { + .name = "format", + .attrs = power10_pmu_format_attr, +}; + +static const struct attribute_group *power10_pmu_attr_groups[] = { + &power10_pmu_format_group, + &power10_pmu_events_group, + NULL, +}; + +static int power10_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, + [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, +}; + +static u64 power10_bhrb_filter_map(u64 branch_sample_type) +{ + u64 pmu_bhrb_filter = 0; + + /* BHRB and regular PMU events share the same privilege state + * filter configuration. BHRB is always recorded along with a + * regular PMU event. As the privilege state filter is handled + * in the basic PMC configuration of the accompanying regular + * PMU event, we ignore any separate BHRB specific request. + */ + + /* No branch filter requested */ + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) + return pmu_bhrb_filter; + + /* Invalid branch filter options - HW does not support */ + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + return -1; + + if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) { + pmu_bhrb_filter |= POWER10_MMCRA_IFM2; + return pmu_bhrb_filter; + } + + if (branch_sample_type & PERF_SAMPLE_BRANCH_COND) { + pmu_bhrb_filter |= POWER10_MMCRA_IFM3; + return pmu_bhrb_filter; + } + + if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL) + return -1; + + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) { + pmu_bhrb_filter |= POWER10_MMCRA_IFM1; + return pmu_bhrb_filter; + } + + /* Every thing else is unsupported */ + return -1; +} + +static void power10_config_bhrb(u64 pmu_bhrb_filter) +{ + pmu_bhrb_filter &= POWER10_MMCRA_BHRB_MASK; + + /* Enable BHRB filter in PMU */ + mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter)); +} + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_LD_REF_L1, + [C(RESULT_MISS)] = PM_LD_MISS_L1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ST_MISS_L1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS, + [C(RESULT_MISS)] = 0, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1, + [C(RESULT_MISS)] = PM_L1_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_IC_PREF_REQ, + [C(RESULT_MISS)] = 0, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_DATA_FROM_L3, + [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = 0, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_BR_CMPL, + [C(RESULT_MISS)] = PM_BR_MPRED_CMPL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, +}; + +#undef C + +static struct power_pmu power10_pmu = { + .name = "POWER10", + .n_counter = MAX_PMU_COUNTERS, + .add_fields = ISA207_ADD_FIELDS, + .test_adder = ISA207_TEST_ADDER, + .group_constraint_mask = CNST_CACHE_PMC4_MASK, + .group_constraint_val = CNST_CACHE_PMC4_VAL, + .compute_mmcr = isa207_compute_mmcr, + .config_bhrb = power10_config_bhrb, + .bhrb_filter_map = power10_bhrb_filter_map, + .get_constraint = isa207_get_constraint, + .get_alternatives = power10_get_alternatives, + .get_mem_data_src = isa207_get_mem_data_src, + .get_mem_weight = isa207_get_mem_weight, + .disable_pmc = isa207_disable_pmc, + .flags = PPMU_HAS_SIER | PPMU_ARCH_207S | + PPMU_ARCH_31, + .n_generic = ARRAY_SIZE(power10_generic_events), + .generic_events = power10_generic_events, + .cache_events = &power10_cache_events, + .attr_groups = power10_pmu_attr_groups, + .bhrb_nr = 32, +}; + +int init_power10_pmu(void) +{ + int rc; + + /* Comes from cpu_specs[] */ + if (!cur_cpu_spec->oprofile_cpu_type || + strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10")) + return -ENODEV; + + rc = register_power_pmu(&power10_pmu); + if (rc) + return rc; + + /* Tell userspace that EBB is supported */ + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB; + + return 0; +} diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index f8574547fc6b..a62b2cd7914f 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -448,7 +448,8 @@ static int power5p_marked_instr_event(u64 event) } static int power5p_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = 0; @@ -586,20 +587,20 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void power5p_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); } static int power5p_generic_events[] = { @@ -618,7 +619,7 @@ static int power5p_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x1c10a8, 0x3c1088 }, [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 }, diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index da52ecab7c9a..8732b587cf71 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -379,7 +379,8 @@ static int power5_marked_instr_event(u64 event) } static int power5_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -528,20 +529,20 @@ static int power5_compute_mmcr(u64 event[], int n_ev, } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void power5_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); } static int power5_generic_events[] = { @@ -560,7 +561,7 @@ static int power5_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x4c1090, 0x3c1088 }, [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 }, diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 3929cacf72ed..0e318cf87129 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -171,7 +171,7 @@ static int power6_marked_instr_event(u64 event) * Assign PMC numbers and compute MMCR1 value for a set of events */ static int p6_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -243,13 +243,13 @@ static int p6_compute_mmcr(u64 event[], int n_ev, if (pmc < 4) mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc); } - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0xe) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } @@ -457,11 +457,11 @@ static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) return nalt; } -static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void p6_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { /* Set PMCxSEL to 0 to disable PMCx */ if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); } static int power6_generic_events[] = { @@ -481,7 +481,7 @@ static int power6_generic_events[] = { * are event codes. * The "DTLB" and "ITLB" events relate to the DERAT and IERAT. */ -static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x280030, 0x80080 }, [C(OP_WRITE)] = { 0x180032, 0x80088 }, diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index a137813a3076..5e0bf09cf077 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -242,7 +242,8 @@ static int power7_marked_instr_event(u64 event) } static int power7_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -298,20 +299,20 @@ static int power7_compute_mmcr(u64 event[], int n_ev, } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void power7_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); } static int power7_generic_events[] = { @@ -332,7 +333,7 @@ static int power7_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0xc880, 0x400f0 }, [C(OP_WRITE)] = { 0, 0x300f0 }, diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 3a5fcc20ff31..5282e8415ddf 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -253,7 +253,7 @@ static void power8_config_bhrb(u64 pmu_bhrb_filter) * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = PM_LD_REF_L1, diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 08c3ef796198..05dae38b969a 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -310,7 +310,7 @@ static void power9_config_bhrb(u64 pmu_bhrb_filter) * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = PM_LD_REF_L1, diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 4035d93d87ab..d35223fb112c 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -253,7 +253,8 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) } static int p970_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel; @@ -393,27 +394,26 @@ static int p970_compute_mmcr(u64 event[], int n_ev, mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ /* Return MMCRx values */ - mmcr[0] = mmcr0; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 = mmcr0; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void p970_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { - int shift, i; + int shift; + /* + * Setting the PMCxSEL field to 0x08 disables PMC x. + */ if (pmc <= 1) { shift = MMCR0_PMC1SEL_SH - 7 * pmc; - i = 0; + mmcr->mmcr0 = (mmcr->mmcr0 & ~(0x1fUL << shift)) | (0x08UL << shift); } else { shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); - i = 1; + mmcr->mmcr1 = (mmcr->mmcr1 & ~(0x1fUL << shift)) | (0x08UL << shift); } - /* - * Setting the PMCxSEL field to 0x08 disables PMC x. - */ - mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift); } static int ppc970_generic_events[] = { @@ -432,7 +432,7 @@ static int ppc970_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x8810, 0x3810 }, [C(OP_WRITE)] = { 0x7810, 0x813 }, diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index 70083649c9ea..11475c58ea43 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -56,7 +56,7 @@ lite5200_low_power: /* * save stuff BDI overwrites * 0xf0 (0xe0->0x100 gets overwritten when BDI connected; - * even when CONFIG_BDI* is disabled and MMU XLAT commented; heisenbug?)) + * even when CONFIG_BDI_SWITCH is disabled and MMU XLAT commented; heisenbug?)) * WARNING: self-refresh doesn't seem to work when BDI2000 is connected, * possibly because BDI sets SDRAM registers before wakeup code does */ diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index fa3d29dcb57e..b77cbb0a50e1 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -160,7 +160,7 @@ config XES_MPC85xx computers from Extreme Engineering Solutions (X-ES) based on Freescale MPC85xx processors. Manufacturer: Extreme Engineering Solutions, Inc. - URL: <http://www.xes-inc.com/> + URL: <https://www.xes-inc.com/> config STX_GP3 bool "Silicon Turnkey Express GP3" diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 0f7c8241912b..f2ff359041ee 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -44,6 +44,7 @@ config SPU_FS tristate "SPU file system" default m depends on PPC_CELL + depends on COREDUMP select SPU_BASE help The SPU file system is used to access Synergistic Processing diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 3b75e8f60609..026c181a98c5 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -66,13 +66,20 @@ static int match_context(const void *v, struct file *file, unsigned fd) */ static struct spu_context *coredump_next_context(int *fd) { + struct spu_context *ctx; struct file *file; int n = iterate_fd(current->files, *fd, match_context, NULL); if (!n) return NULL; *fd = n - 1; + + rcu_read_lock(); file = fcheck(*fd); - return SPUFS_I(file_inode(file))->i_ctx; + ctx = SPUFS_I(file_inode(file))->i_ctx; + get_spu_context(ctx); + rcu_read_unlock(); + + return ctx; } int spufs_coredump_extra_notes_size(void) @@ -83,17 +90,23 @@ int spufs_coredump_extra_notes_size(void) fd = 0; while ((ctx = coredump_next_context(&fd)) != NULL) { rc = spu_acquire_saved(ctx); - if (rc) + if (rc) { + put_spu_context(ctx); break; + } + rc = spufs_ctx_note_size(ctx, fd); spu_release_saved(ctx); - if (rc < 0) + if (rc < 0) { + put_spu_context(ctx); break; + } size += rc; /* start searching the next fd next time */ fd++; + put_spu_context(ctx); } return size; @@ -105,7 +118,7 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i, size_t sz = spufs_coredump_read[i].size; char fullname[80]; struct elf_note en; - size_t ret; + int ret; sprintf(fullname, "SPU/%d/%s", dfd, spufs_coredump_read[i].name); en.n_namesz = strlen(fullname) + 1; diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c index 1cd4ca14b08d..1bf65d02d3ba 100644 --- a/arch/powerpc/platforms/pasemi/misc.c +++ b/arch/powerpc/platforms/pasemi/misc.c @@ -56,8 +56,7 @@ static int __init pasemi_register_i2c_devices(void) if (!adap_node) continue; - node = NULL; - while ((node = of_get_next_child(adap_node, node))) { + for_each_child_of_node(adap_node, node) { struct i2c_board_info info = {}; const u32 *addr; int len; diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 181caa3f6717..5c77b9a24c0e 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -1465,7 +1465,7 @@ static long g5_i2s_enable(struct device_node *node, long param, long value) case 2: if (macio->type == macio_shasta) break; - /* fall through */ + fallthrough; default: return -ENODEV; } diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index bf4be4b53b44..f77a59b5c2e1 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -629,8 +629,7 @@ static void __init kw_i2c_probe(void) for (i = 0; i < chans; i++) kw_i2c_add(host, np, np, i); } else { - for (child = NULL; - (child = of_get_next_child(np, child)) != NULL;) { + for_each_child_of_node(np, child) { const u32 *reg = of_get_property(child, "reg", NULL); if (reg == NULL) @@ -1193,8 +1192,7 @@ static void pmac_i2c_devscan(void (*callback)(struct device_node *dev, * platform function instance */ list_for_each_entry(bus, &pmac_i2c_busses, link) { - for (np = NULL; - (np = of_get_next_child(bus->busnode, np)) != NULL;) { + for_each_child_of_node(bus->busnode, np) { struct whitelist_ent *p; /* If multibus, check if device is on that bus */ if (bus->flags & pmac_i2c_multibus) diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index 62311e84a423..f5422506d4b0 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -114,7 +114,7 @@ static void macio_gpio_init_one(struct macio_chip *macio) * Ok, got one, we dont need anything special to track them down, so * we just create them all */ - for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;) { + for_each_child_of_node(gparent, gp) { const u32 *reg = of_get_property(gp, "reg", NULL); unsigned long offset; if (reg == NULL) @@ -133,7 +133,7 @@ static void macio_gpio_init_one(struct macio_chip *macio) macio->of_node); /* And now we run all the init ones */ - for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;) + for_each_child_of_node(gparent, gp) pmf_do_functions(gp, NULL, 0, PMF_FLAGS_ON_INIT, NULL); /* Note: We do not at this point implement the "at sleep" or "at wake" diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c index 6b61a18e8db3..f286bdfe8346 100644 --- a/arch/powerpc/platforms/powermac/udbg_scc.c +++ b/arch/powerpc/platforms/powermac/udbg_scc.c @@ -80,7 +80,7 @@ void udbg_scc_init(int force_scc) path = of_get_property(of_chosen, "linux,stdout-path", NULL); if (path != NULL) stdout = of_find_node_by_path(path); - for (ch = NULL; (ch = of_get_next_child(escc, ch)) != NULL;) { + for_each_child_of_node(escc, ch) { if (ch == stdout) ch_def = of_node_get(ch); if (of_node_name_eq(ch, "ch-a")) diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index fe3f0fb5aeca..2eb6ae150d1f 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_FA_DUMP) += opal-fadump.o obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o obj-$(CONFIG_OPAL_CORE) += opal-core.o obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o +obj-$(CONFIG_PCI_IOV) += pci-sriov.o obj-$(CONFIG_CXL_BASE) += pci-cxl.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 79409e005fcd..9af8c3b98853 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -338,6 +338,28 @@ static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap) return 0; } +static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev) +{ + struct pci_controller *hose = pdev->bus->sysdata; + struct pnv_phb *phb = hose->private_data; + struct pci_dev *parent = pdev->bus->self; + +#ifdef CONFIG_PCI_IOV + /* for VFs we use the PF's PE as the upstream PE */ + if (pdev->is_virtfn) + parent = pdev->physfn; +#endif + + /* otherwise use the PE of our parent bridge */ + if (parent) { + struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent); + + return eeh_pe_get(phb->hose, ioda_pe->pe_number, 0); + } + + return NULL; +} + /** * pnv_eeh_probe - Do probe on PCI device * @pdev: pci_dev to probe @@ -350,6 +372,7 @@ static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev) struct pci_controller *hose = pdn->phb; struct pnv_phb *phb = hose->private_data; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + struct eeh_pe *upstream_pe; uint32_t pcie_flags; int ret; int config_addr = (pdn->busno << 8) | (pdn->devfn); @@ -372,19 +395,18 @@ static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev) } /* Skip for PCI-ISA bridge */ - if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) return NULL; eeh_edev_dbg(edev, "Probing device\n"); /* Initialize eeh device */ - edev->class_code = pdn->class_code; edev->mode &= 0xFFFFFF00; edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); - if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; if (edev->pcie_cap) { pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS, @@ -399,8 +421,10 @@ static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev) edev->pe_config_addr = phb->ioda.pe_rmap[config_addr]; + upstream_pe = pnv_eeh_get_upstream_pe(pdev); + /* Create PE */ - ret = eeh_add_to_parent_pe(edev); + ret = eeh_pe_tree_insert(edev, upstream_pe); if (ret) { eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret); return NULL; @@ -535,18 +559,6 @@ static int pnv_eeh_set_option(struct eeh_pe *pe, int option) return 0; } -/** - * pnv_eeh_get_pe_addr - Retrieve PE address - * @pe: EEH PE - * - * Retrieve the PE address according to the given tranditional - * PCI BDF (Bus/Device/Function) address. - */ -static int pnv_eeh_get_pe_addr(struct eeh_pe *pe) -{ - return pe->addr; -} - static void pnv_eeh_get_phb_diag(struct eeh_pe *pe) { struct pnv_phb *phb = pe->phb->private_data; @@ -850,32 +862,32 @@ static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option) case EEH_RESET_HOT: /* Don't report linkDown event */ if (aer) { - eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK, + eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK, 4, &ctrl); ctrl |= PCI_ERR_UNC_SURPDN; - eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK, + eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK, 4, ctrl); } - eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl); + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl); ctrl |= PCI_BRIDGE_CTL_BUS_RESET; - eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl); + eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl); msleep(EEH_PE_RST_HOLD_TIME); break; case EEH_RESET_DEACTIVATE: - eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl); + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl); ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; - eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl); + eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl); msleep(EEH_PE_RST_SETTLE_TIME); /* Continue reporting linkDown event */ if (aer) { - eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK, + eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK, 4, &ctrl); ctrl &= ~PCI_ERR_UNC_SURPDN; - eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK, + eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK, 4, ctrl); } @@ -944,11 +956,12 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev) static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type, int pos, u16 mask) { + struct eeh_dev *edev = pdn->edev; int i, status = 0; /* Wait for Transaction Pending bit to be cleared */ for (i = 0; i < 4; i++) { - eeh_ops->read_config(pdn, pos, 2, &status); + eeh_ops->read_config(edev, pos, 2, &status); if (!(status & mask)) return; @@ -969,7 +982,7 @@ static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) if (WARN_ON(!edev->pcie_cap)) return -ENOTTY; - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); + eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); if (!(reg & PCI_EXP_DEVCAP_FLR)) return -ENOTTY; @@ -979,18 +992,18 @@ static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) pnv_eeh_wait_for_pending(pdn, "", edev->pcie_cap + PCI_EXP_DEVSTA, PCI_EXP_DEVSTA_TRPND); - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, 4, ®); reg |= PCI_EXP_DEVCTL_BCR_FLR; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, 4, reg); msleep(EEH_PE_RST_HOLD_TIME); break; case EEH_RESET_DEACTIVATE: - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, 4, ®); reg &= ~PCI_EXP_DEVCTL_BCR_FLR; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, 4, reg); msleep(EEH_PE_RST_SETTLE_TIME); break; @@ -1007,7 +1020,7 @@ static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) if (WARN_ON(!edev->af_cap)) return -ENOTTY; - eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); + eeh_ops->read_config(edev, edev->af_cap + PCI_AF_CAP, 1, &cap); if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) return -ENOTTY; @@ -1022,12 +1035,12 @@ static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) pnv_eeh_wait_for_pending(pdn, "AF", edev->af_cap + PCI_AF_CTRL, PCI_AF_STATUS_TP << 8); - eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, + eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, 1, PCI_AF_CTRL_FLR); msleep(EEH_PE_RST_HOLD_TIME); break; case EEH_RESET_DEACTIVATE: - eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); + eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, 1, 0); msleep(EEH_PE_RST_SETTLE_TIME); break; } @@ -1261,9 +1274,11 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) return false; } -static int pnv_eeh_read_config(struct pci_dn *pdn, +static int pnv_eeh_read_config(struct eeh_dev *edev, int where, int size, u32 *val) { + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + if (!pdn) return PCIBIOS_DEVICE_NOT_FOUND; @@ -1275,9 +1290,11 @@ static int pnv_eeh_read_config(struct pci_dn *pdn, return pnv_pci_cfg_read(pdn, where, size, val); } -static int pnv_eeh_write_config(struct pci_dn *pdn, +static int pnv_eeh_write_config(struct eeh_dev *edev, int where, int size, u32 val) { + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + if (!pdn) return PCIBIOS_DEVICE_NOT_FOUND; @@ -1631,34 +1648,24 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) return ret; } -static int pnv_eeh_restore_config(struct pci_dn *pdn) +static int pnv_eeh_restore_config(struct eeh_dev *edev) { - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); struct pnv_phb *phb; s64 ret = 0; - int config_addr = (pdn->busno << 8) | (pdn->devfn); if (!edev) return -EEXIST; - /* - * We have to restore the PCI config space after reset since the - * firmware can't see SRIOV VFs. - * - * FIXME: The MPS, error routing rules, timeout setting are worthy - * to be exported by firmware in extendible way. - */ - if (edev->physfn) { - ret = eeh_restore_vf_config(pdn); - } else { - phb = pdn->phb->private_data; - ret = opal_pci_reinit(phb->opal_id, - OPAL_REINIT_PCI_DEV, config_addr); - } + if (edev->physfn) + return 0; + + phb = edev->controller->private_data; + ret = opal_pci_reinit(phb->opal_id, + OPAL_REINIT_PCI_DEV, edev->bdfn); if (ret) { pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", - __func__, config_addr, ret); + __func__, edev->bdfn, ret); return -EIO; } @@ -1670,7 +1677,6 @@ static struct eeh_ops pnv_eeh_ops = { .init = pnv_eeh_init, .probe = pnv_eeh_probe, .set_option = pnv_eeh_set_option, - .get_pe_addr = pnv_eeh_get_pe_addr, .get_state = pnv_eeh_get_state, .reset = pnv_eeh_reset, .get_log = pnv_eeh_get_log, diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 2dd467383a88..77513a80cef9 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -48,7 +48,7 @@ static bool default_stop_found; * First stop state levels when SPR and TB loss can occur. */ static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; -static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1; +static u64 deep_spr_loss_state = MAX_STOP_STATE + 1; /* * psscr value and mask of the deepest stop idle state. @@ -73,9 +73,6 @@ static int pnv_save_sprs_for_deep_states(void) */ uint64_t lpcr_val = mfspr(SPRN_LPCR); uint64_t hid0_val = mfspr(SPRN_HID0); - uint64_t hid1_val = mfspr(SPRN_HID1); - uint64_t hid4_val = mfspr(SPRN_HID4); - uint64_t hid5_val = mfspr(SPRN_HID5); uint64_t hmeer_val = mfspr(SPRN_HMEER); uint64_t msr_val = MSR_IDLE; uint64_t psscr_val = pnv_deepest_stop_psscr_val; @@ -117,6 +114,9 @@ static int pnv_save_sprs_for_deep_states(void) /* Only p8 needs to set extra HID regiters */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + uint64_t hid1_val = mfspr(SPRN_HID1); + uint64_t hid4_val = mfspr(SPRN_HID4); + uint64_t hid5_val = mfspr(SPRN_HID5); rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); if (rc != 0) @@ -611,6 +611,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) unsigned long srr1; unsigned long pls; unsigned long mmcr0 = 0; + unsigned long mmcra = 0; struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ bool sprs_saved = false; @@ -657,7 +658,22 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) */ mmcr0 = mfspr(SPRN_MMCR0); } - if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) { + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + /* + * POWER10 uses MMCRA (BHRBRD) as BHRB disable bit. + * If the user hasn't asked for the BHRB to be + * written, the value of MMCRA[BHRBRD] is 1. + * On wakeup from stop, MMCRA[BHRBD] will be 0, + * since it is previleged resource and will be lost. + * Thus, if we do not save and restore the MMCRA[BHRBD], + * hardware will be needlessly writing to the BHRB + * in problem mode. + */ + mmcra = mfspr(SPRN_MMCRA); + } + + if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { sprs.lpcr = mfspr(SPRN_LPCR); sprs.hfscr = mfspr(SPRN_HFSCR); sprs.fscr = mfspr(SPRN_FSCR); @@ -700,8 +716,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { - unsigned long mmcra; - /* * We don't need an isync after the mtsprs here because the * upcoming mtmsrd is execution synchronizing. @@ -721,6 +735,10 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) mtspr(SPRN_MMCR0, mmcr0); } + /* Reload MMCRA to restore BHRB disable bit for POWER10 */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mtspr(SPRN_MMCRA, mmcra); + /* * DD2.2 and earlier need to set then clear bit 60 in MMCRA * to ensure the PMU starts running. @@ -741,7 +759,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) * just always test PSSCR for SPR/TB state loss. */ pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; - if (likely(pls < pnv_first_spr_loss_level)) { + if (likely(pls < deep_spr_loss_state)) { if (sprs_saved) atomic_stop_thread_idle(); goto out; @@ -1088,7 +1106,7 @@ static void __init pnv_power9_idle_init(void) * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. */ pnv_first_tb_loss_level = MAX_STOP_STATE + 1; - pnv_first_spr_loss_level = MAX_STOP_STATE + 1; + deep_spr_loss_state = MAX_STOP_STATE + 1; for (i = 0; i < nr_pnv_idle_states; i++) { int err; struct pnv_idle_states_t *state = &pnv_idle_states[i]; @@ -1099,8 +1117,8 @@ static void __init pnv_power9_idle_init(void) pnv_first_tb_loss_level = psscr_rl; if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && - (pnv_first_spr_loss_level > psscr_rl)) - pnv_first_spr_loss_level = psscr_rl; + (deep_spr_loss_state > psscr_rl)) + deep_spr_loss_state = psscr_rl; /* * The idle code does not deal with TB loss occurring @@ -1111,8 +1129,8 @@ static void __init pnv_power9_idle_init(void) * compatibility. */ if ((state->flags & OPAL_PM_TIMEBASE_STOP) && - (pnv_first_spr_loss_level > psscr_rl)) - pnv_first_spr_loss_level = psscr_rl; + (deep_spr_loss_state > psscr_rl)) + deep_spr_loss_state = psscr_rl; err = validate_psscr_val_mask(&state->psscr_val, &state->psscr_mask, @@ -1158,7 +1176,7 @@ static void __init pnv_power9_idle_init(void) } pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", - pnv_first_spr_loss_level); + deep_spr_loss_state); pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", pnv_first_tb_loss_level); @@ -1205,7 +1223,7 @@ static void __init pnv_probe_idle_states(void) return; } - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (pvr_version_is(PVR_POWER9)) pnv_power9_idle_init(); for (i = 0; i < nr_pnv_idle_states; i++) diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index 1656e8965d6b..c094fdf5825c 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -104,7 +104,7 @@ static int __opal_async_release_token(int token) */ case ASYNC_TOKEN_DISPATCHED: opal_async_tokens[token].state = ASYNC_TOKEN_ABANDONED; - /* Fall through */ + fallthrough; default: rc = 1; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index f923359d8afc..5218f5da2737 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -166,7 +166,7 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index, if (!ptce) { ptce = pnv_tce(tbl, false, idx, alloc); if (!ptce) - return alloc ? H_HARDWARE : H_TOO_HARD; + return -ENOMEM; } if (newtce & TCE_PCI_WRITE) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 73a63efcf855..c9c25fb0783c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -115,32 +115,13 @@ static int __init pci_reset_phbs_setup(char *str) early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup); -static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) -{ - /* - * WARNING: We cannot rely on the resource flags. The Linux PCI - * allocation code sometimes decides to put a 64-bit prefetchable - * BAR in the 32-bit window, so we have to compare the addresses. - * - * For simplicity we only test resource start. - */ - return (r->start >= phb->ioda.m64_base && - r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); -} - -static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags) -{ - unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); - - return (resource_flags & flags) == flags; -} - static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) { s64 rc; phb->ioda.pe_array[pe_no].phb = phb; phb->ioda.pe_array[pe_no].pe_number = pe_no; + phb->ioda.pe_array[pe_no].dma_setup_done = false; /* * Clear the PE frozen state as it might be put into frozen state @@ -164,26 +145,48 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) return; } + mutex_lock(&phb->ioda.pe_alloc_mutex); if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) pr_debug("%s: PE %x was reserved on PHB#%x\n", __func__, pe_no, phb->hose->global_number); + mutex_unlock(&phb->ioda.pe_alloc_mutex); pnv_ioda_init_pe(phb, pe_no); } -static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count) { - long pe; + struct pnv_ioda_pe *ret = NULL; + int run = 0, pe, i; + + mutex_lock(&phb->ioda.pe_alloc_mutex); + /* scan backwards for a run of @count cleared bits */ for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) { - if (!test_and_set_bit(pe, phb->ioda.pe_alloc)) - return pnv_ioda_init_pe(phb, pe); + if (test_bit(pe, phb->ioda.pe_alloc)) { + run = 0; + continue; + } + + run++; + if (run == count) + break; + } + if (run != count) + goto out; + + for (i = pe; i < pe + count; i++) { + set_bit(i, phb->ioda.pe_alloc); + pnv_ioda_init_pe(phb, i); } + ret = &phb->ioda.pe_array[pe]; - return NULL; +out: + mutex_unlock(&phb->ioda.pe_alloc_mutex); + return ret; } -static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) +void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) { struct pnv_phb *phb = pe->phb; unsigned int pe_num = pe->pe_number; @@ -192,7 +195,10 @@ static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */ kfree(pe->npucomp); memset(pe, 0, sizeof(struct pnv_ioda_pe)); + + mutex_lock(&phb->ioda.pe_alloc_mutex); clear_bit(pe_num, phb->ioda.pe_alloc); + mutex_unlock(&phb->ioda.pe_alloc_mutex); } /* The default M64 BAR is shared by all PEs */ @@ -252,8 +258,7 @@ fail: static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, unsigned long *pe_bitmap) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct resource *r; resource_size_t base, sgsz, start, end; int segno, i; @@ -311,6 +316,28 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb) } } + for (index = 0; index < phb->ioda.total_pe_num; index++) { + int64_t rc; + + /* + * P7IOC supports M64DT, which helps mapping M64 segment + * to one particular PE#. However, PHB3 has fixed mapping + * between M64 segment and PE#. In order to have same logic + * for P7IOC and PHB3, we enforce fixed mapping between M64 + * segment and PE# on P7IOC. + */ + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + index, OPAL_M64_WINDOW_TYPE, + index / PNV_IODA1_M64_SEGS, + index % PNV_IODA1_M64_SEGS); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, + index); + goto fail; + } + } + /* * Exclude the segments for reserved and root bus PE, which * are first or last two PEs. @@ -351,8 +378,7 @@ static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus, static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); struct pnv_ioda_pe *master_pe, *pe; unsigned long size, *pe_alloc; int i; @@ -403,26 +429,6 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) pe->master = master_pe; list_add_tail(&pe->list, &master_pe->slaves); } - - /* - * P7IOC supports M64DT, which helps mapping M64 segment - * to one particular PE#. However, PHB3 has fixed mapping - * between M64 segment and PE#. In order to have same logic - * for P7IOC and PHB3, we enforce fixed mapping between M64 - * segment and PE# on P7IOC. - */ - if (phb->type == PNV_PHB_IODA1) { - int64_t rc; - - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - pe->pe_number, OPAL_M64_WINDOW_TYPE, - pe->pe_number / PNV_IODA1_M64_SEGS, - pe->pe_number % PNV_IODA1_M64_SEGS); - if (rc != OPAL_SUCCESS) - pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n", - __func__, rc, phb->hose->global_number, - pe->pe_number); - } } kfree(pe_alloc); @@ -673,8 +679,7 @@ struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn) struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); struct pci_dn *pdn = pci_get_pdn(dev); if (!pdn) @@ -816,7 +821,7 @@ static void pnv_ioda_unset_peltv(struct pnv_phb *phb, pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc); } -static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) +int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; uint8_t bcomp, dcomp, fcomp; @@ -887,7 +892,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) return 0; } -static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) +int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; uint8_t bcomp, dcomp, fcomp; @@ -982,95 +987,9 @@ out: return 0; } -#ifdef CONFIG_PCI_IOV -static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) -{ - struct pci_dn *pdn = pci_get_pdn(dev); - int i; - struct resource *res, res2; - resource_size_t size; - u16 num_vfs; - - if (!dev->is_physfn) - return -EINVAL; - - /* - * "offset" is in VFs. The M64 windows are sized so that when they - * are segmented, each segment is the same size as the IOV BAR. - * Each segment is in a separate PE, and the high order bits of the - * address are the PE number. Therefore, each VF's BAR is in a - * separate PE, and changing the IOV BAR start address changes the - * range of PEs the VFs are in. - */ - num_vfs = pdn->num_vfs; - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &dev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || !res->parent) - continue; - - /* - * The actual IOV BAR range is determined by the start address - * and the actual size for num_vfs VFs BAR. This check is to - * make sure that after shifting, the range will not overlap - * with another device. - */ - size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); - res2.flags = res->flags; - res2.start = res->start + (size * offset); - res2.end = res2.start + (size * num_vfs) - 1; - - if (res2.end > res->end) { - dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n", - i, &res2, res, num_vfs, offset); - return -EBUSY; - } - } - - /* - * Since M64 BAR shares segments among all possible 256 PEs, - * we have to shift the beginning of PF IOV BAR to make it start from - * the segment which belongs to the PE number assigned to the first VF. - * This creates a "hole" in the /proc/iomem which could be used for - * allocating other resources so we reserve this area below and - * release when IOV is released. - */ - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &dev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || !res->parent) - continue; - - size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); - res2 = *res; - res->start += size * offset; - - dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", - i, &res2, res, (offset > 0) ? "En" : "Dis", - num_vfs, offset); - - if (offset < 0) { - devm_release_resource(&dev->dev, &pdn->holes[i]); - memset(&pdn->holes[i], 0, sizeof(pdn->holes[i])); - } - - pci_update_resource(dev, i + PCI_IOV_RESOURCES); - - if (offset > 0) { - pdn->holes[i].start = res2.start; - pdn->holes[i].end = res2.start + size * offset - 1; - pdn->holes[i].flags = IORESOURCE_BUS; - pdn->holes[i].name = "pnv_iov_reserved"; - devm_request_resource(&dev->dev, res->parent, - &pdn->holes[i]); - } - } - return 0; -} -#endif /* CONFIG_PCI_IOV */ - static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); struct pci_dn *pdn = pci_get_pdn(dev); struct pnv_ioda_pe *pe; @@ -1082,7 +1001,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) if (pdn->pe_number != IODA_INVALID_PE) return NULL; - pe = pnv_ioda_alloc_pe(phb); + pe = pnv_ioda_alloc_pe(phb, 1); if (!pe) { pr_warn("%s: Not enough PE# available, disabling device\n", pci_name(dev)); @@ -1129,8 +1048,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) */ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); struct pnv_ioda_pe *pe = NULL; unsigned int pe_num; @@ -1154,7 +1072,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) /* The PE number isn't pinned by M64 */ if (!pe) - pe = pnv_ioda_alloc_pe(phb); + pe = pnv_ioda_alloc_pe(phb, 1); if (!pe) { pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n", @@ -1196,8 +1114,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) struct pnv_ioda_pe *pe; struct pci_dev *gpu_pdev; struct pci_dn *npu_pdn; - struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(npu_pdev->bus); /* * Intentionally leak a reference on the npu device (for @@ -1297,446 +1214,12 @@ static void pnv_pci_ioda_setup_nvlink(void) #endif } -#ifdef CONFIG_PCI_IOV -static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pci_dn *pdn; - int i, j; - int m64_bars; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - - if (pdn->m64_single_mode) - m64_bars = num_vfs; - else - m64_bars = 1; - - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) - for (j = 0; j < m64_bars; j++) { - if (pdn->m64_map[j][i] == IODA_INVALID_M64) - continue; - opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0); - clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc); - pdn->m64_map[j][i] = IODA_INVALID_M64; - } - - kfree(pdn->m64_map); - return 0; -} - -static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pci_dn *pdn; - unsigned int win; - struct resource *res; - int i, j; - int64_t rc; - int total_vfs; - resource_size_t size, start; - int pe_num; - int m64_bars; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - total_vfs = pci_sriov_get_totalvfs(pdev); - - if (pdn->m64_single_mode) - m64_bars = num_vfs; - else - m64_bars = 1; - - pdn->m64_map = kmalloc_array(m64_bars, - sizeof(*pdn->m64_map), - GFP_KERNEL); - if (!pdn->m64_map) - return -ENOMEM; - /* Initialize the m64_map to IODA_INVALID_M64 */ - for (i = 0; i < m64_bars ; i++) - for (j = 0; j < PCI_SRIOV_NUM_BARS; j++) - pdn->m64_map[i][j] = IODA_INVALID_M64; - - - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &pdev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || !res->parent) - continue; - - for (j = 0; j < m64_bars; j++) { - do { - win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, - phb->ioda.m64_bar_idx + 1, 0); - - if (win >= phb->ioda.m64_bar_idx + 1) - goto m64_failed; - } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); - - pdn->m64_map[j][i] = win; - - if (pdn->m64_single_mode) { - size = pci_iov_resource_size(pdev, - PCI_IOV_RESOURCES + i); - start = res->start + size * j; - } else { - size = resource_size(res); - start = res->start; - } - - /* Map the M64 here */ - if (pdn->m64_single_mode) { - pe_num = pdn->pe_num_map[j]; - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - pe_num, OPAL_M64_WINDOW_TYPE, - pdn->m64_map[j][i], 0); - } - - rc = opal_pci_set_phb_mem_window(phb->opal_id, - OPAL_M64_WINDOW_TYPE, - pdn->m64_map[j][i], - start, - 0, /* unused */ - size); - - - if (rc != OPAL_SUCCESS) { - dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n", - win, rc); - goto m64_failed; - } - - if (pdn->m64_single_mode) - rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2); - else - rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1); - - if (rc != OPAL_SUCCESS) { - dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n", - win, rc); - goto m64_failed; - } - } - } - return 0; - -m64_failed: - pnv_pci_vf_release_m64(pdev, num_vfs); - return -EBUSY; -} - -static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, - int num); - -static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe) -{ - struct iommu_table *tbl; - int64_t rc; - - tbl = pe->table_group.tables[0]; - rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); - if (rc) - pe_warn(pe, "OPAL error %lld release DMA window\n", rc); - - pnv_pci_ioda2_set_bypass(pe, false); - if (pe->table_group.group) { - iommu_group_put(pe->table_group.group); - BUG_ON(pe->table_group.group); - } - iommu_tce_table_put(tbl); -} - -static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe, *pe_n; - struct pci_dn *pdn; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - - if (!pdev->is_physfn) - return; - - list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { - if (pe->parent_dev != pdev) - continue; - - pnv_pci_ioda2_release_dma_pe(pdev, pe); - - /* Remove from list */ - mutex_lock(&phb->ioda.pe_list_mutex); - list_del(&pe->list); - mutex_unlock(&phb->ioda.pe_list_mutex); - - pnv_ioda_deconfigure_pe(phb, pe); - - pnv_ioda_free_pe(pe); - } -} - -void pnv_pci_sriov_disable(struct pci_dev *pdev) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - struct pci_dn *pdn; - u16 num_vfs, i; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - num_vfs = pdn->num_vfs; - - /* Release VF PEs */ - pnv_ioda_release_vf_PE(pdev); - - if (phb->type == PNV_PHB_IODA2) { - if (!pdn->m64_single_mode) - pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map); - - /* Release M64 windows */ - pnv_pci_vf_release_m64(pdev, num_vfs); - - /* Release PE numbers */ - if (pdn->m64_single_mode) { - for (i = 0; i < num_vfs; i++) { - if (pdn->pe_num_map[i] == IODA_INVALID_PE) - continue; - - pe = &phb->ioda.pe_array[pdn->pe_num_map[i]]; - pnv_ioda_free_pe(pe); - } - } else - bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); - /* Releasing pe_num_map */ - kfree(pdn->pe_num_map); - } -} - -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, +static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); -static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - int pe_num; - u16 vf_index; - struct pci_dn *pdn; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - - if (!pdev->is_physfn) - return; - - /* Reserve PE for each VF */ - for (vf_index = 0; vf_index < num_vfs; vf_index++) { - int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); - int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); - struct pci_dn *vf_pdn; - - if (pdn->m64_single_mode) - pe_num = pdn->pe_num_map[vf_index]; - else - pe_num = *pdn->pe_num_map + vf_index; - - pe = &phb->ioda.pe_array[pe_num]; - pe->pe_number = pe_num; - pe->phb = phb; - pe->flags = PNV_IODA_PE_VF; - pe->pbus = NULL; - pe->parent_dev = pdev; - pe->mve_number = -1; - pe->rid = (vf_bus << 8) | vf_devfn; - - pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", - hose->global_number, pdev->bus->number, - PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); - - if (pnv_ioda_configure_pe(phb, pe)) { - /* XXX What do we do here ? */ - pnv_ioda_free_pe(pe); - pe->pdev = NULL; - continue; - } - - /* Put PE to the list */ - mutex_lock(&phb->ioda.pe_list_mutex); - list_add_tail(&pe->list, &phb->ioda.pe_list); - mutex_unlock(&phb->ioda.pe_list_mutex); - - /* associate this pe to it's pdn */ - list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { - if (vf_pdn->busno == vf_bus && - vf_pdn->devfn == vf_devfn) { - vf_pdn->pe_number = pe_num; - break; - } - } - - pnv_pci_ioda2_setup_dma_pe(phb, pe); - } -} - -int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - struct pci_dn *pdn; - int ret; - u16 i; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - - if (phb->type == PNV_PHB_IODA2) { - if (!pdn->vfs_expanded) { - dev_info(&pdev->dev, "don't support this SRIOV device" - " with non 64bit-prefetchable IOV BAR\n"); - return -ENOSPC; - } - - /* - * When M64 BARs functions in Single PE mode, the number of VFs - * could be enabled must be less than the number of M64 BARs. - */ - if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) { - dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n"); - return -EBUSY; - } - - /* Allocating pe_num_map */ - if (pdn->m64_single_mode) - pdn->pe_num_map = kmalloc_array(num_vfs, - sizeof(*pdn->pe_num_map), - GFP_KERNEL); - else - pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL); - - if (!pdn->pe_num_map) - return -ENOMEM; - - if (pdn->m64_single_mode) - for (i = 0; i < num_vfs; i++) - pdn->pe_num_map[i] = IODA_INVALID_PE; - - /* Calculate available PE for required VFs */ - if (pdn->m64_single_mode) { - for (i = 0; i < num_vfs; i++) { - pe = pnv_ioda_alloc_pe(phb); - if (!pe) { - ret = -EBUSY; - goto m64_failed; - } - - pdn->pe_num_map[i] = pe->pe_number; - } - } else { - mutex_lock(&phb->ioda.pe_alloc_mutex); - *pdn->pe_num_map = bitmap_find_next_zero_area( - phb->ioda.pe_alloc, phb->ioda.total_pe_num, - 0, num_vfs, 0); - if (*pdn->pe_num_map >= phb->ioda.total_pe_num) { - mutex_unlock(&phb->ioda.pe_alloc_mutex); - dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); - kfree(pdn->pe_num_map); - return -EBUSY; - } - bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); - mutex_unlock(&phb->ioda.pe_alloc_mutex); - } - pdn->num_vfs = num_vfs; - - /* Assign M64 window accordingly */ - ret = pnv_pci_vf_assign_m64(pdev, num_vfs); - if (ret) { - dev_info(&pdev->dev, "Not enough M64 window resources\n"); - goto m64_failed; - } - - /* - * When using one M64 BAR to map one IOV BAR, we need to shift - * the IOV BAR according to the PE# allocated to the VFs. - * Otherwise, the PE# for the VF will conflict with others. - */ - if (!pdn->m64_single_mode) { - ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map); - if (ret) - goto m64_failed; - } - } - - /* Setup VF PEs */ - pnv_ioda_setup_vf_PE(pdev, num_vfs); - - return 0; - -m64_failed: - if (pdn->m64_single_mode) { - for (i = 0; i < num_vfs; i++) { - if (pdn->pe_num_map[i] == IODA_INVALID_PE) - continue; - - pe = &phb->ioda.pe_array[pdn->pe_num_map[i]]; - pnv_ioda_free_pe(pe); - } - } else - bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); - - /* Releasing pe_num_map */ - kfree(pdn->pe_num_map); - - return ret; -} - -int pnv_pcibios_sriov_disable(struct pci_dev *pdev) -{ - pnv_pci_sriov_disable(pdev); - - /* Release PCI data */ - remove_sriov_vf_pdns(pdev); - return 0; -} - -int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) -{ - /* Allocate PCI data */ - add_sriov_vf_pdns(pdev); - - return pnv_pci_sriov_enable(pdev, num_vfs); -} -#endif /* CONFIG_PCI_IOV */ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; @@ -1762,6 +1245,24 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev) pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number); } + /* + * We assume that bridges *probably* don't need to do any DMA so we can + * skip allocating a TCE table, etc unless we get a non-bridge device. + */ + if (!pe->dma_setup_done && !pci_is_bridge(pdev)) { + switch (phb->type) { + case PNV_PHB_IODA1: + pnv_pci_ioda1_setup_dma_pe(phb, pe); + break; + case PNV_PHB_IODA2: + pnv_pci_ioda2_setup_dma_pe(phb, pe); + break; + default: + pr_warn("%s: No DMA for PHB#%x (type %d)\n", + __func__, phb->hose->global_number, phb->type); + } + } + if (pdn) pdn->pe_number = pe->pe_number; pe->device_count++; @@ -1847,8 +1348,7 @@ err: static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev, u64 dma_mask) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; @@ -1885,19 +1385,6 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev, return false; } -static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); - dev->dev.archdata.dma_offset = pe->tce_bypass_base; - - if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) - pnv_ioda_setup_bus_dma(pe, dev->subordinate); - } -} - static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb, bool real_mode) { @@ -2285,6 +1772,7 @@ found: pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; iommu_init_table(tbl, phb->hose->node, 0, 0); + pe->dma_setup_done = true; return; fail: /* XXX Failure: Try to fallback to 64-bit only ? */ @@ -2474,7 +1962,6 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) return 0; } -#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV) static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, int num) { @@ -2498,7 +1985,6 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, return ret; } -#endif #ifdef CONFIG_IOMMU_API unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, @@ -2547,6 +2033,19 @@ static long pnv_pci_ioda2_create_table_userspace( return ret; } +static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); + dev->dev.archdata.dma_offset = pe->tce_bypass_base; + + if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) + pnv_ioda_setup_bus_dma(pe, dev->subordinate); + } +} + static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, @@ -2583,14 +2082,11 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = { }; #endif -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, - struct pnv_ioda_pe *pe) +void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, + struct pnv_ioda_pe *pe) { int64_t rc; - if (!pnv_pci_ioda_pe_dma_weight(pe)) - return; - /* TVE #1 is selected by PCI address bit 59 */ pe->tce_bypass_base = 1ull << 59; @@ -2615,6 +2111,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); #endif + pe->dma_setup_done = true; } int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq) @@ -2763,118 +2260,6 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) count, phb->msi_base); } -#ifdef CONFIG_PCI_IOV -static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - const resource_size_t gate = phb->ioda.m64_segsize >> 2; - struct resource *res; - int i; - resource_size_t size, total_vf_bar_sz; - struct pci_dn *pdn; - int mul, total_vfs; - - pdn = pci_get_pdn(pdev); - pdn->vfs_expanded = 0; - pdn->m64_single_mode = false; - - total_vfs = pci_sriov_get_totalvfs(pdev); - mul = phb->ioda.total_pe_num; - total_vf_bar_sz = 0; - - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &pdev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || res->parent) - continue; - if (!pnv_pci_is_m64_flags(res->flags)) { - dev_warn(&pdev->dev, "Don't support SR-IOV with" - " non M64 VF BAR%d: %pR. \n", - i, res); - goto truncate_iov; - } - - total_vf_bar_sz += pci_iov_resource_size(pdev, - i + PCI_IOV_RESOURCES); - - /* - * If bigger than quarter of M64 segment size, just round up - * power of two. - * - * Generally, one M64 BAR maps one IOV BAR. To avoid conflict - * with other devices, IOV BAR size is expanded to be - * (total_pe * VF_BAR_size). When VF_BAR_size is half of M64 - * segment size , the expanded size would equal to half of the - * whole M64 space size, which will exhaust the M64 Space and - * limit the system flexibility. This is a design decision to - * set the boundary to quarter of the M64 segment size. - */ - if (total_vf_bar_sz > gate) { - mul = roundup_pow_of_two(total_vfs); - dev_info(&pdev->dev, - "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n", - total_vf_bar_sz, gate, mul); - pdn->m64_single_mode = true; - break; - } - } - - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &pdev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || res->parent) - continue; - - size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); - /* - * On PHB3, the minimum size alignment of M64 BAR in single - * mode is 32MB. - */ - if (pdn->m64_single_mode && (size < SZ_32M)) - goto truncate_iov; - dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); - res->end = res->start + size * mul - 1; - dev_dbg(&pdev->dev, " %pR\n", res); - dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)", - i, res, mul); - } - pdn->vfs_expanded = mul; - - return; - -truncate_iov: - /* To save MMIO space, IOV BAR is truncated. */ - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &pdev->resource[i + PCI_IOV_RESOURCES]; - res->flags = 0; - res->end = res->start - 1; - } -} - -static void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) -{ - if (WARN_ON(pci_dev_is_added(pdev))) - return; - - if (pdev->is_virtfn) { - struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); - - /* - * VF PEs are single-device PEs so their pdev pointer needs to - * be set. The pdev doesn't exist when the PE is allocated (in - * (pcibios_sriov_enable()) so we fix it up here. - */ - pe->pdev = pdev; - WARN_ON(!(pe->flags & PNV_IODA_PE_VF)); - } else if (pdev->is_physfn) { - /* - * For PFs adjust their allocated IOV resources to match what - * the PHB can support using it's M64 BAR table. - */ - pnv_pci_ioda_fixup_iov_resources(pdev); - } -} -#endif /* CONFIG_PCI_IOV */ - static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, struct resource *res) { @@ -3101,10 +2486,9 @@ static void pnv_pci_ioda_fixup(void) static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, unsigned long type) { - struct pci_dev *bridge; - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); int num_pci_bridges = 0; + struct pci_dev *bridge; bridge = bus->self; while (bridge) { @@ -3190,8 +2574,6 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus, static void pnv_pci_configure_bus(struct pci_bus *bus) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; struct pci_dev *bridge = bus->self; struct pnv_ioda_pe *pe; bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); @@ -3215,17 +2597,6 @@ static void pnv_pci_configure_bus(struct pci_bus *bus) return; pnv_ioda_setup_pe_seg(pe); - switch (phb->type) { - case PNV_PHB_IODA1: - pnv_pci_ioda1_setup_dma_pe(phb, pe); - break; - case PNV_PHB_IODA2: - pnv_pci_ioda2_setup_dma_pe(phb, pe); - break; - default: - pr_warn("%s: No DMA for PHB#%x (type %d)\n", - __func__, phb->hose->global_number, phb->type); - } } static resource_size_t pnv_pci_default_alignment(void) @@ -3233,49 +2604,12 @@ static resource_size_t pnv_pci_default_alignment(void) return PAGE_SIZE; } -#ifdef CONFIG_PCI_IOV -static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, - int resno) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - struct pci_dn *pdn = pci_get_pdn(pdev); - resource_size_t align; - - /* - * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the - * SR-IOV. While from hardware perspective, the range mapped by M64 - * BAR should be size aligned. - * - * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra - * powernv-specific hardware restriction is gone. But if just use the - * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with - * in one segment of M64 #15, which introduces the PE conflict between - * PF and VF. Based on this, the minimum alignment of an IOV BAR is - * m64_segsize. - * - * This function returns the total IOV BAR size if M64 BAR is in - * Shared PE mode or just VF BAR size if not. - * If the M64 BAR is in Single PE mode, return the VF BAR size or - * M64 segment size if IOV BAR size is less. - */ - align = pci_iov_resource_size(pdev, resno); - if (!pdn->vfs_expanded) - return align; - if (pdn->m64_single_mode) - return max(align, (resource_size_t)phb->ioda.m64_segsize); - - return pdn->vfs_expanded * align; -} -#endif /* CONFIG_PCI_IOV */ - /* Prevent enabling devices for which we couldn't properly * assign a PE */ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); struct pci_dn *pdn; /* The function is probably called while the PEs have @@ -3346,11 +2680,10 @@ static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group, static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe) { - unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe); struct iommu_table *tbl = pe->table_group.tables[0]; int64_t rc; - if (!weight) + if (!pe->dma_setup_done) return; rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0); @@ -3367,22 +2700,17 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe) iommu_tce_table_put(tbl); } -static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) +void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) { struct iommu_table *tbl = pe->table_group.tables[0]; - unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe); -#ifdef CONFIG_IOMMU_API int64_t rc; -#endif - if (!weight) + if (pe->dma_setup_done) return; -#ifdef CONFIG_IOMMU_API rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (rc) pe_warn(pe, "OPAL error %lld release DMA window\n", rc); -#endif pnv_pci_ioda2_set_bypass(pe, false); if (pe->table_group.group) { @@ -3405,14 +2733,8 @@ static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe, if (map[idx] != pe->pe_number) continue; - if (win == OPAL_M64_WINDOW_TYPE) - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - phb->ioda.reserved_pe_idx, win, - idx / PNV_IODA1_M64_SEGS, - idx % PNV_IODA1_M64_SEGS); - else - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - phb->ioda.reserved_pe_idx, win, 0, idx); + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + phb->ioda.reserved_pe_idx, win, 0, idx); if (rc != OPAL_SUCCESS) pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n", @@ -3431,8 +2753,7 @@ static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe) phb->ioda.io_segmap); pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE, phb->ioda.m32_segmap); - pnv_ioda_free_pe_seg(pe, OPAL_M64_WINDOW_TYPE, - phb->ioda.m64_segmap); + /* M64 is pre-configured by pnv_ioda1_init_m64() */ } else if (phb->type == PNV_PHB_IODA2) { pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE, phb->ioda.m32_segmap); @@ -3488,17 +2809,27 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) static void pnv_pci_release_device(struct pci_dev *pdev) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; + /* The VF PE state is torn down when sriov_disable() is called */ if (pdev->is_virtfn) return; if (!pdn || pdn->pe_number == IODA_INVALID_PE) return; +#ifdef CONFIG_PCI_IOV + /* + * FIXME: Try move this to sriov_disable(). It's here since we allocate + * the iov state at probe time since we need to fiddle with the IOV + * resources. + */ + if (pdev->is_physfn) + kfree(pdev->dev.archdata.iov_data); +#endif + /* * PCI hotplug can happen as part of EEH error recovery. The @pdn * isn't removed and added afterwards in this scenario. We should @@ -3534,8 +2865,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose) static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus) { - struct pci_controller *hose = bus->sysdata; - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); struct pnv_ioda_pe *pe; list_for_each_entry(pe, &phb->ioda.pe_list, list) { @@ -3760,7 +3090,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx); } else { /* otherwise just allocate one */ - root_pe = pnv_ioda_alloc_pe(phb); + root_pe = pnv_ioda_alloc_pe(phb, 1); phb->ioda.root_pe_idx = root_pe->pe_number; } @@ -3873,8 +3203,7 @@ void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np) static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); if (!machine_is(powernv)) return; diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c new file mode 100644 index 000000000000..c4434f20f42f --- /dev/null +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -0,0 +1,766 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/kernel.h> +#include <linux/ioport.h> +#include <linux/bitmap.h> +#include <linux/pci.h> + +#include <asm/opal.h> + +#include "pci.h" + +/* for pci_dev_is_added() */ +#include "../../../../drivers/pci/pci.h" + +/* + * The majority of the complexity in supporting SR-IOV on PowerNV comes from + * the need to put the MMIO space for each VF into a separate PE. Internally + * the PHB maps MMIO addresses to a specific PE using the "Memory BAR Table". + * The MBT historically only applied to the 64bit MMIO window of the PHB + * so it's common to see it referred to as the "M64BT". + * + * An MBT entry stores the mapped range as an <base>,<mask> pair. This forces + * the address range that we want to map to be power-of-two sized and aligned. + * For conventional PCI devices this isn't really an issue since PCI device BARs + * have the same requirement. + * + * For a SR-IOV BAR things are a little more awkward since size and alignment + * are not coupled. The alignment is set based on the the per-VF BAR size, but + * the total BAR area is: number-of-vfs * per-vf-size. The number of VFs + * isn't necessarily a power of two, so neither is the total size. To fix that + * we need to finesse (read: hack) the Linux BAR allocator so that it will + * allocate the SR-IOV BARs in a way that lets us map them using the MBT. + * + * The changes to size and alignment that we need to do depend on the "mode" + * of MBT entry that we use. We only support SR-IOV on PHB3 (IODA2) and above, + * so as a baseline we can assume that we have the following BAR modes + * available: + * + * NB: $PE_COUNT is the number of PEs that the PHB supports. + * + * a) A segmented BAR that splits the mapped range into $PE_COUNT equally sized + * segments. The n'th segment is mapped to the n'th PE. + * b) An un-segmented BAR that maps the whole address range to a specific PE. + * + * + * We prefer to use mode a) since it only requires one MBT entry per SR-IOV BAR + * For comparison b) requires one entry per-VF per-BAR, or: + * (num-vfs * num-sriov-bars) in total. To use a) we need the size of each segment + * to equal the size of the per-VF BAR area. So: + * + * new_size = per-vf-size * number-of-PEs + * + * The alignment for the SR-IOV BAR also needs to be changed from per-vf-size + * to "new_size", calculated above. Implementing this is a convoluted process + * which requires several hooks in the PCI core: + * + * 1. In pcibios_add_device() we call pnv_pci_ioda_fixup_iov(). + * + * At this point the device has been probed and the device's BARs are sized, + * but no resource allocations have been done. The SR-IOV BARs are sized + * based on the maximum number of VFs supported by the device and we need + * to increase that to new_size. + * + * 2. Later, when Linux actually assigns resources it tries to make the resource + * allocations for each PCI bus as compact as possible. As a part of that it + * sorts the BARs on a bus by their required alignment, which is calculated + * using pci_resource_alignment(). + * + * For IOV resources this goes: + * pci_resource_alignment() + * pci_sriov_resource_alignment() + * pcibios_sriov_resource_alignment() + * pnv_pci_iov_resource_alignment() + * + * Our hook overrides the default alignment, equal to the per-vf-size, with + * new_size computed above. + * + * 3. When userspace enables VFs for a device: + * + * sriov_enable() + * pcibios_sriov_enable() + * pnv_pcibios_sriov_enable() + * + * This is where we actually allocate PE numbers for each VF and setup the + * MBT mapping for each SR-IOV BAR. In steps 1) and 2) we setup an "arena" + * where each MBT segment is equal in size to the VF BAR so we can shift + * around the actual SR-IOV BAR location within this arena. We need this + * ability because the PE space is shared by all devices on the same PHB. + * When using mode a) described above segment 0 in maps to PE#0 which might + * be already being used by another device on the PHB. + * + * As a result we need allocate a contigious range of PE numbers, then shift + * the address programmed into the SR-IOV BAR of the PF so that the address + * of VF0 matches up with the segment corresponding to the first allocated + * PE number. This is handled in pnv_pci_vf_resource_shift(). + * + * Once all that is done we return to the PCI core which then enables VFs, + * scans them and creates pci_devs for each. The init process for a VF is + * largely the same as a normal device, but the VF is inserted into the IODA + * PE that we allocated for it rather than the PE associated with the bus. + * + * 4. When userspace disables VFs we unwind the above in + * pnv_pcibios_sriov_disable(). Fortunately this is relatively simple since + * we don't need to validate anything, just tear down the mappings and + * move SR-IOV resource back to its "proper" location. + * + * That's how mode a) works. In theory mode b) (single PE mapping) is less work + * since we can map each individual VF with a separate BAR. However, there's a + * few limitations: + * + * 1) For IODA2 mode b) has a minimum alignment requirement of 32MB. This makes + * it only usable for devices with very large per-VF BARs. Such devices are + * similar to Big Foot. They definitely exist, but I've never seen one. + * + * 2) The number of MBT entries that we have is limited. PHB3 and PHB4 only + * 16 total and some are needed for. Most SR-IOV capable network cards can support + * more than 16 VFs on each port. + * + * We use b) when using a) would use more than 1/4 of the entire 64 bit MMIO + * window of the PHB. + * + * + * + * PHB4 (IODA3) added a few new features that would be useful for SR-IOV. It + * allowed the MBT to map 32bit MMIO space in addition to 64bit which allows + * us to support SR-IOV BARs in the 32bit MMIO window. This is useful since + * the Linux BAR allocation will place any BAR marked as non-prefetchable into + * the non-prefetchable bridge window, which is 32bit only. It also added two + * new modes: + * + * c) A segmented BAR similar to a), but each segment can be individually + * mapped to any PE. This is matches how the 32bit MMIO window worked on + * IODA1&2. + * + * d) A segmented BAR with 8, 64, or 128 segments. This works similarly to a), + * but with fewer segments and configurable base PE. + * + * i.e. The n'th segment maps to the (n + base)'th PE. + * + * The base PE is also required to be a multiple of the window size. + * + * Unfortunately, the OPAL API doesn't currently (as of skiboot v6.6) allow us + * to exploit any of the IODA3 features. + */ + +static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); + struct resource *res; + int i; + resource_size_t vf_bar_sz; + struct pnv_iov_data *iov; + int mul; + + iov = kzalloc(sizeof(*iov), GFP_KERNEL); + if (!iov) + goto disable_iov; + pdev->dev.archdata.iov_data = iov; + mul = phb->ioda.total_pe_num; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || res->parent) + continue; + if (!pnv_pci_is_m64_flags(res->flags)) { + dev_warn(&pdev->dev, "Don't support SR-IOV with non M64 VF BAR%d: %pR. \n", + i, res); + goto disable_iov; + } + + vf_bar_sz = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); + + /* + * Generally, one segmented M64 BAR maps one IOV BAR. However, + * if a VF BAR is too large we end up wasting a lot of space. + * If each VF needs more than 1/4 of the default m64 segment + * then each VF BAR should be mapped in single-PE mode to reduce + * the amount of space required. This does however limit the + * number of VFs we can support. + * + * The 1/4 limit is arbitrary and can be tweaked. + */ + if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) { + /* + * On PHB3, the minimum size alignment of M64 BAR in + * single mode is 32MB. If this VF BAR is smaller than + * 32MB, but still too large for a segmented window + * then we can't map it and need to disable SR-IOV for + * this device. + */ + if (vf_bar_sz < SZ_32M) { + pci_err(pdev, "VF BAR%d: %pR can't be mapped in single PE mode\n", + i, res); + goto disable_iov; + } + + iov->m64_single_mode[i] = true; + continue; + } + + /* + * This BAR can be mapped with one segmented window, so adjust + * te resource size to accommodate. + */ + pci_dbg(pdev, " Fixing VF BAR%d: %pR to\n", i, res); + res->end = res->start + vf_bar_sz * mul - 1; + pci_dbg(pdev, " %pR\n", res); + + pci_info(pdev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)", + i, res, mul); + + iov->need_shift = true; + } + + return; + +disable_iov: + /* Save ourselves some MMIO space by disabling the unusable BARs */ + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + res->flags = 0; + res->end = res->start - 1; + } + + pdev->dev.archdata.iov_data = NULL; + kfree(iov); +} + +void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) +{ + if (WARN_ON(pci_dev_is_added(pdev))) + return; + + if (pdev->is_virtfn) { + struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); + + /* + * VF PEs are single-device PEs so their pdev pointer needs to + * be set. The pdev doesn't exist when the PE is allocated (in + * (pcibios_sriov_enable()) so we fix it up here. + */ + pe->pdev = pdev; + WARN_ON(!(pe->flags & PNV_IODA_PE_VF)); + } else if (pdev->is_physfn) { + /* + * For PFs adjust their allocated IOV resources to match what + * the PHB can support using it's M64 BAR table. + */ + pnv_pci_ioda_fixup_iov_resources(pdev); + } +} + +resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, + int resno) +{ + resource_size_t align = pci_iov_resource_size(pdev, resno); + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); + struct pnv_iov_data *iov = pnv_iov_get(pdev); + + /* + * iov can be null if we have an SR-IOV device with IOV BAR that can't + * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch). + * In that case we don't allow VFs to be enabled since one of their + * BARs would not be placed in the correct PE. + */ + if (!iov) + return align; + + /* + * If we're using single mode then we can just use the native VF BAR + * alignment. We validated that it's possible to use a single PE + * window above when we did the fixup. + */ + if (iov->m64_single_mode[resno - PCI_IOV_RESOURCES]) + return align; + + /* + * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the + * SR-IOV. While from hardware perspective, the range mapped by M64 + * BAR should be size aligned. + * + * This function returns the total IOV BAR size if M64 BAR is in + * Shared PE mode or just VF BAR size if not. + * If the M64 BAR is in Single PE mode, return the VF BAR size or + * M64 segment size if IOV BAR size is less. + */ + return phb->ioda.total_pe_num * align; +} + +static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_iov_data *iov; + struct pnv_phb *phb; + int window_id; + + phb = pci_bus_to_pnvhb(pdev->bus); + iov = pnv_iov_get(pdev); + + for_each_set_bit(window_id, iov->used_m64_bar_mask, MAX_M64_BARS) { + opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + 0); + + clear_bit(window_id, &phb->ioda.m64_bar_alloc); + } + + return 0; +} + + +/* + * PHB3 and beyond support segmented windows. The window's address range + * is subdivided into phb->ioda.total_pe_num segments and there's a 1-1 + * mapping between PEs and segments. + */ +static int64_t pnv_ioda_map_m64_segmented(struct pnv_phb *phb, + int window_id, + resource_size_t start, + resource_size_t size) +{ + int64_t rc; + + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + start, + 0, /* unused */ + size); + if (rc) + goto out; + + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + OPAL_ENABLE_M64_SPLIT); +out: + if (rc) + pr_err("Failed to map M64 window #%d: %lld\n", window_id, rc); + + return rc; +} + +static int64_t pnv_ioda_map_m64_single(struct pnv_phb *phb, + int pe_num, + int window_id, + resource_size_t start, + resource_size_t size) +{ + int64_t rc; + + /* + * The API for setting up m64 mmio windows seems to have been designed + * with P7-IOC in mind. For that chip each M64 BAR (window) had a fixed + * split of 8 equally sized segments each of which could individually + * assigned to a PE. + * + * The problem with this is that the API doesn't have any way to + * communicate the number of segments we want on a BAR. This wasn't + * a problem for p7-ioc since you didn't have a choice, but the + * single PE windows added in PHB3 don't map cleanly to this API. + * + * As a result we've got this slightly awkward process where we + * call opal_pci_map_pe_mmio_window() to put the single in single + * PE mode, and set the PE for the window before setting the address + * bounds. We need to do it this way because the single PE windows + * for PHB3 have different alignment requirements on PHB3. + */ + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe_num, + OPAL_M64_WINDOW_TYPE, + window_id, + 0); + if (rc) + goto out; + + /* + * NB: In single PE mode the window needs to be aligned to 32MB + */ + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + start, + 0, /* ignored by FW, m64 is 1-1 */ + size); + if (rc) + goto out; + + /* + * Now actually enable it. We specified the BAR should be in "non-split" + * mode so FW will validate that the BAR is in single PE mode. + */ + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + OPAL_ENABLE_M64_NON_SPLIT); +out: + if (rc) + pr_err("Error mapping single PE BAR\n"); + + return rc; +} + +static int pnv_pci_alloc_m64_bar(struct pnv_phb *phb, struct pnv_iov_data *iov) +{ + int win; + + do { + win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, + phb->ioda.m64_bar_idx + 1, 0); + + if (win >= phb->ioda.m64_bar_idx + 1) + return -1; + } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); + + set_bit(win, iov->used_m64_bar_mask); + + return win; +} + +static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_iov_data *iov; + struct pnv_phb *phb; + unsigned int win; + struct resource *res; + int i, j; + int64_t rc; + resource_size_t size, start; + int base_pe_num; + + phb = pci_bus_to_pnvhb(pdev->bus); + iov = pnv_iov_get(pdev); + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + + /* don't need single mode? map everything in one go! */ + if (!iov->m64_single_mode[i]) { + win = pnv_pci_alloc_m64_bar(phb, iov); + if (win < 0) + goto m64_failed; + + size = resource_size(res); + start = res->start; + + rc = pnv_ioda_map_m64_segmented(phb, win, start, size); + if (rc) + goto m64_failed; + + continue; + } + + /* otherwise map each VF with single PE BARs */ + size = pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i); + base_pe_num = iov->vf_pe_arr[0].pe_number; + + for (j = 0; j < num_vfs; j++) { + win = pnv_pci_alloc_m64_bar(phb, iov); + if (win < 0) + goto m64_failed; + + start = res->start + size * j; + rc = pnv_ioda_map_m64_single(phb, win, + base_pe_num + j, + start, + size); + if (rc) + goto m64_failed; + } + } + return 0; + +m64_failed: + pnv_pci_vf_release_m64(pdev, num_vfs); + return -EBUSY; +} + +static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) +{ + struct pnv_phb *phb; + struct pnv_ioda_pe *pe, *pe_n; + + phb = pci_bus_to_pnvhb(pdev->bus); + + if (!pdev->is_physfn) + return; + + /* FIXME: Use pnv_ioda_release_pe()? */ + list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { + if (pe->parent_dev != pdev) + continue; + + pnv_pci_ioda2_release_pe_dma(pe); + + /* Remove from list */ + mutex_lock(&phb->ioda.pe_list_mutex); + list_del(&pe->list); + mutex_unlock(&phb->ioda.pe_list_mutex); + + pnv_ioda_deconfigure_pe(phb, pe); + + pnv_ioda_free_pe(pe); + } +} + +static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) +{ + struct resource *res, res2; + struct pnv_iov_data *iov; + resource_size_t size; + u16 num_vfs; + int i; + + if (!dev->is_physfn) + return -EINVAL; + iov = pnv_iov_get(dev); + + /* + * "offset" is in VFs. The M64 windows are sized so that when they + * are segmented, each segment is the same size as the IOV BAR. + * Each segment is in a separate PE, and the high order bits of the + * address are the PE number. Therefore, each VF's BAR is in a + * separate PE, and changing the IOV BAR start address changes the + * range of PEs the VFs are in. + */ + num_vfs = iov->num_vfs; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &dev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + if (iov->m64_single_mode[i]) + continue; + + /* + * The actual IOV BAR range is determined by the start address + * and the actual size for num_vfs VFs BAR. This check is to + * make sure that after shifting, the range will not overlap + * with another device. + */ + size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); + res2.flags = res->flags; + res2.start = res->start + (size * offset); + res2.end = res2.start + (size * num_vfs) - 1; + + if (res2.end > res->end) { + dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n", + i, &res2, res, num_vfs, offset); + return -EBUSY; + } + } + + /* + * Since M64 BAR shares segments among all possible 256 PEs, + * we have to shift the beginning of PF IOV BAR to make it start from + * the segment which belongs to the PE number assigned to the first VF. + * This creates a "hole" in the /proc/iomem which could be used for + * allocating other resources so we reserve this area below and + * release when IOV is released. + */ + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &dev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + if (iov->m64_single_mode[i]) + continue; + + size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); + res2 = *res; + res->start += size * offset; + + dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", + i, &res2, res, (offset > 0) ? "En" : "Dis", + num_vfs, offset); + + if (offset < 0) { + devm_release_resource(&dev->dev, &iov->holes[i]); + memset(&iov->holes[i], 0, sizeof(iov->holes[i])); + } + + pci_update_resource(dev, i + PCI_IOV_RESOURCES); + + if (offset > 0) { + iov->holes[i].start = res2.start; + iov->holes[i].end = res2.start + size * offset - 1; + iov->holes[i].flags = IORESOURCE_BUS; + iov->holes[i].name = "pnv_iov_reserved"; + devm_request_resource(&dev->dev, res->parent, + &iov->holes[i]); + } + } + return 0; +} + +static void pnv_pci_sriov_disable(struct pci_dev *pdev) +{ + u16 num_vfs, base_pe; + struct pnv_iov_data *iov; + + iov = pnv_iov_get(pdev); + num_vfs = iov->num_vfs; + base_pe = iov->vf_pe_arr[0].pe_number; + + if (WARN_ON(!iov)) + return; + + /* Release VF PEs */ + pnv_ioda_release_vf_PE(pdev); + + /* Un-shift the IOV BARs if we need to */ + if (iov->need_shift) + pnv_pci_vf_resource_shift(pdev, -base_pe); + + /* Release M64 windows */ + pnv_pci_vf_release_m64(pdev, num_vfs); +} + +static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_phb *phb; + struct pnv_ioda_pe *pe; + int pe_num; + u16 vf_index; + struct pnv_iov_data *iov; + struct pci_dn *pdn; + + if (!pdev->is_physfn) + return; + + phb = pci_bus_to_pnvhb(pdev->bus); + pdn = pci_get_pdn(pdev); + iov = pnv_iov_get(pdev); + + /* Reserve PE for each VF */ + for (vf_index = 0; vf_index < num_vfs; vf_index++) { + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); + struct pci_dn *vf_pdn; + + pe = &iov->vf_pe_arr[vf_index]; + pe->phb = phb; + pe->flags = PNV_IODA_PE_VF; + pe->pbus = NULL; + pe->parent_dev = pdev; + pe->mve_number = -1; + pe->rid = (vf_bus << 8) | vf_devfn; + + pe_num = pe->pe_number; + pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", + pci_domain_nr(pdev->bus), pdev->bus->number, + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); + + if (pnv_ioda_configure_pe(phb, pe)) { + /* XXX What do we do here ? */ + pnv_ioda_free_pe(pe); + pe->pdev = NULL; + continue; + } + + /* Put PE to the list */ + mutex_lock(&phb->ioda.pe_list_mutex); + list_add_tail(&pe->list, &phb->ioda.pe_list); + mutex_unlock(&phb->ioda.pe_list_mutex); + + /* associate this pe to it's pdn */ + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { + if (vf_pdn->busno == vf_bus && + vf_pdn->devfn == vf_devfn) { + vf_pdn->pe_number = pe_num; + break; + } + } + + pnv_pci_ioda2_setup_dma_pe(phb, pe); + } +} + +static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_ioda_pe *base_pe; + struct pnv_iov_data *iov; + struct pnv_phb *phb; + int ret; + u16 i; + + phb = pci_bus_to_pnvhb(pdev->bus); + iov = pnv_iov_get(pdev); + + /* + * There's a calls to IODA2 PE setup code littered throughout. We could + * probably fix that, but we'd still have problems due to the + * restriction inherent on IODA1 PHBs. + * + * NB: We class IODA3 as IODA2 since they're very similar. + */ + if (phb->type != PNV_PHB_IODA2) { + pci_err(pdev, "SR-IOV is not supported on this PHB\n"); + return -ENXIO; + } + + if (!iov) { + dev_info(&pdev->dev, "don't support this SRIOV device with non 64bit-prefetchable IOV BAR\n"); + return -ENOSPC; + } + + /* allocate a contigious block of PEs for our VFs */ + base_pe = pnv_ioda_alloc_pe(phb, num_vfs); + if (!base_pe) { + pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs); + return -EBUSY; + } + + iov->vf_pe_arr = base_pe; + iov->num_vfs = num_vfs; + + /* Assign M64 window accordingly */ + ret = pnv_pci_vf_assign_m64(pdev, num_vfs); + if (ret) { + dev_info(&pdev->dev, "Not enough M64 window resources\n"); + goto m64_failed; + } + + /* + * When using one M64 BAR to map one IOV BAR, we need to shift + * the IOV BAR according to the PE# allocated to the VFs. + * Otherwise, the PE# for the VF will conflict with others. + */ + if (iov->need_shift) { + ret = pnv_pci_vf_resource_shift(pdev, base_pe->pe_number); + if (ret) + goto shift_failed; + } + + /* Setup VF PEs */ + pnv_ioda_setup_vf_PE(pdev, num_vfs); + + return 0; + +shift_failed: + pnv_pci_vf_release_m64(pdev, num_vfs); + +m64_failed: + for (i = 0; i < num_vfs; i++) + pnv_ioda_free_pe(&iov->vf_pe_arr[i]); + + return ret; +} + +int pnv_pcibios_sriov_disable(struct pci_dev *pdev) +{ + pnv_pci_sriov_disable(pdev); + + /* Release PCI data */ + remove_sriov_vf_pdns(pdev); + return 0; +} + +int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + /* Allocate PCI data */ + add_sriov_vf_pdns(pdev); + + return pnv_pci_sriov_enable(pdev, num_vfs); +} diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 091fe1cf386b..9b9bca169275 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -162,8 +162,7 @@ EXPORT_SYMBOL_GPL(pnv_pci_set_power_state); int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct msi_desc *entry; struct msi_msg msg; int hwirq; @@ -211,8 +210,7 @@ int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) void pnv_teardown_msi_irqs(struct pci_dev *pdev) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct msi_desc *entry; irq_hw_number_t hwirq; @@ -824,10 +822,9 @@ EXPORT_SYMBOL(pnv_pci_get_phb_node); int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) { - __be64 val; - struct pci_controller *hose; - struct pnv_phb *phb; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); u64 tunnel_bar; + __be64 val; int rc; if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR)) @@ -835,9 +832,6 @@ int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR)) return -ENXIO; - hose = pci_bus_to_host(dev->bus); - phb = hose->private_data; - mutex_lock(&tunnel_mutex); rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val); if (rc != OPAL_SUCCESS) { diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 51c254f2f3cb..739a0b3b72e1 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -87,7 +87,14 @@ struct pnv_ioda_pe { bool tce_bypass_enabled; uint64_t tce_bypass_base; - /* MSIs. MVE index is identical for for 32 and 64 bit MSI + /* + * Used to track whether we've done DMA setup for this PE or not. We + * want to defer allocating TCE tables, etc until we've added a + * non-bridge device to the PE. + */ + bool dma_setup_done; + + /* MSIs. MVE index is identical for 32 and 64 bit MSI * and -1 if not supported. (It's actually identical to the * PE number) */ @@ -147,6 +154,7 @@ struct pnv_phb { unsigned long m64_size; unsigned long m64_segsize; unsigned long m64_base; +#define MAX_M64_BARS 64 unsigned long m64_bar_alloc; /* IO ports */ @@ -187,6 +195,89 @@ struct pnv_phb { u8 *diag_data; }; + +/* IODA PE management */ + +static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) +{ + /* + * WARNING: We cannot rely on the resource flags. The Linux PCI + * allocation code sometimes decides to put a 64-bit prefetchable + * BAR in the 32-bit window, so we have to compare the addresses. + * + * For simplicity we only test resource start. + */ + return (r->start >= phb->ioda.m64_base && + r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); +} + +static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags) +{ + unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); + + return (resource_flags & flags) == flags; +} + +int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); +int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); + +void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); +void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe); + +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count); +void pnv_ioda_free_pe(struct pnv_ioda_pe *pe); + +#ifdef CONFIG_PCI_IOV +/* + * For SR-IOV we want to put each VF's MMIO resource in to a separate PE. + * This requires a bit of acrobatics with the MMIO -> PE configuration + * and this structure is used to keep track of it all. + */ +struct pnv_iov_data { + /* number of VFs enabled */ + u16 num_vfs; + + /* pointer to the array of VF PEs. num_vfs long*/ + struct pnv_ioda_pe *vf_pe_arr; + + /* Did we map the VF BAR with single-PE IODA BARs? */ + bool m64_single_mode[PCI_SRIOV_NUM_BARS]; + + /* + * True if we're using any segmented windows. In that case we need + * shift the start of the IOV resource the segment corresponding to + * the allocated PE. + */ + bool need_shift; + + /* + * Bit mask used to track which m64 windows are used to map the + * SR-IOV BARs for this device. + */ + DECLARE_BITMAP(used_m64_bar_mask, MAX_M64_BARS); + + /* + * If we map the SR-IOV BARs with a segmented window then + * parts of that window will be "claimed" by other PEs. + * + * "holes" here is used to reserve the leading portion + * of the window that is used by other (non VF) PEs. + */ + struct resource holes[PCI_SRIOV_NUM_BARS]; +}; + +static inline struct pnv_iov_data *pnv_iov_get(struct pci_dev *pdev) +{ + return pdev->dev.archdata.iov_data; +} + +void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev); +resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno); + +int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs); +int pnv_pcibios_sriov_disable(struct pci_dev *pdev); +#endif /* CONFIG_PCI_IOV */ + extern struct pci_ops pnv_pci_ops; void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, @@ -260,4 +351,14 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, extern unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb); +static inline struct pnv_phb *pci_bus_to_pnvhb(struct pci_bus *bus) +{ + struct pci_controller *hose = bus->sysdata; + + if (hose) + return hose->private_data; + + return NULL; +} + #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 3bc188da82ba..7fcb88623081 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -399,7 +399,15 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static unsigned long pnv_memory_block_size(void) { - return 256UL * 1024 * 1024; + /* + * We map the kernel linear region with 1GB large pages on radix. For + * memory hot unplug to work our memory block size must be at least + * this size. + */ + if (radix_enabled()) + return radix_mem_block_size; + else + return 256UL * 1024 * 1024; } #endif diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 24c18362e5ea..5e037df2a3a1 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -25,15 +25,22 @@ config PPC_PSERIES select SWIOTLB default y +config PARAVIRT_SPINLOCKS + bool + config PPC_SPLPAR - depends on PPC_PSERIES bool "Support for shared-processor logical partitions" + depends on PPC_PSERIES + select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS + default y help Enabling this option will make the kernel run more efficiently on logically-partitioned pSeries systems which use shared processors, that is, which share physical processors between two or more partitions. + Say Y if you are unsure. + config DTL bool "Dispatch Trace Log" depends on PPC_SPLPAR && DEBUG_FS diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index ace117f99d94..cb2d9a970b7b 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -24,6 +24,7 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/spinlock.h> +#include <linux/crash_dump.h> #include <asm/eeh.h> #include <asm/eeh_event.h> @@ -32,6 +33,8 @@ #include <asm/ppc-pci.h> #include <asm/rtas.h> +static int pseries_eeh_get_pe_addr(struct pci_dn *pdn); + /* RTAS tokens */ static int ibm_set_eeh_option; static int ibm_set_slot_reset; @@ -52,8 +55,6 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) dev_dbg(&pdev->dev, "EEH: Setting up device\n"); #ifdef CONFIG_PCI_IOV if (pdev->is_virtfn) { - struct pci_dn *physfn_pdn; - pdn->device_id = pdev->device; pdn->vendor_id = pdev->vendor; pdn->class_code = pdev->class; @@ -63,23 +64,172 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) * completion from platform. */ pdn->last_allow_rc = 0; - physfn_pdn = pci_get_pdn(pdev->physfn); - pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index]; } #endif pseries_eeh_init_edev(pdn); #ifdef CONFIG_PCI_IOV if (pdev->is_virtfn) { + /* + * FIXME: This really should be handled by choosing the right + * parent PE in in pseries_eeh_init_edev(). + */ + struct eeh_pe *physfn_pe = pci_dev_to_eeh_dev(pdev->physfn)->pe; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8); - eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */ - eeh_add_to_parent_pe(edev); /* Add as VF PE type */ + eeh_pe_tree_remove(edev); /* Remove as it is adding to bus pe */ + eeh_pe_tree_insert(edev, physfn_pe); /* Add as VF PE type */ } #endif eeh_probe_device(pdev); } + +/** + * pseries_eeh_get_config_addr - Retrieve config address + * + * Retrieve the assocated config address. Actually, there're 2 RTAS + * function calls dedicated for the purpose. We need implement + * it through the new function and then the old one. Besides, + * you should make sure the config address is figured out from + * FDT node before calling the function. + * + * It's notable that zero'ed return value means invalid PE config + * address. + */ +static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_addr) +{ + int ret = 0; + int rets[3]; + + if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { + /* + * First of all, we need to make sure there has one PE + * associated with the device. Otherwise, PE address is + * meaningless. + */ + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), 1); + if (ret || (rets[0] == 0)) + return 0; + + /* Retrieve the associated PE config address */ + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), 0); + if (ret) { + pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", + __func__, phb->global_number, config_addr); + return 0; + } + + return rets[0]; + } + + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), 0); + if (ret) { + pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", + __func__, phb->global_number, config_addr); + return 0; + } + + return rets[0]; + } + + return ret; +} + +/** + * pseries_eeh_phb_reset - Reset the specified PHB + * @phb: PCI controller + * @config_adddr: the associated config address + * @option: reset option + * + * Reset the specified PHB/PE + */ +static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, int option) +{ + int ret; + + /* Reset PE through RTAS call */ + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), option); + + /* If fundamental-reset not supported, try hot-reset */ + if (option == EEH_RESET_FUNDAMENTAL && + ret == -8) { + option = EEH_RESET_HOT; + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), option); + } + + /* We need reset hold or settlement delay */ + if (option == EEH_RESET_FUNDAMENTAL || + option == EEH_RESET_HOT) + msleep(EEH_PE_RST_HOLD_TIME); + else + msleep(EEH_PE_RST_SETTLE_TIME); + + return ret; +} + +/** + * pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE + * @phb: PCI controller + * @config_adddr: the associated config address + * + * The function will be called to reconfigure the bridges included + * in the specified PE so that the mulfunctional PE would be recovered + * again. + */ +static int pseries_eeh_phb_configure_bridge(struct pci_controller *phb, int config_addr) +{ + int ret; + /* Waiting 0.2s maximum before skipping configuration */ + int max_wait = 200; + + while (max_wait > 0) { + ret = rtas_call(ibm_configure_pe, 3, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid)); + + if (!ret) + return ret; + if (ret < 0) + break; + + /* + * If RTAS returns a delay value that's above 100ms, cut it + * down to 100ms in case firmware made a mistake. For more + * on how these delay values work see rtas_busy_delay_time + */ + if (ret > RTAS_EXTENDED_DELAY_MIN+2 && + ret <= RTAS_EXTENDED_DELAY_MAX) + ret = RTAS_EXTENDED_DELAY_MIN+2; + + max_wait -= rtas_busy_delay_time(ret); + + if (max_wait < 0) + break; + + rtas_busy_delay(ret); + } + + pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n", + __func__, phb->global_number, config_addr, ret); + /* PAPR defines -3 as "Parameter Error" for this function: */ + if (ret == -3) + return -EINVAL; + else + return -EIO; +} + /* * Buffer for reporting slot-error-detail rtas calls. Its here * in BSS, and not dynamically alloced, so that it ends up in @@ -96,6 +246,10 @@ static int eeh_error_buf_size; */ static int pseries_eeh_init(void) { + struct pci_controller *phb; + struct pci_dn *pdn; + int addr, config_addr; + /* figure out EEH RTAS function call tokens */ ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); @@ -148,6 +302,22 @@ static int pseries_eeh_init(void) /* Set EEH machine dependent code */ ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device; + if (is_kdump_kernel() || reset_devices) { + pr_info("Issue PHB reset ...\n"); + list_for_each_entry(phb, &hose_list, list_node) { + pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list); + addr = (pdn->busno << 16) | (pdn->devfn << 8); + config_addr = pseries_eeh_get_config_addr(phb, addr); + /* invalid PE config addr */ + if (config_addr == 0) + continue; + + pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL); + pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE); + pseries_eeh_phb_configure_bridge(phb, config_addr); + } + } + return 0; } @@ -221,6 +391,43 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap) } /** + * pseries_eeh_pe_get_parent - Retrieve the parent PE + * @edev: EEH device + * + * The whole PEs existing in the system are organized as hierarchy + * tree. The function is used to retrieve the parent PE according + * to the parent EEH device. + */ +static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev) +{ + struct eeh_dev *parent; + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + /* + * It might have the case for the indirect parent + * EEH device already having associated PE, but + * the direct parent EEH device doesn't have yet. + */ + if (edev->physfn) + pdn = pci_get_pdn(edev->physfn); + else + pdn = pdn ? pdn->parent : NULL; + while (pdn) { + /* We're poking out of PCI territory */ + parent = pdn_to_eeh_dev(pdn); + if (!parent) + return NULL; + + if (parent->pe) + return parent->pe; + + pdn = pdn->parent; + } + + return NULL; +} + +/** * pseries_eeh_init_edev - initialise the eeh_dev and eeh_pe for a pci_dn * * @pdn: PCI device node @@ -275,12 +482,11 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) * correctly reflects that current device is root port * or PCIe switch downstream port. */ - edev->class_code = pdn->class_code; edev->pcix_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); edev->pcie_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_EXP); edev->aer_cap = pseries_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); edev->mode &= 0xFFFFFF00; - if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { + if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; if (edev->pcie_cap) { rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS, @@ -304,8 +510,10 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) if (ret) { eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret); } else { + struct eeh_pe *parent; + /* Retrieve PE address */ - edev->pe_config_addr = eeh_ops->get_pe_addr(&pe); + edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn); pe.addr = edev->pe_config_addr; /* Some older systems (Power4) allow the ibm,set-eeh-option @@ -316,16 +524,19 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) enable = 1; - if (enable) { + /* + * This device doesn't support EEH, but it may have an + * EEH parent. In this case any error on the device will + * freeze the PE of it's upstream bridge, so added it to + * the upstream PE. + */ + parent = pseries_eeh_pe_get_parent(edev); + if (parent && !enable) + edev->pe_config_addr = parent->addr; + + if (enable || parent) { eeh_add_flag(EEH_ENABLED); - eeh_add_to_parent_pe(edev); - } else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) && - (pdn_to_eeh_dev(pdn->parent))->pe) { - /* This device doesn't support EEH, but it may have an - * EEH parent, in which case we mark it as supported. - */ - edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr; - eeh_add_to_parent_pe(edev); + eeh_pe_tree_insert(edev, parent); } eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n", (enable ? "enabled" : "unsupported"), ret); @@ -435,8 +646,10 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option) * It's notable that zero'ed return value means invalid PE config * address. */ -static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) +static int pseries_eeh_get_pe_addr(struct pci_dn *pdn) { + int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + unsigned long buid = pdn->phb->buid; int ret = 0; int rets[3]; @@ -447,18 +660,16 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) * meaningless. */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, - pe->config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), 1); + config_addr, BUID_HI(buid), BUID_LO(buid), 1); if (ret || (rets[0] == 0)) return 0; /* Retrieve the associated PE config address */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, - pe->config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), 0); + config_addr, BUID_HI(buid), BUID_LO(buid), 0); if (ret) { pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", - __func__, pe->phb->global_number, pe->config_addr); + __func__, pdn->phb->global_number, config_addr); return 0; } @@ -467,11 +678,10 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, - pe->config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), 0); + config_addr, BUID_HI(buid), BUID_LO(buid), 0); if (ret) { pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", - __func__, pe->phb->global_number, pe->config_addr); + __func__, pdn->phb->global_number, config_addr); return 0; } @@ -569,35 +779,13 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay) static int pseries_eeh_reset(struct eeh_pe *pe, int option) { int config_addr; - int ret; /* Figure out PE address */ config_addr = pe->config_addr; if (pe->addr) config_addr = pe->addr; - /* Reset PE through RTAS call */ - ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), option); - - /* If fundamental-reset not supported, try hot-reset */ - if (option == EEH_RESET_FUNDAMENTAL && - ret == -8) { - option = EEH_RESET_HOT; - ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), option); - } - - /* We need reset hold or settlement delay */ - if (option == EEH_RESET_FUNDAMENTAL || - option == EEH_RESET_HOT) - msleep(EEH_PE_RST_HOLD_TIME); - else - msleep(EEH_PE_RST_SETTLE_TIME); - - return ret; + return pseries_eeh_phb_reset(pe->phb, config_addr, option); } /** @@ -641,108 +829,49 @@ static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, u * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE * @pe: EEH PE * - * The function will be called to reconfigure the bridges included - * in the specified PE so that the mulfunctional PE would be recovered - * again. */ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) { int config_addr; - int ret; - /* Waiting 0.2s maximum before skipping configuration */ - int max_wait = 200; /* Figure out the PE address */ config_addr = pe->config_addr; if (pe->addr) config_addr = pe->addr; - while (max_wait > 0) { - ret = rtas_call(ibm_configure_pe, 3, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid)); - - if (!ret) - return ret; - if (ret < 0) - break; - - /* - * If RTAS returns a delay value that's above 100ms, cut it - * down to 100ms in case firmware made a mistake. For more - * on how these delay values work see rtas_busy_delay_time - */ - if (ret > RTAS_EXTENDED_DELAY_MIN+2 && - ret <= RTAS_EXTENDED_DELAY_MAX) - ret = RTAS_EXTENDED_DELAY_MIN+2; - - max_wait -= rtas_busy_delay_time(ret); - - if (max_wait < 0) - break; - - rtas_busy_delay(ret); - } - - pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n", - __func__, pe->phb->global_number, pe->addr, ret); - /* PAPR defines -3 as "Parameter Error" for this function: */ - if (ret == -3) - return -EINVAL; - else - return -EIO; + return pseries_eeh_phb_configure_bridge(pe->phb, config_addr); } /** * pseries_eeh_read_config - Read PCI config space - * @pdn: PCI device node - * @where: PCI address + * @edev: EEH device handle + * @where: PCI config space offset * @size: size to read * @val: return value * * Read config space from the speicifed device */ -static int pseries_eeh_read_config(struct pci_dn *pdn, int where, int size, u32 *val) +static int pseries_eeh_read_config(struct eeh_dev *edev, int where, int size, u32 *val) { + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + return rtas_read_config(pdn, where, size, val); } /** * pseries_eeh_write_config - Write PCI config space - * @pdn: PCI device node - * @where: PCI address + * @edev: EEH device handle + * @where: PCI config space offset * @size: size to write * @val: value to be written * * Write config space to the specified device */ -static int pseries_eeh_write_config(struct pci_dn *pdn, int where, int size, u32 val) +static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u32 val) { - return rtas_write_config(pdn, where, size, val); -} + struct pci_dn *pdn = eeh_dev_to_pdn(edev); -static int pseries_eeh_restore_config(struct pci_dn *pdn) -{ - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - s64 ret = 0; - - if (!edev) - return -EEXIST; - - /* - * FIXME: The MPS, error routing rules, timeout setting are worthy - * to be exported by firmware in extendible way. - */ - if (edev->physfn) - ret = eeh_restore_vf_config(pdn); - - if (ret) { - pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", - __func__, edev->pe_config_addr, ret); - return -EIO; - } - - return ret; + return rtas_write_config(pdn, where, size, val); } #ifdef CONFIG_PCI_IOV @@ -772,8 +901,8 @@ int pseries_send_allow_unfreeze(struct pci_dn *pdn, static int pseries_call_allow_unfreeze(struct eeh_dev *edev) { + int cur_vfs = 0, rc = 0, vf_index, bus, devfn, vf_pe_num; struct pci_dn *pdn, *tmp, *parent, *physfn_pdn; - int cur_vfs = 0, rc = 0, vf_index, bus, devfn; u16 *vf_pe_array; vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); @@ -806,8 +935,10 @@ static int pseries_call_allow_unfreeze(struct eeh_dev *edev) } } else { pdn = pci_get_pdn(edev->pdev); - vf_pe_array[0] = cpu_to_be16(pdn->pe_number); physfn_pdn = pci_get_pdn(edev->physfn); + + vf_pe_num = physfn_pdn->pe_num_map[edev->vf_index]; + vf_pe_array[0] = cpu_to_be16(vf_pe_num); rc = pseries_send_allow_unfreeze(physfn_pdn, vf_pe_array, 1); pdn->last_allow_rc = rc; @@ -818,10 +949,8 @@ static int pseries_call_allow_unfreeze(struct eeh_dev *edev) return rc; } -static int pseries_notify_resume(struct pci_dn *pdn) +static int pseries_notify_resume(struct eeh_dev *edev) { - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - if (!edev) return -EEXIST; @@ -841,7 +970,6 @@ static struct eeh_ops pseries_eeh_ops = { .init = pseries_eeh_init, .probe = pseries_eeh_probe, .set_option = pseries_eeh_set_option, - .get_pe_addr = pseries_eeh_get_pe_addr, .get_state = pseries_eeh_get_state, .reset = pseries_eeh_reset, .get_log = pseries_eeh_get_log, @@ -850,7 +978,7 @@ static struct eeh_ops pseries_eeh_ops = { .read_config = pseries_eeh_read_config, .write_config = pseries_eeh_write_config, .next_error = NULL, - .restore_config = pseries_eeh_restore_config, + .restore_config = NULL, /* NB: configure_bridge() does this */ #ifdef CONFIG_PCI_IOV .notify_resume = pseries_notify_resume #endif diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index 3e49cc23a97a..4c7b7f5a2ebc 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -65,6 +65,7 @@ hypertas_fw_features_table[] = { {FW_FEATURE_HPT_RESIZE, "hcall-hpt-resize"}, {FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"}, {FW_FEATURE_PAPR_SCM, "hcall-scm"}, + {FW_FEATURE_RPT_INVALIDATE, "hcall-rpt-invalidate"}, }; /* Build up the firmware features bitmask using the contents of diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 3e8cbfe7a80f..c6e0d8abf75e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -35,54 +35,10 @@ #include <asm/topology.h> #include "pseries.h" -#include "offline_states.h" /* This version can't take the spinlock, because it never returns */ static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE; -static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) = - CPU_STATE_OFFLINE; -static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE; - -static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE; - -static bool cede_offline_enabled __read_mostly = true; - -/* - * Enable/disable cede_offline when available. - */ -static int __init setup_cede_offline(char *str) -{ - return (kstrtobool(str, &cede_offline_enabled) == 0); -} - -__setup("cede_offline=", setup_cede_offline); - -enum cpu_state_vals get_cpu_current_state(int cpu) -{ - return per_cpu(current_state, cpu); -} - -void set_cpu_current_state(int cpu, enum cpu_state_vals state) -{ - per_cpu(current_state, cpu) = state; -} - -enum cpu_state_vals get_preferred_offline_state(int cpu) -{ - return per_cpu(preferred_offline_state, cpu); -} - -void set_preferred_offline_state(int cpu, enum cpu_state_vals state) -{ - per_cpu(preferred_offline_state, cpu) = state; -} - -void set_default_offline_state(int cpu) -{ - per_cpu(preferred_offline_state, cpu) = default_offline_state; -} - static void rtas_stop_self(void) { static struct rtas_args args; @@ -101,9 +57,7 @@ static void rtas_stop_self(void) static void pseries_mach_cpu_die(void) { - unsigned int cpu = smp_processor_id(); unsigned int hwcpu = hard_smp_processor_id(); - u8 cede_latency_hint = 0; local_irq_disable(); idle_task_exit(); @@ -112,49 +66,6 @@ static void pseries_mach_cpu_die(void) else xics_teardown_cpu(); - if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { - set_cpu_current_state(cpu, CPU_STATE_INACTIVE); - if (ppc_md.suspend_disable_cpu) - ppc_md.suspend_disable_cpu(); - - cede_latency_hint = 2; - - get_lppaca()->idle = 1; - if (!lppaca_shared_proc(get_lppaca())) - get_lppaca()->donate_dedicated_cpu = 1; - - while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { - while (!prep_irq_for_idle()) { - local_irq_enable(); - local_irq_disable(); - } - - extended_cede_processor(cede_latency_hint); - } - - local_irq_disable(); - - if (!lppaca_shared_proc(get_lppaca())) - get_lppaca()->donate_dedicated_cpu = 0; - get_lppaca()->idle = 0; - - if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) { - unregister_slb_shadow(hwcpu); - - hard_irq_disable(); - /* - * Call to start_secondary_resume() will not return. - * Kernel stack will be reset and start_secondary() - * will be called to continue the online operation. - */ - start_secondary_resume(); - } - } - - /* Requested state is CPU_STATE_OFFLINE at this point */ - WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE); - - set_cpu_current_state(cpu, CPU_STATE_OFFLINE); unregister_slb_shadow(hwcpu); rtas_stop_self(); @@ -200,24 +111,13 @@ static void pseries_cpu_die(unsigned int cpu) int cpu_status = 1; unsigned int pcpu = get_hard_smp_processor_id(cpu); - if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { - cpu_status = 1; - for (tries = 0; tries < 5000; tries++) { - if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) { - cpu_status = 0; - break; - } - msleep(1); - } - } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { + for (tries = 0; tries < 25; tries++) { + cpu_status = smp_query_cpu_stopped(pcpu); + if (cpu_status == QCSS_STOPPED || + cpu_status == QCSS_HARDWARE_ERROR) + break; + cpu_relax(); - for (tries = 0; tries < 25; tries++) { - cpu_status = smp_query_cpu_stopped(pcpu); - if (cpu_status == QCSS_STOPPED || - cpu_status == QCSS_HARDWARE_ERROR) - break; - cpu_relax(); - } } if (cpu_status != 0) { @@ -359,28 +259,14 @@ static int dlpar_offline_cpu(struct device_node *dn) if (get_hard_smp_processor_id(cpu) != thread) continue; - if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE) + if (!cpu_online(cpu)) break; - if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) { - set_preferred_offline_state(cpu, - CPU_STATE_OFFLINE); - cpu_maps_update_done(); - timed_topology_update(1); - rc = device_offline(get_cpu_device(cpu)); - if (rc) - goto out; - cpu_maps_update_begin(); - break; - } - - /* - * The cpu is in CPU_STATE_INACTIVE. - * Upgrade it's state to CPU_STATE_OFFLINE. - */ - set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); - WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS); - __cpu_die(cpu); + cpu_maps_update_done(); + rc = device_offline(get_cpu_device(cpu)); + if (rc) + goto out; + cpu_maps_update_begin(); break; } if (cpu == num_possible_cpus()) { @@ -414,10 +300,7 @@ static int dlpar_online_cpu(struct device_node *dn) for_each_present_cpu(cpu) { if (get_hard_smp_processor_id(cpu) != thread) continue; - BUG_ON(get_cpu_current_state(cpu) - != CPU_STATE_OFFLINE); cpu_maps_update_done(); - timed_topology_update(1); find_and_online_cpu_nid(cpu); rc = device_online(get_cpu_device(cpu)); if (rc) { @@ -854,7 +737,6 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) parent = of_find_node_by_path("/cpus"); if (!parent) { pr_warn("Could not find CPU root node in device tree\n"); - kfree(cpu_drcs); return -1; } @@ -896,25 +778,6 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) return rc; } -int dlpar_cpu_readd(int cpu) -{ - struct device_node *dn; - struct device *dev; - u32 drc_index; - int rc; - - dev = get_cpu_device(cpu); - dn = dev->of_node; - - rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index); - - rc = dlpar_cpu_remove_by_index(drc_index); - if (!rc) - rc = dlpar_cpu_add(drc_index); - - return rc; -} - int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) { u32 count, drc_index; @@ -1013,27 +876,8 @@ static struct notifier_block pseries_smp_nb = { .notifier_call = pseries_smp_notifier, }; -#define MAX_CEDE_LATENCY_LEVELS 4 -#define CEDE_LATENCY_PARAM_LENGTH 10 -#define CEDE_LATENCY_PARAM_MAX_LENGTH \ - (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char)) -#define CEDE_LATENCY_TOKEN 45 - -static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH]; - -static int parse_cede_parameters(void) -{ - memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH); - return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, - NULL, - CEDE_LATENCY_TOKEN, - __pa(cede_parameters), - CEDE_LATENCY_PARAM_MAX_LENGTH); -} - static int __init pseries_cpu_hotplug_init(void) { - int cpu; int qcss_tok; #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE @@ -1056,16 +900,8 @@ static int __init pseries_cpu_hotplug_init(void) smp_ops->cpu_die = pseries_cpu_die; /* Processors can be added/removed only on LPAR */ - if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR)) of_reconfig_notifier_register(&pseries_smp_nb); - cpu_maps_update_begin(); - if (cede_offline_enabled && parse_cede_parameters() == 0) { - default_offline_state = CPU_STATE_INACTIVE; - for_each_online_cpu(cpu) - set_default_offline_state(cpu); - } - cpu_maps_update_done(); - } return 0; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 5ace2f9a277e..5d545b78111f 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -22,12 +22,10 @@ #include <asm/drmem.h> #include "pseries.h" -static bool rtas_hp_event; - unsigned long pseries_memory_block_size(void) { struct device_node *np; - unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE; + u64 memblock_size = MIN_MEMORY_BLOCK_SIZE; struct resource r; np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); @@ -487,40 +485,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index) return rc; } -static int dlpar_memory_readd_by_index(u32 drc_index) -{ - struct drmem_lmb *lmb; - int lmb_found; - int rc; - - pr_info("Attempting to update LMB, drc index %x\n", drc_index); - - lmb_found = 0; - for_each_drmem_lmb(lmb) { - if (lmb->drc_index == drc_index) { - lmb_found = 1; - rc = dlpar_remove_lmb(lmb); - if (!rc) { - rc = dlpar_add_lmb(lmb); - if (rc) - dlpar_release_drc(lmb->drc_index); - } - break; - } - } - - if (!lmb_found) - rc = -EINVAL; - - if (rc) - pr_info("Failed to update memory at %llx\n", - lmb->base_addr); - else - pr_info("Memory at %llx was updated\n", lmb->base_addr); - - return rc; -} - static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { struct drmem_lmb *lmb, *start_lmb, *end_lmb; @@ -617,10 +581,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index) { return -EOPNOTSUPP; } -static int dlpar_memory_readd_by_index(u32 drc_index) -{ - return -EOPNOTSUPP; -} static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { @@ -903,21 +863,14 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) } break; - case PSERIES_HP_ELOG_ACTION_READD: - drc_index = hp_elog->_drc_u.drc_index; - rc = dlpar_memory_readd_by_index(drc_index); - break; default: pr_err("Invalid action (%d) specified\n", hp_elog->action); rc = -EINVAL; break; } - if (!rc) { - rtas_hp_event = true; + if (!rc) rc = drmem_update_dt(); - rtas_hp_event = false; - } unlock_device_hotplug(); return rc; @@ -953,60 +906,6 @@ static int pseries_add_mem_node(struct device_node *np) return (ret < 0) ? -EINVAL : 0; } -static int pseries_update_drconf_memory(struct of_reconfig_data *pr) -{ - struct of_drconf_cell_v1 *new_drmem, *old_drmem; - unsigned long memblock_size; - u32 entries; - __be32 *p; - int i, rc = -EINVAL; - - if (rtas_hp_event) - return 0; - - memblock_size = pseries_memory_block_size(); - if (!memblock_size) - return -EINVAL; - - if (!pr->old_prop) - return 0; - - p = (__be32 *) pr->old_prop->value; - if (!p) - return -EINVAL; - - /* The first int of the property is the number of lmb's described - * by the property. This is followed by an array of of_drconf_cell - * entries. Get the number of entries and skip to the array of - * of_drconf_cell's. - */ - entries = be32_to_cpu(*p++); - old_drmem = (struct of_drconf_cell_v1 *)p; - - p = (__be32 *)pr->prop->value; - p++; - new_drmem = (struct of_drconf_cell_v1 *)p; - - for (i = 0; i < entries; i++) { - if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) && - (!(be32_to_cpu(new_drmem[i].flags) & DRCONF_MEM_ASSIGNED))) { - rc = pseries_remove_memblock( - be64_to_cpu(old_drmem[i].base_addr), - memblock_size); - break; - } else if ((!(be32_to_cpu(old_drmem[i].flags) & - DRCONF_MEM_ASSIGNED)) && - (be32_to_cpu(new_drmem[i].flags) & - DRCONF_MEM_ASSIGNED)) { - rc = memblock_add(be64_to_cpu(old_drmem[i].base_addr), - memblock_size); - rc = (rc < 0) ? -EINVAL : 0; - break; - } - } - return rc; -} - static int pseries_memory_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -1020,10 +919,6 @@ static int pseries_memory_notifier(struct notifier_block *nb, case OF_RECONFIG_DETACH_NODE: err = pseries_remove_mem_node(rd->dn); break; - case OF_RECONFIG_UPDATE_PROPERTY: - if (!strcmp(rd->prop->name, "ibm,dynamic-memory")) - err = pseries_update_drconf_memory(rd); - break; } return notifier_from_errno(err); } diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c index 267139b13530..96e18d3b2fcf 100644 --- a/arch/powerpc/platforms/pseries/hvcserver.c +++ b/arch/powerpc/platforms/pseries/hvcserver.c @@ -45,7 +45,7 @@ static int hvcs_convert(long to_convert) case H_LONG_BUSY_ORDER_10_SEC: case H_LONG_BUSY_ORDER_100_SEC: return -EBUSY; - case H_FUNCTION: /* fall through */ + case H_FUNCTION: default: return -EPERM; } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f71ff2c94efe..baf24eacd268 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1681,9 +1681,11 @@ static int pseries_lpar_register_process_table(unsigned long base, if (table_size) flags |= PROC_TABLE_NEW; - if (radix_enabled()) - flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE; - else + if (radix_enabled()) { + flags |= PROC_TABLE_RADIX; + if (mmu_has_feature(MMU_FTR_GTSE)) + flags |= PROC_TABLE_GTSE; + } else flags |= PROC_TABLE_HPT_SLB; for (;;) { rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 10d982997736..d6f4162478a5 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -6,6 +6,9 @@ * Copyright (C) 2010 IBM Corporation */ + +#define pr_fmt(fmt) "mobility: " fmt + #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/kobject.h> @@ -64,6 +67,8 @@ static int delete_dt_node(__be32 phandle) if (!dn) return -ENOENT; + pr_debug("removing node %pOFfp\n", dn); + dlpar_detach_node(dn); of_node_put(dn); return 0; @@ -122,6 +127,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop, } if (!more) { + pr_debug("updating node %pOF property %s\n", dn, name); of_update_property(dn, new_prop); *prop = NULL; } @@ -240,33 +246,12 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index) if (rc) dlpar_free_cc_nodes(dn); + pr_debug("added node %pOFfp\n", dn); + of_node_put(parent_dn); return rc; } -static void prrn_update_node(__be32 phandle) -{ - struct pseries_hp_errorlog hp_elog; - struct device_node *dn; - - /* - * If a node is found from a the given phandle, the phandle does not - * represent the drc index of an LMB and we can ignore. - */ - dn = of_find_node_by_phandle(be32_to_cpu(phandle)); - if (dn) { - of_node_put(dn); - return; - } - - hp_elog.resource = PSERIES_HP_ELOG_RESOURCE_MEM; - hp_elog.action = PSERIES_HP_ELOG_ACTION_READD; - hp_elog.id_type = PSERIES_HP_ELOG_ID_DRC_INDEX; - hp_elog._drc_u.drc_index = phandle; - - handle_dlpar_errorlog(&hp_elog); -} - int pseries_devicetree_update(s32 scope) { char *rtas_buf; @@ -305,10 +290,6 @@ int pseries_devicetree_update(s32 scope) break; case UPDATE_DT_NODE: update_dt_node(phandle, scope); - - if (scope == PRRN_SCOPE) - prrn_update_node(phandle); - break; case ADD_DT_NODE: drc_index = *data++; @@ -388,8 +369,6 @@ static ssize_t migration_store(struct class *class, if (rc) return rc; - stop_topology_update(); - do { rc = rtas_ibm_suspend_me(streamid); if (rc == -EAGAIN) @@ -401,8 +380,6 @@ static ssize_t migration_store(struct class *class, post_mobility_fixup(); - start_topology_update(); - return count; } @@ -427,11 +404,11 @@ static int __init mobility_sysfs_init(void) rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr); if (rc) - pr_err("mobility: unable to create migration sysfs file (%d)\n", rc); + pr_err("unable to create migration sysfs file (%d)\n", rc); rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr); if (rc) - pr_err("mobility: unable to create api_version sysfs file (%d)\n", rc); + pr_err("unable to create api_version sysfs file (%d)\n", rc); return 0; } diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h deleted file mode 100644 index 51414aee2862..000000000000 --- a/arch/powerpc/platforms/pseries/offline_states.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _OFFLINE_STATES_H_ -#define _OFFLINE_STATES_H_ - -/* Cpu offline states go here */ -enum cpu_state_vals { - CPU_STATE_OFFLINE, - CPU_STATE_INACTIVE, - CPU_STATE_ONLINE, - CPU_MAX_OFFLINE_STATES -}; - -#ifdef CONFIG_HOTPLUG_CPU -extern enum cpu_state_vals get_cpu_current_state(int cpu); -extern void set_cpu_current_state(int cpu, enum cpu_state_vals state); -extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state); -extern void set_default_offline_state(int cpu); -#else -static inline enum cpu_state_vals get_cpu_current_state(int cpu) -{ - return CPU_STATE_ONLINE; -} - -static inline void set_cpu_current_state(int cpu, enum cpu_state_vals state) -{ -} - -static inline void set_preferred_offline_state(int cpu, enum cpu_state_vals state) -{ -} - -static inline void set_default_offline_state(int cpu) -{ -} -#endif - -extern enum cpu_state_vals get_preferred_offline_state(int cpu); -#endif diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 9c569078a09f..f439f0dfea7d 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -13,9 +13,11 @@ #include <linux/platform_device.h> #include <linux/delay.h> #include <linux/seq_buf.h> +#include <linux/nd.h> #include <asm/plpar_wrappers.h> #include <asm/papr_pdsm.h> +#include <asm/mce.h> #define BIND_ANY_ADDR (~0ul) @@ -62,6 +64,26 @@ PAPR_PMEM_HEALTH_FATAL | \ PAPR_PMEM_HEALTH_UNHEALTHY) +#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS) +#define PAPR_SCM_PERF_STATS_VERSION 0x1 + +/* Struct holding a single performance metric */ +struct papr_scm_perf_stat { + u8 stat_id[8]; + __be64 stat_val; +} __packed; + +/* Struct exchanged between kernel and PHYP for fetching drc perf stats */ +struct papr_scm_perf_stats { + u8 eye_catcher[8]; + /* Should be PAPR_SCM_PERF_STATS_VERSION */ + __be32 stats_version; + /* Number of stats following */ + __be32 num_statistics; + /* zero or more performance matrics */ + struct papr_scm_perf_stat scm_statistic[]; +} __packed; + /* private struct associated with each region */ struct papr_scm_priv { struct platform_device *pdev; @@ -80,6 +102,7 @@ struct papr_scm_priv { struct resource res; struct nd_region *region; struct nd_interleave_set nd_set; + struct list_head region_list; /* Protect dimm health data from concurrent read/writes */ struct mutex health_mutex; @@ -89,8 +112,14 @@ struct papr_scm_priv { /* Health information for the dimm */ u64 health_bitmap; + + /* length of the stat buffer as expected by phyp */ + size_t stat_buffer_len; }; +static LIST_HEAD(papr_nd_regions); +static DEFINE_MUTEX(papr_ndr_lock); + static int drc_pmem_bind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; @@ -195,6 +224,79 @@ err_out: } /* + * Query the Dimm performance stats from PHYP and copy them (if returned) to + * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast + * (num_stats + header) bytes. + * - If buff_stats == NULL the return value is the size in byes of the buffer + * needed to hold all supported performance-statistics. + * - If buff_stats != NULL and num_stats == 0 then we copy all known + * performance-statistics to 'buff_stat' and expect to be large enough to + * hold them. + * - if buff_stats != NULL and num_stats > 0 then copy the requested + * performance-statistics to buff_stats. + */ +static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, + struct papr_scm_perf_stats *buff_stats, + unsigned int num_stats) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + size_t size; + s64 rc; + + /* Setup the out buffer */ + if (buff_stats) { + memcpy(buff_stats->eye_catcher, + PAPR_SCM_PERF_STATS_EYECATCHER, 8); + buff_stats->stats_version = + cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION); + buff_stats->num_statistics = + cpu_to_be32(num_stats); + + /* + * Calculate the buffer size based on num-stats provided + * or use the prefetched max buffer length + */ + if (num_stats) + /* Calculate size from the num_stats */ + size = sizeof(struct papr_scm_perf_stats) + + num_stats * sizeof(struct papr_scm_perf_stat); + else + size = p->stat_buffer_len; + } else { + /* In case of no out buffer ignore the size */ + size = 0; + } + + /* Do the HCALL asking PHYP for info */ + rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index, + buff_stats ? virt_to_phys(buff_stats) : 0, + size); + + /* Check if the error was due to an unknown stat-id */ + if (rc == H_PARTIAL) { + dev_err(&p->pdev->dev, + "Unknown performance stats, Err:0x%016lX\n", ret[0]); + return -ENOENT; + } else if (rc != H_SUCCESS) { + dev_err(&p->pdev->dev, + "Failed to query performance stats, Err:%lld\n", rc); + return -EIO; + + } else if (!size) { + /* Handle case where stat buffer size was requested */ + dev_dbg(&p->pdev->dev, + "Performance stats size %ld\n", ret[0]); + return ret[0]; + } + + /* Successfully fetched the requested stats from phyp */ + dev_dbg(&p->pdev->dev, + "Performance stats returned %d stats\n", + be32_to_cpu(buff_stats->num_statistics)); + return 0; +} + +/* * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the * health information. */ @@ -416,6 +518,51 @@ static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, return 0; } +static int papr_pdsm_fuel_gauge(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc, size; + u64 statval; + struct papr_scm_perf_stat *stat; + struct papr_scm_perf_stats *stats; + + /* Silently fail if fetching performance metrics isn't supported */ + if (!p->stat_buffer_len) + return 0; + + /* Allocate request buffer enough to hold single performance stat */ + size = sizeof(struct papr_scm_perf_stats) + + sizeof(struct papr_scm_perf_stat); + + stats = kzalloc(size, GFP_KERNEL); + if (!stats) + return -ENOMEM; + + stat = &stats->scm_statistic[0]; + memcpy(&stat->stat_id, "MemLife ", sizeof(stat->stat_id)); + stat->stat_val = 0; + + /* Fetch the fuel gauge and populate it in payload */ + rc = drc_pmem_query_stats(p, stats, 1); + if (rc < 0) { + dev_dbg(&p->pdev->dev, "Err(%d) fetching fuel gauge\n", rc); + goto free_stats; + } + + statval = be64_to_cpu(stat->stat_val); + dev_dbg(&p->pdev->dev, + "Fetched fuel-gauge %llu", statval); + payload->health.extension_flags |= + PDSM_DIMM_HEALTH_RUN_GAUGE_VALID; + payload->health.dimm_fuel_gauge = statval; + + rc = sizeof(struct nd_papr_pdsm_health); + +free_stats: + kfree(stats); + return rc; +} + /* Fetch the DIMM health info and populate it in provided package. */ static int papr_pdsm_health(struct papr_scm_priv *p, union nd_pdsm_payload *payload) @@ -456,6 +603,10 @@ static int papr_pdsm_health(struct papr_scm_priv *p, /* struct populated hence can release the mutex now */ mutex_unlock(&p->health_mutex); + + /* Populate the fuel gauge meter in the payload */ + papr_pdsm_fuel_gauge(p, payload); + rc = sizeof(struct nd_papr_pdsm_health); out: @@ -631,6 +782,48 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static ssize_t perf_stats_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int index, rc; + struct seq_buf s; + struct papr_scm_perf_stat *stat; + struct papr_scm_perf_stats *stats; + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + if (!p->stat_buffer_len) + return -ENOENT; + + /* Allocate the buffer for phyp where stats are written */ + stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); + if (!stats) + return -ENOMEM; + + /* Ask phyp to return all dimm perf stats */ + rc = drc_pmem_query_stats(p, stats, 0); + if (rc) + goto free_stats; + /* + * Go through the returned output buffer and print stats and + * values. Since stat_id is essentially a char string of + * 8 bytes, simply use the string format specifier to print it. + */ + seq_buf_init(&s, buf, PAGE_SIZE); + for (index = 0, stat = stats->scm_statistic; + index < be32_to_cpu(stats->num_statistics); + ++index, ++stat) { + seq_buf_printf(&s, "%.8s = 0x%016llX\n", + stat->stat_id, + be64_to_cpu(stat->stat_val)); + } + +free_stats: + kfree(stats); + return rc ? rc : seq_buf_used(&s); +} +DEVICE_ATTR_RO(perf_stats); + static ssize_t flags_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -676,6 +869,7 @@ DEVICE_ATTR_RO(flags); /* papr_scm specific dimm attributes */ static struct attribute *papr_nd_attributes[] = { &dev_attr_flags.attr, + &dev_attr_perf_stats.attr, NULL, }; @@ -696,6 +890,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) struct nd_region_desc ndr_desc; unsigned long dimm_flags; int target_nid, online_nid; + ssize_t stat_size; p->bus_desc.ndctl = papr_scm_ndctl; p->bus_desc.module = THIS_MODULE; @@ -759,6 +954,20 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dev_info(dev, "Region registered with target node %d and online node %d", target_nid, online_nid); + mutex_lock(&papr_ndr_lock); + list_add_tail(&p->region_list, &papr_nd_regions); + mutex_unlock(&papr_ndr_lock); + + /* Try retriving the stat buffer and see if its supported */ + stat_size = drc_pmem_query_stats(p, NULL, 0); + if (stat_size > 0) { + p->stat_buffer_len = stat_size; + dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", + p->stat_buffer_len); + } else { + dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n"); + } + return 0; err: nvdimm_bus_unregister(p->bus); @@ -766,6 +975,68 @@ err: nvdimm_bus_unregister(p->bus); return -ENXIO; } +static void papr_scm_add_badblock(struct nd_region *region, + struct nvdimm_bus *bus, u64 phys_addr) +{ + u64 aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); + + if (nvdimm_bus_add_badrange(bus, aligned_addr, L1_CACHE_BYTES)) { + pr_err("Bad block registration for 0x%llx failed\n", phys_addr); + return; + } + + pr_debug("Add memory range (0x%llx - 0x%llx) as bad range\n", + aligned_addr, aligned_addr + L1_CACHE_BYTES); + + nvdimm_region_notify(region, NVDIMM_REVALIDATE_POISON); +} + +static int handle_mce_ue(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct machine_check_event *evt = data; + struct papr_scm_priv *p; + u64 phys_addr; + bool found = false; + + if (evt->error_type != MCE_ERROR_TYPE_UE) + return NOTIFY_DONE; + + if (list_empty(&papr_nd_regions)) + return NOTIFY_DONE; + + /* + * The physical address obtained here is PAGE_SIZE aligned, so get the + * exact address from the effective address + */ + phys_addr = evt->u.ue_error.physical_address + + (evt->u.ue_error.effective_address & ~PAGE_MASK); + + if (!evt->u.ue_error.physical_address_provided || + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) + return NOTIFY_DONE; + + /* mce notifier is called from a process context, so mutex is safe */ + mutex_lock(&papr_ndr_lock); + list_for_each_entry(p, &papr_nd_regions, region_list) { + if (phys_addr >= p->res.start && phys_addr <= p->res.end) { + found = true; + break; + } + } + + if (found) + papr_scm_add_badblock(p->region, p->bus, phys_addr); + + mutex_unlock(&papr_ndr_lock); + + return found ? NOTIFY_OK : NOTIFY_DONE; +} + +static struct notifier_block mce_ue_nb = { + .notifier_call = handle_mce_ue +}; + static int papr_scm_probe(struct platform_device *pdev) { struct device_node *dn = pdev->dev.of_node; @@ -866,6 +1137,10 @@ static int papr_scm_remove(struct platform_device *pdev) { struct papr_scm_priv *p = platform_get_drvdata(pdev); + mutex_lock(&papr_ndr_lock); + list_del(&p->region_list); + mutex_unlock(&papr_ndr_lock); + nvdimm_bus_unregister(p->bus); drc_pmem_unbind(p); kfree(p->bus_desc.provider_name); @@ -876,6 +1151,7 @@ static int papr_scm_remove(struct platform_device *pdev) static const struct of_device_id papr_scm_match[] = { { .compatible = "ibm,pmemory" }, + { .compatible = "ibm,pmemory-v2" }, { }, }; @@ -888,7 +1164,25 @@ static struct platform_driver papr_scm_driver = { }, }; -module_platform_driver(papr_scm_driver); +static int __init papr_scm_init(void) +{ + int ret; + + ret = platform_driver_register(&papr_scm_driver); + if (!ret) + mce_register_notifier(&mce_ue_nb); + + return ret; +} +module_init(papr_scm_init); + +static void __exit papr_scm_exit(void) +{ + mce_unregister_notifier(&mce_ue_nb); + platform_driver_unregister(&papr_scm_driver); +} +module_exit(papr_scm_exit); + MODULE_DEVICE_TABLE(of, papr_scm_match); MODULE_LICENSE("GPL"); MODULE_AUTHOR("IBM Corporation"); diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index b3a38f5a6b68..f9ae17e8a0f4 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -34,7 +34,7 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) pci_devs_phb_init_dynamic(phb); /* Create EEH devices for the PHB */ - eeh_dev_phb_init_dynamic(phb); + eeh_phb_pe_create(phb); if (dn->child) pseries_eeh_init_edev_recursive(PCI_DN(dn)); diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c index f860a897a9e0..e1dc5d3254df 100644 --- a/arch/powerpc/platforms/pseries/pmem.c +++ b/arch/powerpc/platforms/pseries/pmem.c @@ -24,7 +24,6 @@ #include <asm/topology.h> #include "pseries.h" -#include "offline_states.h" static struct device_node *pmem_node; @@ -147,6 +146,12 @@ const struct of_device_id drc_pmem_match[] = { static int pseries_pmem_init(void) { + /* + * Only supported on POWER8 and above. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return 0; + pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory"); if (!pmem_node) return 0; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 27094c872fd6..2f4ee0a90284 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -359,7 +359,7 @@ static void pseries_lpar_idle(void) * to ever be a problem in practice we can move this into a kernel thread to * finish off the process later in boot. */ -void pseries_enable_reloc_on_exc(void) +bool pseries_enable_reloc_on_exc(void) { long rc; unsigned int delay, total_delay = 0; @@ -370,11 +370,13 @@ void pseries_enable_reloc_on_exc(void) if (rc == H_P2) { pr_info("Relocation on exceptions not" " supported\n"); + return false; } else if (rc != H_SUCCESS) { pr_warn("Unable to enable relocation" " on exceptions: %ld\n", rc); + return false; } - break; + return true; } delay = get_longbusy_msecs(rc); @@ -383,7 +385,7 @@ void pseries_enable_reloc_on_exc(void) pr_warn("Warning: Giving up waiting to enable " "relocation on exceptions (%u msec)!\n", total_delay); - return; + return false; } mdelay(delay); @@ -746,6 +748,11 @@ static void __init pSeries_setup_arch(void) smp_init_pseries(); + if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE)) + if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) + panic("BUG: Radix support requires either GTSE or RPT_INVALIDATE\n"); + + /* openpic global configuration register (64-bit format). */ /* openpic Interrupt Source Unit pointer (64-bit format). */ /* python0 facility area (mmio) (64-bit format) REAL address. */ @@ -772,8 +779,10 @@ static void __init pSeries_setup_arch(void) if (firmware_has_feature(FW_FEATURE_LPAR)) { vpa_init(boot_cpuid); - if (lppaca_shared_proc(get_lppaca())) + if (lppaca_shared_proc(get_lppaca())) { static_branch_enable(&shared_processor); + pv_spinlocks_init(); + } ppc_md.power_save = pseries_lpar_idle; ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; @@ -832,12 +841,15 @@ static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx) return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx); } -static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx) +static int pseries_set_dawr(int nr, unsigned long dawr, unsigned long dawrx) { /* PAPR says we can't set HYP */ dawrx &= ~DAWRX_HYP; - return plpar_set_watchpoint0(dawr, dawrx); + if (nr == 0) + return plpar_set_watchpoint0(dawr, dawrx); + else + return plpar_set_watchpoint1(dawr, dawrx); } #define CMO_CHARACTERISTICS_TOKEN 44 diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 6891710833be..92922491a81c 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -44,8 +44,6 @@ #include <asm/svm.h> #include "pseries.h" -#include "offline_states.h" - /* * The Primary thread of each non-boot processor was started from the OF client @@ -108,10 +106,7 @@ static inline int smp_startup_cpu(unsigned int lcpu) /* Fixup atomic count: it exited inside IRQ handler. */ task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0; -#ifdef CONFIG_HOTPLUG_CPU - if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE) - goto out; -#endif + /* * If the RTAS start-cpu token does not exist then presume the * cpu is already spinning. @@ -126,9 +121,6 @@ static inline int smp_startup_cpu(unsigned int lcpu) return 0; } -#ifdef CONFIG_HOTPLUG_CPU -out: -#endif return 1; } @@ -143,10 +135,6 @@ static void smp_setup_cpu(int cpu) vpa_init(cpu); cpumask_clear_cpu(cpu, of_spin_mask); -#ifdef CONFIG_HOTPLUG_CPU - set_cpu_current_state(cpu, CPU_STATE_ONLINE); - set_default_offline_state(cpu); -#endif } static int smp_pSeries_kick_cpu(int nr) @@ -163,20 +151,6 @@ static int smp_pSeries_kick_cpu(int nr) * the processor will continue on to secondary_start */ paca_ptrs[nr]->cpu_start = 1; -#ifdef CONFIG_HOTPLUG_CPU - set_preferred_offline_state(nr, CPU_STATE_ONLINE); - - if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) { - long rc; - unsigned long hcpuid; - - hcpuid = get_hard_smp_processor_id(nr); - rc = plpar_hcall_norets(H_PROD, hcpuid); - if (rc != H_SUCCESS) - printk(KERN_ERR "Error: Prod to wake up processor %d " - "Ret= %ld\n", nr, rc); - } -#endif return 0; } @@ -188,13 +162,16 @@ static int pseries_smp_prepare_cpu(int cpu) return 0; } -static void smp_pseries_cause_ipi(int cpu) +/* Cause IPI as setup by the interrupt controller (xics or xive) */ +static void (*ic_cause_ipi)(int cpu) __ro_after_init; + +/* Use msgsndp doorbells target is a sibling, else use interrupt controller */ +static void dbell_or_ic_cause_ipi(int cpu) { - /* POWER9 should not use this handler */ if (doorbell_try_core_ipi(cpu)) return; - icp_ops->cause_ipi(cpu); + ic_cause_ipi(cpu); } static int pseries_cause_nmi_ipi(int cpu) @@ -218,26 +195,49 @@ static int pseries_cause_nmi_ipi(int cpu) return 0; } -static __init void pSeries_smp_probe_xics(void) -{ - xics_smp_probe(); - - if (cpu_has_feature(CPU_FTR_DBELL) && !is_secure_guest()) - smp_ops->cause_ipi = smp_pseries_cause_ipi; - else - smp_ops->cause_ipi = icp_ops->cause_ipi; -} - static __init void pSeries_smp_probe(void) { if (xive_enabled()) - /* - * Don't use P9 doorbells when XIVE is enabled. IPIs - * using MMIOs should be faster - */ xive_smp_probe(); else - pSeries_smp_probe_xics(); + xics_smp_probe(); + + /* No doorbell facility, must use the interrupt controller for IPIs */ + if (!cpu_has_feature(CPU_FTR_DBELL)) + return; + + /* Doorbells can only be used for IPIs between SMT siblings */ + if (!cpu_has_feature(CPU_FTR_SMT)) + return; + + if (is_kvm_guest()) { + /* + * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp + * faults to the hypervisor which then reads the instruction + * from guest memory, which tends to be slower than using XIVE. + */ + if (xive_enabled()) + return; + + /* + * XICS hcalls aren't as fast, so we can use msgsndp (which + * also helps exercise KVM emulation), however KVM can't + * emulate secure guests because it can't read the instruction + * out of their memory. + */ + if (is_secure_guest()) + return; + } + + /* + * Under PowerVM, FSCR[MSGP] is enabled as guest vCPU siblings are + * gang scheduled on the same physical core, so doorbells are always + * faster than the interrupt controller, and they can be used by + * secure guests. + */ + + ic_cause_ipi = smp_ops->cause_ipi; + smp_ops->cause_ipi = dbell_or_ic_cause_ipi; } static struct smp_ops_t pseries_smp_ops = { diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 0a24a5a185f0..81e0ac58d620 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -132,15 +132,11 @@ static ssize_t store_hibernate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - cpumask_var_t offline_mask; int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL)) - return -ENOMEM; - stream_id = simple_strtoul(buf, NULL, 16); do { @@ -149,33 +145,14 @@ static ssize_t store_hibernate(struct device *dev, ssleep(1); } while (rc == -EAGAIN); - if (!rc) { - /* All present CPUs must be online */ - cpumask_andnot(offline_mask, cpu_present_mask, - cpu_online_mask); - rc = rtas_online_cpus_mask(offline_mask); - if (rc) { - pr_err("%s: Could not bring present CPUs online.\n", - __func__); - goto out; - } - - stop_topology_update(); + if (!rc) rc = pm_suspend(PM_SUSPEND_MEM); - start_topology_update(); - - /* Take down CPUs not online prior to suspend */ - if (!rtas_offline_cpus_mask(offline_mask)) - pr_warn("%s: Could not restore CPUs to offline " - "state.\n", __func__); - } stream_id = 0; if (!rc) rc = count; -out: - free_cpumask_var(offline_mask); + return rc; } diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile index 7c6d8b14f440..348f59581052 100644 --- a/arch/powerpc/purgatory/Makefile +++ b/arch/powerpc/purgatory/Makefile @@ -2,11 +2,11 @@ KASAN_SANITIZE := n -targets += trampoline.o purgatory.ro kexec-purgatory.c +targets += trampoline_$(BITS).o purgatory.ro kexec-purgatory.c LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -$(obj)/purgatory.ro: $(obj)/trampoline.o FORCE +$(obj)/purgatory.ro: $(obj)/trampoline_$(BITS).o FORCE $(call if_changed,ld) quiet_cmd_bin2c = BIN2C $@ diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline_64.S index a5a83c3f53e6..d956b8a35fd1 100644 --- a/arch/powerpc/purgatory/trampoline.S +++ b/arch/powerpc/purgatory/trampoline_64.S @@ -10,6 +10,7 @@ */ #include <asm/asm-compat.h> +#include <asm/crashdump-ppc64.h> .machine ppc64 .balign 256 @@ -43,14 +44,39 @@ master: mr %r17,%r3 /* save cpu id to r17 */ mr %r15,%r4 /* save physical address in reg15 */ + /* Work out where we're running */ + bcl 20, 31, 0f +0: mflr %r18 + + /* + * Copy BACKUP_SRC_SIZE bytes from BACKUP_SRC_START to + * backup_start 8 bytes at a time. + * + * Use r3 = dest, r4 = src, r5 = size, r6 = count + */ + ld %r3, (backup_start - 0b)(%r18) + cmpdi %cr0, %r3, 0 + beq .Lskip_copy /* skip if there is no backup region */ + lis %r5, BACKUP_SRC_SIZE@h + ori %r5, %r5, BACKUP_SRC_SIZE@l + cmpdi %cr0, %r5, 0 + beq .Lskip_copy /* skip if copy size is zero */ + lis %r4, BACKUP_SRC_START@h + ori %r4, %r4, BACKUP_SRC_START@l + li %r6, 0 +.Lcopy_loop: + ldx %r0, %r6, %r4 + stdx %r0, %r6, %r3 + addi %r6, %r6, 8 + cmpld %cr0, %r6, %r5 + blt .Lcopy_loop + +.Lskip_copy: or %r3,%r3,%r3 /* ok now to high priority, lets boot */ lis %r6,0x1 mtctr %r6 /* delay a bit for slaves to catch up */ bdnz . /* before we overwrite 0-100 again */ - bl 0f /* Work out where we're running */ -0: mflr %r18 - /* load device-tree address */ ld %r3, (dt_offset - 0b)(%r18) mr %r16,%r3 /* save dt address in reg16 */ @@ -61,6 +87,10 @@ master: li %r4,28 STWX_BE %r17,%r3,%r4 /* Store my cpu as __be32 at byte 28 */ 1: + /* Load opal base and entry values in r8 & r9 respectively */ + ld %r8,(opal_base - 0b)(%r18) + ld %r9,(opal_entry - 0b)(%r18) + /* load the kernel address */ ld %r4,(kernel - 0b)(%r18) @@ -89,7 +119,6 @@ master: rfid /* update MSR and start kernel */ - .balign 8 .globl kernel kernel: @@ -102,6 +131,23 @@ dt_offset: .8byte 0x0 .size dt_offset, . - dt_offset + .balign 8 + .globl backup_start +backup_start: + .8byte 0x0 + .size backup_start, . - backup_start + + .balign 8 + .globl opal_base +opal_base: + .8byte 0x0 + .size opal_base, . - opal_base + + .balign 8 + .globl opal_entry +opal_entry: + .8byte 0x0 + .size opal_entry, . - opal_entry .data .balign 8 diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 71b881e554fc..cb58ec7ce77a 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -18,6 +18,7 @@ #include <linux/delay.h> #include <linux/cpumask.h> #include <linux/mm.h> +#include <linux/kmemleak.h> #include <asm/machdep.h> #include <asm/prom.h> @@ -647,6 +648,7 @@ static bool xive_native_provision_pages(void) pr_err("Failed to allocate provisioning page\n"); return false; } + kmemleak_ignore(p); opal_xive_donate_page(chip, __pa(p)); } return true; diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index f0551a2be9df..1e3674d7ea7b 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -768,7 +768,7 @@ static const u8 *get_vec5_feature(unsigned int index) return vec5 + index; } -static bool xive_spapr_disabled(void) +static bool __init xive_spapr_disabled(void) { const u8 *vec5_xive; diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh index 77114755dc6f..6e6a30aea3ed 100755 --- a/arch/powerpc/tools/unrel_branch_check.sh +++ b/arch/powerpc/tools/unrel_branch_check.sh @@ -31,7 +31,10 @@ grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]] grep -v '\<__start_initialization_multiplatform>' | grep -v -e 'b.\?.\?ctr' | grep -v -e 'b.\?.\?lr' | -sed 's/://' | +sed -e 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' \ + -e 's/\bbf.\?[[:space:]]*[[:digit:]][[:digit:]]*,/bne/' \ + -e 's/[[:space:]]0x/ /' \ + -e 's/://' | awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}' ) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 7efe4bc3ccf6..df7bca00f5ec 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -481,6 +481,13 @@ static inline int unrecoverable_excp(struct pt_regs *regs) #endif } +static void xmon_touch_watchdogs(void) +{ + touch_softlockup_watchdog_sync(); + rcu_cpu_stall_reset(); + touch_nmi_watchdog(); +} + static int xmon_core(struct pt_regs *regs, int fromipi) { int cmd = 0; @@ -718,7 +725,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) else insert_cpu_bpts(); - touch_nmi_watchdog(); + xmon_touch_watchdogs(); local_irq_restore(flags); return cmd != 'X' && cmd != EOF; @@ -1593,6 +1600,7 @@ const char *getvecname(unsigned long vec) case 0x1300: ret = "(Instruction Breakpoint)"; break; case 0x1500: ret = "(Denormalisation)"; break; case 0x1700: ret = "(Altivec Assist)"; break; + case 0x3000: ret = "(System Call Vectored)"; break; default: ret = ""; } return ret; @@ -1861,7 +1869,7 @@ static void cacheflush(void) catch_memory_errors = 1; sync(); - if (cmd != 'i') { + if (cmd != 'i' || IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES) cflush((void *) adrs); } else { @@ -2022,6 +2030,18 @@ static void dump_300_sprs(void) #endif } +static void dump_310_sprs(void) +{ +#ifdef CONFIG_PPC64 + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return; + + printf("mmcr3 = %.16lx, sier2 = %.16lx, sier3 = %.16lx\n", + mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3)); + +#endif +} + static void dump_one_spr(int spr, bool show_unimplemented) { unsigned long val; @@ -2076,6 +2096,7 @@ static void super_regs(void) dump_206_sprs(); dump_207_sprs(); dump_300_sprs(); + dump_310_sprs(); return; } @@ -2934,11 +2955,10 @@ generic_inst_dump(unsigned long adr, long count, int praddr, int nr, dotted; unsigned long first_adr; struct ppc_inst inst, last_inst = ppc_inst(0); - unsigned char val[4]; dotted = 0; - for (first_adr = adr; count > 0; --count, adr += 4) { - nr = mread(adr, val, 4); + for (first_adr = adr; count > 0; --count, adr += ppc_inst_len(inst)) { + nr = mread_instr(adr, &inst); if (nr == 0) { if (praddr) { const char *x = fault_chars[fault_type]; @@ -2946,7 +2966,6 @@ generic_inst_dump(unsigned long adr, long count, int praddr, } break; } - inst = ppc_inst(GETWORD(val)); if (adr > first_adr && ppc_inst_equal(inst, last_inst)) { if (!dotted) { printf(" ...\n"); @@ -2957,9 +2976,12 @@ generic_inst_dump(unsigned long adr, long count, int praddr, dotted = 0; last_inst = inst; if (praddr) - printf(REG" %.8x", adr, ppc_inst_val(inst)); + printf(REG" %s", adr, ppc_inst_as_str(inst)); printf("\t"); - dump_func(ppc_inst_val(inst), adr); + if (!ppc_inst_prefixed(inst)) + dump_func(ppc_inst_val(inst), adr); + else + dump_func(ppc_inst_as_u64(inst), adr); printf("\n"); } return adr - first_adr; @@ -4256,7 +4278,7 @@ static int do_spu_cmd(void) subcmd = inchar(); if (isxdigit(subcmd) || subcmd == '\n') termch = subcmd; - /* fall through */ + fallthrough; case 'f': scanhex(&num); if (num >= XMON_NUM_SPUS || !spu_info[num].spu) { diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6c4bce7cad8a..7b5905529146 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -13,11 +13,14 @@ config 32BIT config RISCV def_bool y select ARCH_CLOCKSOURCE_INIT + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_WX select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_KCOV select ARCH_HAS_MMIOWB select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_DIRECT_MAP @@ -47,6 +50,8 @@ config RISCV select GENERIC_TIME_VSYSCALL if MMU && 64BIT select HANDLE_DOMAIN_IRQ select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if MMU && 64BIT select HAVE_ARCH_KGDB select HAVE_ARCH_KGDB_QXFER_PKT @@ -54,14 +59,18 @@ config RISCV select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ASM_MODVERSIONS + select HAVE_CONTEXT_TRACKING + select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS if MMU select HAVE_EBPF_JIT if MMU select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO if MMU && 64BIT select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select IRQ_DOMAIN select MODULES_USE_ELF_RELA if MODULES @@ -179,6 +188,9 @@ config PGTABLE_LEVELS default 3 if 64BIT default 2 +config LOCKDEP_SUPPORT + def_bool y + source "arch/riscv/Kconfig.socs" menu "Platform type" diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 3530c59b3ea7..c59fca695f9d 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -14,6 +14,8 @@ # Based on the ia64 and arm64 boot/Makefile. # +KCOV_INSTRUMENT := n + OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S targets := Image loader diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 4da4886246a4..d58c93efb603 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -17,6 +17,7 @@ CONFIG_BPF_SYSCALL=y CONFIG_SOC_SIFIVE=y CONFIG_SOC_VIRT=y CONFIG_SMP=y +CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_NET=y diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index b48138e329ea..cd1df62b13c7 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -33,6 +33,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_CMDLINE="earlycon console=ttySIF0" CONFIG_CMDLINE_FORCE=y +CONFIG_JUMP_LABEL=y # CONFIG_BLOCK is not set CONFIG_BINFMT_FLAT=y # CONFIG_COREDUMP is not set diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig index cf74e179bf90..f27596e9663e 100644 --- a/arch/riscv/configs/nommu_virt_defconfig +++ b/arch/riscv/configs/nommu_virt_defconfig @@ -30,6 +30,7 @@ CONFIG_MAXPHYSMEM_2GB=y CONFIG_SMP=y CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0" CONFIG_CMDLINE_FORCE=y +CONFIG_JUMP_LABEL=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y # CONFIG_MSDOS_PARTITION is not set diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 05bbf5240569..3a55f0e00d6c 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -17,6 +17,7 @@ CONFIG_BPF_SYSCALL=y CONFIG_SOC_VIRT=y CONFIG_ARCH_RV32I=y CONFIG_SMP=y +CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_NET=y diff --git a/arch/riscv/include/asm/irq_work.h b/arch/riscv/include/asm/irq_work.h new file mode 100644 index 000000000000..d6c277992f76 --- /dev/null +++ b/arch/riscv/include/asm/irq_work.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_IRQ_WORK_H +#define _ASM_RISCV_IRQ_WORK_H + +static inline bool arch_irq_work_has_interrupt(void) +{ + return true; +} +extern void arch_irq_work_raise(void); +#endif /* _ASM_RISCV_IRQ_WORK_H */ diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h new file mode 100644 index 000000000000..38af2ec7b9bf --- /dev/null +++ b/arch/riscv/include/asm/jump_label.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Emil Renner Berthing + * + * Based on arch/arm64/include/asm/jump_label.h + */ +#ifndef __ASM_JUMP_LABEL_H +#define __ASM_JUMP_LABEL_H + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> +#include <asm/asm.h> + +#define JUMP_LABEL_NOP_SIZE 4 + +static __always_inline bool arch_static_branch(struct static_key *key, + bool branch) +{ + asm_volatile_goto( + " .option push \n\t" + " .option norelax \n\t" + " .option norvc \n\t" + "1: nop \n\t" + " .option pop \n\t" + " .pushsection __jump_table, \"aw\" \n\t" + " .align " RISCV_LGPTR " \n\t" + " .long 1b - ., %l[label] - . \n\t" + " " RISCV_PTR " %0 - . \n\t" + " .popsection \n\t" + : : "i"(&((char *)key)[branch]) : : label); + + return false; +label: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, + bool branch) +{ + asm_volatile_goto( + " .option push \n\t" + " .option norelax \n\t" + " .option norvc \n\t" + "1: jal zero, %l[label] \n\t" + " .option pop \n\t" + " .pushsection __jump_table, \"aw\" \n\t" + " .align " RISCV_LGPTR " \n\t" + " .long 1b - ., %l[label] - . \n\t" + " " RISCV_PTR " %0 - . \n\t" + " .popsection \n\t" + : : "i"(&((char *)key)[branch]) : : label); + + return false; +label: + return true; +} + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h index 56053c9838b2..aff6c33ab0c0 100644 --- a/arch/riscv/include/asm/mmio.h +++ b/arch/riscv/include/asm/mmio.h @@ -14,12 +14,6 @@ #include <linux/types.h> #include <asm/mmiowb.h> -#ifndef CONFIG_MMU -#define pgprot_noncached(x) (x) -#define pgprot_writecombine(x) (x) -#define pgprot_device(x) (x) -#endif /* CONFIG_MMU */ - /* Generic IO read/write. These perform native-endian accesses. */ #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 val, volatile void __iomem *addr) diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index 40bb1c15a731..6dfd2a1446d5 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -40,6 +40,9 @@ void arch_send_call_function_single_ipi(int cpu); int riscv_hartid_to_cpuid(int hartid); void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out); +/* Secondary hart entry */ +asmlinkage void smp_callin(void); + /* * Obtains the hart ID of the currently executing task. This relies on * THREAD_INFO_IN_TASK, but we define that unconditionally. diff --git a/arch/riscv/include/asm/stackprotector.h b/arch/riscv/include/asm/stackprotector.h new file mode 100644 index 000000000000..d95f7b2a7f37 --- /dev/null +++ b/arch/riscv/include/asm/stackprotector.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_RISCV_STACKPROTECTOR_H +#define _ASM_RISCV_STACKPROTECTOR_H + +#include <linux/random.h> +#include <linux/version.h> +#include <asm/timex.h> + +extern unsigned long __stack_chk_guard; + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + unsigned long canary; + unsigned long tsc; + + /* Try to get a semi random initial value. */ + get_random_bytes(&canary, sizeof(canary)); + tsc = get_cycles(); + canary += tsc + (tsc << BITS_PER_LONG/2); + canary ^= LINUX_VERSION_CODE; + canary &= CANARY_MASK; + + current->stack_canary = canary; + __stack_chk_guard = current->stack_canary; +} +#endif /* _ASM_RISCV_STACKPROTECTOR_H */ diff --git a/arch/riscv/include/uapi/asm/hwcap.h b/arch/riscv/include/uapi/asm/hwcap.h index dee98ee28318..46dc3f5ee99f 100644 --- a/arch/riscv/include/uapi/asm/hwcap.h +++ b/arch/riscv/include/uapi/asm/hwcap.h @@ -11,7 +11,7 @@ /* * Linux saves the floating-point registers according to the ISA Linux is * executing on, as opposed to the ISA the user program is compiled for. This - * is necessary for a handful of esoteric use cases: for example, userpsace + * is necessary for a handful of esoteric use cases: for example, userspace * threading libraries must be able to examine the actual machine state in * order to fully reconstruct the state of a thread. */ diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h index 13ce76cc5aff..4b989ae15d59 100644 --- a/arch/riscv/include/uapi/asm/unistd.h +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -12,7 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * along with this program. If not, see <https://www.gnu.org/licenses/>. */ #ifdef __LP64__ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index b355cf485671..a5287ab9f7f2 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -53,4 +53,6 @@ endif obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o obj-$(CONFIG_KGDB) += kgdb.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o + clean: diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 07cb9c10de4e..db203442c08f 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -27,9 +27,6 @@ void asm_offsets(void) OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]); OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]); OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]); - OFFSET(TASK_THREAD_SP, task_struct, thread.sp); - OFFSET(TASK_STACK, task_struct, stack); - OFFSET(TASK_TI, task_struct, thread_info); OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags); OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index cae7e6d4c7ef..524d918f3601 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -97,19 +97,36 @@ _save_context: la gp, __global_pointer$ .option pop - la ra, ret_from_exception +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_off +#endif + +#ifdef CONFIG_CONTEXT_TRACKING + /* If previous state is in user mode, call context_tracking_user_exit. */ + li a0, SR_PP + and a0, s1, a0 + bnez a0, skip_context_tracking + call context_tracking_user_exit +skip_context_tracking: +#endif + /* * MSB of cause differentiates between * interrupts and exceptions */ bge s4, zero, 1f + la ra, ret_from_exception + /* Handle interrupts */ move a0, sp /* pt_regs */ la a1, handle_arch_irq REG_L a1, (a1) jr a1 1: +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_on +#endif /* * Exceptions run with interrupts enabled or disabled depending on the * state of SR_PIE in m/sstatus. @@ -119,6 +136,7 @@ _save_context: csrs CSR_STATUS, SR_IE 1: + la ra, ret_from_exception /* Handle syscalls */ li t0, EXC_SYSCALL beq s4, t0, handle_syscall @@ -137,6 +155,17 @@ _save_context: tail do_trap_unknown handle_syscall: +#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING) + /* Recover a0 - a7 for system calls */ + REG_L a0, PT_A0(sp) + REG_L a1, PT_A1(sp) + REG_L a2, PT_A2(sp) + REG_L a3, PT_A3(sp) + REG_L a4, PT_A4(sp) + REG_L a5, PT_A5(sp) + REG_L a6, PT_A6(sp) + REG_L a7, PT_A7(sp) +#endif /* save the initial A0 value (needed in signal handlers) */ REG_S a0, PT_ORIG_A0(sp) /* @@ -190,6 +219,9 @@ ret_from_syscall_rejected: ret_from_exception: REG_L s0, PT_STATUS(sp) csrc CSR_STATUS, SR_IE +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_off +#endif #ifdef CONFIG_RISCV_M_MODE /* the MPP value is too large to be used as an immediate arg for addi */ li t0, SR_MPP @@ -205,6 +237,10 @@ resume_userspace: andi s1, s0, _TIF_WORK_MASK bnez s1, work_pending +#ifdef CONFIG_CONTEXT_TRACKING + call context_tracking_user_enter +#endif + /* Save unwound kernel stack pointer in thread_info */ addi s0, sp, PT_SIZE_ON_STACK REG_S s0, TASK_TI_KERNEL_SP(tp) @@ -216,6 +252,16 @@ resume_userspace: csrw CSR_SCRATCH, tp restore_all: +#ifdef CONFIG_TRACE_IRQFLAGS + REG_L s1, PT_STATUS(sp) + andi t0, s1, SR_PIE + beqz t0, 1f + call trace_hardirqs_on + j 2f +1: + call trace_hardirqs_off +2: +#endif REG_L a0, PT_STATUS(sp) /* * The current load reservation is effectively part of the processor's @@ -389,12 +435,8 @@ ENTRY(__switch_to) lw a4, TASK_TI_CPU(a1) sw a3, TASK_TI_CPU(a1) sw a4, TASK_TI_CPU(a0) -#if TASK_TI != 0 -#error "TASK_TI != 0: tp will contain a 'struct thread_info', not a 'struct task_struct' so get_current() won't work." - addi tp, a1, TASK_TI -#else + /* The offset of thread_info in task_struct is zero. */ move tp, a1 -#endif ret ENDPROC(__switch_to) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 7ed1b22950fd..d0c5c316e9bb 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -77,10 +77,16 @@ relocate: csrw CSR_SATP, a0 .align 2 1: - /* Set trap vector to spin forever to help debug */ - la a0, .Lsecondary_park + /* Set trap vector to exception handler */ + la a0, handle_exception csrw CSR_TVEC, a0 + /* + * Set sup0 scratch register to 0, indicating to exception vector that + * we are presently executing in kernel. + */ + csrw CSR_SCRATCH, zero + /* Reload the global pointer */ .option push .option norelax diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c new file mode 100644 index 000000000000..20e09056d141 --- /dev/null +++ b/arch/riscv/kernel/jump_label.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Emil Renner Berthing + * + * Based on arch/arm64/kernel/jump_label.c + */ +#include <linux/jump_label.h> +#include <linux/kernel.h> +#include <linux/memory.h> +#include <linux/mutex.h> +#include <asm/bug.h> +#include <asm/patch.h> + +#define RISCV_INSN_NOP 0x00000013U +#define RISCV_INSN_JAL 0x0000006fU + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + void *addr = (void *)jump_entry_code(entry); + u32 insn; + + if (type == JUMP_LABEL_JMP) { + long offset = jump_entry_target(entry) - jump_entry_code(entry); + + if (WARN_ON(offset & 1 || offset < -524288 || offset >= 524288)) + return; + + insn = RISCV_INSN_JAL | + (((u32)offset & GENMASK(19, 12)) << (12 - 12)) | + (((u32)offset & GENMASK(11, 11)) << (20 - 11)) | + (((u32)offset & GENMASK(10, 1)) << (21 - 1)) | + (((u32)offset & GENMASK(20, 20)) << (31 - 20)); + } else { + insn = RISCV_INSN_NOP; + } + + mutex_lock(&text_mutex); + patch_text_nosync(addr, &insn, sizeof(insn)); + mutex_unlock(&text_mutex); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + /* + * We use the same instructions in the arch_static_branch and + * arch_static_branch_jump inline functions, so there's no + * need to patch them up here. + * The core will call arch_jump_label_transform when those + * instructions need to be replaced. + */ +} diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 7191342c54da..104fba889cf7 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -263,6 +263,13 @@ static int apply_r_riscv_add32_rela(struct module *me, u32 *location, return 0; } +static int apply_r_riscv_add64_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + *(u64 *)location += (u64)v; + return 0; +} + static int apply_r_riscv_sub32_rela(struct module *me, u32 *location, Elf_Addr v) { @@ -270,6 +277,13 @@ static int apply_r_riscv_sub32_rela(struct module *me, u32 *location, return 0; } +static int apply_r_riscv_sub64_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + *(u64 *)location -= (u64)v; + return 0; +} + static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, Elf_Addr v) = { [R_RISCV_32] = apply_r_riscv_32_rela, @@ -290,7 +304,9 @@ static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, [R_RISCV_RELAX] = apply_r_riscv_relax_rela, [R_RISCV_ALIGN] = apply_r_riscv_align_rela, [R_RISCV_ADD32] = apply_r_riscv_add32_rela, + [R_RISCV_ADD64] = apply_r_riscv_add64_rela, [R_RISCV_SUB32] = apply_r_riscv_sub32_rela, + [R_RISCV_SUB64] = apply_r_riscv_sub64_rela, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 31f39442df72..2b97c493427c 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -24,6 +24,12 @@ register unsigned long gp_in_global __asm__("gp"); +#ifdef CONFIG_STACKPROTECTOR +#include <linux/stackprotector.h> +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + extern asmlinkage void ret_from_fork(void); extern asmlinkage void ret_from_kernel_thread(void); diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 444dc7b0fd78..2d6395f5ad54 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -30,13 +30,10 @@ enum riscv_regset { static int riscv_gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - struct pt_regs *regs; - - regs = task_pt_regs(target); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, regs, 0, -1); + return membuf_write(&to, task_pt_regs(target), + sizeof(struct user_regs_struct)); } static int riscv_gpr_set(struct task_struct *target, @@ -55,21 +52,13 @@ static int riscv_gpr_set(struct task_struct *target, #ifdef CONFIG_FPU static int riscv_fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - int ret; struct __riscv_d_ext_state *fstate = &target->thread.fstate; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, fstate, 0, - offsetof(struct __riscv_d_ext_state, fcsr)); - if (!ret) { - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, fstate, 0, - offsetof(struct __riscv_d_ext_state, fcsr) + - sizeof(fstate->fcsr)); - } - - return ret; + membuf_write(&to, fstate, offsetof(struct __riscv_d_ext_state, fcsr)); + membuf_store(&to, fstate->fcsr); + return membuf_zero(&to, 4); // explicitly pad } static int riscv_fpr_set(struct task_struct *target, @@ -98,8 +87,8 @@ static const struct user_regset riscv_user_regset[] = { .n = ELF_NGREG, .size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t), - .get = &riscv_gpr_get, - .set = &riscv_gpr_set, + .regset_get = riscv_gpr_get, + .set = riscv_gpr_set, }, #ifdef CONFIG_FPU [REGSET_F] = { @@ -107,8 +96,8 @@ static const struct user_regset riscv_user_regset[] = { .n = ELF_NFPREG, .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t), - .get = &riscv_fpr_get, - .set = &riscv_fpr_set, + .regset_get = riscv_fpr_get, + .set = riscv_fpr_set, }, #endif }; diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index b1d4f452f843..554b0fb47060 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -16,6 +16,7 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/delay.h> +#include <linux/irq_work.h> #include <asm/clint.h> #include <asm/sbi.h> @@ -26,6 +27,7 @@ enum ipi_message_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CPU_STOP, + IPI_IRQ_WORK, IPI_MAX }; @@ -123,6 +125,13 @@ static inline void clear_ipi(void) clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id())); } +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) +{ + send_ipi_single(smp_processor_id(), IPI_IRQ_WORK); +} +#endif + void handle_IPI(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -158,6 +167,11 @@ void handle_IPI(struct pt_regs *regs) ipi_stop(); } + if (ops & (1 << IPI_IRQ_WORK)) { + stats[IPI_IRQ_WORK]++; + irq_work_run(); + } + BUG_ON((ops >> IPI_MAX) != 0); /* Order data access and bit testing. */ @@ -173,6 +187,7 @@ static const char * const ipi_names[] = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNC] = "Function call interrupts", [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_IRQ_WORK] = "IRQ work interrupts", }; void show_ipi_stats(struct seq_file *p, int prec) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 4e9922790f6e..356825a57551 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -106,7 +106,7 @@ void __init setup_smp(void) } } -int start_secondary_cpu(int cpu, struct task_struct *tidle) +static int start_secondary_cpu(int cpu, struct task_struct *tidle) { if (cpu_ops[cpu]->cpu_start) return cpu_ops[cpu]->cpu_start(cpu, tidle); @@ -121,7 +121,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) ret = start_secondary_cpu(cpu, tidle); if (!ret) { - lockdep_assert_held(&cpu_running); wait_for_completion_timeout(&cpu_running, msecs_to_jiffies(1000)); @@ -146,6 +145,7 @@ void __init smp_cpus_done(unsigned int max_cpus) asmlinkage __visible void smp_callin(void) { struct mm_struct *mm = &init_mm; + unsigned int curr_cpuid = smp_processor_id(); if (!IS_ENABLED(CONFIG_RISCV_SBI)) clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id())); @@ -154,10 +154,10 @@ asmlinkage __visible void smp_callin(void) mmgrab(mm); current->active_mm = mm; - trap_init(); - notify_cpu_starting(smp_processor_id()); - update_siblings_masks(smp_processor_id()); - set_cpu_online(smp_processor_id(), 1); + notify_cpu_starting(curr_cpuid); + update_siblings_masks(curr_cpuid); + set_cpu_online(curr_cpuid, 1); + /* * Remote TLB flushes are ignored while the CPU is offline, so emit * a local TLB flush right now just in case. diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 7d95cce5e47c..ad14f4466d92 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -174,13 +174,7 @@ int is_valid_bugaddr(unsigned long pc) } #endif /* CONFIG_GENERIC_BUG */ +/* stvec & scratch is already set from head.S */ void trap_init(void) { - /* - * Set sup0 scratch register to 0, indicating to exception vector - * that we are presently executing in the kernel - */ - csr_write(CSR_SCRATCH, 0); - /* Set the exception vector address */ - csr_write(CSR_TVEC, &handle_exception); } diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index e4c7c2c8a02f..478e7338ddc1 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -16,6 +16,8 @@ vdso-syms += flush_icache # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o +ccflags-y := -fno-stack-protector + ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) endif @@ -32,6 +34,7 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os # Disable gcov profiling for VDSO code GCOV_PROFILE := n +KCOV_INSTRUMENT := n # Force dependency $(obj)/vdso.o: $(obj)/vdso.so diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index e6f8016b366a..f3586e31ed1e 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -22,6 +22,7 @@ SECTIONS /* Beginning of code and text segment */ . = LOAD_OFFSET; _start = .; + _stext = .; HEAD_TEXT_SECTION . = ALIGN(PAGE_SIZE); @@ -54,7 +55,6 @@ SECTIONS . = ALIGN(SECTION_ALIGN); .text : { _text = .; - _stext = .; TEXT_TEXT SCHED_TEXT CPUIDLE_TEXT diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 363ef01c30b1..c0185e556ca5 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -5,6 +5,8 @@ ifdef CONFIG_FTRACE CFLAGS_REMOVE_init.o = -pg endif +KCOV_INSTRUMENT_init.o := n + obj-y += init.o obj-y += extable.o obj-$(CONFIG_MMU) += fault.o pageattr.o diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index f6e6286b3d15..787c75f751a5 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -541,12 +541,39 @@ void mark_rodata_ro(void) } #endif +static void __init resource_init(void) +{ + struct memblock_region *region; + + for_each_memblock(memory, region) { + struct resource *res; + + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); + if (!res) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(struct resource)); + + if (memblock_is_nomap(region)) { + res->name = "reserved"; + res->flags = IORESOURCE_MEM; + } else { + res->name = "System RAM"; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + } + res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + + request_resource(&iomem_resource, res); + } +} + void __init paging_init(void) { setup_vm_final(); sparse_init(); setup_zero_page(); zone_sizes_init(); + resource_init(); } #ifdef CONFIG_SPARSEMEM_VMEMMAP diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 289a9a5ea5b5..19fecb362d81 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -7,6 +7,7 @@ #include <linux/pgtable.h> #include <asm/tlbflush.h> #include <asm/bitops.h> +#include <asm/set_memory.h> struct pageattr_masks { pgprot_t set_mask; @@ -94,7 +95,7 @@ static int pageattr_pte_hole(unsigned long addr, unsigned long next, return 0; } -const static struct mm_walk_ops pageattr_ops = { +static const struct mm_walk_ops pageattr_ops = { .pgd_entry = pageattr_pgd_entry, .p4d_entry = pageattr_p4d_entry, .pud_entry = pageattr_pud_entry, diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 3c72a3b77253..11d2f7d05f91 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -944,28 +944,14 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) static int s390_regs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { + unsigned pos; if (target == current) save_access_regs(target->thread.acrs); - if (kbuf) { - unsigned long *k = kbuf; - while (count > 0) { - *k++ = __peek_user(target, pos); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - unsigned long __user *u = ubuf; - while (count > 0) { - if (__put_user(__peek_user(target, pos), u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } - } + for (pos = 0; pos < sizeof(s390_regs); pos += sizeof(long)) + membuf_store(&to, __peek_user(target, pos)); return 0; } @@ -1006,8 +992,8 @@ static int s390_regs_set(struct task_struct *target, } static int s390_fpregs_get(struct task_struct *target, - const struct user_regset *regset, unsigned int pos, - unsigned int count, void *kbuf, void __user *ubuf) + const struct user_regset *regset, + struct membuf to) { _s390_fp_regs fp_regs; @@ -1017,8 +1003,7 @@ static int s390_fpregs_get(struct task_struct *target, fp_regs.fpc = target->thread.fpu.fpc; fpregs_store(&fp_regs, &target->thread.fpu); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fp_regs, 0, -1); + return membuf_write(&to, &fp_regs, sizeof(fp_regs)); } static int s390_fpregs_set(struct task_struct *target, @@ -1065,20 +1050,9 @@ static int s390_fpregs_set(struct task_struct *target, static int s390_last_break_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - if (count > 0) { - if (kbuf) { - unsigned long *k = kbuf; - *k = target->thread.last_break; - } else { - unsigned long __user *u = ubuf; - if (__put_user(target->thread.last_break, u)) - return -EFAULT; - } - } - return 0; + return membuf_store(&to, target->thread.last_break); } static int s390_last_break_set(struct task_struct *target, @@ -1091,16 +1065,13 @@ static int s390_last_break_set(struct task_struct *target, static int s390_tdb_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct pt_regs *regs = task_pt_regs(target); - unsigned char *data; if (!(regs->int_code & 0x200)) return -ENODATA; - data = target->thread.trap_tdb; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256); + return membuf_write(&to, target->thread.trap_tdb, 256); } static int s390_tdb_set(struct task_struct *target, @@ -1113,8 +1084,7 @@ static int s390_tdb_set(struct task_struct *target, static int s390_vxrs_low_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { __u64 vxrs[__NUM_VXRS_LOW]; int i; @@ -1125,7 +1095,7 @@ static int s390_vxrs_low_get(struct task_struct *target, save_fpu_regs(); for (i = 0; i < __NUM_VXRS_LOW; i++) vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); + return membuf_write(&to, vxrs, sizeof(vxrs)); } static int s390_vxrs_low_set(struct task_struct *target, @@ -1154,18 +1124,14 @@ static int s390_vxrs_low_set(struct task_struct *target, static int s390_vxrs_high_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - __vector128 vxrs[__NUM_VXRS_HIGH]; - if (!MACHINE_HAS_VX) return -ENODEV; if (target == current) save_fpu_regs(); - memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs)); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); + return membuf_write(&to, target->thread.fpu.vxrs + __NUM_VXRS_LOW, + __NUM_VXRS_HIGH * sizeof(__vector128)); } static int s390_vxrs_high_set(struct task_struct *target, @@ -1187,12 +1153,9 @@ static int s390_vxrs_high_set(struct task_struct *target, static int s390_system_call_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - unsigned int *data = &target->thread.system_call; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(unsigned int)); + return membuf_store(&to, target->thread.system_call); } static int s390_system_call_set(struct task_struct *target, @@ -1207,8 +1170,7 @@ static int s390_system_call_set(struct task_struct *target, static int s390_gs_cb_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct gs_cb *data = target->thread.gs_cb; @@ -1218,8 +1180,7 @@ static int s390_gs_cb_get(struct task_struct *target, return -ENODATA; if (target == current) save_gs_cb(data); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(struct gs_cb)); + return membuf_write(&to, data, sizeof(struct gs_cb)); } static int s390_gs_cb_set(struct task_struct *target, @@ -1263,8 +1224,7 @@ static int s390_gs_cb_set(struct task_struct *target, static int s390_gs_bc_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct gs_cb *data = target->thread.gs_bc_cb; @@ -1272,8 +1232,7 @@ static int s390_gs_bc_get(struct task_struct *target, return -ENODEV; if (!data) return -ENODATA; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(struct gs_cb)); + return membuf_write(&to, data, sizeof(struct gs_cb)); } static int s390_gs_bc_set(struct task_struct *target, @@ -1324,8 +1283,7 @@ static bool is_ri_cb_valid(struct runtime_instr_cb *cb) static int s390_runtime_instr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct runtime_instr_cb *data = target->thread.ri_cb; @@ -1334,8 +1292,7 @@ static int s390_runtime_instr_get(struct task_struct *target, if (!data) return -ENODATA; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(struct runtime_instr_cb)); + return membuf_write(&to, data, sizeof(struct runtime_instr_cb)); } static int s390_runtime_instr_set(struct task_struct *target, @@ -1391,7 +1348,7 @@ static const struct user_regset s390_regsets[] = { .n = sizeof(s390_regs) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .get = s390_regs_get, + .regset_get = s390_regs_get, .set = s390_regs_set, }, { @@ -1399,7 +1356,7 @@ static const struct user_regset s390_regsets[] = { .n = sizeof(s390_fp_regs) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .get = s390_fpregs_get, + .regset_get = s390_fpregs_get, .set = s390_fpregs_set, }, { @@ -1407,7 +1364,7 @@ static const struct user_regset s390_regsets[] = { .n = 1, .size = sizeof(unsigned int), .align = sizeof(unsigned int), - .get = s390_system_call_get, + .regset_get = s390_system_call_get, .set = s390_system_call_set, }, { @@ -1415,7 +1372,7 @@ static const struct user_regset s390_regsets[] = { .n = 1, .size = sizeof(long), .align = sizeof(long), - .get = s390_last_break_get, + .regset_get = s390_last_break_get, .set = s390_last_break_set, }, { @@ -1423,7 +1380,7 @@ static const struct user_regset s390_regsets[] = { .n = 1, .size = 256, .align = 1, - .get = s390_tdb_get, + .regset_get = s390_tdb_get, .set = s390_tdb_set, }, { @@ -1431,7 +1388,7 @@ static const struct user_regset s390_regsets[] = { .n = __NUM_VXRS_LOW, .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_vxrs_low_get, + .regset_get = s390_vxrs_low_get, .set = s390_vxrs_low_set, }, { @@ -1439,7 +1396,7 @@ static const struct user_regset s390_regsets[] = { .n = __NUM_VXRS_HIGH, .size = sizeof(__vector128), .align = sizeof(__vector128), - .get = s390_vxrs_high_get, + .regset_get = s390_vxrs_high_get, .set = s390_vxrs_high_set, }, { @@ -1447,7 +1404,7 @@ static const struct user_regset s390_regsets[] = { .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_gs_cb_get, + .regset_get = s390_gs_cb_get, .set = s390_gs_cb_set, }, { @@ -1455,7 +1412,7 @@ static const struct user_regset s390_regsets[] = { .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_gs_bc_get, + .regset_get = s390_gs_bc_get, .set = s390_gs_bc_set, }, { @@ -1463,7 +1420,7 @@ static const struct user_regset s390_regsets[] = { .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_runtime_instr_get, + .regset_get = s390_runtime_instr_get, .set = s390_runtime_instr_set, }, }; @@ -1478,28 +1435,15 @@ static const struct user_regset_view user_s390_view = { #ifdef CONFIG_COMPAT static int s390_compat_regs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { + unsigned n; + if (target == current) save_access_regs(target->thread.acrs); - if (kbuf) { - compat_ulong_t *k = kbuf; - while (count > 0) { - *k++ = __peek_user_compat(target, pos); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - compat_ulong_t __user *u = ubuf; - while (count > 0) { - if (__put_user(__peek_user_compat(target, pos), u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } - } + for (n = 0; n < sizeof(s390_compat_regs); n += sizeof(compat_ulong_t)) + membuf_store(&to, __peek_user_compat(target, n)); return 0; } @@ -1541,29 +1485,14 @@ static int s390_compat_regs_set(struct task_struct *target, static int s390_compat_regs_high_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { compat_ulong_t *gprs_high; + int i; - gprs_high = (compat_ulong_t *) - &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)]; - if (kbuf) { - compat_ulong_t *k = kbuf; - while (count > 0) { - *k++ = *gprs_high; - gprs_high += 2; - count -= sizeof(*k); - } - } else { - compat_ulong_t __user *u = ubuf; - while (count > 0) { - if (__put_user(*gprs_high, u++)) - return -EFAULT; - gprs_high += 2; - count -= sizeof(*u); - } - } + gprs_high = (compat_ulong_t *)task_pt_regs(target)->gprs; + for (i = 0; i < NUM_GPRS; i++, gprs_high += 2) + membuf_store(&to, *gprs_high); return 0; } @@ -1602,23 +1531,11 @@ static int s390_compat_regs_high_set(struct task_struct *target, static int s390_compat_last_break_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - compat_ulong_t last_break; + compat_ulong_t last_break = target->thread.last_break; - if (count > 0) { - last_break = target->thread.last_break; - if (kbuf) { - unsigned long *k = kbuf; - *k = last_break; - } else { - unsigned long __user *u = ubuf; - if (__put_user(last_break, u)) - return -EFAULT; - } - } - return 0; + return membuf_store(&to, (unsigned long)last_break); } static int s390_compat_last_break_set(struct task_struct *target, @@ -1635,7 +1552,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = sizeof(s390_compat_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = s390_compat_regs_get, + .regset_get = s390_compat_regs_get, .set = s390_compat_regs_set, }, { @@ -1643,7 +1560,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = sizeof(s390_fp_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = s390_fpregs_get, + .regset_get = s390_fpregs_get, .set = s390_fpregs_set, }, { @@ -1651,7 +1568,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = 1, .size = sizeof(compat_uint_t), .align = sizeof(compat_uint_t), - .get = s390_system_call_get, + .regset_get = s390_system_call_get, .set = s390_system_call_set, }, { @@ -1659,7 +1576,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = 1, .size = sizeof(long), .align = sizeof(long), - .get = s390_compat_last_break_get, + .regset_get = s390_compat_last_break_get, .set = s390_compat_last_break_set, }, { @@ -1667,7 +1584,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = 1, .size = 256, .align = 1, - .get = s390_tdb_get, + .regset_get = s390_tdb_get, .set = s390_tdb_set, }, { @@ -1675,7 +1592,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = __NUM_VXRS_LOW, .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_vxrs_low_get, + .regset_get = s390_vxrs_low_get, .set = s390_vxrs_low_set, }, { @@ -1683,7 +1600,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = __NUM_VXRS_HIGH, .size = sizeof(__vector128), .align = sizeof(__vector128), - .get = s390_vxrs_high_get, + .regset_get = s390_vxrs_high_get, .set = s390_vxrs_high_set, }, { @@ -1691,7 +1608,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = s390_compat_regs_high_get, + .regset_get = s390_compat_regs_high_get, .set = s390_compat_regs_high_set, }, { @@ -1699,7 +1616,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_gs_cb_get, + .regset_get = s390_gs_cb_get, .set = s390_gs_cb_set, }, { @@ -1707,7 +1624,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_gs_bc_get, + .regset_get = s390_gs_bc_get, .set = s390_gs_bc_set, }, { @@ -1715,7 +1632,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_runtime_instr_get, + .regset_get = s390_runtime_instr_get, .set = s390_runtime_instr_set, }, }; diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index b0fefd8f53a6..cde0a66116d2 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -103,9 +103,8 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) fpvalid = !!tsk_used_math(tsk); if (fpvalid) - fpvalid = !fpregs_get(tsk, NULL, 0, - sizeof(struct user_fpu_struct), - fpu, NULL); + fpvalid = !fpregs_get(tsk, NULL, + (struct membuf){fpu, sizeof(*fpu)}); #endif return fpvalid; diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 64bfb714943e..5c93bdb6c41a 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -134,26 +134,11 @@ void ptrace_disable(struct task_struct *child) static int genregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { const struct pt_regs *regs = task_pt_regs(target); - int ret; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - regs->regs, - 0, 16 * sizeof(unsigned long)); - if (!ret) - /* PC, PR, SR, GBR, MACH, MACL, TRA */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - ®s->pc, - offsetof(struct pt_regs, pc), - sizeof(struct pt_regs)); - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct pt_regs), -1); - - return ret; + return membuf_write(&to, regs, sizeof(struct pt_regs)); } static int genregs_set(struct task_struct *target, @@ -182,8 +167,7 @@ static int genregs_set(struct task_struct *target, #ifdef CONFIG_SH_FPU int fpregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { int ret; @@ -191,12 +175,8 @@ int fpregs_get(struct task_struct *target, if (ret) return ret; - if ((boot_cpu_data.flags & CPU_HAS_FPU)) - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.xstate->hardfpu, 0, -1); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.xstate->softfpu, 0, -1); + return membuf_write(&to, target->thread.xstate, + sizeof(struct user_fpu_struct)); } static int fpregs_set(struct task_struct *target, @@ -230,20 +210,12 @@ static int fpregs_active(struct task_struct *target, #ifdef CONFIG_SH_DSP static int dspregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { const struct pt_dspregs *regs = (struct pt_dspregs *)&target->thread.dsp_status.dsp_regs; - int ret; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, regs, - 0, sizeof(struct pt_dspregs)); - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct pt_dspregs), -1); - - return ret; + return membuf_write(&to, regs, sizeof(struct pt_dspregs)); } static int dspregs_set(struct task_struct *target, @@ -324,7 +296,7 @@ static const struct user_regset sh_regsets[] = { .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), - .get = genregs_get, + .regset_get = genregs_get, .set = genregs_set, }, @@ -334,7 +306,7 @@ static const struct user_regset sh_regsets[] = { .n = sizeof(struct user_fpu_struct) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .get = fpregs_get, + .regset_get = fpregs_get, .set = fpregs_set, .active = fpregs_active, }, @@ -345,7 +317,7 @@ static const struct user_regset sh_regsets[] = { .n = sizeof(struct pt_dspregs) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .get = dspregs_get, + .regset_get = dspregs_get, .set = dspregs_set, .active = dspregs_active, }, diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 67f5a3b44c2e..4144be650d41 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -290,8 +290,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_BLK_DEV_RAM rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); #endif if (!MOUNT_ROOT_RDONLY) diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c index 47eb315d411c..5318174a0268 100644 --- a/arch/sparc/kernel/ptrace_32.c +++ b/arch/sparc/kernel/ptrace_32.c @@ -83,41 +83,25 @@ static int regwindow32_set(struct task_struct *target, static int genregs32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { const struct pt_regs *regs = target->thread.kregs; u32 uregs[16]; - int ret; if (target == current) flush_user_windows(); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - regs->u_regs, - 0, 16 * sizeof(u32)); - if (ret || !count) - return ret; - - if (pos < 32 * sizeof(u32)) { - if (regwindow32_get(target, regs, uregs)) - return -EFAULT; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - uregs, - 16 * sizeof(u32), 32 * sizeof(u32)); - if (ret || !count) - return ret; - } - - uregs[0] = regs->psr; - uregs[1] = regs->pc; - uregs[2] = regs->npc; - uregs[3] = regs->y; - uregs[4] = 0; /* WIM */ - uregs[5] = 0; /* TBR */ - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - uregs, - 32 * sizeof(u32), 38 * sizeof(u32)); + membuf_write(&to, regs->u_regs, 16 * sizeof(u32)); + if (!to.left) + return 0; + if (regwindow32_get(target, regs, uregs)) + return -EFAULT; + membuf_write(&to, uregs, 16 * sizeof(u32)); + membuf_store(&to, regs->psr); + membuf_store(&to, regs->pc); + membuf_store(&to, regs->npc); + membuf_store(&to, regs->y); + return membuf_zero(&to, 2 * sizeof(u32)); } static int genregs32_set(struct task_struct *target, @@ -139,19 +123,18 @@ static int genregs32_set(struct task_struct *target, if (ret || !count) return ret; - if (pos < 32 * sizeof(u32)) { - if (regwindow32_get(target, regs, uregs)) - return -EFAULT; - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - uregs, - 16 * sizeof(u32), 32 * sizeof(u32)); - if (ret) - return ret; - if (regwindow32_set(target, regs, uregs)) - return -EFAULT; - if (!count) - return 0; - } + if (regwindow32_get(target, regs, uregs)) + return -EFAULT; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + uregs, + 16 * sizeof(u32), 32 * sizeof(u32)); + if (ret) + return ret; + if (regwindow32_set(target, regs, uregs)) + return -EFAULT; + if (!count) + return 0; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &psr, 32 * sizeof(u32), 33 * sizeof(u32)); @@ -182,46 +165,18 @@ static int genregs32_set(struct task_struct *target, static int fpregs32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - const unsigned long *fpregs = target->thread.float_regs; - int ret = 0; - #if 0 if (target == current) save_and_clear_fpu(); #endif - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - fpregs, - 0, 32 * sizeof(u32)); - - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - 32 * sizeof(u32), - 33 * sizeof(u32)); - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fsr, - 33 * sizeof(u32), - 34 * sizeof(u32)); - - if (!ret) { - unsigned long val; - - val = (1 << 8) | (8 << 16); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &val, - 34 * sizeof(u32), - 35 * sizeof(u32)); - } - - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - 35 * sizeof(u32), -1); - - return ret; + membuf_write(&to, target->thread.float_regs, 32 * sizeof(u32)); + membuf_zero(&to, sizeof(u32)); + membuf_write(&to, &target->thread.fsr, sizeof(u32)); + membuf_store(&to, (u32)((1 << 8) | (8 << 16))); + return membuf_zero(&to, 64 * sizeof(u32)); } static int fpregs32_set(struct task_struct *target, @@ -243,13 +198,11 @@ static int fpregs32_set(struct task_struct *target, user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 32 * sizeof(u32), 33 * sizeof(u32)); - if (!ret && count > 0) { + if (!ret) ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.fsr, 33 * sizeof(u32), 34 * sizeof(u32)); - } - if (!ret) ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 34 * sizeof(u32), -1); @@ -268,7 +221,7 @@ static const struct user_regset sparc32_regsets[] = { .core_note_type = NT_PRSTATUS, .n = 38, .size = sizeof(u32), .align = sizeof(u32), - .get = genregs32_get, .set = genregs32_set + .regset_get = genregs32_get, .set = genregs32_set }, /* Format is: * F0 --> F31 @@ -284,10 +237,104 @@ static const struct user_regset sparc32_regsets[] = { .core_note_type = NT_PRFPREG, .n = 99, .size = sizeof(u32), .align = sizeof(u32), - .get = fpregs32_get, .set = fpregs32_set + .regset_get = fpregs32_get, .set = fpregs32_set + }, +}; + +static int getregs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + const struct pt_regs *regs = target->thread.kregs; + + if (target == current) + flush_user_windows(); + + membuf_store(&to, regs->psr); + membuf_store(&to, regs->pc); + membuf_store(&to, regs->npc); + membuf_store(&to, regs->y); + return membuf_write(&to, regs->u_regs + 1, 15 * sizeof(u32)); +} + +static int setregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = target->thread.kregs; + u32 v[4]; + int ret; + + if (target == current) + flush_user_windows(); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + v, + 0, 4 * sizeof(u32)); + if (ret) + return ret; + regs->psr = (regs->psr & ~(PSR_ICC | PSR_SYSCALL)) | + (v[0] & (PSR_ICC | PSR_SYSCALL)); + regs->pc = v[1]; + regs->npc = v[2]; + regs->y = v[3]; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->u_regs + 1, + 4 * sizeof(u32) , 19 * sizeof(u32)); +} + +static int getfpregs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ +#if 0 + if (target == current) + save_and_clear_fpu(); +#endif + membuf_write(&to, &target->thread.float_regs, 32 * sizeof(u32)); + membuf_write(&to, &target->thread.fsr, sizeof(u32)); + return membuf_zero(&to, 35 * sizeof(u32)); +} + +static int setfpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned long *fpregs = target->thread.float_regs; + int ret; + +#if 0 + if (target == current) + save_and_clear_fpu(); +#endif + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + fpregs, + 0, 32 * sizeof(u32)); + if (ret) + return ret; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fsr, + 32 * sizeof(u32), + 33 * sizeof(u32)); +} + +static const struct user_regset ptrace32_regsets[] = { + [REGSET_GENERAL] = { + .n = 19, .size = sizeof(u32), + .regset_get = getregs_get, .set = setregs_set, + }, + [REGSET_FP] = { + .n = 68, .size = sizeof(u32), + .regset_get = getfpregs_get, .set = setfpregs_set, }, }; +static const struct user_regset_view ptrace32_view = { + .regsets = ptrace32_regsets, .n = ARRAY_SIZE(ptrace32_regsets) +}; + static const struct user_regset_view user_sparc32_view = { .name = "sparc", .e_machine = EM_SPARC, .regsets = sparc32_regsets, .n = ARRAY_SIZE(sparc32_regsets) @@ -315,74 +362,44 @@ long arch_ptrace(struct task_struct *child, long request, { unsigned long addr2 = current->thread.kregs->u_regs[UREG_I4]; void __user *addr2p; - const struct user_regset_view *view; struct pt_regs __user *pregs; struct fps __user *fps; int ret; - view = task_user_regset_view(current); addr2p = (void __user *) addr2; pregs = (struct pt_regs __user *) addr; fps = (struct fps __user *) addr; switch(request) { case PTRACE_GETREGS: { - ret = copy_regset_to_user(child, view, REGSET_GENERAL, - 32 * sizeof(u32), - 4 * sizeof(u32), - &pregs->psr); - if (!ret) - copy_regset_to_user(child, view, REGSET_GENERAL, - 1 * sizeof(u32), - 15 * sizeof(u32), - &pregs->u_regs[0]); + ret = copy_regset_to_user(child, &ptrace32_view, + REGSET_GENERAL, 0, + 19 * sizeof(u32), + pregs); break; } case PTRACE_SETREGS: { - ret = copy_regset_from_user(child, view, REGSET_GENERAL, - 32 * sizeof(u32), - 4 * sizeof(u32), - &pregs->psr); - if (!ret) - copy_regset_from_user(child, view, REGSET_GENERAL, - 1 * sizeof(u32), - 15 * sizeof(u32), - &pregs->u_regs[0]); + ret = copy_regset_from_user(child, &ptrace32_view, + REGSET_GENERAL, 0, + 19 * sizeof(u32), + pregs); break; } case PTRACE_GETFPREGS: { - ret = copy_regset_to_user(child, view, REGSET_FP, - 0 * sizeof(u32), - 32 * sizeof(u32), - &fps->regs[0]); - if (!ret) - ret = copy_regset_to_user(child, view, REGSET_FP, - 33 * sizeof(u32), - 1 * sizeof(u32), - &fps->fsr); - - if (!ret) { - if (__put_user(0, &fps->fpqd) || - __put_user(0, &fps->flags) || - __put_user(0, &fps->extra) || - clear_user(fps->fpq, sizeof(fps->fpq))) - ret = -EFAULT; - } + ret = copy_regset_to_user(child, &ptrace32_view, + REGSET_FP, 0, + 68 * sizeof(u32), + fps); break; } case PTRACE_SETFPREGS: { - ret = copy_regset_from_user(child, view, REGSET_FP, - 0 * sizeof(u32), - 32 * sizeof(u32), - &fps->regs[0]); - if (!ret) - ret = copy_regset_from_user(child, view, REGSET_FP, - 33 * sizeof(u32), - 1 * sizeof(u32), - &fps->fsr); + ret = copy_regset_from_user(child, &ptrace32_view, + REGSET_FP, 0, + 33 * sizeof(u32), + fps); break; } diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 7122efb4b1cc..2b92155db8a5 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -246,52 +246,23 @@ enum sparc_regset { static int genregs64_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { const struct pt_regs *regs = task_pt_regs(target); - int ret; + struct reg_window window; if (target == current) flushw_user(); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - regs->u_regs, - 0, 16 * sizeof(u64)); - if (!ret && count && pos < (32 * sizeof(u64))) { - struct reg_window window; - - if (regwindow64_get(target, regs, &window)) - return -EFAULT; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &window, - 16 * sizeof(u64), - 32 * sizeof(u64)); - } - - if (!ret) { - /* TSTATE, TPC, TNPC */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - ®s->tstate, - 32 * sizeof(u64), - 35 * sizeof(u64)); - } - - if (!ret) { - unsigned long y = regs->y; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &y, - 35 * sizeof(u64), - 36 * sizeof(u64)); - } - - if (!ret) { - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - 36 * sizeof(u64), -1); - - } - return ret; + membuf_write(&to, regs->u_regs, 16 * sizeof(u64)); + if (!to.left) + return 0; + if (regwindow64_get(target, regs, &window)) + return -EFAULT; + membuf_write(&to, &window, 16 * sizeof(u64)); + /* TSTATE, TPC, TNPC */ + membuf_write(&to, ®s->tstate, 3 * sizeof(u64)); + return membuf_store(&to, (u64)regs->y); } static int genregs64_set(struct task_struct *target, @@ -370,69 +341,32 @@ static int genregs64_set(struct task_struct *target, static int fpregs64_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - const unsigned long *fpregs = task_thread_info(target)->fpregs; - unsigned long fprs, fsr, gsr; - int ret; + struct thread_info *t = task_thread_info(target); + unsigned long fprs; if (target == current) save_and_clear_fpu(); - fprs = task_thread_info(target)->fpsaved[0]; + fprs = t->fpsaved[0]; if (fprs & FPRS_DL) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - fpregs, - 0, 16 * sizeof(u64)); + membuf_write(&to, t->fpregs, 16 * sizeof(u64)); else - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - 0, - 16 * sizeof(u64)); - - if (!ret) { - if (fprs & FPRS_DU) - ret = user_regset_copyout(&pos, &count, - &kbuf, &ubuf, - fpregs + 16, - 16 * sizeof(u64), - 32 * sizeof(u64)); - else - ret = user_regset_copyout_zero(&pos, &count, - &kbuf, &ubuf, - 16 * sizeof(u64), - 32 * sizeof(u64)); - } + membuf_zero(&to, 16 * sizeof(u64)); + if (fprs & FPRS_DU) + membuf_write(&to, t->fpregs + 16, 16 * sizeof(u64)); + else + membuf_zero(&to, 16 * sizeof(u64)); if (fprs & FPRS_FEF) { - fsr = task_thread_info(target)->xfsr[0]; - gsr = task_thread_info(target)->gsr[0]; + membuf_store(&to, t->xfsr[0]); + membuf_store(&to, t->gsr[0]); } else { - fsr = gsr = 0; + membuf_zero(&to, 2 * sizeof(u64)); } - - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fsr, - 32 * sizeof(u64), - 33 * sizeof(u64)); - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &gsr, - 33 * sizeof(u64), - 34 * sizeof(u64)); - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fprs, - 34 * sizeof(u64), - 35 * sizeof(u64)); - - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - 35 * sizeof(u64), -1); - - return ret; + return membuf_store(&to, fprs); } static int fpregs64_set(struct task_struct *target, @@ -490,7 +424,7 @@ static const struct user_regset sparc64_regsets[] = { .core_note_type = NT_PRSTATUS, .n = 36, .size = sizeof(u64), .align = sizeof(u64), - .get = genregs64_get, .set = genregs64_set + .regset_get = genregs64_get, .set = genregs64_set }, /* Format is: * F0 --> F63 @@ -502,10 +436,96 @@ static const struct user_regset sparc64_regsets[] = { .core_note_type = NT_PRFPREG, .n = 35, .size = sizeof(u64), .align = sizeof(u64), - .get = fpregs64_get, .set = fpregs64_set + .regset_get = fpregs64_get, .set = fpregs64_set + }, +}; + +static int getregs64_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + const struct pt_regs *regs = task_pt_regs(target); + + if (target == current) + flushw_user(); + + membuf_write(&to, regs->u_regs + 1, 15 * sizeof(u64)); + membuf_store(&to, (u64)0); + membuf_write(&to, ®s->tstate, 3 * sizeof(u64)); + return membuf_store(&to, (u64)regs->y); +} + +static int setregs64_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + unsigned long y = regs->y; + unsigned long tstate; + int ret; + + if (target == current) + flushw_user(); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->u_regs + 1, + 0 * sizeof(u64), + 15 * sizeof(u64)); + if (ret) + return ret; + ret =user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + 15 * sizeof(u64), 16 * sizeof(u64)); + if (ret) + return ret; + /* TSTATE */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &tstate, + 16 * sizeof(u64), + 17 * sizeof(u64)); + if (ret) + return ret; + /* Only the condition codes and the "in syscall" + * state can be modified in the %tstate register. + */ + tstate &= (TSTATE_ICC | TSTATE_XCC | TSTATE_SYSCALL); + regs->tstate &= ~(TSTATE_ICC | TSTATE_XCC | TSTATE_SYSCALL); + regs->tstate |= tstate; + + /* TPC, TNPC */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->tpc, + 17 * sizeof(u64), + 19 * sizeof(u64)); + if (ret) + return ret; + /* Y */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &y, + 19 * sizeof(u64), + 20 * sizeof(u64)); + if (!ret) + regs->y = y; + return ret; +} + +static const struct user_regset ptrace64_regsets[] = { + /* Format is: + * G1 --> G7 + * O0 --> O7 + * 0 + * TSTATE, TPC, TNPC, Y + */ + [REGSET_GENERAL] = { + .n = 20, .size = sizeof(u64), + .regset_get = getregs64_get, .set = setregs64_set, }, }; +static const struct user_regset_view ptrace64_view = { + .regsets = ptrace64_regsets, .n = ARRAY_SIZE(ptrace64_regsets) +}; + static const struct user_regset_view user_sparc64_view = { .name = "sparc64", .e_machine = EM_SPARCV9, .regsets = sparc64_regsets, .n = ARRAY_SIZE(sparc64_regsets) @@ -514,108 +534,28 @@ static const struct user_regset_view user_sparc64_view = { #ifdef CONFIG_COMPAT static int genregs32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { const struct pt_regs *regs = task_pt_regs(target); - compat_ulong_t __user *reg_window; - compat_ulong_t *k = kbuf; - compat_ulong_t __user *u = ubuf; - compat_ulong_t reg; + u32 uregs[16]; + int i; if (target == current) flushw_user(); - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) { - for (; count > 0 && pos < 16; count--) - *k++ = regs->u_regs[pos++]; - - reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6]; - reg_window -= 16; - if (target == current) { - for (; count > 0 && pos < 32; count--) { - if (get_user(*k++, ®_window[pos++])) - return -EFAULT; - } - } else { - for (; count > 0 && pos < 32; count--) { - if (access_process_vm(target, - (unsigned long) - ®_window[pos], - k, sizeof(*k), - FOLL_FORCE) - != sizeof(*k)) - return -EFAULT; - k++; - pos++; - } - } - } else { - for (; count > 0 && pos < 16; count--) { - if (put_user((compat_ulong_t) regs->u_regs[pos++], u++)) - return -EFAULT; - } - - reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6]; - reg_window -= 16; - if (target == current) { - for (; count > 0 && pos < 32; count--) { - if (get_user(reg, ®_window[pos++]) || - put_user(reg, u++)) - return -EFAULT; - } - } else { - for (; count > 0 && pos < 32; count--) { - if (access_process_vm(target, - (unsigned long) - ®_window[pos++], - ®, sizeof(reg), - FOLL_FORCE) - != sizeof(reg)) - return -EFAULT; - if (put_user(reg, u++)) - return -EFAULT; - } - } - } - while (count > 0) { - switch (pos) { - case 32: /* PSR */ - reg = tstate_to_psr(regs->tstate); - break; - case 33: /* PC */ - reg = regs->tpc; - break; - case 34: /* NPC */ - reg = regs->tnpc; - break; - case 35: /* Y */ - reg = regs->y; - break; - case 36: /* WIM */ - case 37: /* TBR */ - reg = 0; - break; - default: - goto finish; - } - - if (kbuf) - *k++ = reg; - else if (put_user(reg, u++)) - return -EFAULT; - pos++; - count--; - } -finish: - pos *= sizeof(reg); - count *= sizeof(reg); - - return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - 38 * sizeof(reg), -1); + for (i = 0; i < 16; i++) + membuf_store(&to, (u32)regs->u_regs[i]); + if (!to.left) + return 0; + if (get_from_target(target, regs->u_regs[UREG_I6], + uregs, sizeof(uregs))) + return -EFAULT; + membuf_write(&to, uregs, 16 * sizeof(u32)); + membuf_store(&to, (u32)tstate_to_psr(regs->tstate)); + membuf_store(&to, (u32)(regs->tpc)); + membuf_store(&to, (u32)(regs->tnpc)); + membuf_store(&to, (u32)(regs->y)); + return membuf_zero(&to, 2 * sizeof(u32)); } static int genregs32_set(struct task_struct *target, @@ -737,56 +677,24 @@ finish: static int fpregs32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - const unsigned long *fpregs = task_thread_info(target)->fpregs; - compat_ulong_t enabled; - unsigned long fprs; - compat_ulong_t fsr; - int ret = 0; + struct thread_info *t = task_thread_info(target); + bool enabled; if (target == current) save_and_clear_fpu(); - fprs = task_thread_info(target)->fpsaved[0]; - if (fprs & FPRS_FEF) { - fsr = task_thread_info(target)->xfsr[0]; - enabled = 1; - } else { - fsr = 0; - enabled = 0; - } - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - fpregs, - 0, 32 * sizeof(u32)); - - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - 32 * sizeof(u32), - 33 * sizeof(u32)); - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fsr, - 33 * sizeof(u32), - 34 * sizeof(u32)); - - if (!ret) { - compat_ulong_t val; - - val = (enabled << 8) | (8 << 16); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &val, - 34 * sizeof(u32), - 35 * sizeof(u32)); - } + enabled = t->fpsaved[0] & FPRS_FEF; - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - 35 * sizeof(u32), -1); - - return ret; + membuf_write(&to, t->fpregs, 32 * sizeof(u32)); + membuf_zero(&to, sizeof(u32)); + if (enabled) + membuf_store(&to, (u32)t->xfsr[0]); + else + membuf_zero(&to, sizeof(u32)); + membuf_store(&to, (u32)((enabled << 8) | (8 << 16))); + return membuf_zero(&to, 64 * sizeof(u32)); } static int fpregs32_set(struct task_struct *target, @@ -847,7 +755,7 @@ static const struct user_regset sparc32_regsets[] = { .core_note_type = NT_PRSTATUS, .n = 38, .size = sizeof(u32), .align = sizeof(u32), - .get = genregs32_get, .set = genregs32_set + .regset_get = genregs32_get, .set = genregs32_set }, /* Format is: * F0 --> F31 @@ -863,10 +771,133 @@ static const struct user_regset sparc32_regsets[] = { .core_note_type = NT_PRFPREG, .n = 99, .size = sizeof(u32), .align = sizeof(u32), - .get = fpregs32_get, .set = fpregs32_set + .regset_get = fpregs32_get, .set = fpregs32_set }, }; +static int getregs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + const struct pt_regs *regs = task_pt_regs(target); + int i; + + if (target == current) + flushw_user(); + + membuf_store(&to, (u32)tstate_to_psr(regs->tstate)); + membuf_store(&to, (u32)(regs->tpc)); + membuf_store(&to, (u32)(regs->tnpc)); + membuf_store(&to, (u32)(regs->y)); + for (i = 1; i < 16; i++) + membuf_store(&to, (u32)regs->u_regs[i]); + return to.left; +} + +static int setregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + unsigned long tstate; + u32 uregs[19]; + int i, ret; + + if (target == current) + flushw_user(); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + uregs, + 0, 19 * sizeof(u32)); + if (ret) + return ret; + + tstate = regs->tstate; + tstate &= ~(TSTATE_ICC | TSTATE_XCC | TSTATE_SYSCALL); + tstate |= psr_to_tstate_icc(uregs[0]); + if (uregs[0] & PSR_SYSCALL) + tstate |= TSTATE_SYSCALL; + regs->tstate = tstate; + regs->tpc = uregs[1]; + regs->tnpc = uregs[2]; + regs->y = uregs[3]; + + for (i = 1; i < 15; i++) + regs->u_regs[i] = uregs[3 + i]; + return 0; +} + +static int getfpregs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct thread_info *t = task_thread_info(target); + + if (target == current) + save_and_clear_fpu(); + + membuf_write(&to, t->fpregs, 32 * sizeof(u32)); + if (t->fpsaved[0] & FPRS_FEF) + membuf_store(&to, (u32)t->xfsr[0]); + else + membuf_zero(&to, sizeof(u32)); + return membuf_zero(&to, 35 * sizeof(u32)); +} + +static int setfpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned long *fpregs = task_thread_info(target)->fpregs; + unsigned long fprs; + int ret; + + if (target == current) + save_and_clear_fpu(); + + fprs = task_thread_info(target)->fpsaved[0]; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + fpregs, + 0, 32 * sizeof(u32)); + if (!ret) { + compat_ulong_t fsr; + unsigned long val; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &fsr, + 32 * sizeof(u32), + 33 * sizeof(u32)); + if (!ret) { + val = task_thread_info(target)->xfsr[0]; + val &= 0xffffffff00000000UL; + val |= fsr; + task_thread_info(target)->xfsr[0] = val; + } + } + + fprs |= (FPRS_FEF | FPRS_DL); + task_thread_info(target)->fpsaved[0] = fprs; + return ret; +} + +static const struct user_regset ptrace32_regsets[] = { + [REGSET_GENERAL] = { + .n = 19, .size = sizeof(u32), + .regset_get = getregs_get, .set = setregs_set, + }, + [REGSET_FP] = { + .n = 68, .size = sizeof(u32), + .regset_get = getfpregs_get, .set = setfpregs_set, + }, +}; + +static const struct user_regset_view ptrace32_view = { + .regsets = ptrace32_regsets, .n = ARRAY_SIZE(ptrace32_regsets) +}; + static const struct user_regset_view user_sparc32_view = { .name = "sparc", .e_machine = EM_SPARC, .regsets = sparc32_regsets, .n = ARRAY_SIZE(sparc32_regsets) @@ -898,7 +929,6 @@ struct compat_fps { long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { - const struct user_regset_view *view = task_user_regset_view(current); compat_ulong_t caddr2 = task_pt_regs(current)->u_regs[UREG_I4]; struct pt_regs32 __user *pregs; struct compat_fps __user *fps; @@ -916,58 +946,31 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; case PTRACE_GETREGS: - ret = copy_regset_to_user(child, view, REGSET_GENERAL, - 32 * sizeof(u32), - 4 * sizeof(u32), - &pregs->psr); - if (!ret) - ret = copy_regset_to_user(child, view, REGSET_GENERAL, - 1 * sizeof(u32), - 15 * sizeof(u32), - &pregs->u_regs[0]); + ret = copy_regset_to_user(child, &ptrace32_view, + REGSET_GENERAL, 0, + 19 * sizeof(u32), + pregs); break; case PTRACE_SETREGS: - ret = copy_regset_from_user(child, view, REGSET_GENERAL, - 32 * sizeof(u32), - 4 * sizeof(u32), - &pregs->psr); - if (!ret) - ret = copy_regset_from_user(child, view, REGSET_GENERAL, - 1 * sizeof(u32), - 15 * sizeof(u32), - &pregs->u_regs[0]); + ret = copy_regset_from_user(child, &ptrace32_view, + REGSET_GENERAL, 0, + 19 * sizeof(u32), + pregs); break; case PTRACE_GETFPREGS: - ret = copy_regset_to_user(child, view, REGSET_FP, - 0 * sizeof(u32), - 32 * sizeof(u32), - &fps->regs[0]); - if (!ret) - ret = copy_regset_to_user(child, view, REGSET_FP, - 33 * sizeof(u32), - 1 * sizeof(u32), - &fps->fsr); - if (!ret) { - if (__put_user(0, &fps->flags) || - __put_user(0, &fps->extra) || - __put_user(0, &fps->fpqd) || - clear_user(&fps->fpq[0], 32 * sizeof(unsigned int))) - ret = -EFAULT; - } + ret = copy_regset_to_user(child, &ptrace32_view, + REGSET_FP, 0, + 68 * sizeof(u32), + fps); break; case PTRACE_SETFPREGS: - ret = copy_regset_from_user(child, view, REGSET_FP, - 0 * sizeof(u32), - 32 * sizeof(u32), - &fps->regs[0]); - if (!ret) - ret = copy_regset_from_user(child, view, REGSET_FP, - 33 * sizeof(u32), - 1 * sizeof(u32), - &fps->fsr); + ret = copy_regset_from_user(child, &ptrace32_view, + REGSET_FP, 0, + 33 * sizeof(u32), + fps); break; case PTRACE_READTEXT: @@ -1026,31 +1029,17 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_GETREGS64: - ret = copy_regset_to_user(child, view, REGSET_GENERAL, - 1 * sizeof(u64), - 15 * sizeof(u64), - &pregs->u_regs[0]); - if (!ret) { - /* XXX doesn't handle 'y' register correctly XXX */ - ret = copy_regset_to_user(child, view, REGSET_GENERAL, - 32 * sizeof(u64), - 4 * sizeof(u64), - &pregs->tstate); - } + ret = copy_regset_to_user(child, &ptrace64_view, + REGSET_GENERAL, 0, + 19 * sizeof(u64), + pregs); break; case PTRACE_SETREGS64: - ret = copy_regset_from_user(child, view, REGSET_GENERAL, - 1 * sizeof(u64), - 15 * sizeof(u64), - &pregs->u_regs[0]); - if (!ret) { - /* XXX doesn't handle 'y' register correctly XXX */ - ret = copy_regset_from_user(child, view, REGSET_GENERAL, - 32 * sizeof(u64), - 4 * sizeof(u64), - &pregs->tstate); - } + ret = copy_regset_from_user(child, &ptrace64_view, + REGSET_GENERAL, 0, + 19 * sizeof(u64), + pregs); break; case PTRACE_GETFPREGS64: diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index 6d07b85b9e24..eea43a1aef1b 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -353,8 +353,6 @@ void __init setup_arch(char **cmdline_p) ROOT_DEV = old_decode_dev(root_dev); #ifdef CONFIG_BLK_DEV_RAM rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0); #endif prom_setsync(prom_sync_me); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index f765fda871eb..d87244197d5c 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -659,8 +659,6 @@ void __init setup_arch(char **cmdline_p) ROOT_DEV = old_decode_dev(root_dev); #ifdef CONFIG_BLK_DEV_RAM rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0); #endif task_thread_info(&init_task)->kregs = &fake_swapper_regs; diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 6b10cdaa7c96..0a460f2a3f90 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -34,7 +34,6 @@ extern int fpu__copy(struct task_struct *dst, struct task_struct *src); extern void fpu__clear_user_states(struct fpu *fpu); extern void fpu__clear_all(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); -extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate); /* * Boot time FPU initialization functions: diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h index d5bdffb9d27f..4f928d6a367b 100644 --- a/arch/x86/include/asm/fpu/regset.h +++ b/arch/x86/include/asm/fpu/regset.h @@ -8,8 +8,8 @@ #include <linux/regset.h> extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active; -extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, - xstateregs_get; +extern user_regset_get2_fn fpregs_get, xfpregs_get, fpregs_soft_get, + xstateregs_get; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, xstateregs_set; diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 1559554af931..14ab815132d4 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -104,8 +104,8 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); const void *get_xsave_field_ptr(int xfeature_nr); int using_compacted_format(void); int xfeature_size(int xfeature_nr); -int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); -int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); +struct membuf; +void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave); int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); void copy_supervisor_to_kernel(struct xregs_state *xsave); diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index ba4c1b15908b..454b20815f35 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -82,7 +82,7 @@ struct xen_dm_op_buf; * - clobber the rest * * The result certainly isn't pretty, and it really shows up cpp's - * weakness as as macro language. Sorry. (But let's just give thanks + * weakness as a macro language. Sorry. (But let's just give thanks * there aren't more than 5 arguments...) */ diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index bd1d0649f8ce..c413756ba89f 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -27,8 +27,7 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r } int xfpregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct fpu *fpu = &target->thread.fpu; @@ -38,8 +37,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, fpu__prepare_read(fpu); fpstate_sanitize_xstate(fpu); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fpu->state.fxsave, 0, -1); + return membuf_write(&to, &fpu->state.fxsave, sizeof(struct fxregs_state)); } int xfpregs_set(struct task_struct *target, const struct user_regset *regset, @@ -74,12 +72,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, } int xstateregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct fpu *fpu = &target->thread.fpu; struct xregs_state *xsave; - int ret; if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -ENODEV; @@ -89,10 +85,8 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, fpu__prepare_read(fpu); if (using_compacted_format()) { - if (kbuf) - ret = copy_xstate_to_kernel(kbuf, xsave, pos, count); - else - ret = copy_xstate_to_user(ubuf, xsave, pos, count); + copy_xstate_to_kernel(to, xsave); + return 0; } else { fpstate_sanitize_xstate(fpu); /* @@ -105,9 +99,8 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, /* * Copy the xstate memory layout. */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + return membuf_write(&to, xsave, fpu_user_xstate_size); } - return ret; } int xstateregs_set(struct task_struct *target, const struct user_regset *regset, @@ -293,8 +286,7 @@ void convert_to_fxsr(struct fxregs_state *fxsave, } int fpregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct fpu *fpu = &target->thread.fpu; struct user_i387_ia32_struct env; @@ -302,23 +294,22 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, fpu__prepare_read(fpu); if (!boot_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); + return fpregs_soft_get(target, regset, to); - if (!boot_cpu_has(X86_FEATURE_FXSR)) - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fpu->state.fsave, 0, - -1); + if (!boot_cpu_has(X86_FEATURE_FXSR)) { + return membuf_write(&to, &fpu->state.fsave, + sizeof(struct fregs_state)); + } fpstate_sanitize_xstate(fpu); - if (kbuf && pos == 0 && count == sizeof(env)) { - convert_from_fxsr(kbuf, target); + if (to.left == sizeof(env)) { + convert_from_fxsr(to.p, target); return 0; } convert_from_fxsr(&env, target); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1); + return membuf_write(&to, &env, sizeof(env)); } int fpregs_set(struct task_struct *target, const struct user_regset *regset, @@ -356,20 +347,4 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, return ret; } -/* - * FPU state for core dumps. - * This is only used for a.out dumps now. - * It is declared generically using elf_fpregset_t (which is - * struct user_i387_struct) but is in fact only used for 32-bit - * dumps, so on 64-bit it is really struct user_i387_ia32_struct. - */ -int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu) -{ - struct task_struct *tsk = current; - - return !fpregs_get(tsk, NULL, 0, sizeof(struct user_i387_ia32_struct), - ufpu, NULL); -} -EXPORT_SYMBOL(dump_fpu); - #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 9393a445d73c..a4ec65317a7f 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -170,14 +170,15 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); + if (!static_cpu_has(X86_FEATURE_FPU)) { + struct user_i387_ia32_struct fp; + fpregs_soft_get(current, NULL, (struct membuf){.p = &fp, + .left = sizeof(fp)}); + return copy_to_user(buf, &fp, sizeof(fp)) ? -EFAULT : 0; + } + if (!access_ok(buf, size)) return -EACCES; - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_get(current, NULL, 0, - sizeof(struct user_i387_ia32_struct), NULL, - (struct _fpstate_32 __user *) buf) ? -1 : 1; - retry: /* * Load the FPU registers if they are not valid for the current task. diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index be2a68a09d19..7a2bf884fede 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1014,32 +1014,20 @@ static inline bool xfeatures_mxcsr_quirk(u64 xfeatures) return true; } -static void fill_gap(unsigned to, void **kbuf, unsigned *pos, unsigned *count) +static void fill_gap(struct membuf *to, unsigned *last, unsigned offset) { - if (*pos < to) { - unsigned size = to - *pos; - - if (size > *count) - size = *count; - memcpy(*kbuf, (void *)&init_fpstate.xsave + *pos, size); - *kbuf += size; - *pos += size; - *count -= size; - } + if (*last >= offset) + return; + membuf_write(to, (void *)&init_fpstate.xsave + *last, offset - *last); + *last = offset; } -static void copy_part(unsigned offset, unsigned size, void *from, - void **kbuf, unsigned *pos, unsigned *count) +static void copy_part(struct membuf *to, unsigned *last, unsigned offset, + unsigned size, void *from) { - fill_gap(offset, kbuf, pos, count); - if (size > *count) - size = *count; - if (size) { - memcpy(*kbuf, from, size); - *kbuf += size; - *pos += size; - *count -= size; - } + fill_gap(to, last, offset); + membuf_write(to, from, size); + *last = offset + size; } /* @@ -1049,20 +1037,15 @@ static void copy_part(unsigned offset, unsigned size, void *from, * It supports partial copy but pos always starts from zero. This is called * from xstateregs_get() and there we check the CPU has XSAVES. */ -int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) +void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave) { struct xstate_header header; const unsigned off_mxcsr = offsetof(struct fxregs_state, mxcsr); - unsigned count = size_total; + unsigned size = to.left; + unsigned last = 0; int i; /* - * Currently copy_regset_to_user() starts from pos 0: - */ - if (unlikely(offset_start != 0)) - return -EFAULT; - - /* * The destination is a ptrace buffer; we put in only user xstates: */ memset(&header, 0, sizeof(header)); @@ -1070,27 +1053,26 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of header.xfeatures &= xfeatures_mask_user(); if (header.xfeatures & XFEATURE_MASK_FP) - copy_part(0, off_mxcsr, - &xsave->i387, &kbuf, &offset_start, &count); + copy_part(&to, &last, 0, off_mxcsr, &xsave->i387); if (header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)) - copy_part(off_mxcsr, MXCSR_AND_FLAGS_SIZE, - &xsave->i387.mxcsr, &kbuf, &offset_start, &count); + copy_part(&to, &last, off_mxcsr, + MXCSR_AND_FLAGS_SIZE, &xsave->i387.mxcsr); if (header.xfeatures & XFEATURE_MASK_FP) - copy_part(offsetof(struct fxregs_state, st_space), 128, - &xsave->i387.st_space, &kbuf, &offset_start, &count); + copy_part(&to, &last, offsetof(struct fxregs_state, st_space), + 128, &xsave->i387.st_space); if (header.xfeatures & XFEATURE_MASK_SSE) - copy_part(xstate_offsets[XFEATURE_SSE], 256, - &xsave->i387.xmm_space, &kbuf, &offset_start, &count); + copy_part(&to, &last, xstate_offsets[XFEATURE_SSE], + 256, &xsave->i387.xmm_space); /* * Fill xsave->i387.sw_reserved value for ptrace frame: */ - copy_part(offsetof(struct fxregs_state, sw_reserved), 48, - xstate_fx_sw_bytes, &kbuf, &offset_start, &count); + copy_part(&to, &last, offsetof(struct fxregs_state, sw_reserved), + 48, xstate_fx_sw_bytes); /* * Copy xregs_state->header: */ - copy_part(offsetof(struct xregs_state, header), sizeof(header), - &header, &kbuf, &offset_start, &count); + copy_part(&to, &last, offsetof(struct xregs_state, header), + sizeof(header), &header); for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { /* @@ -1099,104 +1081,12 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of if ((header.xfeatures >> i) & 1) { void *src = __raw_xsave_addr(xsave, i); - copy_part(xstate_offsets[i], xstate_sizes[i], - src, &kbuf, &offset_start, &count); + copy_part(&to, &last, xstate_offsets[i], + xstate_sizes[i], src); } } - fill_gap(size_total, &kbuf, &offset_start, &count); - - return 0; -} - -static inline int -__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total) -{ - if (!size) - return 0; - - if (offset < size_total) { - unsigned int copy = min(size, size_total - offset); - - if (__copy_to_user(ubuf + offset, data, copy)) - return -EFAULT; - } - return 0; -} - -/* - * Convert from kernel XSAVES compacted format to standard format and copy - * to a user-space buffer. It supports partial copy but pos always starts from - * zero. This is called from xstateregs_get() and there we check the CPU - * has XSAVES. - */ -int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) -{ - unsigned int offset, size; - int ret, i; - struct xstate_header header; - - /* - * Currently copy_regset_to_user() starts from pos 0: - */ - if (unlikely(offset_start != 0)) - return -EFAULT; - - /* - * The destination is a ptrace buffer; we put in only user xstates: - */ - memset(&header, 0, sizeof(header)); - header.xfeatures = xsave->header.xfeatures; - header.xfeatures &= xfeatures_mask_user(); - - /* - * Copy xregs_state->header: - */ - offset = offsetof(struct xregs_state, header); - size = sizeof(header); - - ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total); - if (ret) - return ret; - - for (i = 0; i < XFEATURE_MAX; i++) { - /* - * Copy only in-use xstates: - */ - if ((header.xfeatures >> i) & 1) { - void *src = __raw_xsave_addr(xsave, i); - - offset = xstate_offsets[i]; - size = xstate_sizes[i]; - - /* The next component has to fit fully into the output buffer: */ - if (offset + size > size_total) - break; - - ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total); - if (ret) - return ret; - } - - } - - if (xfeatures_mxcsr_quirk(header.xfeatures)) { - offset = offsetof(struct fxregs_state, mxcsr); - size = MXCSR_AND_FLAGS_SIZE; - __copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total); - } - - /* - * Fill xsave->i387.sw_reserved value for ptrace frame: - */ - offset = offsetof(struct fxregs_state, sw_reserved); - size = sizeof(xstate_fx_sw_bytes); - - ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total); - if (ret) - return ret; - - return 0; + fill_gap(&to, &last, size); } /* diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 3f006489087f..5679aa3fdcb8 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -412,26 +412,12 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset) static int genregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - if (kbuf) { - unsigned long *k = kbuf; - while (count >= sizeof(*k)) { - *k++ = getreg(target, pos); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - unsigned long __user *u = ubuf; - while (count >= sizeof(*u)) { - if (__put_user(getreg(target, pos), u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } - } + int reg; + for (reg = 0; to.left; reg++) + membuf_store(&to, getreg(target, reg * sizeof(unsigned long))); return 0; } @@ -695,16 +681,14 @@ static int ioperm_active(struct task_struct *target, static int ioperm_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct io_bitmap *iobm = target->thread.io_bitmap; if (!iobm) return -ENXIO; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - iobm->bitmap, 0, IO_BITMAP_BYTES); + return membuf_write(&to, iobm->bitmap, IO_BITMAP_BYTES); } /* @@ -1007,28 +991,15 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val) static int genregs32_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - if (kbuf) { - compat_ulong_t *k = kbuf; - while (count >= sizeof(*k)) { - getreg32(target, pos, k++); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - compat_ulong_t __user *u = ubuf; - while (count >= sizeof(*u)) { - compat_ulong_t word; - getreg32(target, pos, &word); - if (__put_user(word, u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } - } + int reg; + for (reg = 0; to.left; reg++) { + u32 val; + getreg32(target, reg * 4, &val); + membuf_store(&to, val); + } return 0; } @@ -1238,25 +1209,25 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { .core_note_type = NT_PRSTATUS, .n = sizeof(struct user_regs_struct) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .get = genregs_get, .set = genregs_set + .regset_get = genregs_get, .set = genregs_set }, [REGSET_FP] = { .core_note_type = NT_PRFPREG, .n = sizeof(struct user_i387_struct) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set + .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, [REGSET_XSTATE] = { .core_note_type = NT_X86_XSTATE, .size = sizeof(u64), .align = sizeof(u64), - .active = xstateregs_active, .get = xstateregs_get, + .active = xstateregs_active, .regset_get = xstateregs_get, .set = xstateregs_set }, [REGSET_IOPERM64] = { .core_note_type = NT_386_IOPERM, .n = IO_BITMAP_LONGS, .size = sizeof(long), .align = sizeof(long), - .active = ioperm_active, .get = ioperm_get + .active = ioperm_active, .regset_get = ioperm_get }, }; @@ -1279,24 +1250,24 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { .core_note_type = NT_PRSTATUS, .n = sizeof(struct user_regs_struct32) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = genregs32_get, .set = genregs32_set + .regset_get = genregs32_get, .set = genregs32_set }, [REGSET_FP] = { .core_note_type = NT_PRFPREG, .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .active = regset_fpregs_active, .get = fpregs_get, .set = fpregs_set + .active = regset_fpregs_active, .regset_get = fpregs_get, .set = fpregs_set }, [REGSET_XFP] = { .core_note_type = NT_PRXFPREG, .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set + .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, [REGSET_XSTATE] = { .core_note_type = NT_X86_XSTATE, .size = sizeof(u64), .align = sizeof(u64), - .active = xstateregs_active, .get = xstateregs_get, + .active = xstateregs_active, .regset_get = xstateregs_get, .set = xstateregs_set }, [REGSET_TLS] = { @@ -1305,13 +1276,13 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { .size = sizeof(struct user_desc), .align = sizeof(struct user_desc), .active = regset_tls_active, - .get = regset_tls_get, .set = regset_tls_set + .regset_get = regset_tls_get, .set = regset_tls_set }, [REGSET_IOPERM32] = { .core_note_type = NT_386_IOPERM, .n = IO_BITMAP_BYTES / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .active = ioperm_active, .get = ioperm_get + .active = ioperm_active, .regset_get = ioperm_get }, }; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a3767e74c758..b9a68d8e06d8 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -870,8 +870,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_BLK_DEV_RAM rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); #endif #ifdef CONFIG_EFI if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 71d3fef1edc9..64a496a0687f 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -256,36 +256,16 @@ int regset_tls_active(struct task_struct *target, } int regset_tls_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { const struct desc_struct *tls; + struct user_desc v; + int pos; - if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || - (pos % sizeof(struct user_desc)) != 0 || - (count % sizeof(struct user_desc)) != 0) - return -EINVAL; - - pos /= sizeof(struct user_desc); - count /= sizeof(struct user_desc); - - tls = &target->thread.tls_array[pos]; - - if (kbuf) { - struct user_desc *info = kbuf; - while (count-- > 0) - fill_user_desc(info++, GDT_ENTRY_TLS_MIN + pos++, - tls++); - } else { - struct user_desc __user *u_info = ubuf; - while (count-- > 0) { - struct user_desc info; - fill_user_desc(&info, GDT_ENTRY_TLS_MIN + pos++, tls++); - if (__copy_to_user(u_info++, &info, sizeof(info))) - return -EFAULT; - } + for (pos = 0, tls = target->thread.tls_array; to.left; pos++, tls++) { + fill_user_desc(&v, GDT_ENTRY_TLS_MIN + pos, tls); + membuf_write(&to, &v, sizeof(v)); } - return 0; } diff --git a/arch/x86/kernel/tls.h b/arch/x86/kernel/tls.h index 3a76e1d3535e..fc39447a0c1a 100644 --- a/arch/x86/kernel/tls.h +++ b/arch/x86/kernel/tls.h @@ -12,7 +12,7 @@ #include <linux/regset.h> extern user_regset_active_fn regset_tls_active; -extern user_regset_get_fn regset_tls_get; +extern user_regset_get2_fn regset_tls_get; extern user_regset_set_fn regset_tls_set; #endif /* _ARCH_X86_KERNEL_TLS_H */ diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index a873da6b46d6..8679a9d6c47f 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -689,12 +689,10 @@ int fpregs_soft_set(struct task_struct *target, int fpregs_soft_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct swregs_state *s387 = &target->thread.fpu.state.soft; const void *space = s387->st_space; - int ret; int offset = (S387->ftop & 7) * 10, other = 80 - offset; RE_ENTRANT_CHECK_OFF; @@ -709,18 +707,11 @@ int fpregs_soft_get(struct task_struct *target, S387->fos |= 0xffff0000; #endif /* PECULIAR_486 */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, s387, 0, - offsetof(struct swregs_state, st_space)); - - /* Copy all registers in stack order. */ - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - space + offset, 0, other); - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - space, 0, offset); + membuf_write(&to, s387, offsetof(struct swregs_state, st_space)); + membuf_write(&to, space + offset, other); + membuf_write(&to, space, offset); RE_ENTRANT_CHECK_ON; - return ret; + return 0; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1c1209da55fe..3b246ae40c8f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1252,28 +1252,23 @@ static void __init preallocate_vmalloc_pages(void) p4d_t *p4d; pud_t *pud; - p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) { - /* Can only happen with 5-level paging */ - p4d = p4d_alloc(&init_mm, pgd, addr); - if (!p4d) { - lvl = "p4d"; - goto failed; - } - } + lvl = "p4d"; + p4d = p4d_alloc(&init_mm, pgd, addr); + if (!p4d) + goto failed; + /* + * With 5-level paging the P4D level is not folded. So the PGDs + * are now populated and there is no need to walk down to the + * PUD level. + */ if (pgtable_l5_enabled()) continue; - pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { - /* Ends up here only with 4-level paging */ - pud = pud_alloc(&init_mm, p4d, addr); - if (!pud) { - lvl = "pud"; - goto failed; - } - } + lvl = "pud"; + pud = pud_alloc(&init_mm, p4d, addr); + if (!pud) + goto failed; } return; diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index ce4a32bd2294..bb3f4797d212 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -39,8 +39,7 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct pt_regs *regs = task_pt_regs(target); struct user_pt_regs newregs = { @@ -63,8 +62,7 @@ static int gpr_get(struct task_struct *target, regs->areg, (WSBITS - regs->windowbase) * 16); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &newregs, 0, -1); + return membuf_write(&to, &newregs, sizeof(newregs)); } static int gpr_set(struct task_struct *target, @@ -121,8 +119,7 @@ static int gpr_set(struct task_struct *target, static int tie_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { int ret; struct pt_regs *regs = task_pt_regs(target); @@ -147,8 +144,7 @@ static int tie_get(struct task_struct *target, newregs->cp6 = ti->xtregs_cp.cp6; newregs->cp7 = ti->xtregs_cp.cp7; #endif - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - newregs, 0, -1); + ret = membuf_write(&to, newregs, sizeof(*newregs)); kfree(newregs); return ret; } @@ -203,7 +199,7 @@ static const struct user_regset xtensa_regsets[] = { .n = sizeof(struct user_pt_regs) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = gpr_get, + .regset_get = gpr_get, .set = gpr_set, }, [REGSET_TIE] = { @@ -211,7 +207,7 @@ static const struct user_regset xtensa_regsets[] = { .n = sizeof(elf_xtregs_t) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .get = tie_get, + .regset_get = tie_get, .set = tie_set, }, }; |