diff options
Diffstat (limited to 'arch/powerpc')
-rw-r--r-- | arch/powerpc/include/asm/asm-const.h | 13 | ||||
-rw-r--r-- | arch/powerpc/include/asm/cputable.h | 2 | ||||
-rw-r--r-- | arch/powerpc/include/asm/uaccess.h | 4 | ||||
-rw-r--r-- | arch/powerpc/kernel/cputable.c | 13 | ||||
-rw-r--r-- | arch/powerpc/kernel/eeh.c | 5 | ||||
-rw-r--r-- | arch/powerpc/kernel/mce.c | 7 | ||||
-rw-r--r-- | arch/powerpc/kernel/smp.c | 70 | ||||
-rw-r--r-- | arch/powerpc/kernel/traps.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-dump.c | 52 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-elog.c | 11 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/ras.c | 118 |
11 files changed, 179 insertions, 118 deletions
diff --git a/arch/powerpc/include/asm/asm-const.h b/arch/powerpc/include/asm/asm-const.h index 082c1538c562..0ce2368bd20f 100644 --- a/arch/powerpc/include/asm/asm-const.h +++ b/arch/powerpc/include/asm/asm-const.h @@ -11,4 +11,17 @@ # define __ASM_CONST(x) x##UL # define ASM_CONST(x) __ASM_CONST(x) #endif + +/* + * Inline assembly memory constraint + * + * GCC 4.9 doesn't properly handle pre update memory constraint "m<>" + * + */ +#if defined(GCC_VERSION) && GCC_VERSION < 50000 +#define UPD_CONSTR "" +#else +#define UPD_CONSTR "<>" +#endif + #endif /* _ASM_POWERPC_ASM_CONST_H */ diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 93bc70d4c9a1..3d2f94afc13a 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -477,7 +477,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ - CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_ARCH_31 | \ + CPU_FTR_ARCH_300 | CPU_FTR_ARCH_31 | \ CPU_FTR_DAWR | CPU_FTR_DAWR1) #define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 916daa4b4d0d..ef5bbb705c08 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -182,7 +182,7 @@ do { \ "1: " op "%U1%X1 %0,%1 # put_user\n" \ EX_TABLE(1b, %l2) \ : \ - : "r" (x), "m<>" (*addr) \ + : "r" (x), "m"UPD_CONSTR (*addr) \ : \ : label) @@ -253,7 +253,7 @@ extern long __get_user_bad(void); ".previous\n" \ EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ - : "m<>" (*addr), "i" (-EFAULT), "0" (err)) + : "m"UPD_CONSTR (*addr), "i" (-EFAULT), "0" (err)) #ifdef __powerpc64__ #define __get_user_asm2(x, addr, err) \ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 492c0b36aff6..29de58d4dfb7 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -121,9 +121,16 @@ extern void __restore_cpu_e6500(void); PPC_FEATURE2_DARN | \ PPC_FEATURE2_SCV) #define COMMON_USER_POWER10 COMMON_USER_POWER9 -#define COMMON_USER2_POWER10 (COMMON_USER2_POWER9 | \ - PPC_FEATURE2_ARCH_3_1 | \ - PPC_FEATURE2_MMA) +#define COMMON_USER2_POWER10 (PPC_FEATURE2_ARCH_3_1 | \ + PPC_FEATURE2_MMA | \ + PPC_FEATURE2_ARCH_3_00 | \ + PPC_FEATURE2_HAS_IEEE128 | \ + PPC_FEATURE2_DARN | \ + PPC_FEATURE2_SCV | \ + PPC_FEATURE2_ARCH_2_07 | \ + PPC_FEATURE2_DSCR | \ + PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ + PPC_FEATURE2_VEC_CRYPTO) #ifdef CONFIG_PPC_BOOK3E_64 #define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 0e160dffcb86..813713c9120c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -466,11 +466,6 @@ int eeh_dev_check_failure(struct eeh_dev *edev) return 0; } - if (!pe->addr) { - eeh_stats.no_cfg_addr++; - return 0; - } - /* * On PowerNV platform, we might already have fenced PHB * there and we need take care of that firstly. diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index ada59f6c4298..63702c0badb9 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -591,12 +591,11 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info); long notrace machine_check_early(struct pt_regs *regs) { long handled = 0; - bool nested = in_nmi(); u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); this_cpu_set_ftrace_enabled(0); - - if (!nested) + /* Do not use nmi_enter/exit for pseries hpte guest */ + if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR)) nmi_enter(); hv_nmi_check_nonrecoverable(regs); @@ -607,7 +606,7 @@ long notrace machine_check_early(struct pt_regs *regs) if (ppc_md.machine_check_early) handled = ppc_md.machine_check_early(regs); - if (!nested) + if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR)) nmi_exit(); this_cpu_set_ftrace_enabled(ftrace_enabled); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 0dc1b8591cc8..3c6b9822f978 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1240,43 +1240,33 @@ static struct device_node *cpu_to_l2cache(int cpu) return cache; } -static bool update_mask_by_l2(int cpu) +static bool update_mask_by_l2(int cpu, cpumask_var_t *mask) { struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; struct device_node *l2_cache, *np; - cpumask_var_t mask; int i; - l2_cache = cpu_to_l2cache(cpu); - if (!l2_cache) { - struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; - - /* - * If no l2cache for this CPU, assume all siblings to share - * cache with this CPU. - */ - if (has_big_cores) - sibling_mask = cpu_smallcore_mask; + if (has_big_cores) + submask_fn = cpu_smallcore_mask; - for_each_cpu(i, sibling_mask(cpu)) + l2_cache = cpu_to_l2cache(cpu); + if (!l2_cache || !*mask) { + /* Assume only core siblings share cache with this CPU */ + for_each_cpu(i, submask_fn(cpu)) set_cpus_related(cpu, i, cpu_l2_cache_mask); return false; } - alloc_cpumask_var_node(&mask, GFP_KERNEL, cpu_to_node(cpu)); - cpumask_and(mask, cpu_online_mask, cpu_cpu_mask(cpu)); - - if (has_big_cores) - submask_fn = cpu_smallcore_mask; + cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu)); /* Update l2-cache mask with all the CPUs that are part of submask */ or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask); /* Skip all CPUs already part of current CPU l2-cache mask */ - cpumask_andnot(mask, mask, cpu_l2_cache_mask(cpu)); + cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(cpu)); - for_each_cpu(i, mask) { + for_each_cpu(i, *mask) { /* * when updating the marks the current CPU has not been marked * online, but we need to update the cache masks @@ -1286,15 +1276,14 @@ static bool update_mask_by_l2(int cpu) /* Skip all CPUs already part of current CPU l2-cache */ if (np == l2_cache) { or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask); - cpumask_andnot(mask, mask, submask_fn(i)); + cpumask_andnot(*mask, *mask, submask_fn(i)); } else { - cpumask_andnot(mask, mask, cpu_l2_cache_mask(i)); + cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(i)); } of_node_put(np); } of_node_put(l2_cache); - free_cpumask_var(mask); return true; } @@ -1337,40 +1326,46 @@ static inline void add_cpu_to_smallcore_masks(int cpu) } } -static void update_coregroup_mask(int cpu) +static void update_coregroup_mask(int cpu, cpumask_var_t *mask) { struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; - cpumask_var_t mask; int coregroup_id = cpu_to_coregroup_id(cpu); int i; - alloc_cpumask_var_node(&mask, GFP_KERNEL, cpu_to_node(cpu)); - cpumask_and(mask, cpu_online_mask, cpu_cpu_mask(cpu)); - if (shared_caches) submask_fn = cpu_l2_cache_mask; + if (!*mask) { + /* Assume only siblings are part of this CPU's coregroup */ + for_each_cpu(i, submask_fn(cpu)) + set_cpus_related(cpu, i, cpu_coregroup_mask); + + return; + } + + cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu)); + /* Update coregroup mask with all the CPUs that are part of submask */ or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask); /* Skip all CPUs already part of coregroup mask */ - cpumask_andnot(mask, mask, cpu_coregroup_mask(cpu)); + cpumask_andnot(*mask, *mask, cpu_coregroup_mask(cpu)); - for_each_cpu(i, mask) { + for_each_cpu(i, *mask) { /* Skip all CPUs not part of this coregroup */ if (coregroup_id == cpu_to_coregroup_id(i)) { or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask); - cpumask_andnot(mask, mask, submask_fn(i)); + cpumask_andnot(*mask, *mask, submask_fn(i)); } else { - cpumask_andnot(mask, mask, cpu_coregroup_mask(i)); + cpumask_andnot(*mask, *mask, cpu_coregroup_mask(i)); } } - free_cpumask_var(mask); } static void add_cpu_to_masks(int cpu) { int first_thread = cpu_first_thread_sibling(cpu); + cpumask_var_t mask; int i; /* @@ -1384,10 +1379,15 @@ static void add_cpu_to_masks(int cpu) set_cpus_related(i, cpu, cpu_sibling_mask); add_cpu_to_smallcore_masks(cpu); - update_mask_by_l2(cpu); + + /* In CPU-hotplug path, hence use GFP_ATOMIC */ + alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu)); + update_mask_by_l2(cpu, &mask); if (has_coregroup_support()) - update_coregroup_mask(cpu); + update_coregroup_mask(cpu, &mask); + + free_cpumask_var(mask); } /* Activate a secondary processor. */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index c5f39f13e96e..5006dcbe1d9f 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -885,7 +885,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) { unsigned int ra, rb, t, i, sel, instr, rc; const void __user *addr; - u8 vbuf[16], *vdst; + u8 vbuf[16] __aligned(16), *vdst; unsigned long ea, msr, msr_mask; bool swap; diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 543c816fa99e..00c5a59d82d9 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -88,9 +88,14 @@ static ssize_t dump_ack_store(struct dump_obj *dump_obj, const char *buf, size_t count) { - dump_send_ack(dump_obj->id); - sysfs_remove_file_self(&dump_obj->kobj, &attr->attr); - kobject_put(&dump_obj->kobj); + /* + * Try to self remove this attribute. If we are successful, + * delete the kobject itself. + */ + if (sysfs_remove_file_self(&dump_obj->kobj, &attr->attr)) { + dump_send_ack(dump_obj->id); + kobject_put(&dump_obj->kobj); + } return count; } @@ -318,15 +323,14 @@ static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj, return count; } -static struct dump_obj *create_dump_obj(uint32_t id, size_t size, - uint32_t type) +static void create_dump_obj(uint32_t id, size_t size, uint32_t type) { struct dump_obj *dump; int rc; dump = kzalloc(sizeof(*dump), GFP_KERNEL); if (!dump) - return NULL; + return; dump->kobj.kset = dump_kset; @@ -346,21 +350,39 @@ static struct dump_obj *create_dump_obj(uint32_t id, size_t size, rc = kobject_add(&dump->kobj, NULL, "0x%x-0x%x", type, id); if (rc) { kobject_put(&dump->kobj); - return NULL; + return; } + /* + * As soon as the sysfs file for this dump is created/activated there is + * a chance the opal_errd daemon (or any userspace) might read and + * acknowledge the dump before kobject_uevent() is called. If that + * happens then there is a potential race between + * dump_ack_store->kobject_put() and kobject_uevent() which leads to a + * use-after-free of a kernfs object resulting in a kernel crash. + * + * To avoid that, we need to take a reference on behalf of the bin file, + * so that our reference remains valid while we call kobject_uevent(). + * We then drop our reference before exiting the function, leaving the + * bin file to drop the last reference (if it hasn't already). + */ + + /* Take a reference for the bin file */ + kobject_get(&dump->kobj); rc = sysfs_create_bin_file(&dump->kobj, &dump->dump_attr); - if (rc) { + if (rc == 0) { + kobject_uevent(&dump->kobj, KOBJ_ADD); + + pr_info("%s: New platform dump. ID = 0x%x Size %u\n", + __func__, dump->id, dump->size); + } else { + /* Drop reference count taken for bin file */ kobject_put(&dump->kobj); - return NULL; } - pr_info("%s: New platform dump. ID = 0x%x Size %u\n", - __func__, dump->id, dump->size); - - kobject_uevent(&dump->kobj, KOBJ_ADD); - - return dump; + /* Drop our reference */ + kobject_put(&dump->kobj); + return; } static irqreturn_t process_dump(int irq, void *data) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 5e33b1fc67c2..37b380eef41a 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -72,9 +72,14 @@ static ssize_t elog_ack_store(struct elog_obj *elog_obj, const char *buf, size_t count) { - opal_send_ack_elog(elog_obj->id); - sysfs_remove_file_self(&elog_obj->kobj, &attr->attr); - kobject_put(&elog_obj->kobj); + /* + * Try to self remove this attribute. If we are successful, + * delete the kobject itself. + */ + if (sysfs_remove_file_self(&elog_obj->kobj, &attr->attr)) { + opal_send_ack_elog(elog_obj->id); + kobject_put(&elog_obj->kobj); + } return count; } diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 13c86a292c6d..b2b245b25edb 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -521,18 +521,55 @@ int pSeries_system_reset_exception(struct pt_regs *regs) return 0; /* need to perform reset */ } +static int mce_handle_err_realmode(int disposition, u8 error_type) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (disposition == RTAS_DISP_NOT_RECOVERED) { + switch (error_type) { + case MC_ERROR_TYPE_SLB: + case MC_ERROR_TYPE_ERAT: + /* + * Store the old slb content in paca before flushing. + * Print this when we go to virtual mode. + * There are chances that we may hit MCE again if there + * is a parity error on the SLB entry we trying to read + * for saving. Hence limit the slb saving to single + * level of recursion. + */ + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + flush_and_reload_slb(); + disposition = RTAS_DISP_FULLY_RECOVERED; + break; + default: + break; + } + } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { + /* Platform corrected itself but could be degraded */ + pr_err("MCE: limited recovery, system may be degraded\n"); + disposition = RTAS_DISP_FULLY_RECOVERED; + } +#endif + return disposition; +} -static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) +static int mce_handle_err_virtmode(struct pt_regs *regs, + struct rtas_error_log *errp, + struct pseries_mc_errorlog *mce_log, + int disposition) { struct mce_error_info mce_err = { 0 }; - unsigned long eaddr = 0, paddr = 0; - struct pseries_errorlog *pseries_log; - struct pseries_mc_errorlog *mce_log; - int disposition = rtas_error_disposition(errp); int initiator = rtas_error_initiator(errp); int severity = rtas_error_severity(errp); + unsigned long eaddr = 0, paddr = 0; u8 error_type, err_sub_type; + if (!mce_log) + goto out; + + error_type = mce_log->error_type; + err_sub_type = rtas_mc_error_sub_type(mce_log); + if (initiator == RTAS_INITIATOR_UNKNOWN) mce_err.initiator = MCE_INITIATOR_UNKNOWN; else if (initiator == RTAS_INITIATOR_CPU) @@ -571,18 +608,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; mce_err.error_class = MCE_ECLASS_UNKNOWN; - if (!rtas_error_extended(errp)) - goto out; - - pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); - if (pseries_log == NULL) - goto out; - - mce_log = (struct pseries_mc_errorlog *)pseries_log->data; - error_type = mce_log->error_type; - err_sub_type = rtas_mc_error_sub_type(mce_log); - - switch (mce_log->error_type) { + switch (error_type) { case MC_ERROR_TYPE_UE: mce_err.error_type = MCE_ERROR_TYPE_UE; mce_common_process_ue(regs, &mce_err); @@ -682,37 +708,31 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; break; } +out: + save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, + &mce_err, regs->nip, eaddr, paddr); + return disposition; +} -#ifdef CONFIG_PPC_BOOK3S_64 - if (disposition == RTAS_DISP_NOT_RECOVERED) { - switch (error_type) { - case MC_ERROR_TYPE_SLB: - case MC_ERROR_TYPE_ERAT: - /* - * Store the old slb content in paca before flushing. - * Print this when we go to virtual mode. - * There are chances that we may hit MCE again if there - * is a parity error on the SLB entry we trying to read - * for saving. Hence limit the slb saving to single - * level of recursion. - */ - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - flush_and_reload_slb(); - disposition = RTAS_DISP_FULLY_RECOVERED; - break; - default: - break; - } - } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { - /* Platform corrected itself but could be degraded */ - printk(KERN_ERR "MCE: limited recovery, system may " - "be degraded\n"); - disposition = RTAS_DISP_FULLY_RECOVERED; - } -#endif +static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) +{ + struct pseries_errorlog *pseries_log; + struct pseries_mc_errorlog *mce_log = NULL; + int disposition = rtas_error_disposition(errp); + u8 error_type; + + if (!rtas_error_extended(errp)) + goto out; + + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); + if (!pseries_log) + goto out; + + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; + error_type = mce_log->error_type; + + disposition = mce_handle_err_realmode(disposition, error_type); -out: /* * Enable translation as we will be accessing per-cpu variables * in save_mce_event() which may fall outside RMO region, also @@ -723,10 +743,10 @@ out: * Note: All the realmode handling like flushing SLB entries for * SLB multihit is done by now. */ +out: mtmsr(mfmsr() | MSR_IR | MSR_DR); - save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, - &mce_err, regs->nip, eaddr, paddr); - + disposition = mce_handle_err_virtmode(regs, errp, mce_log, + disposition); return disposition; } |