diff options
Diffstat (limited to 'arch/powerpc/kernel')
59 files changed, 1370 insertions, 893 deletions
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 3e6c0744c174..11550a3d1ac2 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -339,7 +339,7 @@ int fix_alignment(struct pt_regs *regs) if (r < 0) return -EINVAL; - type = op.type & INSTR_TYPE_MASK; + type = GETTYPE(op.type); if (!OP_IS_LOAD_STORE(type)) { if (op.type != CACHEOP + DCBZ) return -EINVAL; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 8817c5a6bcc2..9fc9e0977009 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -180,6 +180,7 @@ int main(void) OFFSET(PACAKMSR, paca_struct, kernel_msr); OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask); OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); + OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled); #ifdef CONFIG_PPC_BOOK3S OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id); #ifdef CONFIG_PPC_MM_SLICES diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 6537cba1a758..b2072d5bbf2b 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -157,20 +157,20 @@ void btext_map(void) /* By default, we are no longer mapped */ boot_text_mapped = 0; - if (dispDeviceBase == 0) + if (!dispDeviceBase) return; base = ((unsigned long) dispDeviceBase) & 0xFFFFF000UL; offset = ((unsigned long) dispDeviceBase) - base; size = dispDeviceRowBytes * dispDeviceRect[3] + offset + dispDeviceRect[0]; vbase = __ioremap(base, size, pgprot_val(pgprot_noncached_wc(__pgprot(0)))); - if (vbase == 0) + if (!vbase) return; logicalDisplayBase = vbase + offset; boot_text_mapped = 1; } -int btext_initialize(struct device_node *np) +static int btext_initialize(struct device_node *np) { unsigned int width, height, depth, pitch; unsigned long address = 0; @@ -270,7 +270,7 @@ static unsigned char * calc_base(int x, int y) unsigned char *base; base = logicalDisplayBase; - if (base == 0) + if (!base) base = dispDeviceBase; base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3); base += (y + dispDeviceRect[1]) * dispDeviceRowBytes; @@ -281,7 +281,7 @@ static unsigned char * calc_base(int x, int y) void btext_update_display(unsigned long phys, int width, int height, int depth, int pitch) { - if (dispDeviceBase == 0) + if (!dispDeviceBase) return; /* check it's the same frame buffer (within 256MB) */ diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 138157deeadf..155170d70324 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -235,8 +235,6 @@ static inline dma_addr_t dma_nommu_map_page(struct device *dev, enum dma_data_direction dir, unsigned long attrs) { - BUG_ON(dir == DMA_NONE); - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) __dma_sync_page(page, offset, size, dir); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index c904477abaf3..4be1c0de9406 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -717,6 +717,7 @@ static __init void cpufeatures_cpu_quirks(void) if ((version & 0xffff0000) == 0x004e0000) { cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR); cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR; } /* diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 90bb39b1a23c..5746809cfaad 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -263,9 +263,8 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) return n; } -static void *eeh_dump_pe_log(void *data, void *flag) +static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag) { - struct eeh_pe *pe = data; struct eeh_dev *edev, *tmp; size_t *plen = flag; @@ -542,8 +541,12 @@ int eeh_dev_check_failure(struct eeh_dev *edev) /* Frozen parent PE ? */ ret = eeh_ops->get_state(parent_pe, NULL); - if (ret > 0 && !eeh_state_active(ret)) + if (ret > 0 && !eeh_state_active(ret)) { pe = parent_pe; + pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n", + pe->phb->global_number, pe->addr, + pe->phb->global_number, parent_pe->addr); + } /* Next parent level */ parent_pe = parent_pe->parent; @@ -686,9 +689,9 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) return rc; } -static void *eeh_disable_and_save_dev_state(void *data, void *userdata) +static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev, + void *userdata) { - struct eeh_dev *edev = data; struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); struct pci_dev *dev = userdata; @@ -714,9 +717,8 @@ static void *eeh_disable_and_save_dev_state(void *data, void *userdata) return NULL; } -static void *eeh_restore_dev_state(void *data, void *userdata) +static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) { - struct eeh_dev *edev = data; struct pci_dn *pdn = eeh_dev_to_pdn(edev); struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); struct pci_dev *dev = userdata; @@ -856,11 +858,10 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat * the indicated device and its children so that the bunch of the * devices could be reset properly. */ -static void *eeh_set_dev_freset(void *data, void *flag) +static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag) { struct pci_dev *dev; unsigned int *freset = (unsigned int *)flag; - struct eeh_dev *edev = (struct eeh_dev *)data; dev = eeh_dev_to_pci_dev(edev); if (dev) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index b8a329f04814..67619b4b3f96 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -39,18 +39,82 @@ struct eeh_rmv_data { int removed; }; -/** - * eeh_pcid_name - Retrieve name of PCI device driver - * @pdev: PCI device - * - * This routine is used to retrieve the name of PCI device driver - * if that's valid. - */ -static inline const char *eeh_pcid_name(struct pci_dev *pdev) +static int eeh_result_priority(enum pci_ers_result result) +{ + switch (result) { + case PCI_ERS_RESULT_NONE: + return 1; + case PCI_ERS_RESULT_NO_AER_DRIVER: + return 2; + case PCI_ERS_RESULT_RECOVERED: + return 3; + case PCI_ERS_RESULT_CAN_RECOVER: + return 4; + case PCI_ERS_RESULT_DISCONNECT: + return 5; + case PCI_ERS_RESULT_NEED_RESET: + return 6; + default: + WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result); + return 0; + } +}; + +const char *pci_ers_result_name(enum pci_ers_result result) +{ + switch (result) { + case PCI_ERS_RESULT_NONE: + return "none"; + case PCI_ERS_RESULT_CAN_RECOVER: + return "can recover"; + case PCI_ERS_RESULT_NEED_RESET: + return "need reset"; + case PCI_ERS_RESULT_DISCONNECT: + return "disconnect"; + case PCI_ERS_RESULT_RECOVERED: + return "recovered"; + case PCI_ERS_RESULT_NO_AER_DRIVER: + return "no AER driver"; + default: + WARN_ONCE(1, "Unknown result type: %d\n", (int)result); + return "unknown"; + } +}; + +static __printf(2, 3) void eeh_edev_info(const struct eeh_dev *edev, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_INFO "EEH: PE#%x (PCI %s): %pV\n", edev->pe_config_addr, + edev->pdev ? dev_name(&edev->pdev->dev) : "none", &vaf); + + va_end(args); +} + +static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old, + enum pci_ers_result new) +{ + if (eeh_result_priority(new) > eeh_result_priority(old)) + return new; + return old; +} + +static bool eeh_dev_removed(struct eeh_dev *edev) { - if (pdev && pdev->dev.driver) - return pdev->dev.driver->name; - return ""; + return !edev || (edev->mode & EEH_DEV_REMOVED); +} + +static bool eeh_edev_actionable(struct eeh_dev *edev) +{ + return (edev->pdev && !eeh_dev_removed(edev) && + !eeh_pe_passed(edev->pe)); } /** @@ -98,22 +162,20 @@ static inline void eeh_pcid_put(struct pci_dev *pdev) * do real work because EEH should freeze DMA transfers for those PCI * devices encountering EEH errors, which includes MSI or MSI-X. */ -static void eeh_disable_irq(struct pci_dev *dev) +static void eeh_disable_irq(struct eeh_dev *edev) { - struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - /* Don't disable MSI and MSI-X interrupts. They are * effectively disabled by the DMA Stopped state * when an EEH error occurs. */ - if (dev->msi_enabled || dev->msix_enabled) + if (edev->pdev->msi_enabled || edev->pdev->msix_enabled) return; - if (!irq_has_action(dev->irq)) + if (!irq_has_action(edev->pdev->irq)) return; edev->mode |= EEH_DEV_IRQ_DISABLED; - disable_irq_nosync(dev->irq); + disable_irq_nosync(edev->pdev->irq); } /** @@ -123,10 +185,8 @@ static void eeh_disable_irq(struct pci_dev *dev) * This routine must be called to enable interrupt while failed * device could be resumed. */ -static void eeh_enable_irq(struct pci_dev *dev) +static void eeh_enable_irq(struct eeh_dev *edev) { - struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { edev->mode &= ~EEH_DEV_IRQ_DISABLED; /* @@ -149,23 +209,13 @@ static void eeh_enable_irq(struct pci_dev *dev) * * tglx */ - if (irqd_irq_disabled(irq_get_irq_data(dev->irq))) - enable_irq(dev->irq); + if (irqd_irq_disabled(irq_get_irq_data(edev->pdev->irq))) + enable_irq(edev->pdev->irq); } } -static bool eeh_dev_removed(struct eeh_dev *edev) -{ - /* EEH device removed ? */ - if (!edev || (edev->mode & EEH_DEV_REMOVED)) - return true; - - return false; -} - -static void *eeh_dev_save_state(void *data, void *userdata) +static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata) { - struct eeh_dev *edev = data; struct pci_dev *pdev; if (!edev) @@ -189,144 +239,155 @@ static void *eeh_dev_save_state(void *data, void *userdata) return NULL; } -/** - * eeh_report_error - Report pci error to each device driver - * @data: eeh device - * @userdata: return value - * - * Report an EEH error to each device driver, collect up and - * merge the device driver responses. Cumulative response - * passed back in "userdata". - */ -static void *eeh_report_error(void *data, void *userdata) +static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s) { - struct eeh_dev *edev = (struct eeh_dev *)data; - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver; + struct eeh_pe *pe; + struct eeh_dev *edev, *tmp; - if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) - return NULL; + eeh_for_each_pe(root, pe) + eeh_pe_for_each_dev(pe, edev, tmp) + if (eeh_edev_actionable(edev)) + edev->pdev->error_state = s; +} - device_lock(&dev->dev); - dev->error_state = pci_channel_io_frozen; +static void eeh_set_irq_state(struct eeh_pe *root, bool enable) +{ + struct eeh_pe *pe; + struct eeh_dev *edev, *tmp; - driver = eeh_pcid_get(dev); - if (!driver) goto out_no_dev; + eeh_for_each_pe(root, pe) { + eeh_pe_for_each_dev(pe, edev, tmp) { + if (!eeh_edev_actionable(edev)) + continue; - eeh_disable_irq(dev); + if (!eeh_pcid_get(edev->pdev)) + continue; - if (!driver->err_handler || - !driver->err_handler->error_detected) - goto out; + if (enable) + eeh_enable_irq(edev); + else + eeh_disable_irq(edev); - rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); + eeh_pcid_put(edev->pdev); + } + } +} - /* A driver that needs a reset trumps all others */ - if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; - if (*res == PCI_ERS_RESULT_NONE) *res = rc; +typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *, + struct pci_driver *); +static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn, + enum pci_ers_result *result) +{ + struct pci_driver *driver; + enum pci_ers_result new_result; + + device_lock(&edev->pdev->dev); + if (eeh_edev_actionable(edev)) { + driver = eeh_pcid_get(edev->pdev); + + if (!driver) + eeh_edev_info(edev, "no driver"); + else if (!driver->err_handler) + eeh_edev_info(edev, "driver not EEH aware"); + else if (edev->mode & EEH_DEV_NO_HANDLER) + eeh_edev_info(edev, "driver bound too late"); + else { + new_result = fn(edev, driver); + eeh_edev_info(edev, "%s driver reports: '%s'", + driver->name, + pci_ers_result_name(new_result)); + if (result) + *result = pci_ers_merge_result(*result, + new_result); + } + if (driver) + eeh_pcid_put(edev->pdev); + } else { + eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!edev->pdev, + !eeh_dev_removed(edev), !eeh_pe_passed(edev->pe)); + } + device_unlock(&edev->pdev->dev); +} - edev->in_error = true; - pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); +static void eeh_pe_report(const char *name, struct eeh_pe *root, + eeh_report_fn fn, enum pci_ers_result *result) +{ + struct eeh_pe *pe; + struct eeh_dev *edev, *tmp; -out: - eeh_pcid_put(dev); -out_no_dev: - device_unlock(&dev->dev); - return NULL; + pr_info("EEH: Beginning: '%s'\n", name); + eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp) + eeh_pe_report_edev(edev, fn, result); + if (result) + pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n", + name, pci_ers_result_name(*result)); + else + pr_info("EEH: Finished:'%s'", name); } /** - * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled - * @data: eeh device - * @userdata: return value + * eeh_report_error - Report pci error to each device driver + * @edev: eeh device + * @driver: device's PCI driver * - * Tells each device driver that IO ports, MMIO and config space I/O - * are now enabled. Collects up and merges the device driver responses. - * Cumulative response passed back in "userdata". + * Report an EEH error to each device driver. */ -static void *eeh_report_mmio_enabled(void *data, void *userdata) +static enum pci_ers_result eeh_report_error(struct eeh_dev *edev, + struct pci_driver *driver) { - struct eeh_dev *edev = (struct eeh_dev *)data; - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver; - - if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) - return NULL; - - device_lock(&dev->dev); - driver = eeh_pcid_get(dev); - if (!driver) goto out_no_dev; + enum pci_ers_result rc; + struct pci_dev *dev = edev->pdev; - if (!driver->err_handler || - !driver->err_handler->mmio_enabled || - (edev->mode & EEH_DEV_NO_HANDLER)) - goto out; + if (!driver->err_handler->error_detected) + return PCI_ERS_RESULT_NONE; - rc = driver->err_handler->mmio_enabled(dev); + eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)", + driver->name); + rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); - /* A driver that needs a reset trumps all others */ - if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; - if (*res == PCI_ERS_RESULT_NONE) *res = rc; + edev->in_error = true; + pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); + return rc; +} -out: - eeh_pcid_put(dev); -out_no_dev: - device_unlock(&dev->dev); - return NULL; +/** + * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled + * @edev: eeh device + * @driver: device's PCI driver + * + * Tells each device driver that IO ports, MMIO and config space I/O + * are now enabled. + */ +static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev, + struct pci_driver *driver) +{ + if (!driver->err_handler->mmio_enabled) + return PCI_ERS_RESULT_NONE; + eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name); + return driver->err_handler->mmio_enabled(edev->pdev); } /** * eeh_report_reset - Tell device that slot has been reset - * @data: eeh device - * @userdata: return value + * @edev: eeh device + * @driver: device's PCI driver * * This routine must be called while EEH tries to reset particular * PCI device so that the associated PCI device driver could take * some actions, usually to save data the driver needs so that the * driver can work again while the device is recovered. */ -static void *eeh_report_reset(void *data, void *userdata) +static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev, + struct pci_driver *driver) { - struct eeh_dev *edev = (struct eeh_dev *)data; - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver; - - if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) - return NULL; - - device_lock(&dev->dev); - dev->error_state = pci_channel_io_normal; - - driver = eeh_pcid_get(dev); - if (!driver) goto out_no_dev; - - eeh_enable_irq(dev); - - if (!driver->err_handler || - !driver->err_handler->slot_reset || - (edev->mode & EEH_DEV_NO_HANDLER) || - (!edev->in_error)) - goto out; - - rc = driver->err_handler->slot_reset(dev); - if ((*res == PCI_ERS_RESULT_NONE) || - (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc; - if (*res == PCI_ERS_RESULT_DISCONNECT && - rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; - -out: - eeh_pcid_put(dev); -out_no_dev: - device_unlock(&dev->dev); - return NULL; + if (!driver->err_handler->slot_reset || !edev->in_error) + return PCI_ERS_RESULT_NONE; + eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name); + return driver->err_handler->slot_reset(edev->pdev); } -static void *eeh_dev_restore_state(void *data, void *userdata) +static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) { - struct eeh_dev *edev = data; struct pci_dev *pdev; if (!edev) @@ -355,91 +416,53 @@ static void *eeh_dev_restore_state(void *data, void *userdata) /** * eeh_report_resume - Tell device to resume normal operations - * @data: eeh device - * @userdata: return value + * @edev: eeh device + * @driver: device's PCI driver * * This routine must be called to notify the device driver that it * could resume so that the device driver can do some initialization * to make the recovered device work again. */ -static void *eeh_report_resume(void *data, void *userdata) +static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev, + struct pci_driver *driver) { - struct eeh_dev *edev = (struct eeh_dev *)data; - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - bool was_in_error; - struct pci_driver *driver; + if (!driver->err_handler->resume || !edev->in_error) + return PCI_ERS_RESULT_NONE; - if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) - return NULL; - - device_lock(&dev->dev); - dev->error_state = pci_channel_io_normal; - - driver = eeh_pcid_get(dev); - if (!driver) goto out_no_dev; - - was_in_error = edev->in_error; - edev->in_error = false; - eeh_enable_irq(dev); - - if (!driver->err_handler || - !driver->err_handler->resume || - (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) { - edev->mode &= ~EEH_DEV_NO_HANDLER; - goto out; - } - - driver->err_handler->resume(dev); + eeh_edev_info(edev, "Invoking %s->resume()", driver->name); + driver->err_handler->resume(edev->pdev); - pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); -out: - eeh_pcid_put(dev); + pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED); #ifdef CONFIG_PCI_IOV if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); #endif -out_no_dev: - device_unlock(&dev->dev); - return NULL; + return PCI_ERS_RESULT_NONE; } /** * eeh_report_failure - Tell device driver that device is dead. - * @data: eeh device - * @userdata: return value + * @edev: eeh device + * @driver: device's PCI driver * * This informs the device driver that the device is permanently * dead, and that no further recovery attempts will be made on it. */ -static void *eeh_report_failure(void *data, void *userdata) +static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev, + struct pci_driver *driver) { - struct eeh_dev *edev = (struct eeh_dev *)data; - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - struct pci_driver *driver; + enum pci_ers_result rc; - if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) - return NULL; - - device_lock(&dev->dev); - dev->error_state = pci_channel_io_perm_failure; - - driver = eeh_pcid_get(dev); - if (!driver) goto out_no_dev; + if (!driver->err_handler->error_detected) + return PCI_ERS_RESULT_NONE; - eeh_disable_irq(dev); + eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)", + driver->name); + rc = driver->err_handler->error_detected(edev->pdev, + pci_channel_io_perm_failure); - if (!driver->err_handler || - !driver->err_handler->error_detected) - goto out; - - driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); - - pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); -out: - eeh_pcid_put(dev); -out_no_dev: - device_unlock(&dev->dev); - return NULL; + pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_DISCONNECT); + return rc; } static void *eeh_add_virt_device(void *data, void *userdata) @@ -458,9 +481,11 @@ static void *eeh_add_virt_device(void *data, void *userdata) driver = eeh_pcid_get(dev); if (driver) { - eeh_pcid_put(dev); - if (driver->err_handler) + if (driver->err_handler) { + eeh_pcid_put(dev); return NULL; + } + eeh_pcid_put(dev); } #ifdef CONFIG_PCI_IOV @@ -469,10 +494,9 @@ static void *eeh_add_virt_device(void *data, void *userdata) return NULL; } -static void *eeh_rmv_device(void *data, void *userdata) +static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata) { struct pci_driver *driver; - struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata; int *removed = rmv_data ? &rmv_data->removed : NULL; @@ -497,17 +521,19 @@ static void *eeh_rmv_device(void *data, void *userdata) if (eeh_dev_removed(edev)) return NULL; - driver = eeh_pcid_get(dev); - if (driver) { - eeh_pcid_put(dev); - if (removed && - eeh_pe_passed(edev->pe)) - return NULL; - if (removed && - driver->err_handler && - driver->err_handler->error_detected && - driver->err_handler->slot_reset) + if (removed) { + if (eeh_pe_passed(edev->pe)) return NULL; + driver = eeh_pcid_get(dev); + if (driver) { + if (driver->err_handler && + driver->err_handler->error_detected && + driver->err_handler->slot_reset) { + eeh_pcid_put(dev); + return NULL; + } + eeh_pcid_put(dev); + } } /* Remove it from PCI subsystem */ @@ -542,9 +568,8 @@ static void *eeh_rmv_device(void *data, void *userdata) return NULL; } -static void *eeh_pe_detach_dev(void *data, void *userdata) +static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata) { - struct eeh_pe *pe = (struct eeh_pe *)data; struct eeh_dev *edev, *tmp; eeh_pe_for_each_dev(pe, edev, tmp) { @@ -565,9 +590,8 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) * PE reset (for 3 times), we try to clear the frozen state * for 3 times as well. */ -static void *__eeh_clear_pe_frozen_state(void *data, void *flag) +static void *__eeh_clear_pe_frozen_state(struct eeh_pe *pe, void *flag) { - struct eeh_pe *pe = (struct eeh_pe *)data; bool clear_sw_state = *(bool *)flag; int i, rc = 1; @@ -762,6 +786,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) { struct pci_bus *bus; struct eeh_dev *edev, *tmp; + struct eeh_pe *tmp_pe; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; @@ -778,14 +803,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_pe_update_time_stamp(pe); pe->freeze_count++; if (pe->freeze_count > eeh_max_freezes) { - pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" - "last hour and has been permanently disabled.\n", + pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n", pe->phb->global_number, pe->addr, pe->freeze_count); goto hard_fail; } - pr_warn("EEH: This PCI device has failed %d times in the last hour\n", - pe->freeze_count); + pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", + pe->freeze_count, eeh_max_freezes); /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs @@ -798,7 +822,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe) * hotplug for this case. */ pr_info("EEH: Notify device drivers to shutdown\n"); - eeh_pe_dev_traverse(pe, eeh_report_error, &result); + eeh_set_channel_state(pe, pci_channel_io_frozen); + eeh_set_irq_state(pe, false); + eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error, + &result); if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE && result != PCI_ERS_RESULT_NEED_RESET) @@ -845,7 +872,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe) result = PCI_ERS_RESULT_NEED_RESET; } else { pr_info("EEH: Notify device drivers to resume I/O\n"); - eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); + eeh_pe_report("mmio_enabled", pe, + eeh_report_mmio_enabled, &result); } } @@ -888,7 +916,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Notify device drivers " "the completion of reset\n"); result = PCI_ERS_RESULT_NONE; - eeh_pe_dev_traverse(pe, eeh_report_reset, &result); + eeh_set_channel_state(pe, pci_channel_io_normal); + eeh_set_irq_state(pe, true); + eeh_pe_report("slot_reset", pe, eeh_report_reset, &result); } /* All devices should claim they have recovered by now. */ @@ -909,8 +939,17 @@ void eeh_handle_normal_event(struct eeh_pe *pe) /* Tell all device drivers that they can resume operations */ pr_info("EEH: Notify device driver to resume\n"); - eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); + eeh_set_channel_state(pe, pci_channel_io_normal); + eeh_set_irq_state(pe, true); + eeh_pe_report("resume", pe, eeh_report_resume, NULL); + eeh_for_each_pe(pe, tmp_pe) { + eeh_pe_for_each_dev(tmp_pe, edev, tmp) { + edev->mode &= ~EEH_DEV_NO_HANDLER; + edev->in_error = false; + } + } + pr_info("EEH: Recovery successful.\n"); goto final; hard_fail: @@ -926,7 +965,10 @@ hard_fail: eeh_slot_error_detail(pe, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ - eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); + eeh_set_irq_state(pe, false); + eeh_pe_report("error_detected(permanent failure)", pe, + eeh_report_failure, NULL); /* Mark the PE to be removed permanently */ eeh_pe_state_mark(pe, EEH_PE_REMOVED); @@ -1035,7 +1077,9 @@ void eeh_handle_special_event(void) /* Notify all devices to be down */ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); - eeh_pe_dev_traverse(pe, + eeh_set_channel_state(pe, pci_channel_io_perm_failure); + eeh_pe_report( + "error_detected(permanent failure)", pe, eeh_report_failure, NULL); bus = eeh_pe_bus_get(phb_pe); if (!bus) { diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index ee5a67d57aab..1b238ecc553e 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -142,8 +142,7 @@ struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) * The function is used to retrieve the next PE in the * hierarchy PE tree. */ -static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, - struct eeh_pe *root) +struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root) { struct list_head *next = pe->child_list.next; @@ -173,12 +172,12 @@ static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, * to be traversed. */ void *eeh_pe_traverse(struct eeh_pe *root, - eeh_traverse_func fn, void *flag) + eeh_pe_traverse_func fn, void *flag) { struct eeh_pe *pe; void *ret; - for (pe = root; pe; pe = eeh_pe_next(pe, root)) { + eeh_for_each_pe(root, pe) { ret = fn(pe, flag); if (ret) return ret; } @@ -196,7 +195,7 @@ void *eeh_pe_traverse(struct eeh_pe *root, * PE and its child PEs. */ void *eeh_pe_dev_traverse(struct eeh_pe *root, - eeh_traverse_func fn, void *flag) + eeh_edev_traverse_func fn, void *flag) { struct eeh_pe *pe; struct eeh_dev *edev, *tmp; @@ -209,7 +208,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, } /* Traverse root PE */ - for (pe = root; pe; pe = eeh_pe_next(pe, root)) { + eeh_for_each_pe(root, pe) { eeh_pe_for_each_dev(pe, edev, tmp) { ret = fn(edev, flag); if (ret) @@ -235,9 +234,8 @@ struct eeh_pe_get_flag { int config_addr; }; -static void *__eeh_pe_get(void *data, void *flag) +static void *__eeh_pe_get(struct eeh_pe *pe, void *flag) { - struct eeh_pe *pe = (struct eeh_pe *)data; struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag; /* Unexpected PHB PE */ @@ -551,9 +549,8 @@ void eeh_pe_update_time_stamp(struct eeh_pe *pe) * PE. Also, the associated PCI devices will be put into IO frozen * state as well. */ -static void *__eeh_pe_state_mark(void *data, void *flag) +static void *__eeh_pe_state_mark(struct eeh_pe *pe, void *flag) { - struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); struct eeh_dev *edev, *tmp; struct pci_dev *pdev; @@ -595,9 +592,8 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state) } EXPORT_SYMBOL_GPL(eeh_pe_state_mark); -static void *__eeh_pe_dev_mode_mark(void *data, void *flag) +static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag) { - struct eeh_dev *edev = data; int mode = *((int *)flag); edev->mode |= mode; @@ -625,9 +621,8 @@ void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode) * given PE. Besides, we also clear the check count of the PE * as well. */ -static void *__eeh_pe_state_clear(void *data, void *flag) +static void *__eeh_pe_state_clear(struct eeh_pe *pe, void *flag) { - struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); struct eeh_dev *edev, *tmp; struct pci_dev *pdev; @@ -858,9 +853,8 @@ static void eeh_restore_device_bars(struct eeh_dev *edev) * the expansion ROM base address, the latency timer, and etc. * from the saved values in the device node. */ -static void *eeh_restore_one_device_bars(void *data, void *flag) +static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag) { - struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dn *pdn = eeh_dev_to_pdn(edev); /* Do special restore for bridges */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 51695608c68b..b10e01021214 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -36,6 +36,7 @@ #include <asm/context_tracking.h> #include <asm/tm.h> #include <asm/ppc-opcode.h> +#include <asm/barrier.h> #include <asm/export.h> #ifdef CONFIG_PPC_BOOK3S #include <asm/exception-64s.h> @@ -178,6 +179,15 @@ system_call: /* label this so stack traces look sane */ clrldi r8,r8,32 15: slwi r0,r0,4 + + barrier_nospec_asm + /* + * Prevent the load of the handler below (based on the user-passed + * system call number) being speculatively executed until the test + * against NR_syscalls and branch to .Lsyscall_enosys above has + * committed. + */ + ldx r12,r11,r0 /* Fetch system call handler [ptr] */ mtctr r12 bctrl /* Call handler */ @@ -596,6 +606,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) * actually hit this code path. */ + isync slbie r6 slbie r6 /* Workaround POWER5 < DD2.1 issue */ slbmte r7,r0 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f283958129f2..285c6465324a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1501,6 +1501,7 @@ masked_##_H##interrupt: \ xori r10,r10,MSR_EE; /* clear MSR_EE */ \ mtspr SPRN_##_H##SRR1,r10; \ 2: mtcrf 0x80,r9; \ + std r1,PACAR1(r13); \ ld r9,PACA_EXGEN+EX_R9(r13); \ ld r10,PACA_EXGEN+EX_R10(r13); \ ld r11,PACA_EXGEN+EX_R11(r13); \ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 3c2c2688918f..07e8396d472b 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -335,6 +335,26 @@ static unsigned long get_fadump_area_size(void) return size; } +static void __init fadump_reserve_crash_area(unsigned long base, + unsigned long size) +{ + struct memblock_region *reg; + unsigned long mstart, mend, msize; + + for_each_memblock(memory, reg) { + mstart = max_t(unsigned long, base, reg->base); + mend = reg->base + reg->size; + mend = min(base + size, mend); + + if (mstart < mend) { + msize = mend - mstart; + memblock_reserve(mstart, msize); + pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n", + (msize >> 20), mstart); + } + } +} + int __init fadump_reserve_mem(void) { unsigned long base, size, memory_boundary; @@ -380,7 +400,16 @@ int __init fadump_reserve_mem(void) memory_boundary = memblock_end_of_DRAM(); if (fw_dump.dump_active) { - printk(KERN_INFO "Firmware-assisted dump is active.\n"); + pr_info("Firmware-assisted dump is active.\n"); + +#ifdef CONFIG_HUGETLB_PAGE + /* + * FADump capture kernel doesn't care much about hugepages. + * In fact, handling hugepages in capture kernel is asking for + * trouble. So, disable HugeTLB support when fadump is active. + */ + hugetlb_disabled = true; +#endif /* * If last boot has crashed then reserve all the memory * above boot_memory_size so that we don't touch it until @@ -389,11 +418,7 @@ int __init fadump_reserve_mem(void) */ base = fw_dump.boot_memory_size; size = memory_boundary - base; - memblock_reserve(base, size); - printk(KERN_INFO "Reserved %ldMB of memory at %ldMB " - "for saving crash dump\n", - (unsigned long)(size >> 20), - (unsigned long)(base >> 20)); + fadump_reserve_crash_area(base, size); fw_dump.fadumphdr_addr = be64_to_cpu(fdm_active->rmr_region.destination_address) + @@ -1155,6 +1180,9 @@ void fadump_cleanup(void) init_fadump_mem_struct(&fdm, be64_to_cpu(fdm_active->cpu_state_data.destination_address)); fadump_invalidate_dump(&fdm); + } else if (fw_dump.dump_registered) { + /* Un-register Firmware-assisted dump if it was registered. */ + fadump_unregister_dump(&fdm); } } diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index d8670a37d70c..6cab07e76732 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -913,7 +913,7 @@ start_here: tovirt(r6,r6) lis r5, abatron_pteptrs@h ori r5, r5, abatron_pteptrs@l - stw r5, 0xf0(r0) /* Must match your Abatron config file */ + stw r5, 0xf0(0) /* Must match your Abatron config file */ tophys(r5,r5) stw r6, 0(r5) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 4c1012b80d3b..80547dad37da 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -178,8 +178,8 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) if (cpu_has_feature(CPU_FTR_DAWR)) { length_max = 512 ; /* 64 doublewords */ /* DAWR region can't cross 512 boundary */ - if ((bp->attr.bp_addr >> 10) != - ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10)) + if ((bp->attr.bp_addr >> 9) != + ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 9)) return -EINVAL; } if (info->len > diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 061aa0f47bb1..0682fef1f385 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -89,7 +89,7 @@ atomic_t ppc_n_lost_interrupts; #ifdef CONFIG_TAU_INT extern int tau_initialized; -extern int tau_interrupts(int); +u32 tau_interrupts(unsigned long cpu); #endif #endif /* CONFIG_PPC32 */ @@ -508,6 +508,11 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event); seq_printf(p, " Local timer interrupts for timer event device\n"); + seq_printf(p, "%*s: ", prec, "BCT"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).broadcast_irqs_event); + seq_printf(p, " Broadcast timer interrupts for timer event device\n"); + seq_printf(p, "%*s: ", prec, "LOC"); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others); @@ -567,6 +572,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) { u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event; + sum += per_cpu(irq_stat, cpu).broadcast_irqs_event; sum += per_cpu(irq_stat, cpu).pmu_irqs; sum += per_cpu(irq_stat, cpu).mce_exceptions; sum += per_cpu(irq_stat, cpu).spurious_irqs; diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 9ad37f827a97..683b5b3805bd 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -25,6 +25,7 @@ #include <linux/kvm_para.h> #include <linux/slab.h> #include <linux/of.h> +#include <linux/pagemap.h> #include <asm/reg.h> #include <asm/sections.h> @@ -672,14 +673,13 @@ static void kvm_use_magic_page(void) { u32 *p; u32 *start, *end; - u32 tmp; u32 features; /* Tell the host to map the magic page to -4096 on all CPUs */ on_each_cpu(kvm_map_magic_page, &features, 1); /* Quick self-test to see if the mapping works */ - if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) { + if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { kvm_patching_worked = false; return; } diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 2694d078741d..936c7e2d421e 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -98,12 +98,14 @@ void machine_kexec(struct kimage *image) int save_ftrace_enabled; save_ftrace_enabled = __ftrace_enabled_save(); + this_cpu_disable_ftrace(); if (ppc_md.machine_kexec) ppc_md.machine_kexec(image); else default_machine_kexec(image); + this_cpu_enable_ftrace(); __ftrace_enabled_restore(save_ftrace_enabled); /* Fall back to normal restart if we're still alive. */ diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 1044bf15d5ed..a0f6f45005bd 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -231,16 +231,16 @@ static void kexec_prepare_cpus(void) /* we are sure every CPU has IRQs off at this point */ kexec_all_irq_disabled = 1; - /* after we tell the others to go down */ - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(0, 0); - /* * Before removing MMU mappings make sure all CPUs have entered real * mode: */ kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE); + /* after we tell the others to go down */ + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 0); + put_cpu(); } diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 384357cb8bc0..0b196cdcd15d 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -25,23 +25,12 @@ /* * Returns (address we are running at) - (address we were linked at) * for use before the text and data are mapped to KERNELBASE. - */ - -_GLOBAL(reloc_offset) - mflr r0 - bl 1f -1: mflr r3 - PPC_LL r4,(2f-1b)(r3) - subf r3,r4,r3 - mtlr r0 - blr - .align 3 -2: PPC_LONG 1b - -/* * add_reloc_offset(x) returns x + reloc_offset(). */ + +_GLOBAL(reloc_offset) + li r3, 0 _GLOBAL(add_reloc_offset) mflr r0 bl 1f @@ -60,6 +49,10 @@ _GLOBAL(setjmp) PPC_STL r0,0(r3) PPC_STL r1,SZL(r3) PPC_STL r2,2*SZL(r3) +#ifdef CONFIG_PPC32 + mfcr r12 + stmw r12, 3*SZL(r3) +#else mfcr r0 PPC_STL r0,3*SZL(r3) PPC_STL r13,4*SZL(r3) @@ -81,14 +74,16 @@ _GLOBAL(setjmp) PPC_STL r29,20*SZL(r3) PPC_STL r30,21*SZL(r3) PPC_STL r31,22*SZL(r3) +#endif li r3,0 blr _GLOBAL(longjmp) - PPC_LCMPI r4,0 - bne 1f - li r4,1 -1: PPC_LL r13,4*SZL(r3) +#ifdef CONFIG_PPC32 + lmw r12, 3*SZL(r3) + mtcrf 0x38, r12 +#else + PPC_LL r13,4*SZL(r3) PPC_LL r14,5*SZL(r3) PPC_LL r15,6*SZL(r3) PPC_LL r16,7*SZL(r3) @@ -109,11 +104,14 @@ _GLOBAL(longjmp) PPC_LL r31,22*SZL(r3) PPC_LL r0,3*SZL(r3) mtcrf 0x38,r0 +#endif PPC_LL r0,0(r3) PPC_LL r1,SZL(r3) PPC_LL r2,2*SZL(r3) mtlr r0 - mr r3,r4 + mr. r3, r4 + bnelr + li r3, 1 blr _GLOBAL(current_stack_pointer) diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 3f7ba0f5bf29..1b3c6835e730 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -72,6 +72,12 @@ int module_finalize(const Elf_Ehdr *hdr, do_feature_fixups(powerpc_firmware_features, (void *)sect->sh_addr, (void *)sect->sh_addr + sect->sh_size); + + sect = find_section(hdr, sechdrs, "__spec_barrier_fixup"); + if (sect != NULL) + do_barrier_nospec_fixups_range(barrier_nospec_enabled, + (void *)sect->sh_addr, + (void *)sect->sh_addr + sect->sh_size); #endif sect = find_section(hdr, sechdrs, "__lwsync_fixup"); diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 5a7a78f12562..88d83771f462 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -109,12 +109,12 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr, for (i = 1; i < hdr->e_shnum; i++) { /* If it's called *.init*, and we're not init, we're not interested */ - if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0) + if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != NULL) != is_init) continue; /* We don't want to look at debug sections. */ - if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != 0) + if (strstr(secstrings + sechdrs[i].sh_name, ".debug")) continue; if (sechdrs[i].sh_type == SHT_RELA) { diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index a2636c250b7b..1b7419579820 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -280,6 +280,10 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, #ifdef CONFIG_DYNAMIC_FTRACE /* make the trampoline to the ftrace_caller */ relocs++; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + /* an additional one for ftrace_regs_caller */ + relocs++; +#endif #endif pr_debug("Looks like a total of %lu stubs, max\n", relocs); @@ -463,8 +467,11 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, } #ifdef CC_USING_MPROFILE_KERNEL -static bool is_early_mcount_callsite(u32 *instruction) +static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction) { + if (strcmp("_mcount", name)) + return false; + /* * Check if this is one of the -mprofile-kernel sequences. */ @@ -496,8 +503,7 @@ static void squash_toc_save_inst(const char *name, unsigned long addr) #else static void squash_toc_save_inst(const char *name, unsigned long addr) { } -/* without -mprofile-kernel, mcount calls are never early */ -static bool is_early_mcount_callsite(u32 *instruction) +static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction) { return false; } @@ -505,11 +511,11 @@ static bool is_early_mcount_callsite(u32 *instruction) /* We expect a noop next: if it is, replace it with instruction to restore r2. */ -static int restore_r2(u32 *instruction, struct module *me) +static int restore_r2(const char *name, u32 *instruction, struct module *me) { u32 *prev_insn = instruction - 1; - if (is_early_mcount_callsite(prev_insn)) + if (is_mprofile_mcount_callsite(name, prev_insn)) return 1; /* @@ -650,7 +656,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, value = stub_for_addr(sechdrs, value, me); if (!value) return -ENOENT; - if (!restore_r2((u32 *)location + 1, me)) + if (!restore_r2(strtab + sym->st_name, + (u32 *)location + 1, me)) return -ENOEXEC; squash_toc_save_inst(strtab + sym->st_name, value); @@ -762,7 +769,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, * via the paca (in r13). The target (ftrace_caller()) is responsible for * saving and restoring the toc before returning. */ -static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me) +static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, + struct module *me, unsigned long addr) { struct ppc64_stub_entry *entry; unsigned int i, num_stubs; @@ -789,9 +797,10 @@ static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module memcpy(entry->jump, stub_insns, sizeof(stub_insns)); /* Stub uses address relative to kernel toc (from the paca) */ - reladdr = (unsigned long)ftrace_caller - kernel_toc_addr(); + reladdr = addr - kernel_toc_addr(); if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - pr_err("%s: Address of ftrace_caller out of range of kernel_toc.\n", me->name); + pr_err("%s: Address of %ps out of range of kernel_toc.\n", + me->name, (void *)addr); return 0; } @@ -799,22 +808,29 @@ static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module entry->jump[2] |= PPC_LO(reladdr); /* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */ - entry->funcdata = func_desc((unsigned long)ftrace_caller); + entry->funcdata = func_desc(addr); entry->magic = STUB_MAGIC; return (unsigned long)entry; } #else -static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me) +static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, + struct module *me, unsigned long addr) { - return stub_for_addr(sechdrs, (unsigned long)ftrace_caller, me); + return stub_for_addr(sechdrs, addr, me); } #endif int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) { - mod->arch.toc = my_r2(sechdrs, mod); - mod->arch.tramp = create_ftrace_stub(sechdrs, mod); + mod->arch.tramp = create_ftrace_stub(sechdrs, mod, + (unsigned long)ftrace_caller); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + mod->arch.tramp_regs = create_ftrace_stub(sechdrs, mod, + (unsigned long)ftrace_regs_caller); + if (!mod->arch.tramp_regs) + return -ENOENT; +#endif if (!mod->arch.tramp) return -ENOENT; diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index ba681dac7b46..22e9d281324d 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -1030,7 +1030,7 @@ loff_t __init nvram_create_partition(const char *name, int sig, return -ENOSPC; /* Create our OS partition */ - new_part = kmalloc(sizeof(*new_part), GFP_KERNEL); + new_part = kzalloc(sizeof(*new_part), GFP_KERNEL); if (!new_part) { pr_err("%s: kmalloc failed\n", __func__); return -ENOMEM; @@ -1039,7 +1039,7 @@ loff_t __init nvram_create_partition(const char *name, int sig, new_part->index = free_part->index; new_part->header.signature = sig; new_part->header.length = size; - strncpy(new_part->header.name, name, 12); + memcpy(new_part->header.name, name, strnlen(name, sizeof(new_part->header.name))); new_part->header.checksum = nvram_checksum(&new_part->header); rc = nvram_write_header(new_part); diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 85ad2f78b889..4f861055a852 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -11,11 +11,13 @@ #include <linux/sched.h> #include <linux/errno.h> #include <linux/bootmem.h> +#include <linux/syscalls.h> #include <linux/irq.h> #include <linux/list.h> #include <linux/of.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/syscalls.h> #include <asm/processor.h> #include <asm/io.h> @@ -283,7 +285,11 @@ pci_bus_to_hose(int bus) * Note that the returned IO or memory base is a physical address */ -long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" +SYSCALL_DEFINE3(pciconfig_iobase, long, which, + unsigned long, bus, unsigned long, devfn) { struct pci_controller* hose; long result = -EOPNOTSUPP; @@ -307,5 +313,4 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) return result; } - - +#pragma GCC diagnostic pop diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 15ce0306b092..812171c09f42 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -203,8 +203,11 @@ void pcibios_setup_phb_io_space(struct pci_controller *hose) #define IOBASE_ISA_IO 3 #define IOBASE_ISA_MEM 4 -long sys_pciconfig_iobase(long which, unsigned long in_bus, - unsigned long in_devfn) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" +SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, in_bus, + unsigned long, in_devfn) { struct pci_controller* hose; struct pci_bus *tmp_bus, *bus = NULL; @@ -256,6 +259,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus, return -EOPNOTSUPP; } +#pragma GCC diagnostic pop #ifdef CONFIG_NUMA int pcibus_to_node(struct pci_bus *bus) diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index 1b1787d52896..8afbe213d729 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -25,6 +25,9 @@ */ _GLOBAL(ppc_save_regs) PPC_STL r0,0*SZL(r3) +#ifdef CONFIG_PPC32 + stmw r2, 2*SZL(r3) +#else PPC_STL r2,2*SZL(r3) PPC_STL r3,3*SZL(r3) PPC_STL r4,4*SZL(r3) @@ -55,6 +58,7 @@ _GLOBAL(ppc_save_regs) PPC_STL r29,29*SZL(r3) PPC_STL r30,30*SZL(r3) PPC_STL r31,31*SZL(r3) +#endif /* go up one stack frame for SP */ PPC_LL r4,0(r1) PPC_STL r4,1*SZL(r3) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 26ea9793d290..9ef4aea9fffe 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -154,6 +154,7 @@ unsigned long msr_check_and_set(unsigned long bits) return newmsr; } +EXPORT_SYMBOL_GPL(msr_check_and_set); void __msr_check_and_clear(unsigned long bits) { @@ -846,10 +847,6 @@ bool ppc_breakpoint_available(void) } EXPORT_SYMBOL_GPL(ppc_breakpoint_available); -#ifdef CONFIG_PPC64 -DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); -#endif - static inline bool hw_brk_match(struct arch_hw_breakpoint *a, struct arch_hw_breakpoint *b) { @@ -1155,7 +1152,7 @@ static inline void restore_sprs(struct thread_struct *old_thread, mtspr(SPRN_TAR, new_thread->tar); } - if (cpu_has_feature(CPU_FTR_ARCH_300) && + if (cpu_has_feature(CPU_FTR_P9_TIDR) && old_thread->tidr != new_thread->tidr) mtspr(SPRN_TIDR, new_thread->tidr); #endif @@ -1182,20 +1179,6 @@ struct task_struct *__switch_to(struct task_struct *prev, WARN_ON(!irqs_disabled()); -#ifdef CONFIG_PPC64 - /* - * Collect processor utilization data per process - */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); - long unsigned start_tb, current_tb; - start_tb = old_thread->start_tb; - cu->current_tb = current_tb = mfspr(SPRN_PURR); - old_thread->accum_tb += (current_tb - start_tb); - new_thread->start_tb = current_tb; - } -#endif /* CONFIG_PPC64 */ - #ifdef CONFIG_PPC_BOOK3S_64 batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->active) { @@ -1438,7 +1421,7 @@ void show_regs(struct pt_regs * regs) pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); #endif #ifdef CONFIG_PPC64 - pr_cont("SOFTE: %ld ", regs->softe); + pr_cont("IRQMASK: %lx ", regs->softe); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) @@ -1497,104 +1480,42 @@ int set_thread_uses_vas(void) } #ifdef CONFIG_PPC64 -static DEFINE_SPINLOCK(vas_thread_id_lock); -static DEFINE_IDA(vas_thread_ida); - -/* - * We need to assign a unique thread id to each thread in a process. +/** + * Assign a TIDR (thread ID) for task @t and set it in the thread + * structure. For now, we only support setting TIDR for 'current' task. * - * This thread id, referred to as TIDR, and separate from the Linux's tgid, - * is intended to be used to direct an ASB_Notify from the hardware to the - * thread, when a suitable event occurs in the system. + * Since the TID value is a truncated form of it PID, it is possible + * (but unlikely) for 2 threads to have the same TID. In the unlikely event + * that 2 threads share the same TID and are waiting, one of the following + * cases will happen: * - * One such event is a "paste" instruction in the context of Fast Thread - * Wakeup (aka Core-to-core wake up in the Virtual Accelerator Switchboard - * (VAS) in POWER9. + * 1. The correct thread is running, the wrong thread is not + * In this situation, the correct thread is woken and proceeds to pass it's + * condition check. * - * To get a unique TIDR per process we could simply reuse task_pid_nr() but - * the problem is that task_pid_nr() is not yet available copy_thread() is - * called. Fixing that would require changing more intrusive arch-neutral - * code in code path in copy_process()?. + * 2. Neither threads are running + * In this situation, neither thread will be woken. When scheduled, the waiting + * threads will execute either a wait, which will return immediately, followed + * by a condition check, which will pass for the correct thread and fail + * for the wrong thread, or they will execute the condition check immediately. * - * Further, to assign unique TIDRs within each process, we need an atomic - * field (or an IDR) in task_struct, which again intrudes into the arch- - * neutral code. So try to assign globally unique TIDRs for now. + * 3. The wrong thread is running, the correct thread is not + * The wrong thread will be woken, but will fail it's condition check and + * re-execute wait. The correct thread, when scheduled, will execute either + * it's condition check (which will pass), or wait, which returns immediately + * when called the first time after the thread is scheduled, followed by it's + * condition check (which will pass). * - * NOTE: TIDR 0 indicates that the thread does not need a TIDR value. - * For now, only threads that expect to be notified by the VAS - * hardware need a TIDR value and we assign values > 0 for those. - */ -#define MAX_THREAD_CONTEXT ((1 << 16) - 1) -static int assign_thread_tidr(void) -{ - int index; - int err; - unsigned long flags; - -again: - if (!ida_pre_get(&vas_thread_ida, GFP_KERNEL)) - return -ENOMEM; - - spin_lock_irqsave(&vas_thread_id_lock, flags); - err = ida_get_new_above(&vas_thread_ida, 1, &index); - spin_unlock_irqrestore(&vas_thread_id_lock, flags); - - if (err == -EAGAIN) - goto again; - else if (err) - return err; - - if (index > MAX_THREAD_CONTEXT) { - spin_lock_irqsave(&vas_thread_id_lock, flags); - ida_remove(&vas_thread_ida, index); - spin_unlock_irqrestore(&vas_thread_id_lock, flags); - return -ENOMEM; - } - - return index; -} - -static void free_thread_tidr(int id) -{ - unsigned long flags; - - spin_lock_irqsave(&vas_thread_id_lock, flags); - ida_remove(&vas_thread_ida, id); - spin_unlock_irqrestore(&vas_thread_id_lock, flags); -} - -/* - * Clear any TIDR value assigned to this thread. - */ -void clear_thread_tidr(struct task_struct *t) -{ - if (!t->thread.tidr) - return; - - if (!cpu_has_feature(CPU_FTR_ARCH_300)) { - WARN_ON_ONCE(1); - return; - } - - mtspr(SPRN_TIDR, 0); - free_thread_tidr(t->thread.tidr); - t->thread.tidr = 0; -} - -void arch_release_task_struct(struct task_struct *t) -{ - clear_thread_tidr(t); -} - -/* - * Assign a unique TIDR (thread id) for task @t and set it in the thread - * structure. For now, we only support setting TIDR for 'current' task. + * 4. Both threads are running + * Both threads will be woken. The wrong thread will fail it's condition check + * and execute another wait, while the correct thread will pass it's condition + * check. + * + * @t: the task to set the thread ID for */ int set_thread_tidr(struct task_struct *t) { - int rc; - - if (!cpu_has_feature(CPU_FTR_ARCH_300)) + if (!cpu_has_feature(CPU_FTR_P9_TIDR)) return -EINVAL; if (t != current) @@ -1603,11 +1524,7 @@ int set_thread_tidr(struct task_struct *t) if (t->thread.tidr) return 0; - rc = assign_thread_tidr(); - if (rc < 0) - return rc; - - t->thread.tidr = rc; + t->thread.tidr = (u16)task_pid_nr(t); mtspr(SPRN_TIDR, t->thread.tidr); return 0; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 9dbed488aba1..05e7fb47a7a4 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -332,25 +332,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * NOTE: This must match the parsing done in smp_setup_cpu_maps. */ for (i = 0; i < nthreads; i++) { - /* - * version 2 of the kexec param format adds the phys cpuid of - * booted proc. - */ - if (fdt_version(initial_boot_params) >= 2) { - if (be32_to_cpu(intserv[i]) == - fdt_boot_cpuid_phys(initial_boot_params)) { - found = boot_cpu_count; - found_thread = i; - } - } else { - /* - * Check if it's the boot-cpu, set it's hw index now, - * unfortunately this format did not support booting - * off secondary threads. - */ - if (of_get_flat_dt_prop(node, - "linux,boot-cpu", NULL) != NULL) - found = boot_cpu_count; + if (be32_to_cpu(intserv[i]) == + fdt_boot_cpuid_phys(initial_boot_params)) { + found = boot_cpu_count; + found_thread = i; } #ifdef CONFIG_SMP /* logical cpu id is always 0 on UP kernels */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index f9d6befb55a6..5425dd3d6a9f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -103,7 +103,7 @@ int of_workarounds; #ifdef DEBUG_PROM #define prom_debug(x...) prom_printf(x) #else -#define prom_debug(x...) +#define prom_debug(x...) do { } while (0) #endif @@ -301,6 +301,10 @@ static void __init prom_print(const char *msg) } +/* + * Both prom_print_hex & prom_print_dec takes an unsigned long as input so that + * we do not need __udivdi3 or __umoddi3 on 32bits. + */ static void __init prom_print_hex(unsigned long val) { int i, nibbles = sizeof(val)*2; @@ -334,12 +338,14 @@ static void __init prom_print_dec(unsigned long val) call_prom("write", 3, 1, prom.stdout, buf+i, size); } +__printf(1, 2) static void __init prom_printf(const char *format, ...) { const char *p, *q, *s; va_list args; unsigned long v; long vs; + int n = 0; va_start(args, format); for (p = format; *p != 0; p = q) { @@ -358,6 +364,10 @@ static void __init prom_printf(const char *format, ...) ++q; if (*q == 0) break; + while (*q == 'l') { + ++q; + ++n; + } switch (*q) { case 's': ++q; @@ -366,39 +376,55 @@ static void __init prom_printf(const char *format, ...) break; case 'x': ++q; - v = va_arg(args, unsigned long); + switch (n) { + case 0: + v = va_arg(args, unsigned int); + break; + case 1: + v = va_arg(args, unsigned long); + break; + case 2: + default: + v = va_arg(args, unsigned long long); + break; + } prom_print_hex(v); break; - case 'd': + case 'u': ++q; - vs = va_arg(args, int); - if (vs < 0) { - prom_print("-"); - vs = -vs; + switch (n) { + case 0: + v = va_arg(args, unsigned int); + break; + case 1: + v = va_arg(args, unsigned long); + break; + case 2: + default: + v = va_arg(args, unsigned long long); + break; } - prom_print_dec(vs); + prom_print_dec(v); break; - case 'l': + case 'd': ++q; - if (*q == 0) + switch (n) { + case 0: + vs = va_arg(args, int); break; - else if (*q == 'x') { - ++q; - v = va_arg(args, unsigned long); - prom_print_hex(v); - } else if (*q == 'u') { /* '%lu' */ - ++q; - v = va_arg(args, unsigned long); - prom_print_dec(v); - } else if (*q == 'd') { /* %ld */ - ++q; + case 1: vs = va_arg(args, long); - if (vs < 0) { - prom_print("-"); - vs = -vs; - } - prom_print_dec(vs); + break; + case 2: + default: + vs = va_arg(args, long long); + break; } + if (vs < 0) { + prom_print("-"); + vs = -vs; + } + prom_print_dec(vs); break; } } @@ -1160,7 +1186,7 @@ static void __init prom_send_capabilities(void) */ cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); - prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", + prom_printf("Max number of cores passed to firmware: %u (NR_CPUS = %d)\n", cores, NR_CPUS); ibm_architecture_vec.vec5.max_cpus = cpu_to_be32(cores); @@ -1242,7 +1268,7 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) if (align) base = _ALIGN_UP(base, align); - prom_debug("alloc_up(%x, %x)\n", size, align); + prom_debug("%s(%lx, %lx)\n", __func__, size, align); if (ram_top == 0) prom_panic("alloc_up() called with mem not initialized\n"); @@ -1253,7 +1279,7 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) for(; (base + size) <= alloc_top; base = _ALIGN_UP(base + 0x100000, align)) { - prom_debug(" trying: 0x%x\n\r", base); + prom_debug(" trying: 0x%lx\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); if (addr != PROM_ERROR && addr != 0) break; @@ -1265,12 +1291,12 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) return 0; alloc_bottom = addr + size; - prom_debug(" -> %x\n", addr); - prom_debug(" alloc_bottom : %x\n", alloc_bottom); - prom_debug(" alloc_top : %x\n", alloc_top); - prom_debug(" alloc_top_hi : %x\n", alloc_top_high); - prom_debug(" rmo_top : %x\n", rmo_top); - prom_debug(" ram_top : %x\n", ram_top); + prom_debug(" -> %lx\n", addr); + prom_debug(" alloc_bottom : %lx\n", alloc_bottom); + prom_debug(" alloc_top : %lx\n", alloc_top); + prom_debug(" alloc_top_hi : %lx\n", alloc_top_high); + prom_debug(" rmo_top : %lx\n", rmo_top); + prom_debug(" ram_top : %lx\n", ram_top); return addr; } @@ -1285,7 +1311,7 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, { unsigned long base, addr = 0; - prom_debug("alloc_down(%x, %x, %s)\n", size, align, + prom_debug("%s(%lx, %lx, %s)\n", __func__, size, align, highmem ? "(high)" : "(low)"); if (ram_top == 0) prom_panic("alloc_down() called with mem not initialized\n"); @@ -1313,7 +1339,7 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, base = _ALIGN_DOWN(alloc_top - size, align); for (; base > alloc_bottom; base = _ALIGN_DOWN(base - 0x100000, align)) { - prom_debug(" trying: 0x%x\n\r", base); + prom_debug(" trying: 0x%lx\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); if (addr != PROM_ERROR && addr != 0) break; @@ -1324,12 +1350,12 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, alloc_top = addr; bail: - prom_debug(" -> %x\n", addr); - prom_debug(" alloc_bottom : %x\n", alloc_bottom); - prom_debug(" alloc_top : %x\n", alloc_top); - prom_debug(" alloc_top_hi : %x\n", alloc_top_high); - prom_debug(" rmo_top : %x\n", rmo_top); - prom_debug(" ram_top : %x\n", ram_top); + prom_debug(" -> %lx\n", addr); + prom_debug(" alloc_bottom : %lx\n", alloc_bottom); + prom_debug(" alloc_top : %lx\n", alloc_top); + prom_debug(" alloc_top_hi : %lx\n", alloc_top_high); + prom_debug(" rmo_top : %lx\n", rmo_top); + prom_debug(" ram_top : %lx\n", ram_top); return addr; } @@ -1455,7 +1481,7 @@ static void __init prom_init_mem(void) if (size == 0) continue; - prom_debug(" %x %x\n", base, size); + prom_debug(" %lx %lx\n", base, size); if (base == 0 && (of_platform & PLATFORM_LPAR)) rmo_top = size; if ((base + size) > ram_top) @@ -1475,12 +1501,12 @@ static void __init prom_init_mem(void) if (prom_memory_limit) { if (prom_memory_limit <= alloc_bottom) { - prom_printf("Ignoring mem=%x <= alloc_bottom.\n", - prom_memory_limit); + prom_printf("Ignoring mem=%lx <= alloc_bottom.\n", + prom_memory_limit); prom_memory_limit = 0; } else if (prom_memory_limit >= ram_top) { - prom_printf("Ignoring mem=%x >= ram_top.\n", - prom_memory_limit); + prom_printf("Ignoring mem=%lx >= ram_top.\n", + prom_memory_limit); prom_memory_limit = 0; } else { ram_top = prom_memory_limit; @@ -1512,12 +1538,13 @@ static void __init prom_init_mem(void) alloc_bottom = PAGE_ALIGN(prom_initrd_end); prom_printf("memory layout at init:\n"); - prom_printf(" memory_limit : %x (16 MB aligned)\n", prom_memory_limit); - prom_printf(" alloc_bottom : %x\n", alloc_bottom); - prom_printf(" alloc_top : %x\n", alloc_top); - prom_printf(" alloc_top_hi : %x\n", alloc_top_high); - prom_printf(" rmo_top : %x\n", rmo_top); - prom_printf(" ram_top : %x\n", ram_top); + prom_printf(" memory_limit : %lx (16 MB aligned)\n", + prom_memory_limit); + prom_printf(" alloc_bottom : %lx\n", alloc_bottom); + prom_printf(" alloc_top : %lx\n", alloc_top); + prom_printf(" alloc_top_hi : %lx\n", alloc_top_high); + prom_printf(" rmo_top : %lx\n", rmo_top); + prom_printf(" ram_top : %lx\n", ram_top); } static void __init prom_close_stdin(void) @@ -1578,7 +1605,7 @@ static void __init prom_instantiate_opal(void) return; } - prom_printf("instantiating opal at 0x%x...", base); + prom_printf("instantiating opal at 0x%llx...", base); if (call_prom_ret("call-method", 4, 3, rets, ADDR("load-opal-runtime"), @@ -1594,10 +1621,10 @@ static void __init prom_instantiate_opal(void) reserve_mem(base, size); - prom_debug("opal base = 0x%x\n", base); - prom_debug("opal align = 0x%x\n", align); - prom_debug("opal entry = 0x%x\n", entry); - prom_debug("opal size = 0x%x\n", (long)size); + prom_debug("opal base = 0x%llx\n", base); + prom_debug("opal align = 0x%llx\n", align); + prom_debug("opal entry = 0x%llx\n", entry); + prom_debug("opal size = 0x%llx\n", size); prom_setprop(opal_node, "/ibm,opal", "opal-base-address", &base, sizeof(base)); @@ -1674,7 +1701,7 @@ static void __init prom_instantiate_rtas(void) prom_debug("rtas base = 0x%x\n", base); prom_debug("rtas entry = 0x%x\n", entry); - prom_debug("rtas size = 0x%x\n", (long)size); + prom_debug("rtas size = 0x%x\n", size); prom_debug("prom_instantiate_rtas: end...\n"); } @@ -1732,7 +1759,7 @@ static void __init prom_instantiate_sml(void) if (base == 0) prom_panic("Could not allocate memory for sml\n"); - prom_printf("instantiating sml at 0x%x...", base); + prom_printf("instantiating sml at 0x%llx...", base); memset((void *)base, 0, size); @@ -1751,8 +1778,8 @@ static void __init prom_instantiate_sml(void) prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-size", &size, sizeof(size)); - prom_debug("sml base = 0x%x\n", base); - prom_debug("sml size = 0x%x\n", (long)size); + prom_debug("sml base = 0x%llx\n", base); + prom_debug("sml size = 0x%x\n", size); prom_debug("prom_instantiate_sml: end...\n"); } @@ -1845,7 +1872,7 @@ static void __init prom_initialize_tce_table(void) prom_debug("TCE table: %s\n", path); prom_debug("\tnode = 0x%x\n", node); - prom_debug("\tbase = 0x%x\n", base); + prom_debug("\tbase = 0x%llx\n", base); prom_debug("\tsize = 0x%x\n", minsize); /* Initialize the table to have a one-to-one mapping @@ -1932,12 +1959,12 @@ static void __init prom_hold_cpus(void) } prom_debug("prom_hold_cpus: start...\n"); - prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); - prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); - prom_debug(" 1) acknowledge = 0x%x\n", + prom_debug(" 1) spinloop = 0x%lx\n", (unsigned long)spinloop); + prom_debug(" 1) *spinloop = 0x%lx\n", *spinloop); + prom_debug(" 1) acknowledge = 0x%lx\n", (unsigned long)acknowledge); - prom_debug(" 1) *acknowledge = 0x%x\n", *acknowledge); - prom_debug(" 1) secondary_hold = 0x%x\n", secondary_hold); + prom_debug(" 1) *acknowledge = 0x%lx\n", *acknowledge); + prom_debug(" 1) secondary_hold = 0x%lx\n", secondary_hold); /* Set the common spinloop variable, so all of the secondary cpus * will block when they are awakened from their OF spinloop. @@ -1965,7 +1992,7 @@ static void __init prom_hold_cpus(void) prom_getprop(node, "reg", ®, sizeof(reg)); cpu_no = be32_to_cpu(reg); - prom_debug("cpu hw idx = %lu\n", cpu_no); + prom_debug("cpu hw idx = %u\n", cpu_no); /* Init the acknowledge var which will be reset by * the secondary cpu when it awakens from its OF @@ -1975,7 +2002,7 @@ static void __init prom_hold_cpus(void) if (cpu_no != prom.cpu) { /* Primary Thread of non-boot cpu or any thread */ - prom_printf("starting cpu hw idx %lu... ", cpu_no); + prom_printf("starting cpu hw idx %u... ", cpu_no); call_prom("start-cpu", 3, 0, node, secondary_hold, cpu_no); @@ -1986,11 +2013,11 @@ static void __init prom_hold_cpus(void) if (*acknowledge == cpu_no) prom_printf("done\n"); else - prom_printf("failed: %x\n", *acknowledge); + prom_printf("failed: %lx\n", *acknowledge); } #ifdef CONFIG_SMP else - prom_printf("boot cpu hw idx %lu\n", cpu_no); + prom_printf("boot cpu hw idx %u\n", cpu_no); #endif /* CONFIG_SMP */ } @@ -2268,7 +2295,7 @@ static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end, while ((*mem_start + needed) > *mem_end) { unsigned long room, chunk; - prom_debug("Chunk exhausted, claiming more at %x...\n", + prom_debug("Chunk exhausted, claiming more at %lx...\n", alloc_bottom); room = alloc_top - alloc_bottom; if (room > DEVTREE_CHUNK_SIZE) @@ -2494,7 +2521,7 @@ static void __init flatten_device_tree(void) room = alloc_top - alloc_bottom - 0x4000; if (room > DEVTREE_CHUNK_SIZE) room = DEVTREE_CHUNK_SIZE; - prom_debug("starting device tree allocs at %x\n", alloc_bottom); + prom_debug("starting device tree allocs at %lx\n", alloc_bottom); /* Now try to claim that */ mem_start = (unsigned long)alloc_up(room, PAGE_SIZE); @@ -2557,7 +2584,7 @@ static void __init flatten_device_tree(void) int i; prom_printf("reserved memory map:\n"); for (i = 0; i < mem_reserve_cnt; i++) - prom_printf(" %x - %x\n", + prom_printf(" %llx - %llx\n", be64_to_cpu(mem_reserve_map[i].base), be64_to_cpu(mem_reserve_map[i].size)); } @@ -2567,9 +2594,9 @@ static void __init flatten_device_tree(void) */ mem_reserve_cnt = MEM_RESERVE_MAP_SIZE; - prom_printf("Device tree strings 0x%x -> 0x%x\n", + prom_printf("Device tree strings 0x%lx -> 0x%lx\n", dt_string_start, dt_string_end); - prom_printf("Device tree struct 0x%x -> 0x%x\n", + prom_printf("Device tree struct 0x%lx -> 0x%lx\n", dt_struct_start, dt_struct_end); } @@ -3001,7 +3028,7 @@ static void __init prom_find_boot_cpu(void) prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval)); prom.cpu = be32_to_cpu(rval); - prom_debug("Booting CPU hw index = %lu\n", prom.cpu); + prom_debug("Booting CPU hw index = %d\n", prom.cpu); } static void __init prom_check_initrd(unsigned long r3, unsigned long r4) @@ -3023,8 +3050,8 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) reserve_mem(prom_initrd_start, prom_initrd_end - prom_initrd_start); - prom_debug("initrd_start=0x%x\n", prom_initrd_start); - prom_debug("initrd_end=0x%x\n", prom_initrd_end); + prom_debug("initrd_start=0x%lx\n", prom_initrd_start); + prom_debug("initrd_end=0x%lx\n", prom_initrd_end); } #endif /* CONFIG_BLK_DEV_INITRD */ } @@ -3277,7 +3304,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* Don't print anything after quiesce under OPAL, it crashes OFW */ if (of_platform != PLATFORM_OPAL) { prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase); - prom_debug("->dt_header_start=0x%x\n", hdr); + prom_debug("->dt_header_start=0x%lx\n", hdr); } #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index d23cf632edf0..9667666eb18e 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -2443,6 +2443,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Create a new breakpoint request if one doesn't exist already */ hw_breakpoint_init(&attr); attr.bp_addr = hw_brk.address; + attr.bp_len = 8; arch_bp_generic_fields(hw_brk.type, &attr.bp_type); @@ -3081,27 +3082,19 @@ long arch_ptrace(struct task_struct *child, long request, #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ - if (!access_ok(VERIFY_WRITE, datavp, - sizeof(struct ppc_debug_info))) + if (copy_to_user(datavp, &dbginfo, + sizeof(struct ppc_debug_info))) return -EFAULT; - ret = __copy_to_user(datavp, &dbginfo, - sizeof(struct ppc_debug_info)) ? - -EFAULT : 0; - break; + return 0; } case PPC_PTRACE_SETHWDEBUG: { struct ppc_hw_breakpoint bp_info; - if (!access_ok(VERIFY_READ, datavp, - sizeof(struct ppc_hw_breakpoint))) + if (copy_from_user(&bp_info, datavp, + sizeof(struct ppc_hw_breakpoint))) return -EFAULT; - ret = __copy_from_user(&bp_info, datavp, - sizeof(struct ppc_hw_breakpoint)) ? - -EFAULT : 0; - if (!ret) - ret = ppc_set_hwdebug(child, &bp_info); - break; + return ppc_set_hwdebug(child, &bp_info); } case PPC_PTRACE_DELHWDEBUG: { diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index d49063d0baa4..487dcd8da4de 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -256,7 +256,7 @@ static int __init proc_rtas_init(void) __initcall(proc_rtas_init); -static int parse_number(const char __user *p, size_t count, unsigned long *val) +static int parse_number(const char __user *p, size_t count, u64 *val) { char buf[40]; char *end; @@ -269,7 +269,7 @@ static int parse_number(const char __user *p, size_t count, unsigned long *val) buf[count] = 0; - *val = simple_strtoul(buf, &end, 10); + *val = simple_strtoull(buf, &end, 10); if (*end && *end != '\n') return -EINVAL; @@ -283,17 +283,17 @@ static ssize_t ppc_rtas_poweron_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct rtc_time tm; - unsigned long nowtime; + time64_t nowtime; int error = parse_number(buf, count, &nowtime); if (error) return error; power_on_time = nowtime; /* save the time */ - to_tm(nowtime, &tm); + rtc_time64_to_tm(nowtime, &tm); error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL, - tm.tm_year, tm.tm_mon, tm.tm_mday, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); if (error) printk(KERN_WARNING "error: setting poweron time returned: %s\n", @@ -349,14 +349,14 @@ static ssize_t ppc_rtas_clock_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct rtc_time tm; - unsigned long nowtime; + time64_t nowtime; int error = parse_number(buf, count, &nowtime); if (error) return error; - to_tm(nowtime, &tm); + rtc_time64_to_tm(nowtime, &tm); error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, - tm.tm_year, tm.tm_mon, tm.tm_mday, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, 0); if (error) printk(KERN_WARNING "error: setting the clock returned: %s\n", @@ -377,8 +377,8 @@ static int ppc_rtas_clock_show(struct seq_file *m, void *v) unsigned int year, mon, day, hour, min, sec; year = ret[0]; mon = ret[1]; day = ret[2]; hour = ret[3]; min = ret[4]; sec = ret[5]; - seq_printf(m, "%lu\n", - mktime(year, mon, day, hour, min, sec)); + seq_printf(m, "%lld\n", + mktime64(year, mon, day, hour, min, sec)); } return 0; } @@ -504,7 +504,7 @@ static void ppc_rtas_process_sensor(struct seq_file *m, "EPOW power off" }; const char * battery_cyclestate[] = { "None", "In progress", "Requested" }; - const char * battery_charging[] = { "Charging", "Discharching", + const char * battery_charging[] = { "Charging", "Discharging", "No current flow" }; const char * ibm_drconnector[] = { "Empty", "Present", "Unusable", "Exchange" }; @@ -707,7 +707,7 @@ static void get_location_code(struct seq_file *m, struct individual_sensor *s, static ssize_t ppc_rtas_tone_freq_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - unsigned long freq; + u64 freq; int error = parse_number(buf, count, &freq); if (error) return error; @@ -732,7 +732,7 @@ static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v) static ssize_t ppc_rtas_tone_volume_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - unsigned long volume; + u64 volume; int error = parse_number(buf, count, &volume); if (error) return error; diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c index 49600985c7ef..a28239b8b0c0 100644 --- a/arch/powerpc/kernel/rtas-rtc.c +++ b/arch/powerpc/kernel/rtas-rtc.c @@ -13,7 +13,7 @@ #define MAX_RTC_WAIT 5000 /* 5 sec */ #define RTAS_CLOCK_BUSY (-2) -unsigned long __init rtas_get_boot_time(void) +time64_t __init rtas_get_boot_time(void) { int ret[8]; int error; @@ -38,7 +38,7 @@ unsigned long __init rtas_get_boot_time(void) return 0; } - return mktime(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]); + return mktime64(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]); } /* NOTE: get_rtc_time will get an error if executed in interrupt context diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 3f1c4fcbe0aa..7fb9f83dcde8 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -26,6 +26,7 @@ #include <linux/memblock.h> #include <linux/slab.h> #include <linux/reboot.h> +#include <linux/syscalls.h> #include <asm/prom.h> #include <asm/rtas.h> @@ -1050,7 +1051,10 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, } /* We assume to be passed big endian arguments */ -asmlinkage int ppc_rtas(struct rtas_args __user *uargs) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" +SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { struct rtas_args args; unsigned long flags; @@ -1136,6 +1140,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) return 0; } +#pragma GCC diagnostic pop /* * Call early during boot, before mem init, to retrieve the RTAS diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index b98a722da915..a8b277362931 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -10,10 +10,78 @@ #include <asm/debugfs.h> #include <asm/security_features.h> +#include <asm/setup.h> unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; +bool barrier_nospec_enabled; + +static void enable_barrier_nospec(bool enable) +{ + barrier_nospec_enabled = enable; + do_barrier_nospec_fixups(enable); +} + +void setup_barrier_nospec(void) +{ + bool enable; + + /* + * It would make sense to check SEC_FTR_SPEC_BAR_ORI31 below as well. + * But there's a good reason not to. The two flags we check below are + * both are enabled by default in the kernel, so if the hcall is not + * functional they will be enabled. + * On a system where the host firmware has been updated (so the ori + * functions as a barrier), but on which the hypervisor (KVM/Qemu) has + * not been updated, we would like to enable the barrier. Dropping the + * check for SEC_FTR_SPEC_BAR_ORI31 achieves that. The only downside is + * we potentially enable the barrier on systems where the host firmware + * is not updated, but that's harmless as it's a no-op. + */ + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR); + + enable_barrier_nospec(enable); +} + +#ifdef CONFIG_DEBUG_FS +static int barrier_nospec_set(void *data, u64 val) +{ + switch (val) { + case 0: + case 1: + break; + default: + return -EINVAL; + } + + if (!!val == !!barrier_nospec_enabled) + return 0; + + enable_barrier_nospec(!!val); + + return 0; +} + +static int barrier_nospec_get(void *data, u64 *val) +{ + *val = barrier_nospec_enabled ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_barrier_nospec, + barrier_nospec_get, barrier_nospec_set, "%llu\n"); + +static __init int barrier_nospec_debugfs_init(void) +{ + debugfs_create_file("barrier_nospec", 0600, powerpc_debugfs_root, NULL, + &fops_barrier_nospec); + return 0; +} +device_initcall(barrier_nospec_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { bool thread_priv; @@ -52,6 +120,9 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, c if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) return sprintf(buf, "Not affected\n"); + if (barrier_nospec_enabled) + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + return sprintf(buf, "Vulnerable\n"); } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 0af5c11b9e78..62b1a40d8957 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -192,12 +192,6 @@ void machine_halt(void) machine_hang(); } - -#ifdef CONFIG_TAU -extern u32 cpu_temp(unsigned long cpu); -extern u32 cpu_temp_both(unsigned long cpu); -#endif /* CONFIG_TAU */ - #ifdef CONFIG_SMP DEFINE_PER_CPU(unsigned int, cpu_pvr); #endif diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index d144df54ad40..c6a592b67386 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -62,4 +62,10 @@ void kvm_cma_reserve(void); static inline void kvm_cma_reserve(void) { }; #endif +#ifdef CONFIG_TAU +u32 cpu_temp(unsigned long cpu); +u32 cpu_temp_both(unsigned long cpu); +u32 tau_interrupts(unsigned long cpu); +#endif /* CONFIG_TAU */ + #endif /* __ARCH_POWERPC_KERNEL_SETUP_H */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index b78f142a4148..7a7ce8ad455e 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -346,6 +346,13 @@ void __init early_setup(unsigned long dt_ptr) */ cpu_ready_for_interrupts(); + /* + * We enable ftrace here, but since we only support DYNAMIC_FTRACE, it + * will only actually get enabled on the boot cpu much later once + * ftrace itself has been initialized. + */ + this_cpu_enable_ftrace(); + DBG(" <- early_setup()\n"); #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 61db86ecd318..fb932f1202c7 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -15,6 +15,7 @@ #include <linux/key.h> #include <linux/context_tracking.h> #include <linux/livepatch.h> +#include <linux/syscalls.h> #include <asm/hw_breakpoint.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -150,6 +151,9 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { user_exit(); + /* Check valid addr_limit, TIF check is done there */ + addr_limit_user_check(); + if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index a6467f843acf..800433685888 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -49,10 +49,8 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, #else /* CONFIG_PPC64 */ -extern long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, - struct pt_regs *regs); -extern long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, - struct pt_regs *regs); +extern long sys_rt_sigreturn(void); +extern long sys_sigreturn(void); static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct task_struct *tsk) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 492f03451877..5eedbb282d42 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -25,9 +25,10 @@ #include <linux/errno.h> #include <linux/elf.h> #include <linux/ptrace.h> +#include <linux/pagemap.h> #include <linux/ratelimit.h> -#ifdef CONFIG_PPC64 #include <linux/syscalls.h> +#ifdef CONFIG_PPC64 #include <linux/compat.h> #else #include <linux/wait.h> @@ -57,10 +58,6 @@ #ifdef CONFIG_PPC64 -#define sys_rt_sigreturn compat_sys_rt_sigreturn -#define sys_swapcontext compat_sys_swapcontext -#define sys_sigreturn compat_sys_sigreturn - #define old_sigaction old_sigaction32 #define sigcontext sigcontext32 #define mcontext mcontext32 @@ -1041,11 +1038,18 @@ static int do_setcontext_tm(struct ucontext __user *ucp, } #endif -long sys_swapcontext(struct ucontext __user *old_ctx, - struct ucontext __user *new_ctx, - int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" +#ifdef CONFIG_PPC64 +COMPAT_SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, + struct ucontext __user *, new_ctx, int, ctx_size) +#else +SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, + struct ucontext __user *, new_ctx, long, ctx_size) +#endif { - unsigned char tmp __maybe_unused; + struct pt_regs *regs = current_pt_regs(); int ctx_has_vsx_region = 0; #ifdef CONFIG_PPC64 @@ -1109,9 +1113,8 @@ long sys_swapcontext(struct ucontext __user *old_ctx, } if (new_ctx == NULL) return 0; - if (!access_ok(VERIFY_READ, new_ctx, ctx_size) - || __get_user(tmp, (u8 __user *) new_ctx) - || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1)) + if (!access_ok(VERIFY_READ, new_ctx, ctx_size) || + fault_in_pages_readable((u8 __user *)new_ctx, ctx_size)) return -EFAULT; /* @@ -1131,11 +1134,16 @@ long sys_swapcontext(struct ucontext __user *old_ctx, set_thread_flag(TIF_RESTOREALL); return 0; } +#pragma GCC diagnostic pop -long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, - struct pt_regs *regs) +#ifdef CONFIG_PPC64 +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) +#else +SYSCALL_DEFINE0(rt_sigreturn) +#endif { struct rt_sigframe __user *rt_sf; + struct pt_regs *regs = current_pt_regs(); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM struct ucontext __user *uc_transact; unsigned long msr_hi; @@ -1223,15 +1231,16 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, return 0; } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" #ifdef CONFIG_PPC32 -int sys_debug_setcontext(struct ucontext __user *ctx, - int ndbg, struct sig_dbg_op __user *dbg, - int r6, int r7, int r8, - struct pt_regs *regs) +SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, + int, ndbg, struct sig_dbg_op __user *, dbg) { + struct pt_regs *regs = current_pt_regs(); struct sig_dbg_op op; int i; - unsigned char tmp __maybe_unused; unsigned long new_msr = regs->msr; #ifdef CONFIG_PPC_ADV_DEBUG_REGS unsigned long new_dbcr0 = current->thread.debug.dbcr0; @@ -1287,9 +1296,8 @@ int sys_debug_setcontext(struct ucontext __user *ctx, current->thread.debug.dbcr0 = new_dbcr0; #endif - if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx)) - || __get_user(tmp, (u8 __user *) ctx) - || __get_user(tmp, (u8 __user *) (ctx + 1) - 1)) + if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx)) || + fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx))) return -EFAULT; /* @@ -1329,6 +1337,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, return 0; } #endif +#pragma GCC diagnostic pop /* * OK, we're invoking a handler @@ -1419,9 +1428,13 @@ badframe: /* * Do a signal return; undo the signal stack. */ -long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, - struct pt_regs *regs) +#ifdef CONFIG_PPC64 +COMPAT_SYSCALL_DEFINE0(sigreturn) +#else +SYSCALL_DEFINE0(sigreturn) +#endif { + struct pt_regs *regs = current_pt_regs(); struct sigframe __user *sf; struct sigcontext __user *sc; struct sigcontext sigctx; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 720117690822..d42b60020389 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -24,6 +24,7 @@ #include <linux/elf.h> #include <linux/ptrace.h> #include <linux/ratelimit.h> +#include <linux/syscalls.h> #include <asm/sigcontext.h> #include <asm/ucontext.h> @@ -624,17 +625,17 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) /* * Handle {get,set,swap}_context operations */ -int sys_swapcontext(struct ucontext __user *old_ctx, - struct ucontext __user *new_ctx, - long ctx_size, long r6, long r7, long r8, struct pt_regs *regs) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" +SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, + struct ucontext __user *, new_ctx, long, ctx_size) { unsigned char tmp; sigset_t set; unsigned long new_msr = 0; int ctx_has_vsx_region = 0; - BUG_ON(regs != current->thread.regs); - if (new_ctx && get_user(new_msr, &new_ctx->uc_mcontext.gp_regs[PT_MSR])) return -EFAULT; @@ -692,24 +693,22 @@ int sys_swapcontext(struct ucontext __user *old_ctx, set_thread_flag(TIF_RESTOREALL); return 0; } +#pragma GCC diagnostic pop /* * Do a signal return; undo the signal stack. */ -int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7, unsigned long r8, - struct pt_regs *regs) +SYSCALL_DEFINE0(rt_sigreturn) { + struct pt_regs *regs = current_pt_regs(); struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1]; sigset_t set; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM unsigned long msr; #endif - BUG_ON(current->thread.regs != regs); - /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9ca7148b5881..5eadfffabe35 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -59,6 +59,7 @@ #include <asm/kexec.h> #include <asm/asm-prototypes.h> #include <asm/cpu_has_feature.h> +#include <asm/ftrace.h> #ifdef DEBUG #include <asm/udbg.h> @@ -155,11 +156,13 @@ static irqreturn_t reschedule_action(int irq, void *data) return IRQ_HANDLED; } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST static irqreturn_t tick_broadcast_ipi_action(int irq, void *data) { - tick_broadcast_ipi_handler(); + timer_broadcast_interrupt(); return IRQ_HANDLED; } +#endif #ifdef CONFIG_NMI_IPI static irqreturn_t nmi_ipi_action(int irq, void *data) @@ -172,7 +175,9 @@ static irqreturn_t nmi_ipi_action(int irq, void *data) static irq_handler_t smp_ipi_action[] = { [PPC_MSG_CALL_FUNCTION] = call_function_action, [PPC_MSG_RESCHEDULE] = reschedule_action, +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action, +#endif #ifdef CONFIG_NMI_IPI [PPC_MSG_NMI_IPI] = nmi_ipi_action, #endif @@ -186,8 +191,12 @@ static irq_handler_t smp_ipi_action[] = { const char *smp_ipi_name[] = { [PPC_MSG_CALL_FUNCTION] = "ipi call function", [PPC_MSG_RESCHEDULE] = "ipi reschedule", +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast", +#endif +#ifdef CONFIG_NMI_IPI [PPC_MSG_NMI_IPI] = "nmi ipi", +#endif }; /* optional function to request ipi, for controllers with >= 4 ipis */ @@ -277,8 +286,10 @@ irqreturn_t smp_ipi_demux_relaxed(void) generic_smp_call_function_interrupt(); if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) scheduler_ipi(); +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) - tick_broadcast_ipi_handler(); + timer_broadcast_interrupt(); +#endif #ifdef CONFIG_NMI_IPI if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI)) nmi_ipi_action(0, NULL); @@ -419,9 +430,9 @@ out: return ret; } -static void do_smp_send_nmi_ipi(int cpu) +static void do_smp_send_nmi_ipi(int cpu, bool safe) { - if (smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu)) + if (!safe && smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu)) return; if (cpu >= 0) { @@ -461,7 +472,7 @@ void smp_flush_nmi_ipi(u64 delay_us) * - delay_us > 0 is the delay before giving up waiting for targets to * enter the handler, == 0 specifies indefinite delay. */ -int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) +int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool safe) { unsigned long flags; int me = raw_smp_processor_id(); @@ -494,7 +505,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) nmi_ipi_busy_count++; nmi_ipi_unlock(); - do_smp_send_nmi_ipi(cpu); + do_smp_send_nmi_ipi(cpu, safe); while (!cpumask_empty(&nmi_ipi_pending_mask)) { udelay(1); @@ -516,6 +527,16 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) return ret; } + +int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) +{ + return __smp_send_nmi_ipi(cpu, fn, delay_us, false); +} + +int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) +{ + return __smp_send_nmi_ipi(cpu, fn, delay_us, true); +} #endif /* CONFIG_NMI_IPI */ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST @@ -559,7 +580,7 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) * entire NMI dance and waiting for * cpus to clear pending mask, etc. */ - do_smp_send_nmi_ipi(cpu); + do_smp_send_nmi_ipi(cpu, false); } } } @@ -1066,6 +1087,9 @@ void start_secondary(void *unused) local_irq_enable(); + /* We can enable ftrace for secondary cpus now */ + this_cpu_enable_ftrace(); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); BUG(); @@ -1162,6 +1186,8 @@ int __cpu_disable(void) if (!smp_ops->cpu_disable) return -ENOSYS; + this_cpu_disable_ftrace(); + err = smp_ops->cpu_disable(); if (err) return err; @@ -1180,6 +1206,12 @@ void __cpu_die(unsigned int cpu) void cpu_die(void) { + /* + * Disable on the down path. This will be re-enabled by + * start_secondary() via start_secondary_resume() below + */ + this_cpu_disable_ftrace(); + if (ppc_md.cpu_die) ppc_md.cpu_die(); diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index d534ed901538..07e97f289c52 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -1,21 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + /* - * Stack trace utility + * Stack trace utility functions etc. * * Copyright 2008 Christoph Hellwig, IBM Corp. - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. + * Copyright 2018 SUSE Linux GmbH + * Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp. */ #include <linux/export.h> +#include <linux/kallsyms.h> +#include <linux/module.h> +#include <linux/nmi.h> #include <linux/sched.h> #include <linux/sched/debug.h> +#include <linux/sched/task_stack.h> #include <linux/stacktrace.h> #include <asm/ptrace.h> #include <asm/processor.h> +#include <linux/ftrace.h> +#include <asm/kprobes.h> + +#include <asm/paca.h> /* * Save stack-backtrace addresses into a stack_trace buffer. @@ -76,3 +82,164 @@ save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) save_context_stack(trace, regs->gpr[1], current, 0); } EXPORT_SYMBOL_GPL(save_stack_trace_regs); + +#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE +int +save_stack_trace_tsk_reliable(struct task_struct *tsk, + struct stack_trace *trace) +{ + unsigned long sp; + unsigned long stack_page = (unsigned long)task_stack_page(tsk); + unsigned long stack_end; + int graph_idx = 0; + + /* + * The last frame (unwinding first) may not yet have saved + * its LR onto the stack. + */ + int firstframe = 1; + + if (tsk == current) + sp = current_stack_pointer(); + else + sp = tsk->thread.ksp; + + stack_end = stack_page + THREAD_SIZE; + if (!is_idle_task(tsk)) { + /* + * For user tasks, this is the SP value loaded on + * kernel entry, see "PACAKSAVE(r13)" in _switch() and + * system_call_common()/EXCEPTION_PROLOG_COMMON(). + * + * Likewise for non-swapper kernel threads, + * this also happens to be the top of the stack + * as setup by copy_thread(). + * + * Note that stack backlinks are not properly setup by + * copy_thread() and thus, a forked task() will have + * an unreliable stack trace until it's been + * _switch()'ed to for the first time. + */ + stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + } else { + /* + * idle tasks have a custom stack layout, + * c.f. cpu_idle_thread_init(). + */ + stack_end -= STACK_FRAME_OVERHEAD; + } + + if (sp < stack_page + sizeof(struct thread_struct) || + sp > stack_end - STACK_FRAME_MIN_SIZE) { + return 1; + } + + for (;;) { + unsigned long *stack = (unsigned long *) sp; + unsigned long newsp, ip; + + /* sanity check: ABI requires SP to be aligned 16 bytes. */ + if (sp & 0xF) + return 1; + + /* Mark stacktraces with exception frames as unreliable. */ + if (sp <= stack_end - STACK_INT_FRAME_SIZE && + stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + return 1; + } + + newsp = stack[0]; + /* Stack grows downwards; unwinder may only go up. */ + if (newsp <= sp) + return 1; + + if (newsp != stack_end && + newsp > stack_end - STACK_FRAME_MIN_SIZE) { + return 1; /* invalid backlink, too far up. */ + } + + /* Examine the saved LR: it must point into kernel code. */ + ip = stack[STACK_FRAME_LR_SAVE]; + if (!firstframe && !__kernel_text_address(ip)) + return 1; + firstframe = 0; + + /* + * FIXME: IMHO these tests do not belong in + * arch-dependent code, they are generic. + */ + ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, NULL); +#ifdef CONFIG_KPROBES + /* + * Mark stacktraces with kretprobed functions on them + * as unreliable. + */ + if (ip == (unsigned long)kretprobe_trampoline) + return 1; +#endif + + if (!trace->skip) + trace->entries[trace->nr_entries++] = ip; + else + trace->skip--; + + if (newsp == stack_end) + break; + + if (trace->nr_entries >= trace->max_entries) + return -E2BIG; + + sp = newsp; + } + return 0; +} +EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable); +#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */ + +#ifdef CONFIG_PPC_BOOK3S_64 +static void handle_backtrace_ipi(struct pt_regs *regs) +{ + nmi_cpu_backtrace(regs); +} + +static void raise_backtrace_ipi(cpumask_t *mask) +{ + unsigned int cpu; + + for_each_cpu(cpu, mask) { + if (cpu == smp_processor_id()) + handle_backtrace_ipi(NULL); + else + smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC); + } + + for_each_cpu(cpu, mask) { + struct paca_struct *p = paca_ptrs[cpu]; + + cpumask_clear_cpu(cpu, mask); + + pr_warn("CPU %d didn't respond to backtrace IPI, inspecting paca.\n", cpu); + if (!virt_addr_valid(p)) { + pr_warn("paca pointer appears corrupt? (%px)\n", p); + continue; + } + + pr_warn("irq_soft_mask: 0x%02x in_mce: %d in_nmi: %d", + p->irq_soft_mask, p->in_mce, p->in_nmi); + + if (virt_addr_valid(p->__current)) + pr_cont(" current: %d (%s)\n", p->__current->pid, + p->__current->comm); + else + pr_cont(" current pointer corrupt? (%px)\n", p->__current); + + pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1); + show_stack(p->__current, (unsigned long *)p->saved_r1); + } +} + +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +{ + nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi); +} +#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index c11c73373691..bdf58ba1a94b 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -52,15 +52,6 @@ #include <asm/syscalls.h> #include <asm/switch_to.h> - -asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, - compat_uptr_t tvp_x) -{ - /* sign extend n */ - return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x)); -} - unsigned long compat_sys_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 466216506eb2..083fa06962fd 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -62,6 +62,9 @@ out: return ret; } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) @@ -75,6 +78,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len, { return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT); } +#pragma GCC diagnostic pop #ifdef CONFIG_PPC32 /* diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 7ccb7f81f8db..919a32746ede 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -35,7 +35,7 @@ #endif #define SYSCALL_SPU(func) SYSCALL(func) #define COMPAT_SYS_SPU(func) COMPAT_SYS(func) -#define PPC_SYS_SPU(func) PPC_SYS(func) +#define COMPAT_SPU_NEW(func) COMPAT_SYS(func) #define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32) .section .rodata,"a" diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c index 55323a620cfe..4653258722ac 100644 --- a/arch/powerpc/kernel/systbl_chk.c +++ b/arch/powerpc/kernel/systbl_chk.c @@ -31,7 +31,7 @@ #define SYSCALL_SPU(func) SYSCALL(func) #define COMPAT_SYS_SPU(func) COMPAT_SYS(func) -#define PPC_SYS_SPU(func) PPC_SYS(func) +#define COMPAT_SPU_NEW(func) COMPAT_SYS(_new##func) #define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32) /* Just insert a marker for ni_syscalls */ diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh index 31b6e7c358ca..f2e356c2a345 100644 --- a/arch/powerpc/kernel/systbl_chk.sh +++ b/arch/powerpc/kernel/systbl_chk.sh @@ -16,7 +16,7 @@ awk 'BEGIN { num = -1; } # Ignore the beginning of the file /^START_TABLE/ { num = 0; next; } /^END_TABLE/ { if (num != $2) { - printf "NR_syscalls (%s) is not one more than the last syscall (%s)\n", + printf "Error: NR_syscalls (%s) is not one more than the last syscall (%s)\n", $2, num - 1; exit(1); } @@ -25,7 +25,7 @@ awk 'BEGIN { num = -1; } # Ignore the beginning of the file { if (num == -1) next; if (($1 != -1) && ($1 != num)) { - printf "Syscall %s out of order (expected %s)\n", + printf "Error: Syscall %s out of order (expected %s)\n", $1, num; exit(1); }; diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 8cdd852aedd1..e2ab8a111b69 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -27,6 +27,9 @@ #include <asm/cache.h> #include <asm/8xx_immap.h> #include <asm/machdep.h> +#include <asm/asm-prototypes.h> + +#include "setup.h" static struct tau_temp { @@ -50,7 +53,7 @@ struct timer_list tau_timer; #define shrink_timer 2*HZ /* period between shrinking the window */ #define min_window 2 /* minimum window size, degrees C */ -void set_thresholds(unsigned long cpu) +static void set_thresholds(unsigned long cpu) { #ifdef CONFIG_TAU_INT /* @@ -70,7 +73,7 @@ void set_thresholds(unsigned long cpu) #endif } -void TAUupdate(int cpu) +static void TAUupdate(int cpu) { unsigned thrm; @@ -205,7 +208,7 @@ static void tau_timeout_smp(struct timer_list *unused) int tau_initialized = 0; -void __init TAU_init_smp(void * info) +static void __init TAU_init_smp(void *info) { unsigned long cpu = smp_processor_id(); @@ -217,7 +220,7 @@ void __init TAU_init_smp(void * info) set_thresholds(cpu); } -int __init TAU_init(void) +static int __init TAU_init(void) { /* We assume in SMP that if one CPU has TAU support, they * all have it --BenH @@ -259,12 +262,12 @@ u32 cpu_temp_both(unsigned long cpu) return ((tau[cpu].high << 16) | tau[cpu].low); } -int cpu_temp(unsigned long cpu) +u32 cpu_temp(unsigned long cpu) { return ((tau[cpu].high + tau[cpu].low) / 2); } -int tau_interrupts(unsigned long cpu) +u32 tau_interrupts(unsigned long cpu) { return (tau[cpu].interrupts); } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 360e71d455cc..70f145e02487 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -163,12 +163,6 @@ EXPORT_SYMBOL(__cputime_usec_factor); void (*dtl_consumer)(struct dtl_entry *, u64); #endif -#ifdef CONFIG_PPC64 -#define get_accounting(tsk) (&get_paca()->accounting) -#else -#define get_accounting(tsk) (&task_thread_info(tsk)->accounting) -#endif - static void calc_cputime_factors(void) { struct div_result res; @@ -421,21 +415,6 @@ void vtime_flush(struct task_struct *tsk) acct->softirq_time = 0; } -#ifdef CONFIG_PPC32 -/* - * Called from the context switch with interrupts disabled, to charge all - * accumulated times to the current process, and to prepare accounting on - * the next process. - */ -void arch_vtime_task_switch(struct task_struct *prev) -{ - struct cpu_accounting_data *acct = get_accounting(current); - - acct->starttime = get_accounting(prev)->starttime; - acct->startspurr = get_accounting(prev)->startspurr; -} -#endif /* CONFIG_PPC32 */ - #else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #define calc_cputime_factors() #endif @@ -513,6 +492,35 @@ static inline void clear_irq_work_pending(void) "i" (offsetof(struct paca_struct, irq_work_pending))); } +void arch_irq_work_raise(void) +{ + preempt_disable(); + set_irq_work_pending_flag(); + /* + * Non-nmi code running with interrupts disabled will replay + * irq_happened before it re-enables interrupts, so setthe + * decrementer there instead of causing a hardware exception + * which would immediately hit the masked interrupt handler + * and have the net effect of setting the decrementer in + * irq_happened. + * + * NMI interrupts can not check this when they return, so the + * decrementer hardware exception is raised, which will fire + * when interrupts are next enabled. + * + * BookE does not support this yet, it must audit all NMI + * interrupt handlers to ensure they call nmi_enter() so this + * check would be correct. + */ + if (IS_ENABLED(CONFIG_BOOKE) || !irqs_disabled() || in_nmi()) { + set_dec(1); + } else { + hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_DEC; + } + preempt_enable(); +} + #else /* 32-bit */ DEFINE_PER_CPU(u8, irq_work_pending); @@ -521,8 +529,6 @@ DEFINE_PER_CPU(u8, irq_work_pending); #define test_irq_work_pending() __this_cpu_read(irq_work_pending) #define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) -#endif /* 32 vs 64 bit */ - void arch_irq_work_raise(void) { preempt_disable(); @@ -531,6 +537,8 @@ void arch_irq_work_raise(void) preempt_enable(); } +#endif /* 32 vs 64 bit */ + #else /* CONFIG_IRQ_WORK */ #define test_irq_work_pending() 0 @@ -538,60 +546,16 @@ void arch_irq_work_raise(void) #endif /* CONFIG_IRQ_WORK */ -static void __timer_interrupt(void) -{ - struct pt_regs *regs = get_irq_regs(); - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - struct clock_event_device *evt = this_cpu_ptr(&decrementers); - u64 now; - - trace_timer_interrupt_entry(regs); - - if (test_irq_work_pending()) { - clear_irq_work_pending(); - irq_work_run(); - } - - now = get_tb_or_rtc(); - if (now >= *next_tb) { - *next_tb = ~(u64)0; - if (evt->event_handler) - evt->event_handler(evt); - __this_cpu_inc(irq_stat.timer_irqs_event); - } else { - now = *next_tb - now; - if (now <= decrementer_max) - set_dec(now); - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); - __this_cpu_inc(irq_stat.timer_irqs_others); - } - -#ifdef CONFIG_PPC64 - /* collect purr register values often, for accurate calculations */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); - cu->current_tb = mfspr(SPRN_PURR); - } -#endif - - trace_timer_interrupt_exit(regs); -} - /* * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. */ -void timer_interrupt(struct pt_regs * regs) +void timer_interrupt(struct pt_regs *regs) { - struct pt_regs *old_regs; + struct clock_event_device *evt = this_cpu_ptr(&decrementers); u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - - /* Ensure a positive value is written to the decrementer, or else - * some CPUs will continue to take decrementer exceptions. - */ - set_dec(decrementer_max); + struct pt_regs *old_regs; + u64 now; /* Some implementations of hotplug will get timer interrupts while * offline, just ignore these and we also need to set @@ -599,11 +563,23 @@ void timer_interrupt(struct pt_regs * regs) * don't replay timer interrupt when return, otherwise we'll trap * here infinitely :( */ - if (!cpu_online(smp_processor_id())) { + if (unlikely(!cpu_online(smp_processor_id()))) { *next_tb = ~(u64)0; + set_dec(decrementer_max); return; } + /* Ensure a positive value is written to the decrementer, or else + * some CPUs will continue to take decrementer exceptions. When the + * PPC_WATCHDOG (decrementer based) is configured, keep this at most + * 31 bits, which is about 4 seconds on most systems, which gives + * the watchdog a chance of catching timer interrupt hard lockups. + */ + if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) + set_dec(0x7fffffff); + else + set_dec(decrementer_max); + /* Conditionally hard-enable interrupts now that the DEC has been * bumped to its maximum value */ @@ -617,13 +593,46 @@ void timer_interrupt(struct pt_regs * regs) old_regs = set_irq_regs(regs); irq_enter(); + trace_timer_interrupt_entry(regs); + + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); + } + + now = get_tb_or_rtc(); + if (now >= *next_tb) { + *next_tb = ~(u64)0; + if (evt->event_handler) + evt->event_handler(evt); + __this_cpu_inc(irq_stat.timer_irqs_event); + } else { + now = *next_tb - now; + if (now <= decrementer_max) + set_dec(now); + /* We may have raced with new irq work */ + if (test_irq_work_pending()) + set_dec(1); + __this_cpu_inc(irq_stat.timer_irqs_others); + } - __timer_interrupt(); + trace_timer_interrupt_exit(regs); irq_exit(); set_irq_regs(old_regs); } EXPORT_SYMBOL(timer_interrupt); +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void timer_broadcast_interrupt(void) +{ + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + + *next_tb = ~(u64)0; + tick_receive_broadcast(); + __this_cpu_inc(irq_stat.broadcast_irqs_event); +} +#endif + /* * Hypervisor decrementer interrupts shouldn't occur but are sometimes * left pending on exit from a KVM guest. We don't need to do anything @@ -781,21 +790,19 @@ void __init generic_calibrate_decr(void) } } -int update_persistent_clock(struct timespec now) +int update_persistent_clock64(struct timespec64 now) { struct rtc_time tm; if (!ppc_md.set_rtc_time) return -ENODEV; - to_tm(now.tv_sec + 1 + timezone_offset, &tm); - tm.tm_year -= 1900; - tm.tm_mon -= 1; + rtc_time64_to_tm(now.tv_sec + 1 + timezone_offset, &tm); return ppc_md.set_rtc_time(&tm); } -static void __read_persistent_clock(struct timespec *ts) +static void __read_persistent_clock(struct timespec64 *ts) { struct rtc_time tm; static int first = 1; @@ -819,11 +826,10 @@ static void __read_persistent_clock(struct timespec *ts) } ppc_md.get_rtc_time(&tm); - ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec); + ts->tv_sec = rtc_tm_to_time64(&tm); } -void read_persistent_clock(struct timespec *ts) +void read_persistent_clock64(struct timespec64 *ts) { __read_persistent_clock(ts); @@ -971,15 +977,6 @@ static int decrementer_shutdown(struct clock_event_device *dev) return 0; } -/* Interrupt handler for the timer broadcast IPI */ -void tick_broadcast_ipi_handler(void) -{ - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - - *next_tb = get_tb_or_rtc(); - __timer_interrupt(); -} - static void register_decrementer_clockevent(int cpu) { struct clock_event_device *dec = &per_cpu(decrementers, cpu); @@ -1141,56 +1138,6 @@ void __init time_init(void) #endif } - -#define FEBRUARY 2 -#define STARTOFTIME 1970 -#define SECDAY 86400L -#define SECYR (SECDAY * 365) -#define leapyear(year) ((year) % 4 == 0 && \ - ((year) % 100 != 0 || (year) % 400 == 0)) -#define days_in_year(a) (leapyear(a) ? 366 : 365) -#define days_in_month(a) (month_days[(a) - 1]) - -static int month_days[12] = { - 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 -}; - -void to_tm(int tim, struct rtc_time * tm) -{ - register int i; - register long hms, day; - - day = tim / SECDAY; - hms = tim % SECDAY; - - /* Hours, minutes, seconds are easy */ - tm->tm_hour = hms / 3600; - tm->tm_min = (hms % 3600) / 60; - tm->tm_sec = (hms % 3600) % 60; - - /* Number of years in days */ - for (i = STARTOFTIME; day >= days_in_year(i); i++) - day -= days_in_year(i); - tm->tm_year = i; - - /* Number of months in days left */ - if (leapyear(tm->tm_year)) - days_in_month(FEBRUARY) = 29; - for (i = 1; day >= days_in_month(i); i++) - day -= days_in_month(i); - days_in_month(FEBRUARY) = 28; - tm->tm_mon = i; - - /* Days are what is left over (+1) from all that. */ - tm->tm_mday = day + 1; - - /* - * No-one uses the day of the week. - */ - tm->tm_wday = -1; -} -EXPORT_SYMBOL(to_tm); - /* * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit * result. diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index b92ac8e711db..ff12f47a96b6 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -12,6 +12,7 @@ #include <asm/ptrace.h> #include <asm/reg.h> #include <asm/bug.h> +#include <asm/export.h> #ifdef CONFIG_VSX /* See fpu.S, this is borrowed from there */ @@ -55,6 +56,16 @@ _GLOBAL(tm_enable) or r4, r4, r3 mtmsrd r4 1: blr +EXPORT_SYMBOL_GPL(tm_enable); + +_GLOBAL(tm_disable) + mfmsr r4 + li r3, MSR_TM >> 32 + sldi r3, r3, 32 + andc r4, r4, r3 + mtmsrd r4 + blr +EXPORT_SYMBOL_GPL(tm_disable); _GLOBAL(tm_save_sprs) mfspr r0, SPRN_TFHAR @@ -78,6 +89,7 @@ _GLOBAL(tm_restore_sprs) _GLOBAL(tm_abort) TABORT(R3) blr +EXPORT_SYMBOL_GPL(tm_abort); /* void tm_reclaim(struct thread_struct *thread, * uint8_t cause) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 4741fe112f05..c076a32093fd 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -357,6 +357,8 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned int op[2]; void *ip = (void *)rec->ip; + unsigned long entry, ptr, tramp; + struct module *mod = rec->arch.mod; /* read where this goes */ if (probe_kernel_read(op, ip, sizeof(op))) @@ -368,19 +370,44 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EINVAL; } - /* If we never set up a trampoline to ftrace_caller, then bail */ - if (!rec->arch.mod->arch.tramp) { + /* If we never set up ftrace trampoline(s), then bail */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (!mod->arch.tramp || !mod->arch.tramp_regs) { +#else + if (!mod->arch.tramp) { +#endif pr_err("No ftrace trampoline\n"); return -EINVAL; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (rec->flags & FTRACE_FL_REGS) + tramp = mod->arch.tramp_regs; + else +#endif + tramp = mod->arch.tramp; + + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + entry = ppc_global_function_entry((void *)addr); + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + /* Ensure branch is within 24 bits */ - if (!create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { + if (!create_branch(ip, tramp, BRANCH_SET_LINK)) { pr_err("Branch out of range\n"); return -EINVAL; } - if (patch_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { + if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -388,14 +415,6 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return 0; } -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, - unsigned long addr) -{ - return ftrace_make_call(rec, addr); -} -#endif - #else /* !CONFIG_PPC64: */ static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) @@ -472,53 +491,158 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) #endif /* CONFIG_MODULES */ } -int ftrace_update_ftrace_func(ftrace_func_t func) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_MODULES +static int +__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) { - unsigned long ip = (unsigned long)(&ftrace_call); - unsigned int old, new; - int ret; + unsigned int op; + unsigned long ip = rec->ip; + unsigned long entry, ptr, tramp; + struct module *mod = rec->arch.mod; - old = *(unsigned int *)&ftrace_call; - new = ftrace_call_replace(ip, (unsigned long)func, 1); - ret = ftrace_modify_code(ip, old, new); + /* If we never set up ftrace trampolines, then bail */ + if (!mod->arch.tramp || !mod->arch.tramp_regs) { + pr_err("No ftrace trampoline\n"); + return -EINVAL; + } - return ret; -} + /* read where this goes */ + if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + pr_err("Fetching opcode failed.\n"); + return -EFAULT; + } -static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) -{ - unsigned long ftrace_addr = (unsigned long)FTRACE_ADDR; - int ret; + /* Make sure that that this is still a 24bit jump */ + if (!is_bl_op(op)) { + pr_err("Not expected bl: opcode is %x\n", op); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, op); + entry = ppc_global_function_entry((void *)old_addr); + + pr_devel("ip:%lx jumps to %lx", ip, tramp); - ret = ftrace_update_record(rec, enable); + if (tramp != entry) { + /* old_addr is not within range, so we must have used a trampoline */ + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + } + + /* The new target may be within range */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + if (patch_branch((unsigned int *)ip, addr, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; + } - switch (ret) { - case FTRACE_UPDATE_IGNORE: return 0; - case FTRACE_UPDATE_MAKE_CALL: - return ftrace_make_call(rec, ftrace_addr); - case FTRACE_UPDATE_MAKE_NOP: - return ftrace_make_nop(NULL, rec, ftrace_addr); + } + + if (rec->flags & FTRACE_FL_REGS) + tramp = mod->arch.tramp_regs; + else + tramp = mod->arch.tramp; + + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + entry = ppc_global_function_entry((void *)addr); + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + + /* Ensure branch is within 24 bits */ + if (!create_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) { + pr_err("Branch out of range\n"); + return -EINVAL; + } + + if (patch_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; } return 0; } +#endif -void ftrace_replace_code(int enable) +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) { - struct ftrace_rec_iter *iter; - struct dyn_ftrace *rec; + unsigned long ip = rec->ip; + unsigned int old, new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) { + /* within range */ + old = ftrace_call_replace(ip, old_addr, 1); + new = ftrace_call_replace(ip, addr, 1); + return ftrace_modify_code(ip, old, new); + } + +#ifdef CONFIG_MODULES + /* + * Out of range jumps are called from modules. + */ + if (!rec->arch.mod) { + pr_err("No module loaded\n"); + return -EINVAL; + } + + return __ftrace_modify_call(rec, old_addr, addr); +#else + /* We should not get here without modules */ + return -EINVAL; +#endif /* CONFIG_MODULES */ +} +#endif + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip = (unsigned long)(&ftrace_call); + unsigned int old, new; int ret; - for (iter = ftrace_rec_iter_start(); iter; - iter = ftrace_rec_iter_next(iter)) { - rec = ftrace_rec_iter_record(iter); - ret = __ftrace_replace_code(rec, enable); - if (ret) { - ftrace_bug(ret, rec); - return; - } + old = *(unsigned int *)&ftrace_call; + new = ftrace_call_replace(ip, (unsigned long)func, 1); + ret = ftrace_modify_code(ip, old, new); + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + /* Also update the regs callback function */ + if (!ret) { + ip = (unsigned long)(&ftrace_regs_call); + old = *(unsigned int *)&ftrace_regs_call; + new = ftrace_call_replace(ip, (unsigned long)func, 1); + ret = ftrace_modify_code(ip, old, new); } +#endif + + return ret; } /* @@ -538,7 +662,6 @@ int __init ftrace_dyn_arch_init(void) #ifdef CONFIG_FUNCTION_GRAPH_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE extern void ftrace_graph_call(void); extern void ftrace_graph_stub(void); @@ -567,7 +690,6 @@ int ftrace_disable_ftrace_graph_caller(void) return ftrace_modify_code(ip, old, new); } -#endif /* CONFIG_DYNAMIC_FTRACE */ /* * Hook the return address and push it in the stack of return addrs diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index afef2c076282..2c29098f630f 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -14,7 +14,6 @@ #include <asm/ftrace.h> #include <asm/export.h> -#ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) /* @@ -47,26 +46,7 @@ _GLOBAL(ftrace_graph_stub) MCOUNT_RESTORE_FRAME /* old link register ends up in ctr reg */ bctr -#else -_GLOBAL(mcount) -_GLOBAL(_mcount) - - MCOUNT_SAVE_FRAME - subi r3, r3, MCOUNT_INSN_SIZE - LOAD_REG_ADDR(r5, ftrace_trace_function) - lwz r5,0(r5) - - mtctr r5 - bctrl - nop - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - b ftrace_graph_caller -#endif - MCOUNT_RESTORE_FRAME - bctr -#endif EXPORT_SYMBOL(_mcount) _GLOBAL(ftrace_stub) diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S index e5ccea19821e..e25f77c10a72 100644 --- a/arch/powerpc/kernel/trace/ftrace_64.S +++ b/arch/powerpc/kernel/trace/ftrace_64.S @@ -14,7 +14,6 @@ #include <asm/ppc-opcode.h> #include <asm/export.h> -#ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) EXPORT_SYMBOL(_mcount) @@ -23,34 +22,6 @@ EXPORT_SYMBOL(_mcount) mtlr r0 bctr -#else /* CONFIG_DYNAMIC_FTRACE */ -_GLOBAL_TOC(_mcount) -EXPORT_SYMBOL(_mcount) - /* Taken from output of objdump from lib64/glibc */ - mflr r3 - ld r11, 0(r1) - stdu r1, -112(r1) - std r3, 128(r1) - ld r4, 16(r11) - - subi r3, r3, MCOUNT_INSN_SIZE - LOAD_REG_ADDR(r5,ftrace_trace_function) - ld r5,0(r5) - ld r5,0(r5) - mtctr r5 - bctrl - nop - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - b ftrace_graph_caller -#endif - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 -_GLOBAL(ftrace_stub) - blr -#endif /* CONFIG_DYNAMIC_FTRACE */ - #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(return_to_handler) /* need to save return values */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index 3f3e81852422..9a5b5a513604 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -17,11 +17,10 @@ #include <asm/bug.h> #include <asm/ptrace.h> -#ifdef CONFIG_DYNAMIC_FTRACE /* * - * ftrace_caller() is the function that replaces _mcount() when ftrace is - * active. + * ftrace_caller()/ftrace_regs_caller() is the function that replaces _mcount() + * when ftrace is active. * * We arrive here after a function A calls function B, and we are the trace * function for B. When we enter r1 points to A's stack frame, B has not yet @@ -37,7 +36,7 @@ * Our job is to save the register state into a struct pt_regs (on the stack) * and then arrange for the ftrace function to be called. */ -_GLOBAL(ftrace_caller) +_GLOBAL(ftrace_regs_caller) /* Save the original return address in A's stack frame */ std r0,LRSAVE(r1) @@ -47,6 +46,12 @@ _GLOBAL(ftrace_caller) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) SAVE_10GPRS(2, r1) + + /* Ok to continue? */ + lbz r3, PACA_FTRACE_ENABLED(r13) + cmpdi r3, 0 + beq ftrace_no_trace + SAVE_10GPRS(12, r1) SAVE_10GPRS(22, r1) @@ -94,8 +99,8 @@ _GLOBAL(ftrace_caller) addi r6, r1 ,STACK_FRAME_OVERHEAD /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: +.globl ftrace_regs_call +ftrace_regs_call: bl ftrace_stub nop @@ -156,6 +161,7 @@ ftrace_call: bne- livepatch_handler #endif +ftrace_caller_common: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: @@ -168,6 +174,74 @@ _GLOBAL(ftrace_graph_stub) _GLOBAL(ftrace_stub) blr +ftrace_no_trace: + mflr r3 + mtctr r3 + REST_GPR(3, r1) + addi r1, r1, SWITCH_FRAME_SIZE + mtlr r0 + bctr + +_GLOBAL(ftrace_caller) + /* Save the original return address in A's stack frame */ + std r0, LRSAVE(r1) + + /* Create our stack frame + pt_regs */ + stdu r1, -SWITCH_FRAME_SIZE(r1) + + /* Save all gprs to pt_regs */ + SAVE_8GPRS(3, r1) + + lbz r3, PACA_FTRACE_ENABLED(r13) + cmpdi r3, 0 + beq ftrace_no_trace + + /* Get the _mcount() call site out of LR */ + mflr r7 + std r7, _NIP(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2, PACATOC(r13) /* get kernel TOC in r2 */ + + addis r3, r2, function_trace_op@toc@ha + addi r3, r3, function_trace_op@toc@l + ld r5, 0(r3) + + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r7, MCOUNT_INSN_SIZE + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + + /* Set pt_regs to NULL */ + li r6, 0 + + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + + ld r3, _NIP(r1) + mtctr r3 + + /* Restore gprs */ + REST_8GPRS(3,r1) + + /* Restore callee's TOC */ + ld r2, 24(r1) + + /* Pop our stack frame */ + addi r1, r1, SWITCH_FRAME_SIZE + + /* Reload original LR */ + ld r0, LRSAVE(r1) + mtlr r0 + + /* Handle function_graph or go back */ + b ftrace_caller_common + #ifdef CONFIG_LIVEPATCH /* * This function runs in the mcount context, between two functions. As @@ -236,8 +310,6 @@ livepatch_handler: blr #endif /* CONFIG_LIVEPATCH */ -#endif /* CONFIG_DYNAMIC_FTRACE */ - #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(ftrace_graph_caller) stdu r1, -112(r1) diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S index f095358da96e..4c515c4023de 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.S +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S @@ -14,8 +14,11 @@ #include <asm/ppc-opcode.h> #include <asm/export.h> -#ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL_TOC(ftrace_caller) + lbz r3, PACA_FTRACE_ENABLED(r13) + cmpdi r3, 0 + beqlr + /* Taken from output of objdump from lib64/glibc */ mflr r3 ld r11, 0(r1) @@ -39,7 +42,6 @@ _GLOBAL(ftrace_graph_stub) _GLOBAL(ftrace_stub) blr -#endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(ftrace_graph_caller) diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index b8c434d1d459..50112d4473bb 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -8,8 +8,15 @@ obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \ # Build rules -ifeq ($(CONFIG_PPC32),y) -CROSS32CC := $(CC) +ifdef CROSS32_COMPILE + VDSOCC := $(CROSS32_COMPILE)gcc +else + VDSOCC := $(CC) +endif + +CC32FLAGS := +ifdef CONFIG_PPC64 +CC32FLAGS += -m32 endif targets := $(obj-vdso32) vdso32.so vdso32.so.dbg @@ -45,9 +52,9 @@ $(obj-vdso32): %.o: %.S FORCE # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) + cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) quiet_cmd_vdso32as = VDSO32A $@ - cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< + cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index 8812085883fd..4acd3fb2b38e 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <asm/ptrace.h> #include <asm/processor.h> +#include <asm/switch_to.h> #include <linux/uaccess.h> /* Functions in vector.S */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index dd10e6f1d1b7..5baac79df97e 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -153,6 +153,13 @@ SECTIONS *(__rfi_flush_fixup) __stop___rfi_flush_fixup = .; } + + . = ALIGN(8); + __spec_barrier_fixup : AT(ADDR(__spec_barrier_fixup) - LOAD_OFFSET) { + __start___barrier_nospec_fixup = .; + *(__barrier_nospec_fixup) + __stop___barrier_nospec_fixup = .; + } #endif EXCEPTION_TABLE(0) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 6256dc3b0087..1d82274f7e9f 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -64,7 +64,7 @@ * means the CPU(s) with their bit still set in the pending mask have had * their heartbeat stop, and action is taken. * - * Some platforms implement true NMI IPIs, which can by used by the SMP + * Some platforms implement true NMI IPIs, which can be used by the SMP * watchdog to detect an unresponsive CPU and pull it out of its stuck * state with the NMI IPI, to get crash/debug data from it. This way the * SMP watchdog can detect hardware interrupts off lockups. @@ -111,7 +111,13 @@ static inline void wd_smp_unlock(unsigned long *flags) static void wd_lockup_ipi(struct pt_regs *regs) { - pr_emerg("CPU %d Hard LOCKUP\n", raw_smp_processor_id()); + int cpu = raw_smp_processor_id(); + u64 tb = get_tb(); + + pr_emerg("CPU %d Hard LOCKUP\n", cpu); + pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n", + cpu, tb, per_cpu(wd_timer_tb, cpu), + tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000); print_modules(); print_irqtrace_events(current); if (regs) @@ -154,6 +160,9 @@ static void watchdog_smp_panic(int cpu, u64 tb) pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n", cpu, cpumask_pr_args(&wd_smp_cpus_pending)); + pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n", + cpu, tb, wd_smp_last_reset_tb, + tb_to_ns(tb - wd_smp_last_reset_tb) / 1000000); if (!sysctl_hardlockup_all_cpu_backtrace) { /* @@ -194,10 +203,19 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb) { if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) { if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) { + struct pt_regs *regs = get_irq_regs(); unsigned long flags; - pr_emerg("CPU %d became unstuck\n", cpu); wd_smp_lock(&flags); + + pr_emerg("CPU %d became unstuck TB:%lld\n", + cpu, tb); + print_irqtrace_events(current); + if (regs) + show_regs(regs); + else + dump_stack(); + cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); wd_smp_unlock(&flags); } @@ -245,8 +263,6 @@ void soft_nmi_interrupt(struct pt_regs *regs) tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) { - per_cpu(wd_timer_tb, cpu) = tb; - wd_smp_lock(&flags); if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) { wd_smp_unlock(&flags); @@ -254,7 +270,11 @@ void soft_nmi_interrupt(struct pt_regs *regs) } set_cpu_stuck(cpu, tb); - pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", cpu, (void *)regs->nip); + pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", + cpu, (void *)regs->nip); + pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n", + cpu, tb, per_cpu(wd_timer_tb, cpu), + tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000); print_modules(); print_irqtrace_events(current); show_regs(regs); |