diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r-- | arch/powerpc/platforms/pseries/Makefile | 5 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh.c | 543 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_cache.c | 59 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_dev.c | 14 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_driver.c | 310 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_event.c | 59 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_pe.c | 652 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_pseries.c | 247 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_sysfs.c | 9 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/hotplug-memory.c | 12 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/iommu.c | 12 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/lpar.c | 77 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/msi.c | 26 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/pci.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/pci_dlpar.c | 32 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/processor_idle.c | 62 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/setup.c | 22 |
17 files changed, 1307 insertions, 836 deletions
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index c222189f5bb2..890622b87c8f 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -6,8 +6,9 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ firmware.o power.o dlpar.o mobility.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_EEH) += eeh.o eeh_dev.o eeh_cache.o eeh_driver.o \ - eeh_event.o eeh_sysfs.o eeh_pseries.o +obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ + eeh_driver.o eeh_event.o eeh_sysfs.o \ + eeh_pseries.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_PCI) += pci.o pci_dlpar.o obj-$(CONFIG_PSERIES_MSI) += msi.o diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index ecd394cf34e6..9a04322b1736 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -92,6 +92,20 @@ struct eeh_ops *eeh_ops = NULL; int eeh_subsystem_enabled; EXPORT_SYMBOL(eeh_subsystem_enabled); +/* + * EEH probe mode support. The intention is to support multiple + * platforms for EEH. Some platforms like pSeries do PCI emunation + * based on device tree. However, other platforms like powernv probe + * PCI devices from hardware. The flag is used to distinguish that. + * In addition, struct eeh_ops::probe would be invoked for particular + * OF node or PCI device so that the corresponding PE would be created + * there. + */ +int eeh_probe_mode; + +/* Global EEH mutex */ +DEFINE_MUTEX(eeh_mutex); + /* Lock to avoid races due to multiple reports of an error */ static DEFINE_RAW_SPINLOCK(confirm_error_lock); @@ -204,22 +218,12 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) } } - /* Gather status on devices under the bridge */ - if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { - struct device_node *child; - - for_each_child_of_node(dn, child) { - if (of_node_to_eeh_dev(child)) - n += eeh_gather_pci_data(of_node_to_eeh_dev(child), buf+n, len-n); - } - } - return n; } /** * eeh_slot_error_detail - Generate combined log including driver log and error log - * @edev: device to report error log for + * @pe: EEH PE * @severity: temporary or permanent error log * * This routine should be called to generate the combined log, which @@ -227,17 +231,22 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) * out from the config space of the corresponding PCI device, while * the error log is fetched through platform dependent function call. */ -void eeh_slot_error_detail(struct eeh_dev *edev, int severity) +void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; - pci_regs_buf[0] = 0; + struct eeh_dev *edev; - eeh_pci_enable(edev, EEH_OPT_THAW_MMIO); - eeh_ops->configure_bridge(eeh_dev_to_of_node(edev)); - eeh_restore_bars(edev); - loglen = eeh_gather_pci_data(edev, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); + eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + eeh_ops->configure_bridge(pe); + eeh_pe_restore_bars(pe); - eeh_ops->get_log(eeh_dev_to_of_node(edev), severity, pci_regs_buf, loglen); + pci_regs_buf[0] = 0; + eeh_pe_for_each_dev(pe, edev) { + loglen += eeh_gather_pci_data(edev, pci_regs_buf, + EEH_PCI_REGS_LOG_LEN); + } + + eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); } /** @@ -261,126 +270,8 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) } /** - * eeh_find_device_pe - Retrieve the PE for the given device - * @dn: device node - * - * Return the PE under which this device lies - */ -struct device_node *eeh_find_device_pe(struct device_node *dn) -{ - while (dn->parent && of_node_to_eeh_dev(dn->parent) && - (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) { - dn = dn->parent; - } - return dn; -} - -/** - * __eeh_mark_slot - Mark all child devices as failed - * @parent: parent device - * @mode_flag: failure flag - * - * Mark all devices that are children of this device as failed. - * Mark the device driver too, so that it can see the failure - * immediately; this is critical, since some drivers poll - * status registers in interrupts ... If a driver is polling, - * and the slot is frozen, then the driver can deadlock in - * an interrupt context, which is bad. - */ -static void __eeh_mark_slot(struct device_node *parent, int mode_flag) -{ - struct device_node *dn; - - for_each_child_of_node(parent, dn) { - if (of_node_to_eeh_dev(dn)) { - /* Mark the pci device driver too */ - struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev; - - of_node_to_eeh_dev(dn)->mode |= mode_flag; - - if (dev && dev->driver) - dev->error_state = pci_channel_io_frozen; - - __eeh_mark_slot(dn, mode_flag); - } - } -} - -/** - * eeh_mark_slot - Mark the indicated device and its children as failed - * @dn: parent device - * @mode_flag: failure flag - * - * Mark the indicated device and its child devices as failed. - * The device drivers are marked as failed as well. - */ -void eeh_mark_slot(struct device_node *dn, int mode_flag) -{ - struct pci_dev *dev; - dn = eeh_find_device_pe(dn); - - /* Back up one, since config addrs might be shared */ - if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) - dn = dn->parent; - - of_node_to_eeh_dev(dn)->mode |= mode_flag; - - /* Mark the pci device too */ - dev = of_node_to_eeh_dev(dn)->pdev; - if (dev) - dev->error_state = pci_channel_io_frozen; - - __eeh_mark_slot(dn, mode_flag); -} - -/** - * __eeh_clear_slot - Clear failure flag for the child devices - * @parent: parent device - * @mode_flag: flag to be cleared - * - * Clear failure flag for the child devices. - */ -static void __eeh_clear_slot(struct device_node *parent, int mode_flag) -{ - struct device_node *dn; - - for_each_child_of_node(parent, dn) { - if (of_node_to_eeh_dev(dn)) { - of_node_to_eeh_dev(dn)->mode &= ~mode_flag; - of_node_to_eeh_dev(dn)->check_count = 0; - __eeh_clear_slot(dn, mode_flag); - } - } -} - -/** - * eeh_clear_slot - Clear failure flag for the indicated device and its children - * @dn: parent device - * @mode_flag: flag to be cleared - * - * Clear failure flag for the indicated device and its children. - */ -void eeh_clear_slot(struct device_node *dn, int mode_flag) -{ - unsigned long flags; - raw_spin_lock_irqsave(&confirm_error_lock, flags); - - dn = eeh_find_device_pe(dn); - - /* Back up one, since config addrs might be shared */ - if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) - dn = dn->parent; - - of_node_to_eeh_dev(dn)->mode &= ~mode_flag; - of_node_to_eeh_dev(dn)->check_count = 0; - __eeh_clear_slot(dn, mode_flag); - raw_spin_unlock_irqrestore(&confirm_error_lock, flags); -} - -/** - * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze - * @dn: device node - * @dev: pci device, if known + * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze + * @edev: eeh device * * Check for an EEH failure for the given device node. Call this * routine if the result of a read was all 0xff's and you want to @@ -392,11 +283,13 @@ void eeh_clear_slot(struct device_node *dn, int mode_flag) * * It is safe to call this routine in an interrupt context. */ -int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) +int eeh_dev_check_failure(struct eeh_dev *edev) { int ret; unsigned long flags; - struct eeh_dev *edev; + struct device_node *dn; + struct pci_dev *dev; + struct eeh_pe *pe; int rc = 0; const char *location; @@ -405,23 +298,23 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) if (!eeh_subsystem_enabled) return 0; - if (!dn) { + if (!edev) { eeh_stats.no_dn++; return 0; } - dn = eeh_find_device_pe(dn); - edev = of_node_to_eeh_dev(dn); + dn = eeh_dev_to_of_node(edev); + dev = eeh_dev_to_pci_dev(edev); + pe = edev->pe; /* Access to IO BARs might get this far and still not want checking. */ - if (!(edev->mode & EEH_MODE_SUPPORTED) || - edev->mode & EEH_MODE_NOCHECK) { + if (!pe) { eeh_stats.ignored_check++; - pr_debug("EEH: Ignored check (%x) for %s %s\n", - edev->mode, eeh_pci_name(dev), dn->full_name); + pr_debug("EEH: Ignored check for %s %s\n", + eeh_pci_name(dev), dn->full_name); return 0; } - if (!edev->config_addr && !edev->pe_config_addr) { + if (!pe->addr && !pe->config_addr) { eeh_stats.no_cfg_addr++; return 0; } @@ -434,13 +327,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) */ raw_spin_lock_irqsave(&confirm_error_lock, flags); rc = 1; - if (edev->mode & EEH_MODE_ISOLATED) { - edev->check_count++; - if (edev->check_count % EEH_MAX_FAILS == 0) { + if (pe->state & EEH_PE_ISOLATED) { + pe->check_count++; + if (pe->check_count % EEH_MAX_FAILS == 0) { location = of_get_property(dn, "ibm,loc-code", NULL); printk(KERN_ERR "EEH: %d reads ignored for recovering device at " "location=%s driver=%s pci addr=%s\n", - edev->check_count, location, + pe->check_count, location, eeh_driver_name(dev), eeh_pci_name(dev)); printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", eeh_driver_name(dev)); @@ -456,7 +349,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) * function zero of a multi-function device. * In any case they must share a common PHB. */ - ret = eeh_ops->get_state(dn, NULL); + ret = eeh_ops->get_state(pe, NULL); /* Note that config-io to empty slots may fail; * they are empty when they don't have children. @@ -469,7 +362,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { eeh_stats.false_positives++; - edev->false_positives ++; + pe->false_positives++; rc = 0; goto dn_unlock; } @@ -480,10 +373,10 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) * with other functions on this device, and functions under * bridges. */ - eeh_mark_slot(dn, EEH_MODE_ISOLATED); + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); raw_spin_unlock_irqrestore(&confirm_error_lock, flags); - eeh_send_failure_event(edev); + eeh_send_failure_event(pe); /* Most EEH events are due to device driver bugs. Having * a stack trace will help the device-driver authors figure @@ -497,7 +390,7 @@ dn_unlock: return rc; } -EXPORT_SYMBOL_GPL(eeh_dn_check_failure); +EXPORT_SYMBOL_GPL(eeh_dev_check_failure); /** * eeh_check_failure - Check if all 1's data is due to EEH slot freeze @@ -514,21 +407,19 @@ EXPORT_SYMBOL_GPL(eeh_dn_check_failure); unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) { unsigned long addr; - struct pci_dev *dev; - struct device_node *dn; + struct eeh_dev *edev; /* Finding the phys addr + pci device; this is pretty quick. */ addr = eeh_token_to_phys((unsigned long __force) token); - dev = pci_addr_cache_get_device(addr); - if (!dev) { + edev = eeh_addr_cache_get_dev(addr); + if (!edev) { eeh_stats.no_device++; return val; } - dn = pci_device_to_OF_node(dev); - eeh_dn_check_failure(dn, dev); + eeh_dev_check_failure(edev); - pci_dev_put(dev); + pci_dev_put(eeh_dev_to_pci_dev(edev)); return val; } @@ -537,23 +428,22 @@ EXPORT_SYMBOL(eeh_check_failure); /** * eeh_pci_enable - Enable MMIO or DMA transfers for this slot - * @edev: pci device node + * @pe: EEH PE * * This routine should be called to reenable frozen MMIO or DMA * so that it would work correctly again. It's useful while doing * recovery or log collection on the indicated device. */ -int eeh_pci_enable(struct eeh_dev *edev, int function) +int eeh_pci_enable(struct eeh_pe *pe, int function) { int rc; - struct device_node *dn = eeh_dev_to_of_node(edev); - rc = eeh_ops->set_option(dn, function); + rc = eeh_ops->set_option(pe, function); if (rc) - printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n", - function, rc, dn->full_name); + pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n", + __func__, function, pe->phb->global_number, pe->addr, rc); - rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC); + rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && (function == EEH_OPT_THAW_MMIO)) return 0; @@ -571,17 +461,24 @@ int eeh_pci_enable(struct eeh_dev *edev, int function) */ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) { - struct device_node *dn = pci_device_to_OF_node(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); + struct eeh_pe *pe = edev->pe; + + if (!pe) { + pr_err("%s: No PE found on PCI device %s\n", + __func__, pci_name(dev)); + return -EINVAL; + } switch (state) { case pcie_deassert_reset: - eeh_ops->reset(dn, EEH_RESET_DEACTIVATE); + eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); break; case pcie_hot_reset: - eeh_ops->reset(dn, EEH_RESET_HOT); + eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: - eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL); + eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: return -EINVAL; @@ -591,66 +488,37 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat } /** - * __eeh_set_pe_freset - Check the required reset for child devices - * @parent: parent device - * @freset: return value - * - * Each device might have its preferred reset type: fundamental or - * hot reset. The routine is used to collect the information from - * the child devices so that they could be reset accordingly. - */ -void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) -{ - struct device_node *dn; - - for_each_child_of_node(parent, dn) { - if (of_node_to_eeh_dev(dn)) { - struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev; - - if (dev && dev->driver) - *freset |= dev->needs_freset; - - __eeh_set_pe_freset(dn, freset); - } - } -} - -/** - * eeh_set_pe_freset - Check the required reset for the indicated device and its children - * @dn: parent device - * @freset: return value + * eeh_set_pe_freset - Check the required reset for the indicated device + * @data: EEH device + * @flag: return value * * Each device might have its preferred reset type: fundamental or * hot reset. The routine is used to collected the information for * the indicated device and its children so that the bunch of the * devices could be reset properly. */ -void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) +static void *eeh_set_dev_freset(void *data, void *flag) { struct pci_dev *dev; - dn = eeh_find_device_pe(dn); - - /* Back up one, since config addrs might be shared */ - if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) - dn = dn->parent; + unsigned int *freset = (unsigned int *)flag; + struct eeh_dev *edev = (struct eeh_dev *)data; - dev = of_node_to_eeh_dev(dn)->pdev; + dev = eeh_dev_to_pci_dev(edev); if (dev) *freset |= dev->needs_freset; - __eeh_set_pe_freset(dn, freset); + return NULL; } /** * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second - * @edev: pci device node to be reset. + * @pe: EEH PE * * Assert the PCI #RST line for 1/4 second. */ -static void eeh_reset_pe_once(struct eeh_dev *edev) +static void eeh_reset_pe_once(struct eeh_pe *pe) { unsigned int freset = 0; - struct device_node *dn = eeh_dev_to_of_node(edev); /* Determine type of EEH reset required for * Partitionable Endpoint, a hot-reset (1) @@ -658,12 +526,12 @@ static void eeh_reset_pe_once(struct eeh_dev *edev) * A fundamental reset required by any device under * Partitionable Endpoint trumps hot-reset. */ - eeh_set_pe_freset(dn, &freset); + eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); if (freset) - eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL); + eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); else - eeh_ops->reset(dn, EEH_RESET_HOT); + eeh_ops->reset(pe, EEH_RESET_HOT); /* The PCI bus requires that the reset be held high for at least * a 100 milliseconds. We wait a bit longer 'just in case'. @@ -675,9 +543,9 @@ static void eeh_reset_pe_once(struct eeh_dev *edev) * pci slot reset line is dropped. Make sure we don't miss * these, and clear the flag now. */ - eeh_clear_slot(dn, EEH_MODE_ISOLATED); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); - eeh_ops->reset(dn, EEH_RESET_DEACTIVATE); + eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); /* After a PCI slot has been reset, the PCI Express spec requires * a 1.5 second idle time for the bus to stabilize, before starting @@ -689,116 +557,36 @@ static void eeh_reset_pe_once(struct eeh_dev *edev) /** * eeh_reset_pe - Reset the indicated PE - * @edev: PCI device associated EEH device + * @pe: EEH PE * * This routine should be called to reset indicated device, including * PE. A PE might include multiple PCI devices and sometimes PCI bridges * might be involved as well. */ -int eeh_reset_pe(struct eeh_dev *edev) +int eeh_reset_pe(struct eeh_pe *pe) { int i, rc; - struct device_node *dn = eeh_dev_to_of_node(edev); /* Take three shots at resetting the bus */ for (i=0; i<3; i++) { - eeh_reset_pe_once(edev); + eeh_reset_pe_once(pe); - rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC); + rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) return 0; if (rc < 0) { - printk(KERN_ERR "EEH: unrecoverable slot failure %s\n", - dn->full_name); + pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x", + __func__, pe->phb->global_number, pe->addr); return -1; } - printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n", - i+1, dn->full_name, rc); + pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n", + i+1, pe->phb->global_number, pe->addr, rc); } return -1; } -/** Save and restore of PCI BARs - * - * Although firmware will set up BARs during boot, it doesn't - * set up device BAR's after a device reset, although it will, - * if requested, set up bridge configuration. Thus, we need to - * configure the PCI devices ourselves. - */ - -/** - * eeh_restore_one_device_bars - Restore the Base Address Registers for one device - * @edev: PCI device associated EEH device - * - * Loads the PCI configuration space base address registers, - * the expansion ROM base address, the latency timer, and etc. - * from the saved values in the device node. - */ -static inline void eeh_restore_one_device_bars(struct eeh_dev *edev) -{ - int i; - u32 cmd; - struct device_node *dn = eeh_dev_to_of_node(edev); - - if (!edev->phb) - return; - - for (i=4; i<10; i++) { - eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); - } - - /* 12 == Expansion ROM Address */ - eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]); - -#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) -#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) - - eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, - SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - - eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, - SAVED_BYTE(PCI_LATENCY_TIMER)); - - /* max latency, min grant, interrupt pin and line */ - eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]); - - /* Restore PERR & SERR bits, some devices require it, - * don't touch the other command bits - */ - eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd); - if (edev->config_space[1] & PCI_COMMAND_PARITY) - cmd |= PCI_COMMAND_PARITY; - else - cmd &= ~PCI_COMMAND_PARITY; - if (edev->config_space[1] & PCI_COMMAND_SERR) - cmd |= PCI_COMMAND_SERR; - else - cmd &= ~PCI_COMMAND_SERR; - eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd); -} - -/** - * eeh_restore_bars - Restore the PCI config space info - * @edev: EEH device - * - * This routine performs a recursive walk to the children - * of this device as well. - */ -void eeh_restore_bars(struct eeh_dev *edev) -{ - struct device_node *dn; - if (!edev) - return; - - if ((edev->mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(edev->class_code)) - eeh_restore_one_device_bars(edev); - - for_each_child_of_node(eeh_dev_to_of_node(edev), dn) - eeh_restore_bars(of_node_to_eeh_dev(dn)); -} - /** * eeh_save_bars - Save device bars * @edev: PCI device associated EEH device @@ -808,7 +596,7 @@ void eeh_restore_bars(struct eeh_dev *edev) * PCI devices are added individually; but, for the restore, * an entire slot is reset at a time. */ -static void eeh_save_bars(struct eeh_dev *edev) +void eeh_save_bars(struct eeh_dev *edev) { int i; struct device_node *dn; @@ -822,102 +610,6 @@ static void eeh_save_bars(struct eeh_dev *edev) } /** - * eeh_early_enable - Early enable EEH on the indicated device - * @dn: device node - * @data: BUID - * - * Enable EEH functionality on the specified PCI device. The function - * is expected to be called before real PCI probing is done. However, - * the PHBs have been initialized at this point. - */ -static void *eeh_early_enable(struct device_node *dn, void *data) -{ - int ret; - const u32 *class_code = of_get_property(dn, "class-code", NULL); - const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL); - const u32 *device_id = of_get_property(dn, "device-id", NULL); - const u32 *regs; - int enable; - struct eeh_dev *edev = of_node_to_eeh_dev(dn); - - edev->class_code = 0; - edev->mode = 0; - edev->check_count = 0; - edev->freeze_count = 0; - edev->false_positives = 0; - - if (!of_device_is_available(dn)) - return NULL; - - /* Ignore bad nodes. */ - if (!class_code || !vendor_id || !device_id) - return NULL; - - /* There is nothing to check on PCI to ISA bridges */ - if (dn->type && !strcmp(dn->type, "isa")) { - edev->mode |= EEH_MODE_NOCHECK; - return NULL; - } - edev->class_code = *class_code; - - /* Ok... see if this device supports EEH. Some do, some don't, - * and the only way to find out is to check each and every one. - */ - regs = of_get_property(dn, "reg", NULL); - if (regs) { - /* First register entry is addr (00BBSS00) */ - /* Try to enable eeh */ - ret = eeh_ops->set_option(dn, EEH_OPT_ENABLE); - - enable = 0; - if (ret == 0) { - edev->config_addr = regs[0]; - - /* If the newer, better, ibm,get-config-addr-info is supported, - * then use that instead. - */ - edev->pe_config_addr = eeh_ops->get_pe_addr(dn); - - /* Some older systems (Power4) allow the - * ibm,set-eeh-option call to succeed even on nodes - * where EEH is not supported. Verify support - * explicitly. - */ - ret = eeh_ops->get_state(dn, NULL); - if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) - enable = 1; - } - - if (enable) { - eeh_subsystem_enabled = 1; - edev->mode |= EEH_MODE_SUPPORTED; - - pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n", - dn->full_name, edev->config_addr, - edev->pe_config_addr); - } else { - - /* This device doesn't support EEH, but it may have an - * EEH parent, in which case we mark it as supported. - */ - if (dn->parent && of_node_to_eeh_dev(dn->parent) && - (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) { - /* Parent supports EEH. */ - edev->mode |= EEH_MODE_SUPPORTED; - edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr; - return NULL; - } - } - } else { - printk(KERN_WARNING "EEH: %s: unable to get reg property.\n", - dn->full_name); - } - - eeh_save_bars(edev); - return NULL; -} - -/** * eeh_ops_register - Register platform dependent EEH operations * @ops: platform dependent EEH operations * @@ -982,7 +674,7 @@ int __exit eeh_ops_unregister(const char *name) * Even if force-off is set, the EEH hardware is still enabled, so that * newer systems can boot. */ -void __init eeh_init(void) +static int __init eeh_init(void) { struct pci_controller *hose, *tmp; struct device_node *phb; @@ -992,27 +684,34 @@ void __init eeh_init(void) if (!eeh_ops) { pr_warning("%s: Platform EEH operation not found\n", __func__); - return; + return -EEXIST; } else if ((ret = eeh_ops->init())) { pr_warning("%s: Failed to call platform init function (%d)\n", __func__, ret); - return; + return ret; } raw_spin_lock_init(&confirm_error_lock); /* Enable EEH for all adapters */ - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - phb = hose->dn; - traverse_pci_devices(phb, eeh_early_enable, NULL); + if (eeh_probe_mode_devtree()) { + list_for_each_entry_safe(hose, tmp, + &hose_list, list_node) { + phb = hose->dn; + traverse_pci_devices(phb, eeh_ops->of_probe, NULL); + } } if (eeh_subsystem_enabled) - printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n"); + pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); else - printk(KERN_WARNING "EEH: No capable adapters found\n"); + pr_warning("EEH: No capable adapters found\n"); + + return ret; } +core_initcall_sync(eeh_init); + /** * eeh_add_device_early - Enable EEH for the indicated device_node * @dn: device node for which to set up EEH @@ -1029,7 +728,7 @@ static void eeh_add_device_early(struct device_node *dn) { struct pci_controller *phb; - if (!dn || !of_node_to_eeh_dev(dn)) + if (!of_node_to_eeh_dev(dn)) return; phb = of_node_to_eeh_dev(dn)->phb; @@ -1037,7 +736,8 @@ static void eeh_add_device_early(struct device_node *dn) if (NULL == phb || 0 == phb->buid) return; - eeh_early_enable(dn, NULL); + /* FIXME: hotplug support on POWERNV */ + eeh_ops->of_probe(dn, NULL); } /** @@ -1087,7 +787,7 @@ static void eeh_add_device_late(struct pci_dev *dev) edev->pdev = dev; dev->dev.archdata.edev = edev; - pci_addr_cache_insert_device(dev); + eeh_addr_cache_insert_dev(dev); eeh_sysfs_add_device(dev); } @@ -1117,6 +817,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); /** * eeh_remove_device - Undo EEH setup for the indicated pci device * @dev: pci device to be removed + * @purge_pe: remove the PE or not * * This routine should be called when a device is removed from * a running system (e.g. by hotplug or dlpar). It unregisters @@ -1124,7 +825,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); * this device will no longer be detected after this call; thus, * i/o errors affecting this slot may leave this device unusable. */ -static void eeh_remove_device(struct pci_dev *dev) +static void eeh_remove_device(struct pci_dev *dev, int purge_pe) { struct eeh_dev *edev; @@ -1143,28 +844,30 @@ static void eeh_remove_device(struct pci_dev *dev) dev->dev.archdata.edev = NULL; pci_dev_put(dev); - pci_addr_cache_remove_device(dev); + eeh_rmv_from_parent_pe(edev, purge_pe); + eeh_addr_cache_rmv_dev(dev); eeh_sysfs_remove_device(dev); } /** * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device * @dev: PCI device + * @purge_pe: remove the corresponding PE or not * * This routine must be called when a device is removed from the * running system through hotplug or dlpar. The corresponding * PCI address cache will be removed. */ -void eeh_remove_bus_device(struct pci_dev *dev) +void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { struct pci_bus *bus = dev->subordinate; struct pci_dev *child, *tmp; - eeh_remove_device(dev); + eeh_remove_device(dev, purge_pe); if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) - eeh_remove_bus_device(child); + eeh_remove_bus_device(child, purge_pe); } } EXPORT_SYMBOL_GPL(eeh_remove_bus_device); diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c index e5ae1c687c66..5a4c87903057 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/platforms/pseries/eeh_cache.c @@ -50,6 +50,7 @@ struct pci_io_addr_range { struct rb_node rb_node; unsigned long addr_lo; unsigned long addr_hi; + struct eeh_dev *edev; struct pci_dev *pcidev; unsigned int flags; }; @@ -59,7 +60,7 @@ static struct pci_io_addr_cache { spinlock_t piar_lock; } pci_io_addr_cache_root; -static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr) +static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr) { struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; @@ -74,7 +75,7 @@ static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr) n = n->rb_right; } else { pci_dev_get(piar->pcidev); - return piar->pcidev; + return piar->edev; } } } @@ -83,7 +84,7 @@ static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr) } /** - * pci_addr_cache_get_device - Get device, given only address + * eeh_addr_cache_get_dev - Get device, given only address * @addr: mmio (PIO) phys address or i/o port number * * Given an mmio phys address, or a port number, find a pci device @@ -92,15 +93,15 @@ static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr) * from zero (that is, they do *not* have pci_io_addr added in). * It is safe to call this function within an interrupt. */ -struct pci_dev *pci_addr_cache_get_device(unsigned long addr) +struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr) { - struct pci_dev *dev; + struct eeh_dev *edev; unsigned long flags; spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - dev = __pci_addr_cache_get_device(addr); + edev = __eeh_addr_cache_get_device(addr); spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); - return dev; + return edev; } #ifdef DEBUG @@ -108,7 +109,7 @@ struct pci_dev *pci_addr_cache_get_device(unsigned long addr) * Handy-dandy debug print routine, does nothing more * than print out the contents of our addr cache. */ -static void pci_addr_cache_print(struct pci_io_addr_cache *cache) +static void eeh_addr_cache_print(struct pci_io_addr_cache *cache) { struct rb_node *n; int cnt = 0; @@ -117,7 +118,7 @@ static void pci_addr_cache_print(struct pci_io_addr_cache *cache) while (n) { struct pci_io_addr_range *piar; piar = rb_entry(n, struct pci_io_addr_range, rb_node); - printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n", + pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n", (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); cnt++; @@ -128,7 +129,7 @@ static void pci_addr_cache_print(struct pci_io_addr_cache *cache) /* Insert address range into the rb tree. */ static struct pci_io_addr_range * -pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, +eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo, unsigned long ahi, unsigned int flags) { struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; @@ -146,23 +147,24 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, } else { if (dev != piar->pcidev || alo != piar->addr_lo || ahi != piar->addr_hi) { - printk(KERN_WARNING "PIAR: overlapping address range\n"); + pr_warning("PIAR: overlapping address range\n"); } return piar; } } - piar = kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); + piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); if (!piar) return NULL; pci_dev_get(dev); piar->addr_lo = alo; piar->addr_hi = ahi; + piar->edev = pci_dev_to_eeh_dev(dev); piar->pcidev = dev; piar->flags = flags; #ifdef DEBUG - printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n", + pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n", alo, ahi, pci_name(dev)); #endif @@ -172,7 +174,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, return piar; } -static void __pci_addr_cache_insert_device(struct pci_dev *dev) +static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) { struct device_node *dn; struct eeh_dev *edev; @@ -180,7 +182,7 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); if (!dn) { - printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev)); + pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev)); return; } @@ -192,8 +194,7 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev) } /* Skip any devices for which EEH is not enabled. */ - if (!(edev->mode & EEH_MODE_SUPPORTED) || - edev->mode & EEH_MODE_NOCHECK) { + if (!edev->pe) { #ifdef DEBUG pr_info("PCI: skip building address cache for=%s - %s\n", pci_name(dev), dn->full_name); @@ -212,19 +213,19 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev) continue; if (start == 0 || ~start == 0 || end == 0 || ~end == 0) continue; - pci_addr_cache_insert(dev, start, end, flags); + eeh_addr_cache_insert(dev, start, end, flags); } } /** - * pci_addr_cache_insert_device - Add a device to the address cache + * eeh_addr_cache_insert_dev - Add a device to the address cache * @dev: PCI device whose I/O addresses we are interested in. * * In order to support the fast lookup of devices based on addresses, * we maintain a cache of devices that can be quickly searched. * This routine adds a device to that cache. */ -void pci_addr_cache_insert_device(struct pci_dev *dev) +void eeh_addr_cache_insert_dev(struct pci_dev *dev) { unsigned long flags; @@ -233,11 +234,11 @@ void pci_addr_cache_insert_device(struct pci_dev *dev) return; spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - __pci_addr_cache_insert_device(dev); + __eeh_addr_cache_insert_dev(dev); spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); } -static inline void __pci_addr_cache_remove_device(struct pci_dev *dev) +static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev) { struct rb_node *n; @@ -258,7 +259,7 @@ restart: } /** - * pci_addr_cache_remove_device - remove pci device from addr cache + * eeh_addr_cache_rmv_dev - remove pci device from addr cache * @dev: device to remove * * Remove a device from the addr-cache tree. @@ -266,17 +267,17 @@ restart: * the tree multiple times (once per resource). * But so what; device removal doesn't need to be that fast. */ -void pci_addr_cache_remove_device(struct pci_dev *dev) +void eeh_addr_cache_rmv_dev(struct pci_dev *dev) { unsigned long flags; spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - __pci_addr_cache_remove_device(dev); + __eeh_addr_cache_rmv_dev(dev); spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); } /** - * pci_addr_cache_build - Build a cache of I/O addresses + * eeh_addr_cache_build - Build a cache of I/O addresses * * Build a cache of pci i/o addresses. This cache will be used to * find the pci device that corresponds to a given address. @@ -284,7 +285,7 @@ void pci_addr_cache_remove_device(struct pci_dev *dev) * Must be run late in boot process, after the pci controllers * have been scanned for devices (after all device resources are known). */ -void __init pci_addr_cache_build(void) +void __init eeh_addr_cache_build(void) { struct device_node *dn; struct eeh_dev *edev; @@ -293,7 +294,7 @@ void __init pci_addr_cache_build(void) spin_lock_init(&pci_io_addr_cache_root.piar_lock); for_each_pci_dev(dev) { - pci_addr_cache_insert_device(dev); + eeh_addr_cache_insert_dev(dev); dn = pci_device_to_OF_node(dev); if (!dn) @@ -312,7 +313,7 @@ void __init pci_addr_cache_build(void) #ifdef DEBUG /* Verify tree built up above, echo back the list of addrs. */ - pci_addr_cache_print(&pci_io_addr_cache_root); + eeh_addr_cache_print(&pci_io_addr_cache_root); #endif } diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c index c4507d095900..66442341d3a6 100644 --- a/arch/powerpc/platforms/pseries/eeh_dev.c +++ b/arch/powerpc/platforms/pseries/eeh_dev.c @@ -55,7 +55,7 @@ void * __devinit eeh_dev_init(struct device_node *dn, void *data) struct eeh_dev *edev; /* Allocate EEH device */ - edev = zalloc_maybe_bootmem(sizeof(*edev), GFP_KERNEL); + edev = kzalloc(sizeof(*edev), GFP_KERNEL); if (!edev) { pr_warning("%s: out of memory\n", __func__); return NULL; @@ -65,6 +65,7 @@ void * __devinit eeh_dev_init(struct device_node *dn, void *data) PCI_DN(dn)->edev = edev; edev->dn = dn; edev->phb = phb; + INIT_LIST_HEAD(&edev->list); return NULL; } @@ -80,6 +81,9 @@ void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb) { struct device_node *dn = phb->dn; + /* EEH PE for PHB */ + eeh_phb_pe_create(phb); + /* EEH device for PHB */ eeh_dev_init(dn, phb); @@ -93,10 +97,16 @@ void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb) * Scan all the existing PHBs and create EEH devices for their OF * nodes and their children OF nodes */ -void __init eeh_dev_phb_init(void) +static int __init eeh_dev_phb_init(void) { struct pci_controller *phb, *tmp; list_for_each_entry_safe(phb, tmp, &hose_list, list_node) eeh_dev_phb_init_dynamic(phb); + + pr_info("EEH: devices created\n"); + + return 0; } + +core_initcall(eeh_dev_phb_init); diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index baf92cd9dfab..a3fefb61097c 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -25,6 +25,7 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/module.h> #include <linux/pci.h> #include <asm/eeh.h> #include <asm/eeh_event.h> @@ -47,6 +48,41 @@ static inline const char *eeh_pcid_name(struct pci_dev *pdev) return ""; } +/** + * eeh_pcid_get - Get the PCI device driver + * @pdev: PCI device + * + * The function is used to retrieve the PCI device driver for + * the indicated PCI device. Besides, we will increase the reference + * of the PCI device driver to prevent that being unloaded on + * the fly. Otherwise, kernel crash would be seen. + */ +static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev) +{ + if (!pdev || !pdev->driver) + return NULL; + + if (!try_module_get(pdev->driver->driver.owner)) + return NULL; + + return pdev->driver; +} + +/** + * eeh_pcid_put - Dereference on the PCI device driver + * @pdev: PCI device + * + * The function is called to do dereference on the PCI device + * driver of the indicated PCI device. + */ +static inline void eeh_pcid_put(struct pci_dev *pdev) +{ + if (!pdev || !pdev->driver) + return; + + module_put(pdev->driver->driver.owner); +} + #if 0 static void print_device_node_tree(struct pci_dn *pdn, int dent) { @@ -93,7 +129,7 @@ static void eeh_disable_irq(struct pci_dev *dev) if (!irq_has_action(dev->irq)) return; - edev->mode |= EEH_MODE_IRQ_DISABLED; + edev->mode |= EEH_DEV_IRQ_DISABLED; disable_irq_nosync(dev->irq); } @@ -108,36 +144,44 @@ static void eeh_enable_irq(struct pci_dev *dev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - if ((edev->mode) & EEH_MODE_IRQ_DISABLED) { - edev->mode &= ~EEH_MODE_IRQ_DISABLED; + if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { + edev->mode &= ~EEH_DEV_IRQ_DISABLED; enable_irq(dev->irq); } } /** * eeh_report_error - Report pci error to each device driver - * @dev: PCI device + * @data: eeh device * @userdata: return value * * Report an EEH error to each device driver, collect up and * merge the device driver responses. Cumulative response * passed back in "userdata". */ -static int eeh_report_error(struct pci_dev *dev, void *userdata) +static void *eeh_report_error(void *data, void *userdata) { + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver = dev->driver; + struct pci_driver *driver; + /* We might not have the associated PCI device, + * then we should continue for next one. + */ + if (!dev) return NULL; dev->error_state = pci_channel_io_frozen; - if (!driver) - return 0; + driver = eeh_pcid_get(dev); + if (!driver) return NULL; eeh_disable_irq(dev); if (!driver->err_handler || - !driver->err_handler->error_detected) - return 0; + !driver->err_handler->error_detected) { + eeh_pcid_put(dev); + return NULL; + } rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); @@ -145,27 +189,34 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata) if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; - return 0; + eeh_pcid_put(dev); + return NULL; } /** * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled - * @dev: PCI device + * @data: eeh device * @userdata: return value * * Tells each device driver that IO ports, MMIO and config space I/O * are now enabled. Collects up and merges the device driver responses. * Cumulative response passed back in "userdata". */ -static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) +static void *eeh_report_mmio_enabled(void *data, void *userdata) { + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver = dev->driver; + struct pci_driver *driver; - if (!driver || - !driver->err_handler || - !driver->err_handler->mmio_enabled) - return 0; + driver = eeh_pcid_get(dev); + if (!driver) return NULL; + + if (!driver->err_handler || + !driver->err_handler->mmio_enabled) { + eeh_pcid_put(dev); + return NULL; + } rc = driver->err_handler->mmio_enabled(dev); @@ -173,12 +224,13 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; - return 0; + eeh_pcid_put(dev); + return NULL; } /** * eeh_report_reset - Tell device that slot has been reset - * @dev: PCI device + * @data: eeh device * @userdata: return value * * This routine must be called while EEH tries to reset particular @@ -186,21 +238,26 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) * some actions, usually to save data the driver needs so that the * driver can work again while the device is recovered. */ -static int eeh_report_reset(struct pci_dev *dev, void *userdata) +static void *eeh_report_reset(void *data, void *userdata) { + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver = dev->driver; - - if (!driver) - return 0; + struct pci_driver *driver; + if (!dev) return NULL; dev->error_state = pci_channel_io_normal; + driver = eeh_pcid_get(dev); + if (!driver) return NULL; + eeh_enable_irq(dev); if (!driver->err_handler || - !driver->err_handler->slot_reset) - return 0; + !driver->err_handler->slot_reset) { + eeh_pcid_put(dev); + return NULL; + } rc = driver->err_handler->slot_reset(dev); if ((*res == PCI_ERS_RESULT_NONE) || @@ -208,109 +265,115 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata) if (*res == PCI_ERS_RESULT_DISCONNECT && rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; - return 0; + eeh_pcid_put(dev); + return NULL; } /** * eeh_report_resume - Tell device to resume normal operations - * @dev: PCI device + * @data: eeh device * @userdata: return value * * This routine must be called to notify the device driver that it * could resume so that the device driver can do some initialization * to make the recovered device work again. */ -static int eeh_report_resume(struct pci_dev *dev, void *userdata) +static void *eeh_report_resume(void *data, void *userdata) { - struct pci_driver *driver = dev->driver; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + struct pci_driver *driver; + if (!dev) return NULL; dev->error_state = pci_channel_io_normal; - if (!driver) - return 0; + driver = eeh_pcid_get(dev); + if (!driver) return NULL; eeh_enable_irq(dev); if (!driver->err_handler || - !driver->err_handler->resume) - return 0; + !driver->err_handler->resume) { + eeh_pcid_put(dev); + return NULL; + } driver->err_handler->resume(dev); - return 0; + eeh_pcid_put(dev); + return NULL; } /** * eeh_report_failure - Tell device driver that device is dead. - * @dev: PCI device + * @data: eeh device * @userdata: return value * * This informs the device driver that the device is permanently * dead, and that no further recovery attempts will be made on it. */ -static int eeh_report_failure(struct pci_dev *dev, void *userdata) +static void *eeh_report_failure(void *data, void *userdata) { - struct pci_driver *driver = dev->driver; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + struct pci_driver *driver; + if (!dev) return NULL; dev->error_state = pci_channel_io_perm_failure; - if (!driver) - return 0; + driver = eeh_pcid_get(dev); + if (!driver) return NULL; eeh_disable_irq(dev); if (!driver->err_handler || - !driver->err_handler->error_detected) - return 0; + !driver->err_handler->error_detected) { + eeh_pcid_put(dev); + return NULL; + } driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); - return 0; + eeh_pcid_put(dev); + return NULL; } /** * eeh_reset_device - Perform actual reset of a pci slot - * @edev: PE associated EEH device + * @pe: EEH PE * @bus: PCI bus corresponding to the isolcated slot * * This routine must be called to do reset on the indicated PE. * During the reset, udev might be invoked because those affected * PCI devices will be removed and then added. */ -static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus) +static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) { - struct device_node *dn; int cnt, rc; /* pcibios will clear the counter; save the value */ - cnt = edev->freeze_count; + cnt = pe->freeze_count; + /* + * We don't remove the corresponding PE instances because + * we need the information afterwords. The attached EEH + * devices are expected to be attached soon when calling + * into pcibios_add_pci_devices(). + */ if (bus) - pcibios_remove_pci_devices(bus); + __pcibios_remove_pci_devices(bus, 0); /* Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system * up if the reset failed for some reason. */ - rc = eeh_reset_pe(edev); + rc = eeh_reset_pe(pe); if (rc) return rc; - /* Walk over all functions on this device. */ - dn = eeh_dev_to_of_node(edev); - if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) - dn = dn->parent->child; - - while (dn) { - struct eeh_dev *pedev = of_node_to_eeh_dev(dn); - - /* On Power4, always true because eeh_pe_config_addr=0 */ - if (edev->pe_config_addr == pedev->pe_config_addr) { - eeh_ops->configure_bridge(dn); - eeh_restore_bars(pedev); - } - dn = dn->sibling; - } + /* Restore PE */ + eeh_ops->configure_bridge(pe); + eeh_pe_restore_bars(pe); /* Give the system 5 seconds to finish running the user-space * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, @@ -322,7 +385,7 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus) ssleep(5); pcibios_add_pci_devices(bus); } - edev->freeze_count = cnt; + pe->freeze_count = cnt; return 0; } @@ -334,7 +397,7 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus) /** * eeh_handle_event - Reset a PCI device after hard lockup. - * @event: EEH event + * @pe: EEH PE * * While PHB detects address or data parity errors on particular PCI * slot, the associated PE will be frozen. Besides, DMA's occurring @@ -349,69 +412,24 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus) * drivers (which cause a second set of hotplug events to go out to * userspace). */ -struct eeh_dev *handle_eeh_events(struct eeh_event *event) +void eeh_handle_event(struct eeh_pe *pe) { - struct device_node *frozen_dn; - struct eeh_dev *frozen_edev; struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; - const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str; - - frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev)); - if (!frozen_dn) { - location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL); - location = location ? location : "unknown"; - printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " - "for location=%s pci addr=%s\n", - location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev))); - return NULL; - } - - frozen_bus = pcibios_find_pci_bus(frozen_dn); - location = of_get_property(frozen_dn, "ibm,loc-code", NULL); - location = location ? location : "unknown"; - - /* There are two different styles for coming up with the PE. - * In the old style, it was the highest EEH-capable device - * which was always an EADS pci bridge. In the new style, - * there might not be any EADS bridges, and even when there are, - * the firmware marks them as "EEH incapable". So another - * two-step is needed to find the pci bus.. - */ - if (!frozen_bus) - frozen_bus = pcibios_find_pci_bus(frozen_dn->parent); + frozen_bus = eeh_pe_bus_get(pe); if (!frozen_bus) { - printk(KERN_ERR "EEH: Cannot find PCI bus " - "for location=%s dn=%s\n", - location, frozen_dn->full_name); - return NULL; + pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n", + __func__, pe->phb->global_number, pe->addr); + return; } - frozen_edev = of_node_to_eeh_dev(frozen_dn); - frozen_edev->freeze_count++; - pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev)); - drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev)); - - if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES) + pe->freeze_count++; + if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; - - printk(KERN_WARNING - "EEH: This PCI device has failed %d times in the last hour:\n", - frozen_edev->freeze_count); - - if (frozen_edev->pdev) { - bus_pci_str = pci_name(frozen_edev->pdev); - bus_drv_str = eeh_pcid_name(frozen_edev->pdev); - printk(KERN_WARNING - "EEH: Bus location=%s driver=%s pci addr=%s\n", - location, bus_drv_str, bus_pci_str); - } - - printk(KERN_WARNING - "EEH: Device location=%s driver=%s pci addr=%s\n", - location, drv_str, pci_str); + pr_warning("EEH: This PCI device has failed %d times in the last hour\n", + pe->freeze_count); /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs @@ -419,12 +437,12 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event) * status ... if any child can't handle the reset, then the entire * slot is dlpar removed and added. */ - pci_walk_bus(frozen_bus, eeh_report_error, &result); + eeh_pe_dev_traverse(pe, eeh_report_error, &result); /* Get the current PCI slot state. This can take a long time, * sometimes over 3 seconds for certain systems. */ - rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000); + rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { printk(KERN_WARNING "EEH: Permanent failure\n"); goto hard_fail; @@ -434,14 +452,14 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event) * don't post the error log until after all dev drivers * have been informed. */ - eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP); + eeh_slot_error_detail(pe, EEH_LOG_TEMP); /* If all device drivers were EEH-unaware, then shut * down all of the device drivers, and hope they * go down willingly, without panicing the system. */ if (result == PCI_ERS_RESULT_NONE) { - rc = eeh_reset_device(frozen_edev, frozen_bus); + rc = eeh_reset_device(pe, frozen_bus); if (rc) { printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); goto hard_fail; @@ -450,7 +468,7 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event) /* If all devices reported they can proceed, then re-enable MMIO */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { - rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO); + rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); if (rc < 0) goto hard_fail; @@ -458,13 +476,13 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event) result = PCI_ERS_RESULT_NEED_RESET; } else { result = PCI_ERS_RESULT_NONE; - pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result); + eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); } } /* If all devices reported they can proceed, then re-enable DMA */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { - rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA); + rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); if (rc < 0) goto hard_fail; @@ -482,13 +500,13 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event) /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { - rc = eeh_reset_device(frozen_edev, NULL); + rc = eeh_reset_device(pe, NULL); if (rc) { printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); goto hard_fail; } result = PCI_ERS_RESULT_NONE; - pci_walk_bus(frozen_bus, eeh_report_reset, &result); + eeh_pe_dev_traverse(pe, eeh_report_reset, &result); } /* All devices should claim they have recovered by now. */ @@ -499,9 +517,9 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event) } /* Tell all device drivers that they can resume operations */ - pci_walk_bus(frozen_bus, eeh_report_resume, NULL); + eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); - return frozen_edev; + return; excess_failures: /* @@ -509,30 +527,26 @@ excess_failures: * are due to poorly seated PCI cards. Only 10% or so are * due to actual, failed cards. */ - printk(KERN_ERR - "EEH: PCI device at location=%s driver=%s pci addr=%s\n" - "has failed %d times in the last hour " - "and has been permanently disabled.\n" - "Please try reseating this device or replacing it.\n", - location, drv_str, pci_str, frozen_edev->freeze_count); + pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n" + "last hour and has been permanently disabled.\n" + "Please try reseating or replacing it.\n", + pe->phb->global_number, pe->addr, + pe->freeze_count); goto perm_error; hard_fail: - printk(KERN_ERR - "EEH: Unable to recover from failure of PCI device " - "at location=%s driver=%s pci addr=%s\n" - "Please try reseating this device or replacing it.\n", - location, drv_str, pci_str); + pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n" + "Please try reseating or replacing it\n", + pe->phb->global_number, pe->addr); perm_error: - eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM); + eeh_slot_error_detail(pe, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ - pci_walk_bus(frozen_bus, eeh_report_failure, NULL); + eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); /* Shut down the device drivers for good. */ - pcibios_remove_pci_devices(frozen_bus); - - return NULL; + if (frozen_bus) + pcibios_remove_pci_devices(frozen_bus); } diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index fb506317ebb0..185bedd926df 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -23,6 +23,7 @@ #include <linux/pci.h> #include <linux/slab.h> #include <linux/workqueue.h> +#include <linux/kthread.h> #include <asm/eeh_event.h> #include <asm/ppc-pci.h> @@ -57,9 +58,7 @@ static int eeh_event_handler(void * dummy) { unsigned long flags; struct eeh_event *event; - struct eeh_dev *edev; - - set_task_comm(current, "eehd"); + struct eeh_pe *pe; spin_lock_irqsave(&eeh_eventlist_lock, flags); event = NULL; @@ -76,28 +75,23 @@ static int eeh_event_handler(void * dummy) /* Serialize processing of EEH events */ mutex_lock(&eeh_event_mutex); - edev = event->edev; - eeh_mark_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING); - - printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", - eeh_pci_name(edev->pdev)); + pe = event->pe; + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", + pe->phb->global_number, pe->addr); set_current_state(TASK_INTERRUPTIBLE); /* Don't add to load average */ - edev = handle_eeh_events(event); - - if (edev) { - eeh_clear_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING); - pci_dev_put(edev->pdev); - } + eeh_handle_event(pe); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); kfree(event); mutex_unlock(&eeh_event_mutex); /* If there are no new errors after an hour, clear the counter. */ - if (edev && edev->freeze_count>0) { + if (pe && pe->freeze_count > 0) { msleep_interruptible(3600*1000); - if (edev->freeze_count>0) - edev->freeze_count--; + if (pe->freeze_count > 0) + pe->freeze_count--; } @@ -113,42 +107,29 @@ static int eeh_event_handler(void * dummy) */ static void eeh_thread_launcher(struct work_struct *dummy) { - if (kernel_thread(eeh_event_handler, NULL, CLONE_KERNEL) < 0) + if (IS_ERR(kthread_run(eeh_event_handler, NULL, "eehd"))) printk(KERN_ERR "Failed to start EEH daemon\n"); } /** * eeh_send_failure_event - Generate a PCI error event - * @edev: EEH device + * @pe: EEH PE * * This routine can be called within an interrupt context; * the actual event will be delivered in a normal context * (from a workqueue). */ -int eeh_send_failure_event(struct eeh_dev *edev) +int eeh_send_failure_event(struct eeh_pe *pe) { unsigned long flags; struct eeh_event *event; - struct device_node *dn = eeh_dev_to_of_node(edev); - const char *location; - - if (!mem_init_done) { - printk(KERN_ERR "EEH: event during early boot not handled\n"); - location = of_get_property(dn, "ibm,loc-code", NULL); - printk(KERN_ERR "EEH: device node = %s\n", dn->full_name); - printk(KERN_ERR "EEH: PCI location = %s\n", location); - return 1; - } - event = kmalloc(sizeof(*event), GFP_ATOMIC); - if (event == NULL) { - printk(KERN_ERR "EEH: out of memory, event not handled\n"); - return 1; - } - - if (edev->pdev) - pci_dev_get(edev->pdev); - event->edev = edev; + event = kzalloc(sizeof(*event), GFP_ATOMIC); + if (!event) { + pr_err("EEH: out of memory, event not handled\n"); + return -ENOMEM; + } + event->pe = pe; /* We may or may not be called in an interrupt context */ spin_lock_irqsave(&eeh_eventlist_lock, flags); diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c new file mode 100644 index 000000000000..797cd181dc3f --- /dev/null +++ b/arch/powerpc/platforms/pseries/eeh_pe.c @@ -0,0 +1,652 @@ +/* + * The file intends to implement PE based on the information from + * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device. + * All the PEs should be organized as hierarchy tree. The first level + * of the tree will be associated to existing PHBs since the particular + * PE is only meaningful in one PHB domain. + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/export.h> +#include <linux/gfp.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/string.h> + +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> + +static LIST_HEAD(eeh_phb_pe); + +/** + * eeh_pe_alloc - Allocate PE + * @phb: PCI controller + * @type: PE type + * + * Allocate PE instance dynamically. + */ +static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type) +{ + struct eeh_pe *pe; + + /* Allocate PHB PE */ + pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL); + if (!pe) return NULL; + + /* Initialize PHB PE */ + pe->type = type; + pe->phb = phb; + INIT_LIST_HEAD(&pe->child_list); + INIT_LIST_HEAD(&pe->child); + INIT_LIST_HEAD(&pe->edevs); + + return pe; +} + +/** + * eeh_phb_pe_create - Create PHB PE + * @phb: PCI controller + * + * The function should be called while the PHB is detected during + * system boot or PCI hotplug in order to create PHB PE. + */ +int __devinit eeh_phb_pe_create(struct pci_controller *phb) +{ + struct eeh_pe *pe; + + /* Allocate PHB PE */ + pe = eeh_pe_alloc(phb, EEH_PE_PHB); + if (!pe) { + pr_err("%s: out of memory!\n", __func__); + return -ENOMEM; + } + + /* Put it into the list */ + eeh_lock(); + list_add_tail(&pe->child, &eeh_phb_pe); + eeh_unlock(); + + pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number); + + return 0; +} + +/** + * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB + * @phb: PCI controller + * + * The overall PEs form hierarchy tree. The first layer of the + * hierarchy tree is composed of PHB PEs. The function is used + * to retrieve the corresponding PHB PE according to the given PHB. + */ +static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) +{ + struct eeh_pe *pe; + + list_for_each_entry(pe, &eeh_phb_pe, child) { + /* + * Actually, we needn't check the type since + * the PE for PHB has been determined when that + * was created. + */ + if ((pe->type & EEH_PE_PHB) && pe->phb == phb) + return pe; + } + + return NULL; +} + +/** + * eeh_pe_next - Retrieve the next PE in the tree + * @pe: current PE + * @root: root PE + * + * The function is used to retrieve the next PE in the + * hierarchy PE tree. + */ +static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, + struct eeh_pe *root) +{ + struct list_head *next = pe->child_list.next; + + if (next == &pe->child_list) { + while (1) { + if (pe == root) + return NULL; + next = pe->child.next; + if (next != &pe->parent->child_list) + break; + pe = pe->parent; + } + } + + return list_entry(next, struct eeh_pe, child); +} + +/** + * eeh_pe_traverse - Traverse PEs in the specified PHB + * @root: root PE + * @fn: callback + * @flag: extra parameter to callback + * + * The function is used to traverse the specified PE and its + * child PEs. The traversing is to be terminated once the + * callback returns something other than NULL, or no more PEs + * to be traversed. + */ +static void *eeh_pe_traverse(struct eeh_pe *root, + eeh_traverse_func fn, void *flag) +{ + struct eeh_pe *pe; + void *ret; + + for (pe = root; pe; pe = eeh_pe_next(pe, root)) { + ret = fn(pe, flag); + if (ret) return ret; + } + + return NULL; +} + +/** + * eeh_pe_dev_traverse - Traverse the devices from the PE + * @root: EEH PE + * @fn: function callback + * @flag: extra parameter to callback + * + * The function is used to traverse the devices of the specified + * PE and its child PEs. + */ +void *eeh_pe_dev_traverse(struct eeh_pe *root, + eeh_traverse_func fn, void *flag) +{ + struct eeh_pe *pe; + struct eeh_dev *edev; + void *ret; + + if (!root) { + pr_warning("%s: Invalid PE %p\n", __func__, root); + return NULL; + } + + eeh_lock(); + + /* Traverse root PE */ + for (pe = root; pe; pe = eeh_pe_next(pe, root)) { + eeh_pe_for_each_dev(pe, edev) { + ret = fn(edev, flag); + if (ret) { + eeh_unlock(); + return ret; + } + } + } + + eeh_unlock(); + + return NULL; +} + +/** + * __eeh_pe_get - Check the PE address + * @data: EEH PE + * @flag: EEH device + * + * For one particular PE, it can be identified by PE address + * or tranditional BDF address. BDF address is composed of + * Bus/Device/Function number. The extra data referred by flag + * indicates which type of address should be used. + */ +static void *__eeh_pe_get(void *data, void *flag) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + struct eeh_dev *edev = (struct eeh_dev *)flag; + + /* Unexpected PHB PE */ + if (pe->type & EEH_PE_PHB) + return NULL; + + /* We prefer PE address */ + if (edev->pe_config_addr && + (edev->pe_config_addr == pe->addr)) + return pe; + + /* Try BDF address */ + if (edev->pe_config_addr && + (edev->config_addr == pe->config_addr)) + return pe; + + return NULL; +} + +/** + * eeh_pe_get - Search PE based on the given address + * @edev: EEH device + * + * Search the corresponding PE based on the specified address which + * is included in the eeh device. The function is used to check if + * the associated PE has been created against the PE address. It's + * notable that the PE address has 2 format: traditional PE address + * which is composed of PCI bus/device/function number, or unified + * PE address. + */ +static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev) +{ + struct eeh_pe *root = eeh_phb_pe_get(edev->phb); + struct eeh_pe *pe; + + pe = eeh_pe_traverse(root, __eeh_pe_get, edev); + + return pe; +} + +/** + * eeh_pe_get_parent - Retrieve the parent PE + * @edev: EEH device + * + * The whole PEs existing in the system are organized as hierarchy + * tree. The function is used to retrieve the parent PE according + * to the parent EEH device. + */ +static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) +{ + struct device_node *dn; + struct eeh_dev *parent; + + /* + * It might have the case for the indirect parent + * EEH device already having associated PE, but + * the direct parent EEH device doesn't have yet. + */ + dn = edev->dn->parent; + while (dn) { + /* We're poking out of PCI territory */ + if (!PCI_DN(dn)) return NULL; + + parent = of_node_to_eeh_dev(dn); + /* We're poking out of PCI territory */ + if (!parent) return NULL; + + if (parent->pe) + return parent->pe; + + dn = dn->parent; + } + + return NULL; +} + +/** + * eeh_add_to_parent_pe - Add EEH device to parent PE + * @edev: EEH device + * + * Add EEH device to the parent PE. If the parent PE already + * exists, the PE type will be changed to EEH_PE_BUS. Otherwise, + * we have to create new PE to hold the EEH device and the new + * PE will be linked to its parent PE as well. + */ +int eeh_add_to_parent_pe(struct eeh_dev *edev) +{ + struct eeh_pe *pe, *parent; + + eeh_lock(); + + /* + * Search the PE has been existing or not according + * to the PE address. If that has been existing, the + * PE should be composed of PCI bus and its subordinate + * components. + */ + pe = eeh_pe_get(edev); + if (pe && !(pe->type & EEH_PE_INVALID)) { + if (!edev->pe_config_addr) { + eeh_unlock(); + pr_err("%s: PE with addr 0x%x already exists\n", + __func__, edev->config_addr); + return -EEXIST; + } + + /* Mark the PE as type of PCI bus */ + pe->type = EEH_PE_BUS; + edev->pe = pe; + + /* Put the edev to PE */ + list_add_tail(&edev->list, &pe->edevs); + eeh_unlock(); + pr_debug("EEH: Add %s to Bus PE#%x\n", + edev->dn->full_name, pe->addr); + + return 0; + } else if (pe && (pe->type & EEH_PE_INVALID)) { + list_add_tail(&edev->list, &pe->edevs); + edev->pe = pe; + /* + * We're running to here because of PCI hotplug caused by + * EEH recovery. We need clear EEH_PE_INVALID until the top. + */ + parent = pe; + while (parent) { + if (!(parent->type & EEH_PE_INVALID)) + break; + parent->type &= ~EEH_PE_INVALID; + parent = parent->parent; + } + eeh_unlock(); + pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", + edev->dn->full_name, pe->addr, pe->parent->addr); + + return 0; + } + + /* Create a new EEH PE */ + pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); + if (!pe) { + eeh_unlock(); + pr_err("%s: out of memory!\n", __func__); + return -ENOMEM; + } + pe->addr = edev->pe_config_addr; + pe->config_addr = edev->config_addr; + + /* + * Put the new EEH PE into hierarchy tree. If the parent + * can't be found, the newly created PE will be attached + * to PHB directly. Otherwise, we have to associate the + * PE with its parent. + */ + parent = eeh_pe_get_parent(edev); + if (!parent) { + parent = eeh_phb_pe_get(edev->phb); + if (!parent) { + eeh_unlock(); + pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", + __func__, edev->phb->global_number); + edev->pe = NULL; + kfree(pe); + return -EEXIST; + } + } + pe->parent = parent; + + /* + * Put the newly created PE into the child list and + * link the EEH device accordingly. + */ + list_add_tail(&pe->child, &parent->child_list); + list_add_tail(&edev->list, &pe->edevs); + edev->pe = pe; + eeh_unlock(); + pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", + edev->dn->full_name, pe->addr, pe->parent->addr); + + return 0; +} + +/** + * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE + * @edev: EEH device + * @purge_pe: remove PE or not + * + * The PE hierarchy tree might be changed when doing PCI hotplug. + * Also, the PCI devices or buses could be removed from the system + * during EEH recovery. So we have to call the function remove the + * corresponding PE accordingly if necessary. + */ +int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) +{ + struct eeh_pe *pe, *parent, *child; + int cnt; + + if (!edev->pe) { + pr_warning("%s: No PE found for EEH device %s\n", + __func__, edev->dn->full_name); + return -EEXIST; + } + + eeh_lock(); + + /* Remove the EEH device */ + pe = edev->pe; + edev->pe = NULL; + list_del(&edev->list); + + /* + * Check if the parent PE includes any EEH devices. + * If not, we should delete that. Also, we should + * delete the parent PE if it doesn't have associated + * child PEs and EEH devices. + */ + while (1) { + parent = pe->parent; + if (pe->type & EEH_PE_PHB) + break; + + if (purge_pe) { + if (list_empty(&pe->edevs) && + list_empty(&pe->child_list)) { + list_del(&pe->child); + kfree(pe); + } else { + break; + } + } else { + if (list_empty(&pe->edevs)) { + cnt = 0; + list_for_each_entry(child, &pe->child_list, child) { + if (!(pe->type & EEH_PE_INVALID)) { + cnt++; + break; + } + } + + if (!cnt) + pe->type |= EEH_PE_INVALID; + else + break; + } + } + + pe = parent; + } + + eeh_unlock(); + + return 0; +} + +/** + * __eeh_pe_state_mark - Mark the state for the PE + * @data: EEH PE + * @flag: state + * + * The function is used to mark the indicated state for the given + * PE. Also, the associated PCI devices will be put into IO frozen + * state as well. + */ +static void *__eeh_pe_state_mark(void *data, void *flag) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + int state = *((int *)flag); + struct eeh_dev *tmp; + struct pci_dev *pdev; + + /* + * Mark the PE with the indicated state. Also, + * the associated PCI device will be put into + * I/O frozen state to avoid I/O accesses from + * the PCI device driver. + */ + pe->state |= state; + eeh_pe_for_each_dev(pe, tmp) { + pdev = eeh_dev_to_pci_dev(tmp); + if (pdev) + pdev->error_state = pci_channel_io_frozen; + } + + return NULL; +} + +/** + * eeh_pe_state_mark - Mark specified state for PE and its associated device + * @pe: EEH PE + * + * EEH error affects the current PE and its child PEs. The function + * is used to mark appropriate state for the affected PEs and the + * associated devices. + */ +void eeh_pe_state_mark(struct eeh_pe *pe, int state) +{ + eeh_lock(); + eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); + eeh_unlock(); +} + +/** + * __eeh_pe_state_clear - Clear state for the PE + * @data: EEH PE + * @flag: state + * + * The function is used to clear the indicated state from the + * given PE. Besides, we also clear the check count of the PE + * as well. + */ +static void *__eeh_pe_state_clear(void *data, void *flag) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + int state = *((int *)flag); + + pe->state &= ~state; + pe->check_count = 0; + + return NULL; +} + +/** + * eeh_pe_state_clear - Clear state for the PE and its children + * @pe: PE + * @state: state to be cleared + * + * When the PE and its children has been recovered from error, + * we need clear the error state for that. The function is used + * for the purpose. + */ +void eeh_pe_state_clear(struct eeh_pe *pe, int state) +{ + eeh_lock(); + eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); + eeh_unlock(); +} + +/** + * eeh_restore_one_device_bars - Restore the Base Address Registers for one device + * @data: EEH device + * @flag: Unused + * + * Loads the PCI configuration space base address registers, + * the expansion ROM base address, the latency timer, and etc. + * from the saved values in the device node. + */ +static void *eeh_restore_one_device_bars(void *data, void *flag) +{ + int i; + u32 cmd; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct device_node *dn = eeh_dev_to_of_node(edev); + + for (i = 4; i < 10; i++) + eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); + /* 12 == Expansion ROM Address */ + eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]); + +#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) +#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) + + eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, + SAVED_BYTE(PCI_CACHE_LINE_SIZE)); + eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, + SAVED_BYTE(PCI_LATENCY_TIMER)); + + /* max latency, min grant, interrupt pin and line */ + eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]); + + /* + * Restore PERR & SERR bits, some devices require it, + * don't touch the other command bits + */ + eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd); + if (edev->config_space[1] & PCI_COMMAND_PARITY) + cmd |= PCI_COMMAND_PARITY; + else + cmd &= ~PCI_COMMAND_PARITY; + if (edev->config_space[1] & PCI_COMMAND_SERR) + cmd |= PCI_COMMAND_SERR; + else + cmd &= ~PCI_COMMAND_SERR; + eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd); + + return NULL; +} + +/** + * eeh_pe_restore_bars - Restore the PCI config space info + * @pe: EEH PE + * + * This routine performs a recursive walk to the children + * of this device as well. + */ +void eeh_pe_restore_bars(struct eeh_pe *pe) +{ + /* + * We needn't take the EEH lock since eeh_pe_dev_traverse() + * will take that. + */ + eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL); +} + +/** + * eeh_pe_bus_get - Retrieve PCI bus according to the given PE + * @pe: EEH PE + * + * Retrieve the PCI bus according to the given PE. Basically, + * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the + * primary PCI bus will be retrieved. The parent bus will be + * returned for BUS PE. However, we don't have associated PCI + * bus for DEVICE PE. + */ +struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) +{ + struct pci_bus *bus = NULL; + struct eeh_dev *edev; + struct pci_dev *pdev; + + eeh_lock(); + + if (pe->type & EEH_PE_PHB) { + bus = pe->phb->bus; + } else if (pe->type & EEH_PE_BUS) { + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); + pdev = eeh_dev_to_pci_dev(edev); + if (pdev) + bus = pdev->bus; + } + + eeh_unlock(); + + return bus; +} diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index c33360ec4f4f..19506f935737 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -129,27 +129,117 @@ static int pseries_eeh_init(void) eeh_error_buf_size = RTAS_ERROR_LOG_MAX; } + /* Set EEH probe mode */ + eeh_probe_mode_set(EEH_PROBE_MODE_DEVTREE); + return 0; } /** + * pseries_eeh_of_probe - EEH probe on the given device + * @dn: OF node + * @flag: Unused + * + * When EEH module is installed during system boot, all PCI devices + * are checked one by one to see if it supports EEH. The function + * is introduced for the purpose. + */ +static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) +{ + struct eeh_dev *edev; + struct eeh_pe pe; + const u32 *class_code, *vendor_id, *device_id; + const u32 *regs; + int enable = 0; + int ret; + + /* Retrieve OF node and eeh device */ + edev = of_node_to_eeh_dev(dn); + if (!of_device_is_available(dn)) + return NULL; + + /* Retrieve class/vendor/device IDs */ + class_code = of_get_property(dn, "class-code", NULL); + vendor_id = of_get_property(dn, "vendor-id", NULL); + device_id = of_get_property(dn, "device-id", NULL); + + /* Skip for bad OF node or PCI-ISA bridge */ + if (!class_code || !vendor_id || !device_id) + return NULL; + if (dn->type && !strcmp(dn->type, "isa")) + return NULL; + + /* Update class code and mode of eeh device */ + edev->class_code = *class_code; + edev->mode = 0; + + /* Retrieve the device address */ + regs = of_get_property(dn, "reg", NULL); + if (!regs) { + pr_warning("%s: OF node property %s::reg not found\n", + __func__, dn->full_name); + return NULL; + } + + /* Initialize the fake PE */ + memset(&pe, 0, sizeof(struct eeh_pe)); + pe.phb = edev->phb; + pe.config_addr = regs[0]; + + /* Enable EEH on the device */ + ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); + if (!ret) { + edev->config_addr = regs[0]; + /* Retrieve PE address */ + edev->pe_config_addr = eeh_ops->get_pe_addr(&pe); + pe.addr = edev->pe_config_addr; + + /* Some older systems (Power4) allow the ibm,set-eeh-option + * call to succeed even on nodes where EEH is not supported. + * Verify support explicitly. + */ + ret = eeh_ops->get_state(&pe, NULL); + if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) + enable = 1; + + if (enable) { + eeh_subsystem_enabled = 1; + eeh_add_to_parent_pe(edev); + + pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n", + __func__, dn->full_name, pe.phb->global_number, + pe.addr, pe.config_addr); + } else if (dn->parent && of_node_to_eeh_dev(dn->parent) && + (of_node_to_eeh_dev(dn->parent))->pe) { + /* This device doesn't support EEH, but it may have an + * EEH parent, in which case we mark it as supported. + */ + edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr; + edev->pe_config_addr = of_node_to_eeh_dev(dn->parent)->pe_config_addr; + eeh_add_to_parent_pe(edev); + } + } + + /* Save memory bars */ + eeh_save_bars(edev); + + return NULL; +} + +/** * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable - * @dn: device node + * @pe: EEH PE * @option: operation to be issued * * The function is used to control the EEH functionality globally. * Currently, following options are support according to PAPR: * Enable EEH, Disable EEH, Enable MMIO and Enable DMA */ -static int pseries_eeh_set_option(struct device_node *dn, int option) +static int pseries_eeh_set_option(struct eeh_pe *pe, int option) { int ret = 0; - struct eeh_dev *edev; - const u32 *reg; int config_addr; - edev = of_node_to_eeh_dev(dn); - /* * When we're enabling or disabling EEH functioality on * the particular PE, the PE config address is possibly @@ -159,15 +249,11 @@ static int pseries_eeh_set_option(struct device_node *dn, int option) switch (option) { case EEH_OPT_DISABLE: case EEH_OPT_ENABLE: - reg = of_get_property(dn, "reg", NULL); - config_addr = reg[0]; - break; - case EEH_OPT_THAW_MMIO: case EEH_OPT_THAW_DMA: - config_addr = edev->config_addr; - if (edev->pe_config_addr) - config_addr = edev->pe_config_addr; + config_addr = pe->config_addr; + if (pe->addr) + config_addr = pe->addr; break; default: @@ -177,15 +263,15 @@ static int pseries_eeh_set_option(struct device_node *dn, int option) } ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid), option); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid), option); return ret; } /** * pseries_eeh_get_pe_addr - Retrieve PE address - * @dn: device node + * @pe: EEH PE * * Retrieve the assocated PE address. Actually, there're 2 RTAS * function calls dedicated for the purpose. We need implement @@ -196,14 +282,11 @@ static int pseries_eeh_set_option(struct device_node *dn, int option) * It's notable that zero'ed return value means invalid PE config * address. */ -static int pseries_eeh_get_pe_addr(struct device_node *dn) +static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) { - struct eeh_dev *edev; int ret = 0; int rets[3]; - edev = of_node_to_eeh_dev(dn); - if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { /* * First of all, we need to make sure there has one PE @@ -211,18 +294,18 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn) * meaningless. */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, - edev->config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid), 1); + pe->config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid), 1); if (ret || (rets[0] == 0)) return 0; /* Retrieve the associated PE config address */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, - edev->config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid), 0); + pe->config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warning("%s: Failed to get PE address for %s\n", - __func__, dn->full_name); + pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n", + __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -231,11 +314,11 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn) if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, - edev->config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid), 0); + pe->config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warning("%s: Failed to get PE address for %s\n", - __func__, dn->full_name); + pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n", + __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -247,7 +330,7 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn) /** * pseries_eeh_get_state - Retrieve PE state - * @dn: PE associated device node + * @pe: EEH PE * @state: return value * * Retrieve the state of the specified PE. On RTAS compliant @@ -258,30 +341,28 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn) * RTAS calls for the purpose, we need to try the new one and back * to the old one if the new one couldn't work properly. */ -static int pseries_eeh_get_state(struct device_node *dn, int *state) +static int pseries_eeh_get_state(struct eeh_pe *pe, int *state) { - struct eeh_dev *edev; int config_addr; int ret; int rets[4]; int result; /* Figure out PE config address if possible */ - edev = of_node_to_eeh_dev(dn); - config_addr = edev->config_addr; - if (edev->pe_config_addr) - config_addr = edev->pe_config_addr; + config_addr = pe->config_addr; + if (pe->addr) + config_addr = pe->addr; if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid)); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); } else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) { /* Fake PE unavailable info */ rets[2] = 0; ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid)); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); } else { return EEH_STATE_NOT_SUPPORT; } @@ -333,34 +414,32 @@ static int pseries_eeh_get_state(struct device_node *dn, int *state) /** * pseries_eeh_reset - Reset the specified PE - * @dn: PE associated device node + * @pe: EEH PE * @option: reset option * * Reset the specified PE */ -static int pseries_eeh_reset(struct device_node *dn, int option) +static int pseries_eeh_reset(struct eeh_pe *pe, int option) { - struct eeh_dev *edev; int config_addr; int ret; /* Figure out PE address */ - edev = of_node_to_eeh_dev(dn); - config_addr = edev->config_addr; - if (edev->pe_config_addr) - config_addr = edev->pe_config_addr; + config_addr = pe->config_addr; + if (pe->addr) + config_addr = pe->addr; /* Reset PE through RTAS call */ ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid), option); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid), option); /* If fundamental-reset not supported, try hot-reset */ if (option == EEH_RESET_FUNDAMENTAL && ret == -8) { ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid), EEH_RESET_HOT); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid), EEH_RESET_HOT); } return ret; @@ -368,13 +447,13 @@ static int pseries_eeh_reset(struct device_node *dn, int option) /** * pseries_eeh_wait_state - Wait for PE state - * @dn: PE associated device node + * @pe: EEH PE * @max_wait: maximal period in microsecond * * Wait for the state of associated PE. It might take some time * to retrieve the PE's state. */ -static int pseries_eeh_wait_state(struct device_node *dn, int max_wait) +static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait) { int ret; int mwait; @@ -391,7 +470,7 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait) #define EEH_STATE_MAX_WAIT_TIME (300 * 1000) while (1) { - ret = pseries_eeh_get_state(dn, &mwait); + ret = pseries_eeh_get_state(pe, &mwait); /* * If the PE's state is temporarily unavailable, @@ -426,7 +505,7 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait) /** * pseries_eeh_get_log - Retrieve error log - * @dn: device node + * @pe: EEH PE * @severity: temporary or permanent error log * @drv_log: driver log to be combined with retrieved error log * @len: length of driver log @@ -435,24 +514,22 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait) * Actually, the error will be retrieved through the dedicated * RTAS call. */ -static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len) +static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len) { - struct eeh_dev *edev; int config_addr; unsigned long flags; int ret; - edev = of_node_to_eeh_dev(dn); spin_lock_irqsave(&slot_errbuf_lock, flags); memset(slot_errbuf, 0, eeh_error_buf_size); /* Figure out the PE address */ - config_addr = edev->config_addr; - if (edev->pe_config_addr) - config_addr = edev->pe_config_addr; + config_addr = pe->config_addr; + if (pe->addr) + config_addr = pe->addr; ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr, - BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid), + BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), virt_to_phys(drv_log), len, virt_to_phys(slot_errbuf), eeh_error_buf_size, severity); @@ -465,40 +542,38 @@ static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_l /** * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE - * @dn: PE associated device node + * @pe: EEH PE * * The function will be called to reconfigure the bridges included * in the specified PE so that the mulfunctional PE would be recovered * again. */ -static int pseries_eeh_configure_bridge(struct device_node *dn) +static int pseries_eeh_configure_bridge(struct eeh_pe *pe) { - struct eeh_dev *edev; int config_addr; int ret; /* Figure out the PE address */ - edev = of_node_to_eeh_dev(dn); - config_addr = edev->config_addr; - if (edev->pe_config_addr) - config_addr = edev->pe_config_addr; + config_addr = pe->config_addr; + if (pe->addr) + config_addr = pe->addr; /* Use new configure-pe function, if supported */ if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_configure_pe, 3, 1, NULL, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid)); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_configure_bridge, 3, 1, NULL, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid)); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); } else { return -EFAULT; } if (ret) - pr_warning("%s: Unable to configure bridge %d for %s\n", - __func__, ret, dn->full_name); + pr_warning("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", + __func__, pe->phb->global_number, pe->addr, ret); return ret; } @@ -542,6 +617,8 @@ static int pseries_eeh_write_config(struct device_node *dn, int where, int size, static struct eeh_ops pseries_eeh_ops = { .name = "pseries", .init = pseries_eeh_init, + .of_probe = pseries_eeh_of_probe, + .dev_probe = NULL, .set_option = pseries_eeh_set_option, .get_pe_addr = pseries_eeh_get_pe_addr, .get_state = pseries_eeh_get_state, @@ -559,7 +636,21 @@ static struct eeh_ops pseries_eeh_ops = { * EEH initialization on pseries platform. This function should be * called before any EEH related functions. */ -int __init eeh_pseries_init(void) +static int __init eeh_pseries_init(void) { - return eeh_ops_register(&pseries_eeh_ops); + int ret = -EINVAL; + + if (!machine_is(pseries)) + return ret; + + ret = eeh_ops_register(&pseries_eeh_ops); + if (!ret) + pr_info("EEH: pSeries platform initialized\n"); + else + pr_info("EEH: pSeries platform initialization failure (%d)\n", + ret); + + return ret; } + +early_initcall(eeh_pseries_init); diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/platforms/pseries/eeh_sysfs.c index 243b3510d70f..d37708360f2e 100644 --- a/arch/powerpc/platforms/pseries/eeh_sysfs.c +++ b/arch/powerpc/platforms/pseries/eeh_sysfs.c @@ -53,9 +53,6 @@ static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL); EEH_SHOW_ATTR(eeh_mode, mode, "0x%x"); EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x"); EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); -EEH_SHOW_ATTR(eeh_check_count, check_count, "%d" ); -EEH_SHOW_ATTR(eeh_freeze_count, freeze_count, "%d" ); -EEH_SHOW_ATTR(eeh_false_positives, false_positives, "%d" ); void eeh_sysfs_add_device(struct pci_dev *pdev) { @@ -64,9 +61,6 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); - rc += device_create_file(&pdev->dev, &dev_attr_eeh_check_count); - rc += device_create_file(&pdev->dev, &dev_attr_eeh_false_positives); - rc += device_create_file(&pdev->dev, &dev_attr_eeh_freeze_count); if (rc) printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); @@ -77,8 +71,5 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) device_remove_file(&pdev->dev, &dev_attr_eeh_mode); device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); - device_remove_file(&pdev->dev, &dev_attr_eeh_check_count); - device_remove_file(&pdev->dev, &dev_attr_eeh_false_positives); - device_remove_file(&pdev->dev, &dev_attr_eeh_freeze_count); } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 11d8e0544ac0..ecdb0a6b3171 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -78,6 +78,8 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz unsigned long start, start_pfn; struct zone *zone; int ret; + unsigned long section; + unsigned long sections_to_remove; start_pfn = base >> PAGE_SHIFT; @@ -97,9 +99,13 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz * to sysfs "state" file and we can't remove sysfs entries * while writing to it. So we have to defer it to here. */ - ret = __remove_pages(zone, start_pfn, memblock_size >> PAGE_SHIFT); - if (ret) - return ret; + sections_to_remove = (memblock_size >> PAGE_SHIFT) / PAGES_PER_SECTION; + for (section = 0; section < sections_to_remove; section++) { + unsigned long pfn = start_pfn + section * PAGES_PER_SECTION; + ret = __remove_pages(zone, pfn, PAGES_PER_SECTION); + if (ret) + return ret; + } /* * Update memory regions for memory remove diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index bca220f2873c..6153eea27ce7 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -28,6 +28,7 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/mm.h> +#include <linux/memblock.h> #include <linux/spinlock.h> #include <linux/sched.h> /* for show_stack */ #include <linux/string.h> @@ -41,7 +42,6 @@ #include <asm/iommu.h> #include <asm/pci-bridge.h> #include <asm/machdep.h> -#include <asm/abs_addr.h> #include <asm/pSeries_reconfig.h> #include <asm/firmware.h> #include <asm/tce.h> @@ -99,7 +99,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, while (npages--) { /* can't move this out since we might cross MEMBLOCK boundary */ - rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + rpn = __pa(uaddr) >> TCE_SHIFT; *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; uaddr += TCE_PAGE_SIZE; @@ -148,7 +148,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, int ret = 0; long tcenum_start = tcenum, npages_start = npages; - rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + rpn = __pa(uaddr) >> TCE_SHIFT; proto_tce = TCE_PCI_READ; if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; @@ -217,7 +217,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, __get_cpu_var(tce_page) = tcep; } - rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + rpn = __pa(uaddr) >> TCE_SHIFT; proto_tce = TCE_PCI_READ; if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; @@ -237,7 +237,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, rc = plpar_tce_put_indirect((u64)tbl->it_index, (u64)tcenum << 12, - (u64)virt_to_abs(tcep), + (u64)__pa(tcep), limit); npages -= limit; @@ -441,7 +441,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, rc = plpar_tce_put_indirect(liobn, dma_offset, - (u64)virt_to_abs(tcep), + (u64)__pa(tcep), limit); num_tce -= limit; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 5f3ef876ded2..0da39fed355a 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -31,7 +31,6 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/machdep.h> -#include <asm/abs_addr.h> #include <asm/mmu_context.h> #include <asm/iommu.h> #include <asm/tlbflush.h> @@ -108,9 +107,9 @@ void vpa_init(int cpu) } static long pSeries_lpar_hpte_insert(unsigned long hpte_group, - unsigned long va, unsigned long pa, - unsigned long rflags, unsigned long vflags, - int psize, int ssize) + unsigned long vpn, unsigned long pa, + unsigned long rflags, unsigned long vflags, + int psize, int ssize) { unsigned long lpar_rc; unsigned long flags; @@ -118,11 +117,11 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long hpte_v, hpte_r; if (!(vflags & HPTE_V_BOLTED)) - pr_devel("hpte_insert(group=%lx, va=%016lx, pa=%016lx, " - "rflags=%lx, vflags=%lx, psize=%d)\n", - hpte_group, va, pa, rflags, vflags, psize); + pr_devel("hpte_insert(group=%lx, vpn=%016lx, " + "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n", + hpte_group, vpn, pa, rflags, vflags, psize); - hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID; + hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID; hpte_r = hpte_encode_r(pa, psize) | rflags; if (!(vflags & HPTE_V_BOLTED)) @@ -227,22 +226,6 @@ static void pSeries_lpar_hptab_clear(void) } /* - * This computes the AVPN and B fields of the first dword of a HPTE, - * for use when we want to match an existing PTE. The bottom 7 bits - * of the returned value are zero. - */ -static inline unsigned long hpte_encode_avpn(unsigned long va, int psize, - int ssize) -{ - unsigned long v; - - v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm); - v <<= HPTE_V_AVPN_SHIFT; - v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; - return v; -} - -/* * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and * the low 3 bits of flags happen to line up. So no transform is needed. * We can probably optimize here and assume the high bits of newpp are @@ -250,14 +233,14 @@ static inline unsigned long hpte_encode_avpn(unsigned long va, int psize, */ static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, - unsigned long va, + unsigned long vpn, int psize, int ssize, int local) { unsigned long lpar_rc; unsigned long flags = (newpp & 7) | H_AVPN; unsigned long want_v; - want_v = hpte_encode_avpn(va, psize, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", want_v, slot, flags, psize); @@ -295,15 +278,15 @@ static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot) return dword0; } -static long pSeries_lpar_hpte_find(unsigned long va, int psize, int ssize) +static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize) { unsigned long hash; unsigned long i; long slot; unsigned long want_v, hpte_v; - hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize); - want_v = hpte_encode_avpn(va, psize, ssize); + hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); /* Bolted entries are always in the primary group */ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; @@ -323,12 +306,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, int psize, int ssize) { - unsigned long lpar_rc, slot, vsid, va, flags; + unsigned long vpn; + unsigned long lpar_rc, slot, vsid, flags; vsid = get_kernel_vsid(ea, ssize); - va = hpt_va(ea, vsid, ssize); + vpn = hpt_vpn(ea, vsid, ssize); - slot = pSeries_lpar_hpte_find(va, psize, ssize); + slot = pSeries_lpar_hpte_find(vpn, psize, ssize); BUG_ON(slot == -1); flags = newpp & 7; @@ -337,17 +321,17 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, BUG_ON(lpar_rc != H_SUCCESS); } -static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, +static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, int psize, int ssize, int local) { unsigned long want_v; unsigned long lpar_rc; unsigned long dummy1, dummy2; - pr_devel(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n", - slot, va, psize, local); + pr_devel(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n", + slot, vpn, psize, local); - want_v = hpte_encode_avpn(va, psize, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2); if (lpar_rc == H_NOT_FOUND) return; @@ -358,15 +342,16 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, static void pSeries_lpar_hpte_removebolted(unsigned long ea, int psize, int ssize) { - unsigned long slot, vsid, va; + unsigned long vpn; + unsigned long slot, vsid; vsid = get_kernel_vsid(ea, ssize); - va = hpt_va(ea, vsid, ssize); + vpn = hpt_vpn(ea, vsid, ssize); - slot = pSeries_lpar_hpte_find(va, psize, ssize); + slot = pSeries_lpar_hpte_find(vpn, psize, ssize); BUG_ON(slot == -1); - pSeries_lpar_hpte_invalidate(slot, va, psize, ssize, 0); + pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0); } /* Flag bits for H_BULK_REMOVE */ @@ -382,12 +367,12 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea, */ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) { + unsigned long vpn; unsigned long i, pix, rc; unsigned long flags = 0; struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long param[9]; - unsigned long va; unsigned long hash, index, shift, hidx, slot; real_pte_t pte; int psize, ssize; @@ -399,21 +384,21 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) ssize = batch->ssize; pix = 0; for (i = 0; i < number; i++) { - va = batch->vaddr[i]; + vpn = batch->vpn[i]; pte = batch->pte[i]; - pte_iterate_hashed_subpages(pte, psize, va, index, shift) { - hash = hpt_hash(va, shift, ssize); + pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { + hash = hpt_hash(vpn, shift, ssize); hidx = __rpte_to_hidx(pte, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { - pSeries_lpar_hpte_invalidate(slot, va, psize, + pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, local); } else { param[pix] = HBR_REQUEST | HBR_AVPN | slot; - param[pix+1] = hpte_encode_avpn(va, psize, + param[pix+1] = hpte_encode_avpn(vpn, psize, ssize); pix += 2; if (pix == 8) { diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 109fdb75578d..d19f4977c834 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -210,6 +210,7 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) { struct device_node *dn; + struct eeh_dev *edev; /* Found our PE and assume 8 at that point. */ @@ -217,7 +218,10 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) if (!dn) return NULL; - dn = eeh_find_device_pe(dn); + /* Get the top level device in the PE */ + edev = of_node_to_eeh_dev(dn); + edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list); + dn = eeh_dev_to_of_node(edev); if (!dn) return NULL; @@ -387,12 +391,13 @@ static int check_msix_entries(struct pci_dev *pdev) return 0; } -static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) { struct pci_dn *pdn; int hwirq, virq, i, rc; struct msi_desc *entry; struct msi_msg msg; + int nvec = nvec_in; pdn = get_pdn(pdev); if (!pdn) @@ -402,10 +407,23 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return -EINVAL; /* + * Firmware currently refuse any non power of two allocation + * so we round up if the quota will allow it. + */ + if (type == PCI_CAP_ID_MSIX) { + int m = roundup_pow_of_two(nvec); + int quota = msi_quota_for_device(pdev, m); + + if (quota >= m) + nvec = m; + } + + /* * Try the new more explicit firmware interface, if that fails fall * back to the old interface. The old interface is known to never * return MSI-Xs. */ +again: if (type == PCI_CAP_ID_MSI) { rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec); @@ -417,6 +435,10 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec); if (rc != nvec) { + if (nvec != nvec_in) { + nvec = nvec_in; + goto again; + } pr_debug("rtas_msi: rtas_change_msi() failed\n"); return rc; } diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 2c6ded29f73d..56b864d777ee 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -73,7 +73,7 @@ void __init pSeries_final_fixup(void) { pSeries_request_regions(); - pci_addr_cache_build(); + eeh_addr_cache_build(); } /* diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 3ccebc83dc02..261a577a3dd2 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -65,27 +65,43 @@ pcibios_find_pci_bus(struct device_node *dn) EXPORT_SYMBOL_GPL(pcibios_find_pci_bus); /** - * pcibios_remove_pci_devices - remove all devices under this bus + * __pcibios_remove_pci_devices - remove all devices under this bus + * @bus: the indicated PCI bus + * @purge_pe: destroy the PE on removal of PCI devices * * Remove all of the PCI devices under this bus both from the * linux pci device tree, and from the powerpc EEH address cache. + * By default, the corresponding PE will be destroied during the + * normal PCI hotplug path. For PCI hotplug during EEH recovery, + * the corresponding PE won't be destroied and deallocated. */ -void pcibios_remove_pci_devices(struct pci_bus *bus) +void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe) { - struct pci_dev *dev, *tmp; + struct pci_dev *dev, *tmp; struct pci_bus *child_bus; /* First go down child busses */ list_for_each_entry(child_bus, &bus->children, node) - pcibios_remove_pci_devices(child_bus); + __pcibios_remove_pci_devices(child_bus, purge_pe); pr_debug("PCI: Removing devices on bus %04x:%02x\n", - pci_domain_nr(bus), bus->number); + pci_domain_nr(bus), bus->number); list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { pr_debug(" * Removing %s...\n", pci_name(dev)); - eeh_remove_bus_device(dev); - pci_stop_and_remove_bus_device(dev); - } + eeh_remove_bus_device(dev, purge_pe); + pci_stop_and_remove_bus_device(dev); + } +} + +/** + * pcibios_remove_pci_devices - remove all devices under this bus + * + * Remove all of the PCI devices under this bus both from the + * linux pci device tree, and from the powerpc EEH address cache. + */ +void pcibios_remove_pci_devices(struct pci_bus *bus) +{ + __pcibios_remove_pci_devices(bus, 1); } EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c index 455760b1fe6e..45d00e5fe14d 100644 --- a/arch/powerpc/platforms/pseries/processor_idle.c +++ b/arch/powerpc/platforms/pseries/processor_idle.c @@ -33,13 +33,6 @@ static int max_idle_state = MAX_IDLE_STATE_COUNT - 1; static struct cpuidle_device __percpu *pseries_cpuidle_devices; static struct cpuidle_state *cpuidle_state_table; -void update_smt_snooze_delay(int snooze) -{ - struct cpuidle_driver *drv = cpuidle_get_driver(); - if (drv) - drv->states[0].target_residency = snooze; -} - static inline void idle_loop_prolog(unsigned long *in_purr, ktime_t *kt_before) { @@ -66,32 +59,22 @@ static int snooze_loop(struct cpuidle_device *dev, { unsigned long in_purr; ktime_t kt_before; - unsigned long start_snooze; - long snooze = drv->states[0].target_residency; + int cpu = dev->cpu; idle_loop_prolog(&in_purr, &kt_before); + local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); - if (snooze) { - start_snooze = get_tb() + snooze * tb_ticks_per_usec; - local_irq_enable(); - set_thread_flag(TIF_POLLING_NRFLAG); - - while ((snooze < 0) || (get_tb() < start_snooze)) { - if (need_resched() || cpu_is_offline(dev->cpu)) - goto out; - ppc64_runlatch_off(); - HMT_low(); - HMT_very_low(); - } - - HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - smp_mb(); - local_irq_disable(); + while ((!need_resched()) && cpu_online(cpu)) { + ppc64_runlatch_off(); + HMT_low(); + HMT_very_low(); } -out: HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb(); + dev->last_residency = (int)idle_loop_epilog(in_purr, kt_before); return index; @@ -172,8 +155,8 @@ static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = { .name = "CEDE", .desc = "CEDE", .flags = CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 1, - .target_residency = 10, + .exit_latency = 10, + .target_residency = 100, .enter = &dedicated_cede_loop }, }; @@ -190,6 +173,23 @@ static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = { .enter = &shared_cede_loop }, }; +void update_smt_snooze_delay(int cpu, int residency) +{ + struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu); + + if (cpuidle_state_table != dedicated_states) + return; + + if (residency < 0) { + /* Disable the Nap state on that cpu */ + if (dev) + dev->states_usage[1].disable = 1; + } else + if (drv) + drv->states[1].target_residency = residency; +} + static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n, unsigned long action, void *hcpu) { @@ -246,10 +246,6 @@ static int pseries_cpuidle_driver_init(void) drv->states[drv->state_count] = /* structure copy */ cpuidle_state_table[idle_state]; - if (cpuidle_state_table == dedicated_states) - drv->states[drv->state_count].target_residency = - __get_cpu_var(smt_snooze_delay); - drv->state_count += 1; } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 51ecac920dd8..e3cb7ae61658 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -388,10 +388,8 @@ static void __init pSeries_setup_arch(void) /* Find and initialize PCI host bridges */ init_pci_config_tokens(); - eeh_pseries_init(); find_and_init_phbs(); pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); - eeh_init(); pSeries_nvram_init(); @@ -416,16 +414,20 @@ static int __init pSeries_init_panel(void) } machine_arch_initcall(pseries, pSeries_init_panel); -static int pseries_set_dabr(unsigned long dabr) +static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx) { return plpar_hcall_norets(H_SET_DABR, dabr); } -static int pseries_set_xdabr(unsigned long dabr) +static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx) { - /* We want to catch accesses from kernel and userspace */ - return plpar_hcall_norets(H_SET_XDABR, dabr, - H_DABRX_KERNEL | H_DABRX_USER); + /* Have to set at least one bit in the DABRX according to PAPR */ + if (dabrx == 0 && dabr == 0) + dabrx = DABRX_USER; + /* PAPR says we can only set kernel and user bits */ + dabrx &= DABRX_KERNEL | DABRX_USER; + + return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx); } #define CMO_CHARACTERISTICS_TOKEN 44 @@ -529,10 +531,10 @@ static void __init pSeries_init_early(void) if (firmware_has_feature(FW_FEATURE_LPAR)) hvc_vio_init_early(); #endif - if (firmware_has_feature(FW_FEATURE_DABR)) - ppc_md.set_dabr = pseries_set_dabr; - else if (firmware_has_feature(FW_FEATURE_XDABR)) + if (firmware_has_feature(FW_FEATURE_XDABR)) ppc_md.set_dabr = pseries_set_xdabr; + else if (firmware_has_feature(FW_FEATURE_DABR)) + ppc_md.set_dabr = pseries_set_dabr; pSeries_cmo_feature_init(); iommu_init_early_pSeries(); |