diff options
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r-- | arch/powerpc/platforms/powernv/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/eeh-ioda.c | 916 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/eeh-powernv.c | 379 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-wrappers.S | 3 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal.c | 69 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci-ioda.c | 62 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci-p5ioc2.c | 11 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.c | 139 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.h | 35 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/setup.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/smp.c | 4 |
11 files changed, 1578 insertions, 45 deletions
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index bcc3cb48a44e..7fe595152478 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -3,3 +3,4 @@ obj-y += opal-rtc.o opal-nvram.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o +obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c new file mode 100644 index 000000000000..0cd1c4a71755 --- /dev/null +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -0,0 +1,916 @@ +/* + * The file intends to implement the functions needed by EEH, which is + * built on IODA compliant chip. Actually, lots of functions related + * to EEH would be built based on the OPAL APIs. + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/bootmem.h> +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/msi.h> +#include <linux/notifier.h> +#include <linux/pci.h> +#include <linux/string.h> + +#include <asm/eeh.h> +#include <asm/eeh_event.h> +#include <asm/io.h> +#include <asm/iommu.h> +#include <asm/msi_bitmap.h> +#include <asm/opal.h> +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> +#include <asm/tce.h> + +#include "powernv.h" +#include "pci.h" + +/* Debugging option */ +#ifdef IODA_EEH_DBG_ON +#define IODA_EEH_DBG(args...) pr_info(args) +#else +#define IODA_EEH_DBG(args...) +#endif + +static char *hub_diag = NULL; +static int ioda_eeh_nb_init = 0; + +static int ioda_eeh_event(struct notifier_block *nb, + unsigned long events, void *change) +{ + uint64_t changed_evts = (uint64_t)change; + + /* We simply send special EEH event */ + if ((changed_evts & OPAL_EVENT_PCI_ERROR) && + (events & OPAL_EVENT_PCI_ERROR)) + eeh_send_failure_event(NULL); + + return 0; +} + +static struct notifier_block ioda_eeh_nb = { + .notifier_call = ioda_eeh_event, + .next = NULL, + .priority = 0 +}; + +#ifdef CONFIG_DEBUG_FS +static int ioda_eeh_dbgfs_set(void *data, u64 val) +{ + struct pci_controller *hose = data; + struct pnv_phb *phb = hose->private_data; + + out_be64(phb->regs + 0xD10, val); + return 0; +} + +static int ioda_eeh_dbgfs_get(void *data, u64 *val) +{ + struct pci_controller *hose = data; + struct pnv_phb *phb = hose->private_data; + + *val = in_be64(phb->regs + 0xD10); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_dbgfs_ops, ioda_eeh_dbgfs_get, + ioda_eeh_dbgfs_set, "0x%llx\n"); +#endif /* CONFIG_DEBUG_FS */ + +/** + * ioda_eeh_post_init - Chip dependent post initialization + * @hose: PCI controller + * + * The function will be called after eeh PEs and devices + * have been built. That means the EEH is ready to supply + * service with I/O cache. + */ +static int ioda_eeh_post_init(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + int ret; + + /* Register OPAL event notifier */ + if (!ioda_eeh_nb_init) { + ret = opal_notifier_register(&ioda_eeh_nb); + if (ret) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, ret); + return ret; + } + + ioda_eeh_nb_init = 1; + } + + /* FIXME: Enable it for PHB3 later */ + if (phb->type == PNV_PHB_IODA1) { + if (!hub_diag) { + hub_diag = (char *)__get_free_page(GFP_KERNEL | + __GFP_ZERO); + if (!hub_diag) { + pr_err("%s: Out of memory !\n", + __func__); + return -ENOMEM; + } + } + +#ifdef CONFIG_DEBUG_FS + if (phb->dbgfs) + debugfs_create_file("err_injct", 0600, + phb->dbgfs, hose, + &ioda_eeh_dbgfs_ops); +#endif + + phb->eeh_state |= PNV_EEH_STATE_ENABLED; + } + + return 0; +} + +/** + * ioda_eeh_set_option - Set EEH operation or I/O setting + * @pe: EEH PE + * @option: options + * + * Enable or disable EEH option for the indicated PE. The + * function also can be used to enable I/O or DMA for the + * PE. + */ +static int ioda_eeh_set_option(struct eeh_pe *pe, int option) +{ + s64 ret; + u32 pe_no; + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + + /* Check on PE number */ + if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { + pr_err("%s: PE address %x out of range [0, %x] " + "on PHB#%x\n", + __func__, pe->addr, phb->ioda.total_pe, + hose->global_number); + return -EINVAL; + } + + pe_no = pe->addr; + switch (option) { + case EEH_OPT_DISABLE: + ret = -EEXIST; + break; + case EEH_OPT_ENABLE: + ret = 0; + break; + case EEH_OPT_THAW_MMIO: + ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO); + if (ret) { + pr_warning("%s: Failed to enable MMIO for " + "PHB#%x-PE#%x, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return -EIO; + } + + break; + case EEH_OPT_THAW_DMA: + ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_DMA); + if (ret) { + pr_warning("%s: Failed to enable DMA for " + "PHB#%x-PE#%x, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return -EIO; + } + + break; + default: + pr_warning("%s: Invalid option %d\n", __func__, option); + return -EINVAL; + } + + return ret; +} + +/** + * ioda_eeh_get_state - Retrieve the state of PE + * @pe: EEH PE + * + * The PE's state should be retrieved from the PEEV, PEST + * IODA tables. Since the OPAL has exported the function + * to do it, it'd better to use that. + */ +static int ioda_eeh_get_state(struct eeh_pe *pe) +{ + s64 ret = 0; + u8 fstate; + u16 pcierr; + u32 pe_no; + int result; + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + + /* + * Sanity check on PE address. The PHB PE address should + * be zero. + */ + if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { + pr_err("%s: PE address %x out of range [0, %x] " + "on PHB#%x\n", + __func__, pe->addr, phb->ioda.total_pe, + hose->global_number); + return EEH_STATE_NOT_SUPPORT; + } + + /* Retrieve PE status through OPAL */ + pe_no = pe->addr; + ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, + &fstate, &pcierr, NULL); + if (ret) { + pr_err("%s: Failed to get EEH status on " + "PHB#%x-PE#%x\n, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return EEH_STATE_NOT_SUPPORT; + } + + /* Check PHB status */ + if (pe->type & EEH_PE_PHB) { + result = 0; + result &= ~EEH_STATE_RESET_ACTIVE; + + if (pcierr != OPAL_EEH_PHB_ERROR) { + result |= EEH_STATE_MMIO_ACTIVE; + result |= EEH_STATE_DMA_ACTIVE; + result |= EEH_STATE_MMIO_ENABLED; + result |= EEH_STATE_DMA_ENABLED; + } + + return result; + } + + /* Parse result out */ + result = 0; + switch (fstate) { + case OPAL_EEH_STOPPED_NOT_FROZEN: + result &= ~EEH_STATE_RESET_ACTIVE; + result |= EEH_STATE_MMIO_ACTIVE; + result |= EEH_STATE_DMA_ACTIVE; + result |= EEH_STATE_MMIO_ENABLED; + result |= EEH_STATE_DMA_ENABLED; + break; + case OPAL_EEH_STOPPED_MMIO_FREEZE: + result &= ~EEH_STATE_RESET_ACTIVE; + result |= EEH_STATE_DMA_ACTIVE; + result |= EEH_STATE_DMA_ENABLED; + break; + case OPAL_EEH_STOPPED_DMA_FREEZE: + result &= ~EEH_STATE_RESET_ACTIVE; + result |= EEH_STATE_MMIO_ACTIVE; + result |= EEH_STATE_MMIO_ENABLED; + break; + case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: + result &= ~EEH_STATE_RESET_ACTIVE; + break; + case OPAL_EEH_STOPPED_RESET: + result |= EEH_STATE_RESET_ACTIVE; + break; + case OPAL_EEH_STOPPED_TEMP_UNAVAIL: + result |= EEH_STATE_UNAVAILABLE; + break; + case OPAL_EEH_STOPPED_PERM_UNAVAIL: + result |= EEH_STATE_NOT_SUPPORT; + break; + default: + pr_warning("%s: Unexpected EEH status 0x%x " + "on PHB#%x-PE#%x\n", + __func__, fstate, hose->global_number, pe_no); + } + + return result; +} + +static int ioda_eeh_pe_clear(struct eeh_pe *pe) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + u32 pe_no; + u8 fstate; + u16 pcierr; + s64 ret; + + pe_no = pe->addr; + hose = pe->phb; + phb = pe->phb->private_data; + + /* Clear the EEH error on the PE */ + ret = opal_pci_eeh_freeze_clear(phb->opal_id, + pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (ret) { + pr_err("%s: Failed to clear EEH error for " + "PHB#%x-PE#%x, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return -EIO; + } + + /* + * Read the PE state back and verify that the frozen + * state has been removed. + */ + ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, + &fstate, &pcierr, NULL); + if (ret) { + pr_err("%s: Failed to get EEH status on " + "PHB#%x-PE#%x\n, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return -EIO; + } + + if (fstate != OPAL_EEH_STOPPED_NOT_FROZEN) { + pr_err("%s: Frozen state not cleared on " + "PHB#%x-PE#%x, sts=%x\n", + __func__, hose->global_number, pe_no, fstate); + return -EIO; + } + + return 0; +} + +static s64 ioda_eeh_phb_poll(struct pnv_phb *phb) +{ + s64 rc = OPAL_HARDWARE; + + while (1) { + rc = opal_pci_poll(phb->opal_id); + if (rc <= 0) + break; + + msleep(rc); + } + + return rc; +} + +static int ioda_eeh_phb_reset(struct pci_controller *hose, int option) +{ + struct pnv_phb *phb = hose->private_data; + s64 rc = OPAL_HARDWARE; + + pr_debug("%s: Reset PHB#%x, option=%d\n", + __func__, hose->global_number, option); + + /* Issue PHB complete reset request */ + if (option == EEH_RESET_FUNDAMENTAL || + option == EEH_RESET_HOT) + rc = opal_pci_reset(phb->opal_id, + OPAL_PHB_COMPLETE, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_DEACTIVATE) + rc = opal_pci_reset(phb->opal_id, + OPAL_PHB_COMPLETE, + OPAL_DEASSERT_RESET); + if (rc < 0) + goto out; + + /* + * Poll state of the PHB until the request is done + * successfully. + */ + rc = ioda_eeh_phb_poll(phb); +out: + if (rc != OPAL_SUCCESS) + return -EIO; + + return 0; +} + +static int ioda_eeh_root_reset(struct pci_controller *hose, int option) +{ + struct pnv_phb *phb = hose->private_data; + s64 rc = OPAL_SUCCESS; + + pr_debug("%s: Reset PHB#%x, option=%d\n", + __func__, hose->global_number, option); + + /* + * During the reset deassert time, we needn't care + * the reset scope because the firmware does nothing + * for fundamental or hot reset during deassert phase. + */ + if (option == EEH_RESET_FUNDAMENTAL) + rc = opal_pci_reset(phb->opal_id, + OPAL_PCI_FUNDAMENTAL_RESET, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_HOT) + rc = opal_pci_reset(phb->opal_id, + OPAL_PCI_HOT_RESET, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_DEACTIVATE) + rc = opal_pci_reset(phb->opal_id, + OPAL_PCI_HOT_RESET, + OPAL_DEASSERT_RESET); + if (rc < 0) + goto out; + + /* Poll state of the PHB until the request is done */ + rc = ioda_eeh_phb_poll(phb); +out: + if (rc != OPAL_SUCCESS) + return -EIO; + + return 0; +} + +static int ioda_eeh_bridge_reset(struct pci_controller *hose, + struct pci_dev *dev, int option) +{ + u16 ctrl; + + pr_debug("%s: Reset device %04x:%02x:%02x.%01x with option %d\n", + __func__, hose->global_number, dev->bus->number, + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), option); + + switch (option) { + case EEH_RESET_FUNDAMENTAL: + case EEH_RESET_HOT: + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); + ctrl |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + break; + case EEH_RESET_DEACTIVATE: + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); + ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + break; + } + + return 0; +} + +/** + * ioda_eeh_reset - Reset the indicated PE + * @pe: EEH PE + * @option: reset option + * + * Do reset on the indicated PE. For PCI bus sensitive PE, + * we need to reset the parent p2p bridge. The PHB has to + * be reinitialized if the p2p bridge is root bridge. For + * PCI device sensitive PE, we will try to reset the device + * through FLR. For now, we don't have OPAL APIs to do HARD + * reset yet, so all reset would be SOFT (HOT) reset. + */ +static int ioda_eeh_reset(struct eeh_pe *pe, int option) +{ + struct pci_controller *hose = pe->phb; + struct eeh_dev *edev; + struct pci_dev *dev; + int ret; + + /* + * Anyway, we have to clear the problematic state for the + * corresponding PE. However, we needn't do it if the PE + * is PHB associated. That means the PHB is having fatal + * errors and it needs reset. Further more, the AIB interface + * isn't reliable any more. + */ + if (!(pe->type & EEH_PE_PHB) && + (option == EEH_RESET_HOT || + option == EEH_RESET_FUNDAMENTAL)) { + ret = ioda_eeh_pe_clear(pe); + if (ret) + return -EIO; + } + + /* + * The rules applied to reset, either fundamental or hot reset: + * + * We always reset the direct upstream bridge of the PE. If the + * direct upstream bridge isn't root bridge, we always take hot + * reset no matter what option (fundamental or hot) is. Otherwise, + * we should do the reset according to the required option. + */ + if (pe->type & EEH_PE_PHB) { + ret = ioda_eeh_phb_reset(hose, option); + } else { + if (pe->type & EEH_PE_DEVICE) { + /* + * If it's device PE, we didn't refer to the parent + * PCI bus yet. So we have to figure it out indirectly. + */ + edev = list_first_entry(&pe->edevs, + struct eeh_dev, list); + dev = eeh_dev_to_pci_dev(edev); + dev = dev->bus->self; + } else { + /* + * If it's bus PE, the parent PCI bus is already there + * and just pick it up. + */ + dev = pe->bus->self; + } + + /* + * Do reset based on the fact that the direct upstream bridge + * is root bridge (port) or not. + */ + if (dev->bus->number == 0) + ret = ioda_eeh_root_reset(hose, option); + else + ret = ioda_eeh_bridge_reset(hose, dev, option); + } + + return ret; +} + +/** + * ioda_eeh_get_log - Retrieve error log + * @pe: EEH PE + * @severity: Severity level of the log + * @drv_log: buffer to store the log + * @len: space of the log buffer + * + * The function is used to retrieve error log from P7IOC. + */ +static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len) +{ + s64 ret; + unsigned long flags; + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + + spin_lock_irqsave(&phb->lock, flags); + + ret = opal_pci_get_phb_diag_data2(phb->opal_id, + phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); + if (ret) { + spin_unlock_irqrestore(&phb->lock, flags); + pr_warning("%s: Failed to get log for PHB#%x-PE#%x\n", + __func__, hose->global_number, pe->addr); + return -EIO; + } + + /* + * FIXME: We probably need log the error in somewhere. + * Lets make it up in future. + */ + /* pr_info("%s", phb->diag.blob); */ + + spin_unlock_irqrestore(&phb->lock, flags); + + return 0; +} + +/** + * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE + * @pe: EEH PE + * + * For particular PE, it might have included PCI bridges. In order + * to make the PE work properly, those PCI bridges should be configured + * correctly. However, we need do nothing on P7IOC since the reset + * function will do everything that should be covered by the function. + */ +static int ioda_eeh_configure_bridge(struct eeh_pe *pe) +{ + return 0; +} + +static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) +{ + /* GEM */ + pr_info(" GEM XFIR: %016llx\n", data->gemXfir); + pr_info(" GEM RFIR: %016llx\n", data->gemRfir); + pr_info(" GEM RIRQFIR: %016llx\n", data->gemRirqfir); + pr_info(" GEM Mask: %016llx\n", data->gemMask); + pr_info(" GEM RWOF: %016llx\n", data->gemRwof); + + /* LEM */ + pr_info(" LEM FIR: %016llx\n", data->lemFir); + pr_info(" LEM Error Mask: %016llx\n", data->lemErrMask); + pr_info(" LEM Action 0: %016llx\n", data->lemAction0); + pr_info(" LEM Action 1: %016llx\n", data->lemAction1); + pr_info(" LEM WOF: %016llx\n", data->lemWof); +} + +static void ioda_eeh_hub_diag(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + struct OpalIoP7IOCErrorData *data; + long rc; + + data = (struct OpalIoP7IOCErrorData *)ioda_eeh_hub_diag; + rc = opal_pci_get_hub_diag_data(phb->hub_id, data, PAGE_SIZE); + if (rc != OPAL_SUCCESS) { + pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n", + __func__, phb->hub_id, rc); + return; + } + + switch (data->type) { + case OPAL_P7IOC_DIAG_TYPE_RGC: + pr_info("P7IOC diag-data for RGC\n\n"); + ioda_eeh_hub_diag_common(data); + pr_info(" RGC Status: %016llx\n", data->rgc.rgcStatus); + pr_info(" RGC LDCP: %016llx\n", data->rgc.rgcLdcp); + break; + case OPAL_P7IOC_DIAG_TYPE_BI: + pr_info("P7IOC diag-data for BI %s\n\n", + data->bi.biDownbound ? "Downbound" : "Upbound"); + ioda_eeh_hub_diag_common(data); + pr_info(" BI LDCP 0: %016llx\n", data->bi.biLdcp0); + pr_info(" BI LDCP 1: %016llx\n", data->bi.biLdcp1); + pr_info(" BI LDCP 2: %016llx\n", data->bi.biLdcp2); + pr_info(" BI Fence Status: %016llx\n", data->bi.biFenceStatus); + break; + case OPAL_P7IOC_DIAG_TYPE_CI: + pr_info("P7IOC diag-data for CI Port %d\\nn", + data->ci.ciPort); + ioda_eeh_hub_diag_common(data); + pr_info(" CI Port Status: %016llx\n", data->ci.ciPortStatus); + pr_info(" CI Port LDCP: %016llx\n", data->ci.ciPortLdcp); + break; + case OPAL_P7IOC_DIAG_TYPE_MISC: + pr_info("P7IOC diag-data for MISC\n\n"); + ioda_eeh_hub_diag_common(data); + break; + case OPAL_P7IOC_DIAG_TYPE_I2C: + pr_info("P7IOC diag-data for I2C\n\n"); + ioda_eeh_hub_diag_common(data); + break; + default: + pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n", + __func__, phb->hub_id, data->type); + } +} + +static void ioda_eeh_p7ioc_phb_diag(struct pci_controller *hose, + struct OpalIoPhbErrorCommon *common) +{ + struct OpalIoP7IOCPhbErrorData *data; + int i; + + data = (struct OpalIoP7IOCPhbErrorData *)common; + + pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n\n", + hose->global_number, common->version); + + pr_info(" brdgCtl: %08x\n", data->brdgCtl); + + pr_info(" portStatusReg: %08x\n", data->portStatusReg); + pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus); + pr_info(" busAgentStatus: %08x\n", data->busAgentStatus); + + pr_info(" deviceStatus: %08x\n", data->deviceStatus); + pr_info(" slotStatus: %08x\n", data->slotStatus); + pr_info(" linkStatus: %08x\n", data->linkStatus); + pr_info(" devCmdStatus: %08x\n", data->devCmdStatus); + pr_info(" devSecStatus: %08x\n", data->devSecStatus); + + pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus); + pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus); + pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus); + pr_info(" tlpHdr1: %08x\n", data->tlpHdr1); + pr_info(" tlpHdr2: %08x\n", data->tlpHdr2); + pr_info(" tlpHdr3: %08x\n", data->tlpHdr3); + pr_info(" tlpHdr4: %08x\n", data->tlpHdr4); + pr_info(" sourceId: %08x\n", data->sourceId); + + pr_info(" errorClass: %016llx\n", data->errorClass); + pr_info(" correlator: %016llx\n", data->correlator); + pr_info(" p7iocPlssr: %016llx\n", data->p7iocPlssr); + pr_info(" p7iocCsr: %016llx\n", data->p7iocCsr); + pr_info(" lemFir: %016llx\n", data->lemFir); + pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask); + pr_info(" lemWOF: %016llx\n", data->lemWOF); + pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus); + pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus); + pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0); + pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1); + pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus); + pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus); + pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0); + pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1); + pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus); + pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus); + pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0); + pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1); + pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus); + pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus); + pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0); + pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1); + + for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) { + if ((data->pestA[i] >> 63) == 0 && + (data->pestB[i] >> 63) == 0) + continue; + + pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]); + pr_info(" PESTB: %016llx\n", data->pestB[i]); + } +} + +static void ioda_eeh_phb_diag(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + struct OpalIoPhbErrorCommon *common; + long rc; + + common = (struct OpalIoPhbErrorCommon *)phb->diag.blob; + rc = opal_pci_get_phb_diag_data2(phb->opal_id, common, PAGE_SIZE); + if (rc != OPAL_SUCCESS) { + pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", + __func__, hose->global_number, rc); + return; + } + + switch (common->ioType) { + case OPAL_PHB_ERROR_DATA_TYPE_P7IOC: + ioda_eeh_p7ioc_phb_diag(hose, common); + break; + default: + pr_warning("%s: Unrecognized I/O chip %d\n", + __func__, common->ioType); + } +} + +static int ioda_eeh_get_phb_pe(struct pci_controller *hose, + struct eeh_pe **pe) +{ + struct eeh_pe *phb_pe; + + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe) { + pr_warning("%s Can't find PE for PHB#%d\n", + __func__, hose->global_number); + return -EEXIST; + } + + *pe = phb_pe; + return 0; +} + +static int ioda_eeh_get_pe(struct pci_controller *hose, + u16 pe_no, struct eeh_pe **pe) +{ + struct eeh_pe *phb_pe, *dev_pe; + struct eeh_dev dev; + + /* Find the PHB PE */ + if (ioda_eeh_get_phb_pe(hose, &phb_pe)) + return -EEXIST; + + /* Find the PE according to PE# */ + memset(&dev, 0, sizeof(struct eeh_dev)); + dev.phb = hose; + dev.pe_config_addr = pe_no; + dev_pe = eeh_pe_get(&dev); + if (!dev_pe) { + pr_warning("%s: Can't find PE for PHB#%x - PE#%x\n", + __func__, hose->global_number, pe_no); + return -EEXIST; + } + + *pe = dev_pe; + return 0; +} + +/** + * ioda_eeh_next_error - Retrieve next error for EEH core to handle + * @pe: The affected PE + * + * The function is expected to be called by EEH core while it gets + * special EEH event (without binding PE). The function calls to + * OPAL APIs for next error to handle. The informational error is + * handled internally by platform. However, the dead IOC, dead PHB, + * fenced PHB and frozen PE should be handled by EEH core eventually. + */ +static int ioda_eeh_next_error(struct eeh_pe **pe) +{ + struct pci_controller *hose, *tmp; + struct pnv_phb *phb; + u64 frozen_pe_no; + u16 err_type, severity; + long rc; + int ret = 1; + + /* + * While running here, it's safe to purge the event queue. + * And we should keep the cached OPAL notifier event sychronized + * between the kernel and firmware. + */ + eeh_remove_event(NULL); + opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + /* + * If the subordinate PCI buses of the PHB has been + * removed, we needn't take care of it any more. + */ + phb = hose->private_data; + if (phb->eeh_state & PNV_EEH_STATE_REMOVED) + continue; + + rc = opal_pci_next_error(phb->opal_id, + &frozen_pe_no, &err_type, &severity); + + /* If OPAL API returns error, we needn't proceed */ + if (rc != OPAL_SUCCESS) { + IODA_EEH_DBG("%s: Invalid return value on " + "PHB#%x (0x%lx) from opal_pci_next_error", + __func__, hose->global_number, rc); + continue; + } + + /* If the PHB doesn't have error, stop processing */ + if (err_type == OPAL_EEH_NO_ERROR || + severity == OPAL_EEH_SEV_NO_ERROR) { + IODA_EEH_DBG("%s: No error found on PHB#%x\n", + __func__, hose->global_number); + continue; + } + + /* + * Processing the error. We're expecting the error with + * highest priority reported upon multiple errors on the + * specific PHB. + */ + IODA_EEH_DBG("%s: Error (%d, %d, %d) on PHB#%x\n", + err_type, severity, pe_no, hose->global_number); + switch (err_type) { + case OPAL_EEH_IOC_ERROR: + if (severity == OPAL_EEH_SEV_IOC_DEAD) { + list_for_each_entry_safe(hose, tmp, + &hose_list, list_node) { + phb = hose->private_data; + phb->eeh_state |= PNV_EEH_STATE_REMOVED; + } + + pr_err("EEH: dead IOC detected\n"); + ret = 4; + goto out; + } else if (severity == OPAL_EEH_SEV_INF) { + pr_info("EEH: IOC informative error " + "detected\n"); + ioda_eeh_hub_diag(hose); + } + + break; + case OPAL_EEH_PHB_ERROR: + if (severity == OPAL_EEH_SEV_PHB_DEAD) { + if (ioda_eeh_get_phb_pe(hose, pe)) + break; + + pr_err("EEH: dead PHB#%x detected\n", + hose->global_number); + phb->eeh_state |= PNV_EEH_STATE_REMOVED; + ret = 3; + goto out; + } else if (severity == OPAL_EEH_SEV_PHB_FENCED) { + if (ioda_eeh_get_phb_pe(hose, pe)) + break; + + pr_err("EEH: fenced PHB#%x detected\n", + hose->global_number); + ret = 2; + goto out; + } else if (severity == OPAL_EEH_SEV_INF) { + pr_info("EEH: PHB#%x informative error " + "detected\n", + hose->global_number); + ioda_eeh_phb_diag(hose); + } + + break; + case OPAL_EEH_PE_ERROR: + if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) + break; + + pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", + (*pe)->addr, (*pe)->phb->global_number); + ret = 1; + goto out; + } + } + + ret = 0; +out: + return ret; +} + +struct pnv_eeh_ops ioda_eeh_ops = { + .post_init = ioda_eeh_post_init, + .set_option = ioda_eeh_set_option, + .get_state = ioda_eeh_get_state, + .reset = ioda_eeh_reset, + .get_log = ioda_eeh_get_log, + .configure_bridge = ioda_eeh_configure_bridge, + .next_error = ioda_eeh_next_error +}; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c new file mode 100644 index 000000000000..969cce73055a --- /dev/null +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -0,0 +1,379 @@ +/* + * The file intends to implement the platform dependent EEH operations on + * powernv platform. Actually, the powernv was created in order to fully + * hypervisor support. + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/atomic.h> +#include <linux/delay.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/msi.h> +#include <linux/of.h> +#include <linux/pci.h> +#include <linux/proc_fs.h> +#include <linux/rbtree.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> + +#include <asm/eeh.h> +#include <asm/eeh_event.h> +#include <asm/firmware.h> +#include <asm/io.h> +#include <asm/iommu.h> +#include <asm/machdep.h> +#include <asm/msi_bitmap.h> +#include <asm/opal.h> +#include <asm/ppc-pci.h> + +#include "powernv.h" +#include "pci.h" + +/** + * powernv_eeh_init - EEH platform dependent initialization + * + * EEH platform dependent initialization on powernv + */ +static int powernv_eeh_init(void) +{ + /* We require OPALv3 */ + if (!firmware_has_feature(FW_FEATURE_OPALv3)) { + pr_warning("%s: OPALv3 is required !\n", __func__); + return -EINVAL; + } + + /* Set EEH probe mode */ + eeh_probe_mode_set(EEH_PROBE_MODE_DEV); + + return 0; +} + +/** + * powernv_eeh_post_init - EEH platform dependent post initialization + * + * EEH platform dependent post initialization on powernv. When + * the function is called, the EEH PEs and devices should have + * been built. If the I/O cache staff has been built, EEH is + * ready to supply service. + */ +static int powernv_eeh_post_init(void) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + int ret = 0; + + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + + if (phb->eeh_ops && phb->eeh_ops->post_init) { + ret = phb->eeh_ops->post_init(hose); + if (ret) + break; + } + } + + return ret; +} + +/** + * powernv_eeh_dev_probe - Do probe on PCI device + * @dev: PCI device + * @flag: unused + * + * When EEH module is installed during system boot, all PCI devices + * are checked one by one to see if it supports EEH. The function + * is introduced for the purpose. By default, EEH has been enabled + * on all PCI devices. That's to say, we only need do necessary + * initialization on the corresponding eeh device and create PE + * accordingly. + * + * It's notable that's unsafe to retrieve the EEH device through + * the corresponding PCI device. During the PCI device hotplug, which + * was possiblly triggered by EEH core, the binding between EEH device + * and the PCI device isn't built yet. + */ +static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct device_node *dn = pci_device_to_OF_node(dev); + struct eeh_dev *edev = of_node_to_eeh_dev(dn); + + /* + * When probing the root bridge, which doesn't have any + * subordinate PCI devices. We don't have OF node for + * the root bridge. So it's not reasonable to continue + * the probing. + */ + if (!dn || !edev) + return 0; + + /* Skip for PCI-ISA bridge */ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA) + return 0; + + /* Initialize eeh device */ + edev->class_code = dev->class; + edev->mode = 0; + edev->config_addr = ((dev->bus->number << 8) | dev->devfn); + edev->pe_config_addr = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff); + + /* Create PE */ + eeh_add_to_parent_pe(edev); + + /* + * Enable EEH explicitly so that we will do EEH check + * while accessing I/O stuff + * + * FIXME: Enable that for PHB3 later + */ + if (phb->type == PNV_PHB_IODA1) + eeh_subsystem_enabled = 1; + + /* Save memory bars */ + eeh_save_bars(edev); + + return 0; +} + +/** + * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable + * @pe: EEH PE + * @option: operation to be issued + * + * The function is used to control the EEH functionality globally. + * Currently, following options are support according to PAPR: + * Enable EEH, Disable EEH, Enable MMIO and Enable DMA + */ +static int powernv_eeh_set_option(struct eeh_pe *pe, int option) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = -EEXIST; + + /* + * What we need do is pass it down for hardware + * implementation to handle it. + */ + if (phb->eeh_ops && phb->eeh_ops->set_option) + ret = phb->eeh_ops->set_option(pe, option); + + return ret; +} + +/** + * powernv_eeh_get_pe_addr - Retrieve PE address + * @pe: EEH PE + * + * Retrieve the PE address according to the given tranditional + * PCI BDF (Bus/Device/Function) address. + */ +static int powernv_eeh_get_pe_addr(struct eeh_pe *pe) +{ + return pe->addr; +} + +/** + * powernv_eeh_get_state - Retrieve PE state + * @pe: EEH PE + * @delay: delay while PE state is temporarily unavailable + * + * Retrieve the state of the specified PE. For IODA-compitable + * platform, it should be retrieved from IODA table. Therefore, + * we prefer passing down to hardware implementation to handle + * it. + */ +static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = EEH_STATE_NOT_SUPPORT; + + if (phb->eeh_ops && phb->eeh_ops->get_state) { + ret = phb->eeh_ops->get_state(pe); + + /* + * If the PE state is temporarily unavailable, + * to inform the EEH core delay for default + * period (1 second) + */ + if (delay) { + *delay = 0; + if (ret & EEH_STATE_UNAVAILABLE) + *delay = 1000; + } + } + + return ret; +} + +/** + * powernv_eeh_reset - Reset the specified PE + * @pe: EEH PE + * @option: reset option + * + * Reset the specified PE + */ +static int powernv_eeh_reset(struct eeh_pe *pe, int option) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = -EEXIST; + + if (phb->eeh_ops && phb->eeh_ops->reset) + ret = phb->eeh_ops->reset(pe, option); + + return ret; +} + +/** + * powernv_eeh_wait_state - Wait for PE state + * @pe: EEH PE + * @max_wait: maximal period in microsecond + * + * Wait for the state of associated PE. It might take some time + * to retrieve the PE's state. + */ +static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait) +{ + int ret; + int mwait; + + while (1) { + ret = powernv_eeh_get_state(pe, &mwait); + + /* + * If the PE's state is temporarily unavailable, + * we have to wait for the specified time. Otherwise, + * the PE's state will be returned immediately. + */ + if (ret != EEH_STATE_UNAVAILABLE) + return ret; + + max_wait -= mwait; + if (max_wait <= 0) { + pr_warning("%s: Timeout getting PE#%x's state (%d)\n", + __func__, pe->addr, max_wait); + return EEH_STATE_NOT_SUPPORT; + } + + msleep(mwait); + } + + return EEH_STATE_NOT_SUPPORT; +} + +/** + * powernv_eeh_get_log - Retrieve error log + * @pe: EEH PE + * @severity: temporary or permanent error log + * @drv_log: driver log to be combined with retrieved error log + * @len: length of driver log + * + * Retrieve the temporary or permanent error from the PE. + */ +static int powernv_eeh_get_log(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = -EEXIST; + + if (phb->eeh_ops && phb->eeh_ops->get_log) + ret = phb->eeh_ops->get_log(pe, severity, drv_log, len); + + return ret; +} + +/** + * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE + * @pe: EEH PE + * + * The function will be called to reconfigure the bridges included + * in the specified PE so that the mulfunctional PE would be recovered + * again. + */ +static int powernv_eeh_configure_bridge(struct eeh_pe *pe) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = 0; + + if (phb->eeh_ops && phb->eeh_ops->configure_bridge) + ret = phb->eeh_ops->configure_bridge(pe); + + return ret; +} + +/** + * powernv_eeh_next_error - Retrieve next EEH error to handle + * @pe: Affected PE + * + * Using OPAL API, to retrieve next EEH error for EEH core to handle + */ +static int powernv_eeh_next_error(struct eeh_pe **pe) +{ + struct pci_controller *hose; + struct pnv_phb *phb = NULL; + + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + break; + } + + if (phb && phb->eeh_ops->next_error) + return phb->eeh_ops->next_error(pe); + + return -EEXIST; +} + +static struct eeh_ops powernv_eeh_ops = { + .name = "powernv", + .init = powernv_eeh_init, + .post_init = powernv_eeh_post_init, + .of_probe = NULL, + .dev_probe = powernv_eeh_dev_probe, + .set_option = powernv_eeh_set_option, + .get_pe_addr = powernv_eeh_get_pe_addr, + .get_state = powernv_eeh_get_state, + .reset = powernv_eeh_reset, + .wait_state = powernv_eeh_wait_state, + .get_log = powernv_eeh_get_log, + .configure_bridge = powernv_eeh_configure_bridge, + .read_config = pnv_pci_cfg_read, + .write_config = pnv_pci_cfg_write, + .next_error = powernv_eeh_next_error +}; + +/** + * eeh_powernv_init - Register platform dependent EEH operations + * + * EEH initialization on powernv platform. This function should be + * called before any EEH related functions. + */ +static int __init eeh_powernv_init(void) +{ + int ret = -EINVAL; + + if (!machine_is(powernv)) + return ret; + + ret = eeh_ops_register(&powernv_eeh_ops); + if (!ret) + pr_info("EEH: PowerNV platform initialized\n"); + else + pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret); + + return ret; +} + +early_initcall(eeh_powernv_init); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6fabe92eafb6..e88863ffb135 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -107,4 +107,7 @@ OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR); OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS); OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS); OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED); +OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR); +OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL); OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI); +OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 628c564ceadb..106301fd2fa5 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -15,6 +15,7 @@ #include <linux/of.h> #include <linux/of_platform.h> #include <linux/interrupt.h> +#include <linux/notifier.h> #include <linux/slab.h> #include <asm/opal.h> #include <asm/firmware.h> @@ -31,6 +32,10 @@ static DEFINE_SPINLOCK(opal_write_lock); extern u64 opal_mc_secondary_handler[]; static unsigned int *opal_irqs; static unsigned int opal_irq_count; +static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); +static DEFINE_SPINLOCK(opal_notifier_lock); +static uint64_t last_notified_mask = 0x0ul; +static atomic_t opal_notifier_hold = ATOMIC_INIT(0); int __init early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data) @@ -95,6 +100,68 @@ static int __init opal_register_exception_handlers(void) early_initcall(opal_register_exception_handlers); +int opal_notifier_register(struct notifier_block *nb) +{ + if (!nb) { + pr_warning("%s: Invalid argument (%p)\n", + __func__, nb); + return -EINVAL; + } + + atomic_notifier_chain_register(&opal_notifier_head, nb); + return 0; +} + +static void opal_do_notifier(uint64_t events) +{ + unsigned long flags; + uint64_t changed_mask; + + if (atomic_read(&opal_notifier_hold)) + return; + + spin_lock_irqsave(&opal_notifier_lock, flags); + changed_mask = last_notified_mask ^ events; + last_notified_mask = events; + spin_unlock_irqrestore(&opal_notifier_lock, flags); + + /* + * We feed with the event bits and changed bits for + * enough information to the callback. + */ + atomic_notifier_call_chain(&opal_notifier_head, + events, (void *)changed_mask); +} + +void opal_notifier_update_evt(uint64_t evt_mask, + uint64_t evt_val) +{ + unsigned long flags; + + spin_lock_irqsave(&opal_notifier_lock, flags); + last_notified_mask &= ~evt_mask; + last_notified_mask |= evt_val; + spin_unlock_irqrestore(&opal_notifier_lock, flags); +} + +void opal_notifier_enable(void) +{ + int64_t rc; + uint64_t evt = 0; + + atomic_set(&opal_notifier_hold, 0); + + /* Process pending events */ + rc = opal_poll_events(&evt); + if (rc == OPAL_SUCCESS && evt) + opal_do_notifier(evt); +} + +void opal_notifier_disable(void) +{ + atomic_set(&opal_notifier_hold, 1); +} + int opal_get_chars(uint32_t vtermno, char *buf, int count) { s64 len, rc; @@ -297,7 +364,7 @@ static irqreturn_t opal_interrupt(int irq, void *data) opal_handle_interrupt(virq_to_hw(irq), &events); - /* XXX TODO: Do something with the events */ + opal_do_notifier(events); return IRQ_HANDLED; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9c9d15e4cdf2..49b57b9f835d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/pci.h> +#include <linux/debugfs.h> #include <linux/delay.h> #include <linux/string.h> #include <linux/init.h> @@ -32,6 +33,7 @@ #include <asm/iommu.h> #include <asm/tce.h> #include <asm/xics.h> +#include <asm/debug.h> #include "powernv.h" #include "pci.h" @@ -441,6 +443,17 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev set_iommu_table_base(&pdev->dev, &pe->tce32_table); } +static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + set_iommu_table_base(&dev->dev, &pe->tce32_table); + if (dev->subordinate) + pnv_ioda_setup_bus_dma(pe, dev->subordinate); + } +} + static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, u64 *startp, u64 *endp) { @@ -595,6 +608,12 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, TCE_PCI_SWINV_PAIR; } iommu_init_table(tbl, phb->hose->node); + iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); + + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + else + pnv_ioda_setup_bus_dma(pe, pe->pbus); return; fail: @@ -667,6 +686,11 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, } iommu_init_table(tbl, phb->hose->node); + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + else + pnv_ioda_setup_bus_dma(pe, pe->pbus); + return; fail: if (pe->tce32_seg >= 0) @@ -968,11 +992,38 @@ static void pnv_pci_ioda_setup_DMA(void) } } +static void pnv_pci_ioda_create_dbgfs(void) +{ +#ifdef CONFIG_DEBUG_FS + struct pci_controller *hose, *tmp; + struct pnv_phb *phb; + char name[16]; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + phb = hose->private_data; + + sprintf(name, "PCI%04x", hose->global_number); + phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); + if (!phb->dbgfs) + pr_warning("%s: Error on creating debugfs on PHB#%x\n", + __func__, hose->global_number); + } +#endif /* CONFIG_DEBUG_FS */ +} + static void pnv_pci_ioda_fixup(void) { pnv_pci_ioda_setup_PEs(); pnv_pci_ioda_setup_seg(); pnv_pci_ioda_setup_DMA(); + + pnv_pci_ioda_create_dbgfs(); + +#ifdef CONFIG_EEH + eeh_probe_mode_set(EEH_PROBE_MODE_DEV); + eeh_addr_cache_build(); + eeh_init(); +#endif } /* @@ -1049,7 +1100,8 @@ static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) OPAL_ASSERT_RESET); } -void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) +void __init pnv_pci_init_ioda_phb(struct device_node *np, + u64 hub_id, int ioda_type) { struct pci_controller *hose; static int primary = 1; @@ -1087,6 +1139,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) hose->first_busno = 0; hose->last_busno = 0xff; hose->private_data = phb; + phb->hub_id = hub_id; phb->opal_id = phb_id; phb->type = ioda_type; @@ -1172,6 +1225,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) phb->ioda.io_size, phb->ioda.io_segsize); phb->hose->ops = &pnv_pci_ops; +#ifdef CONFIG_EEH + phb->eeh_ops = &ioda_eeh_ops; +#endif /* Setup RID -> PE mapping function */ phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; @@ -1212,7 +1268,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) void pnv_pci_init_ioda2_phb(struct device_node *np) { - pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2); + pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); } void __init pnv_pci_init_ioda_hub(struct device_node *np) @@ -1235,6 +1291,6 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np) for_each_child_of_node(np, phbn) { /* Look for IODA1 PHBs */ if (of_device_is_compatible(phbn, "ibm,ioda-phb")) - pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1); + pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1); } } diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 92b37a0186c9..b68db6325c1b 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -86,13 +86,16 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { } static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) { - if (phb->p5ioc2.iommu_table.it_map == NULL) + if (phb->p5ioc2.iommu_table.it_map == NULL) { iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node); + iommu_register_group(&phb->p5ioc2.iommu_table, + pci_domain_nr(phb->hose->bus), phb->opal_id); + } set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); } -static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, +static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, void *tce_mem, u64 tce_size) { struct pnv_phb *phb; @@ -133,6 +136,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, phb->hose->first_busno = 0; phb->hose->last_busno = 0xff; phb->hose->private_data = phb; + phb->hub_id = hub_id; phb->opal_id = phb_id; phb->type = PNV_PHB_P5IOC2; phb->model = PNV_PHB_MODEL_P5IOC2; @@ -226,7 +230,8 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) for_each_child_of_node(np, phbn) { if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) { - pnv_pci_init_p5ioc2_phb(phbn, tce_mem, tce_per_phb); + pnv_pci_init_p5ioc2_phb(phbn, hub_id, + tce_mem, tce_per_phb); tce_mem += tce_per_phb; } } diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 277343cc6a3d..a28d3b5e6393 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -20,6 +20,7 @@ #include <linux/irq.h> #include <linux/io.h> #include <linux/msi.h> +#include <linux/iommu.h> #include <asm/sections.h> #include <asm/io.h> @@ -32,6 +33,8 @@ #include <asm/iommu.h> #include <asm/tce.h> #include <asm/firmware.h> +#include <asm/eeh_event.h> +#include <asm/eeh.h> #include "powernv.h" #include "pci.h" @@ -202,7 +205,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) spin_lock_irqsave(&phb->lock, flags); - rc = opal_pci_get_phb_diag_data(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); + rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, + PNV_PCI_DIAG_BUF_SIZE); has_diag = (rc == OPAL_SUCCESS); rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, @@ -227,43 +231,50 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) spin_unlock_irqrestore(&phb->lock, flags); } -static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus, - u32 bdfn) +static void pnv_pci_config_check_eeh(struct pnv_phb *phb, + struct device_node *dn) { s64 rc; u8 fstate; u16 pcierr; u32 pe_no; - /* Get PE# if we support IODA */ - pe_no = phb->bdfn_to_pe ? phb->bdfn_to_pe(phb, bus, bdfn & 0xff) : 0; + /* + * Get the PE#. During the PCI probe stage, we might not + * setup that yet. So all ER errors should be mapped to + * PE#0 + */ + pe_no = PCI_DN(dn)->pe_number; + if (pe_no == IODA_INVALID_PE) + pe_no = 0; /* Read freeze status */ rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr, NULL); if (rc) { - pr_warning("PCI %d: Failed to read EEH status for PE#%d," - " err %lld\n", phb->hose->global_number, pe_no, rc); + pr_warning("%s: Can't read EEH status (PE#%d) for " + "%s, err %lld\n", + __func__, pe_no, dn->full_name, rc); return; } - cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n", - bdfn, pe_no, fstate); + cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", + (PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn), + pe_no, fstate); if (fstate != 0) pnv_pci_handle_eeh_config(phb, pe_no); } -static int pnv_pci_read_config(struct pci_bus *bus, - unsigned int devfn, - int where, int size, u32 *val) +int pnv_pci_cfg_read(struct device_node *dn, + int where, int size, u32 *val) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; - u32 bdfn = (((uint64_t)bus->number) << 8) | devfn; + struct pci_dn *pdn = PCI_DN(dn); + struct pnv_phb *phb = pdn->phb->private_data; + u32 bdfn = (pdn->busno << 8) | pdn->devfn; +#ifdef CONFIG_EEH + struct eeh_pe *phb_pe = NULL; +#endif s64 rc; - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - switch (size) { case 1: { u8 v8; @@ -287,28 +298,43 @@ static int pnv_pci_read_config(struct pci_bus *bus, default: return PCIBIOS_FUNC_NOT_SUPPORTED; } - cfg_dbg("pnv_pci_read_config bus: %x devfn: %x +%x/%x -> %08x\n", - bus->number, devfn, where, size, *val); - - /* Check if the PHB got frozen due to an error (no response) */ - pnv_pci_config_check_eeh(phb, bus, bdfn); + cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", + __func__, pdn->busno, pdn->devfn, where, size, *val); + + /* + * Check if the specified PE has been put into frozen + * state. On the other hand, we needn't do that while + * the PHB has been put into frozen state because of + * PHB-fatal errors. + */ +#ifdef CONFIG_EEH + phb_pe = eeh_phb_pe_get(pdn->phb); + if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED)) + return PCIBIOS_SUCCESSFUL; + + if (phb->eeh_state & PNV_EEH_STATE_ENABLED) { + if (*val == EEH_IO_ERROR_VALUE(size) && + eeh_dev_check_failure(of_node_to_eeh_dev(dn))) + return PCIBIOS_DEVICE_NOT_FOUND; + } else { + pnv_pci_config_check_eeh(phb, dn); + } +#else + pnv_pci_config_check_eeh(phb, dn); +#endif return PCIBIOS_SUCCESSFUL; } -static int pnv_pci_write_config(struct pci_bus *bus, - unsigned int devfn, - int where, int size, u32 val) +int pnv_pci_cfg_write(struct device_node *dn, + int where, int size, u32 val) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; - u32 bdfn = (((uint64_t)bus->number) << 8) | devfn; - - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; + struct pci_dn *pdn = PCI_DN(dn); + struct pnv_phb *phb = pdn->phb->private_data; + u32 bdfn = (pdn->busno << 8) | pdn->devfn; - cfg_dbg("pnv_pci_write_config bus: %x devfn: %x +%x/%x -> %08x\n", - bus->number, devfn, where, size, val); + cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", + pdn->busno, pdn->devfn, where, size, val); switch (size) { case 1: opal_pci_config_write_byte(phb->opal_id, bdfn, where, val); @@ -322,14 +348,54 @@ static int pnv_pci_write_config(struct pci_bus *bus, default: return PCIBIOS_FUNC_NOT_SUPPORTED; } + /* Check if the PHB got frozen due to an error (no response) */ - pnv_pci_config_check_eeh(phb, bus, bdfn); +#ifdef CONFIG_EEH + if (!(phb->eeh_state & PNV_EEH_STATE_ENABLED)) + pnv_pci_config_check_eeh(phb, dn); +#else + pnv_pci_config_check_eeh(phb, dn); +#endif return PCIBIOS_SUCCESSFUL; } +static int pnv_pci_read_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 *val) +{ + struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); + struct pci_dn *pdn; + + for (dn = busdn->child; dn; dn = dn->sibling) { + pdn = PCI_DN(dn); + if (pdn && pdn->devfn == devfn) + return pnv_pci_cfg_read(dn, where, size, val); + } + + *val = 0xFFFFFFFF; + return PCIBIOS_DEVICE_NOT_FOUND; + +} + +static int pnv_pci_write_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 val) +{ + struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); + struct pci_dn *pdn; + + for (dn = busdn->child; dn; dn = dn->sibling) { + pdn = PCI_DN(dn); + if (pdn && pdn->devfn == devfn) + return pnv_pci_cfg_write(dn, where, size, val); + } + + return PCIBIOS_DEVICE_NOT_FOUND; +} + struct pci_ops pnv_pci_ops = { - .read = pnv_pci_read_config, + .read = pnv_pci_read_config, .write = pnv_pci_write_config, }; @@ -412,6 +478,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose) pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)), be32_to_cpup(sizep), 0); iommu_init_table(tbl, hose->node); + iommu_register_group(tbl, pci_domain_nr(hose->bus), 0); /* Deal with SW invalidated TCEs when needed (BML way) */ swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info", diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 25d76c4df50b..d633c64e05a1 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -66,15 +66,43 @@ struct pnv_ioda_pe { struct list_head list; }; +/* IOC dependent EEH operations */ +#ifdef CONFIG_EEH +struct pnv_eeh_ops { + int (*post_init)(struct pci_controller *hose); + int (*set_option)(struct eeh_pe *pe, int option); + int (*get_state)(struct eeh_pe *pe); + int (*reset)(struct eeh_pe *pe, int option); + int (*get_log)(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len); + int (*configure_bridge)(struct eeh_pe *pe); + int (*next_error)(struct eeh_pe **pe); +}; + +#define PNV_EEH_STATE_ENABLED (1 << 0) /* EEH enabled */ +#define PNV_EEH_STATE_REMOVED (1 << 1) /* PHB removed */ + +#endif /* CONFIG_EEH */ + struct pnv_phb { struct pci_controller *hose; enum pnv_phb_type type; enum pnv_phb_model model; + u64 hub_id; u64 opal_id; void __iomem *regs; int initialized; spinlock_t lock; +#ifdef CONFIG_EEH + struct pnv_eeh_ops *eeh_ops; + int eeh_state; +#endif + +#ifdef CONFIG_DEBUG_FS + struct dentry *dbgfs; +#endif + #ifdef CONFIG_PCI_MSI unsigned int msi_base; unsigned int msi32_support; @@ -150,7 +178,14 @@ struct pnv_phb { }; extern struct pci_ops pnv_pci_ops; +#ifdef CONFIG_EEH +extern struct pnv_eeh_ops ioda_eeh_ops; +#endif +int pnv_pci_cfg_read(struct device_node *dn, + int where, int size, u32 *val); +int pnv_pci_cfg_write(struct device_node *dn, + int where, int size, u32 val); extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index d4459bfc92f7..84438af96c05 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -93,6 +93,8 @@ static void __noreturn pnv_restart(char *cmd) { long rc = OPAL_BUSY; + opal_notifier_disable(); + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_cec_reboot(); if (rc == OPAL_BUSY_EVENT) @@ -108,6 +110,8 @@ static void __noreturn pnv_power_off(void) { long rc = OPAL_BUSY; + opal_notifier_disable(); + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_cec_power_down(0); if (rc == OPAL_BUSY_EVENT) diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 88c9459c3e07..89e3857af4e0 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -40,7 +40,7 @@ #define DBG(fmt...) #endif -static void __cpuinit pnv_smp_setup_cpu(int cpu) +static void pnv_smp_setup_cpu(int cpu) { if (cpu != boot_cpuid) xics_setup_cpu(); @@ -51,7 +51,7 @@ static int pnv_smp_cpu_bootable(unsigned int nr) /* Special case - we inhibit secondary thread startup * during boot if the user requests it. */ - if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { + if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) { if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) return 0; if (smt_enabled_at_boot |