diff options
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r-- | arch/powerpc/platforms/powernv/Makefile | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/eeh-powernv.c | 303 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/idle.c | 6 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/npu-dma.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-msglog.c | 34 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal.c | 7 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci-ioda.c | 348 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci-p5ioc2.c | 271 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.c | 43 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.h | 153 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/subcore.c | 2 |
11 files changed, 585 insertions, 586 deletions
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index f1516b5ecec9..cd9711e72df6 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -5,7 +5,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o obj-y += opal-kmsg.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o -obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o npu-dma.o +obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 5f152b95ca0c..950b3e539057 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -167,42 +167,26 @@ static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val) return 0; } -static int pnv_eeh_outb_dbgfs_set(void *data, u64 val) -{ - return pnv_eeh_dbgfs_set(data, 0xD10, val); -} - -static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val) -{ - return pnv_eeh_dbgfs_get(data, 0xD10, val); -} - -static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val) -{ - return pnv_eeh_dbgfs_set(data, 0xD90, val); -} - -static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val) -{ - return pnv_eeh_dbgfs_get(data, 0xD90, val); -} - -static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val) -{ - return pnv_eeh_dbgfs_set(data, 0xE10, val); -} +#define PNV_EEH_DBGFS_ENTRY(name, reg) \ +static int pnv_eeh_dbgfs_set_##name(void *data, u64 val) \ +{ \ + return pnv_eeh_dbgfs_set(data, reg, val); \ +} \ + \ +static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val) \ +{ \ + return pnv_eeh_dbgfs_get(data, reg, val); \ +} \ + \ +DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name, \ + pnv_eeh_dbgfs_get_##name, \ + pnv_eeh_dbgfs_set_##name, \ + "0x%llx\n") + +PNV_EEH_DBGFS_ENTRY(outb, 0xD10); +PNV_EEH_DBGFS_ENTRY(inbA, 0xD90); +PNV_EEH_DBGFS_ENTRY(inbB, 0xE10); -static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val) -{ - return pnv_eeh_dbgfs_get(data, 0xE10, val); -} - -DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get, - pnv_eeh_outb_dbgfs_set, "0x%llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get, - pnv_eeh_inbA_dbgfs_set, "0x%llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get, - pnv_eeh_inbB_dbgfs_set, "0x%llx\n"); #endif /* CONFIG_DEBUG_FS */ /** @@ -268,13 +252,13 @@ static int pnv_eeh_post_init(void) debugfs_create_file("err_injct_outbound", 0600, phb->dbgfs, hose, - &pnv_eeh_outb_dbgfs_ops); + &pnv_eeh_dbgfs_ops_outb); debugfs_create_file("err_injct_inboundA", 0600, phb->dbgfs, hose, - &pnv_eeh_inbA_dbgfs_ops); + &pnv_eeh_dbgfs_ops_inbA); debugfs_create_file("err_injct_inboundB", 0600, phb->dbgfs, hose, - &pnv_eeh_inbB_dbgfs_ops); + &pnv_eeh_dbgfs_ops_inbB); #endif /* CONFIG_DEBUG_FS */ } @@ -387,6 +371,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) edev->mode &= 0xFFFFFF00; edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); + edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; @@ -444,9 +429,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) * PCI devices of the PE are expected to be removed prior * to PE reset. */ - if (!edev->pe->bus) + if (!(edev->pe->state & EEH_PE_PRI_BUS)) { edev->pe->bus = pci_find_bus(hose->global_number, pdn->busno); + if (edev->pe->bus) + edev->pe->state |= EEH_PE_PRI_BUS; + } /* * Enable EEH explicitly so that we will do EEH check @@ -892,6 +880,120 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev) } } +static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type, + int pos, u16 mask) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + int i, status = 0; + + /* Wait for Transaction Pending bit to be cleared */ + for (i = 0; i < 4; i++) { + eeh_ops->read_config(pdn, pos, 2, &status); + if (!(status & mask)) + return; + + msleep((1 << i) * 100); + } + + pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n", + __func__, type, + edev->phb->global_number, pdn->busno, + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); +} + +static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 reg = 0; + + if (WARN_ON(!edev->pcie_cap)) + return -ENOTTY; + + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); + if (!(reg & PCI_EXP_DEVCAP_FLR)) + return -ENOTTY; + + switch (option) { + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + pnv_eeh_wait_for_pending(pdn, "", + edev->pcie_cap + PCI_EXP_DEVSTA, + PCI_EXP_DEVSTA_TRPND); + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, ®); + reg |= PCI_EXP_DEVCTL_BCR_FLR; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, reg); + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, ®); + reg &= ~PCI_EXP_DEVCTL_BCR_FLR; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, reg); + msleep(EEH_PE_RST_SETTLE_TIME); + break; + } + + return 0; +} + +static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 cap = 0; + + if (WARN_ON(!edev->af_cap)) + return -ENOTTY; + + eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); + if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) + return -ENOTTY; + + switch (option) { + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + /* + * Wait for Transaction Pending bit to clear. A word-aligned + * test is used, so we use the conrol offset rather than status + * and shift the test bit to match. + */ + pnv_eeh_wait_for_pending(pdn, "AF", + edev->af_cap + PCI_AF_CTRL, + PCI_AF_STATUS_TP << 8); + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, + 1, PCI_AF_CTRL_FLR); + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); + msleep(EEH_PE_RST_SETTLE_TIME); + break; + } + + return 0; +} + +static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option) +{ + struct eeh_dev *edev; + struct pci_dn *pdn; + int ret; + + /* The VF PE should have only one child device */ + edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list); + pdn = eeh_dev_to_pdn(edev); + if (!pdn) + return -ENXIO; + + ret = pnv_eeh_do_flr(pdn, option); + if (!ret) + return ret; + + return pnv_eeh_do_af_flr(pdn, option); +} + /** * pnv_eeh_reset - Reset the specified PE * @pe: EEH PE @@ -953,7 +1055,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) } bus = eeh_pe_bus_get(pe); - if (pci_is_root_bus(bus) || + if (pe->type & EEH_PE_VF) + ret = pnv_eeh_reset_vf_pe(pe, option); + else if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent)) ret = pnv_eeh_root_reset(hose, option); else @@ -1092,6 +1196,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) if (!edev || !edev->pe) return false; + /* + * We will issue FLR or AF FLR to all VFs, which are contained + * in VF PE. It relies on the EEH PCI config accessors. So we + * can't block them during the window. + */ + if (edev->physfn && (edev->pe->state & EEH_PE_RESET)) + return false; + if (edev->pe->state & EEH_PE_CFG_BLOCKED) return true; @@ -1476,6 +1588,65 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) return ret; } +static int pnv_eeh_restore_vf_config(struct pci_dn *pdn) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 devctl, cmd, cap2, aer_capctl; + int old_mps; + + if (edev->pcie_cap) { + /* Restore MPS */ + old_mps = (ffs(pdn->mps) - 8) << 5; + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, &devctl); + devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; + devctl |= old_mps; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + + /* Disable Completion Timeout */ + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, + 4, &cap2); + if (cap2 & 0x10) { + eeh_ops->read_config(pdn, + edev->pcie_cap + PCI_EXP_DEVCTL2, + 4, &cap2); + cap2 |= 0x10; + eeh_ops->write_config(pdn, + edev->pcie_cap + PCI_EXP_DEVCTL2, + 4, cap2); + } + } + + /* Enable SERR and parity checking */ + eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); + cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); + eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); + + /* Enable report various errors */ + if (edev->pcie_cap) { + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, &devctl); + devctl &= ~PCI_EXP_DEVCTL_CERE; + devctl |= (PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + } + + /* Enable ECRC generation and check */ + if (edev->pcie_cap && edev->aer_cap) { + eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, + 4, &aer_capctl); + aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); + eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, + 4, aer_capctl); + } + + return 0; +} + static int pnv_eeh_restore_config(struct pci_dn *pdn) { struct eeh_dev *edev = pdn_to_eeh_dev(pdn); @@ -1485,9 +1656,21 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) if (!edev) return -EEXIST; - phb = edev->phb->private_data; - ret = opal_pci_reinit(phb->opal_id, - OPAL_REINIT_PCI_DEV, edev->config_addr); + /* + * We have to restore the PCI config space after reset since the + * firmware can't see SRIOV VFs. + * + * FIXME: The MPS, error routing rules, timeout setting are worthy + * to be exported by firmware in extendible way. + */ + if (edev->physfn) { + ret = pnv_eeh_restore_vf_config(pdn); + } else { + phb = edev->phb->private_data; + ret = opal_pci_reinit(phb->opal_id, + OPAL_REINIT_PCI_DEV, edev->config_addr); + } + if (ret) { pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", __func__, edev->config_addr, ret); @@ -1516,6 +1699,40 @@ static struct eeh_ops pnv_eeh_ops = { .restore_config = pnv_eeh_restore_config }; +void pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (!pdev->is_virtfn) + return; + + /* + * The following operations will fail if VF's sysfs files + * aren't created or its resources aren't finalized. + */ + eeh_add_device_early(pdn); + eeh_add_device_late(pdev); + eeh_sysfs_add_device(pdev); +} + +#ifdef CONFIG_PCI_IOV +static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + int parent_mps; + + if (!pdev->is_virtfn) + return; + + /* Synchronize MPS for VF and PF */ + parent_mps = pcie_get_mps(pdev->physfn); + if ((128 << pdev->pcie_mpss) >= parent_mps) + pcie_set_mps(pdev, parent_mps); + pdn->mps = pcie_get_mps(pdev); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_pci_fixup_vf_mps); +#endif /* CONFIG_PCI_IOV */ + /** * eeh_powernv_init - Register platform dependent EEH operations * diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 15bfbcd5debc..fcc8b6861b63 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -35,9 +35,9 @@ int pnv_save_sprs_for_winkle(void) int rc; /* - * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross + * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across * all cpus at boot. Get these reg values of current cpu and use the - * same accross all cpus. + * same across all cpus. */ uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; uint64_t hid0_val = mfspr(SPRN_HID0); @@ -185,7 +185,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, * fastsleep workaround needs to be left in 'applied' state on all * the cores. Do this by- * 1. Patching out the call to 'undo' workaround in fastsleep exit path - * 2. Sending ipi to all the cores which have atleast one online thread + * 2. Sending ipi to all the cores which have at least one online thread * 3. Patching out the call to 'apply' workaround in fastsleep entry * path * There is no need to send ipi to cores which have all threads diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index e85aa900f5c0..7229acd9bb3a 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -278,7 +278,7 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe) /* * Enable/disable bypass mode on the NPU. The NPU only supports one - * window per link, so bypass needs to be explicity enabled or + * window per link, so bypass needs to be explicitly enabled or * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be * active at the same time. */ diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 44ed78af1a0d..39d6ff9e5630 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -31,26 +31,25 @@ struct memcons { __be32 in_cons; }; -static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *to, - loff_t pos, size_t count) +static struct memcons *opal_memcons = NULL; + +ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) { - struct memcons *mc = bin_attr->private; const char *conbuf; ssize_t ret; size_t first_read = 0; uint32_t out_pos, avail; - if (!mc) + if (!opal_memcons) return -ENODEV; - out_pos = be32_to_cpu(ACCESS_ONCE(mc->out_pos)); + out_pos = be32_to_cpu(ACCESS_ONCE(opal_memcons->out_pos)); /* Now we've read out_pos, put a barrier in before reading the new * data it points to in conbuf. */ smp_rmb(); - conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys)); + conbuf = phys_to_virt(be64_to_cpu(opal_memcons->obuf_phys)); /* When the buffer has wrapped, read from the out_pos marker to the end * of the buffer, and then read the remaining data as in the un-wrapped @@ -58,7 +57,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, if (out_pos & MEMCONS_OUT_POS_WRAP) { out_pos &= MEMCONS_OUT_POS_MASK; - avail = be32_to_cpu(mc->obuf_size) - out_pos; + avail = be32_to_cpu(opal_memcons->obuf_size) - out_pos; ret = memory_read_from_buffer(to, count, &pos, conbuf + out_pos, avail); @@ -76,7 +75,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, } /* Sanity check. The firmware should not do this to us. */ - if (out_pos > be32_to_cpu(mc->obuf_size)) { + if (out_pos > be32_to_cpu(opal_memcons->obuf_size)) { pr_err("OPAL: memory console corruption. Aborting read.\n"); return -EINVAL; } @@ -91,6 +90,13 @@ out: return ret; } +static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *to, + loff_t pos, size_t count) +{ + return opal_msglog_copy(to, pos, count); +} + static struct bin_attribute opal_msglog_attr = { .attr = {.name = "msglog", .mode = 0444}, .read = opal_msglog_read @@ -117,7 +123,15 @@ void __init opal_msglog_init(void) return; } - opal_msglog_attr.private = mc; + opal_memcons = mc; +} + +void __init opal_msglog_sysfs_init(void) +{ + if (!opal_memcons) { + pr_warn("OPAL: message log initialisation failed, not creating sysfs entry\n"); + return; + } if (sysfs_create_bin_file(opal_kobj, &opal_msglog_attr) != 0) pr_warn("OPAL: sysfs file creation failed\n"); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 4e0da5af94a1..0256d0729252 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -724,6 +724,9 @@ static int __init opal_init(void) of_node_put(leds); } + /* Initialise OPAL message log interface */ + opal_msglog_init(); + /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { @@ -739,8 +742,8 @@ static int __init opal_init(void) opal_platform_dump_init(); /* Setup system parameters interface */ opal_sys_param_init(); - /* Setup message log interface. */ - opal_msglog_init(); + /* Setup message log sysfs interface. */ + opal_msglog_sysfs_init(); } /* Initialize platform devices: IPMI backend, PRD & flash interface */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 573ae1994097..c5baaf3cc4e5 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -872,9 +872,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) if (!res->flags || !res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) - continue; - /* * The actual IOV BAR range is determined by the start address * and the actual size for num_vfs VFs BAR. This check is to @@ -903,9 +900,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) if (!res->flags || !res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) - continue; - size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); res2 = *res; res->start += size * offset; @@ -1196,29 +1190,36 @@ static void pnv_pci_ioda_setup_PEs(void) } #ifdef CONFIG_PCI_IOV -static int pnv_pci_vf_release_m64(struct pci_dev *pdev) +static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs) { struct pci_bus *bus; struct pci_controller *hose; struct pnv_phb *phb; struct pci_dn *pdn; int i, j; + int m64_bars; bus = pdev->bus; hose = pci_bus_to_host(bus); phb = hose->private_data; pdn = pci_get_pdn(pdev); + if (pdn->m64_single_mode) + m64_bars = num_vfs; + else + m64_bars = 1; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) - for (j = 0; j < M64_PER_IOV; j++) { - if (pdn->m64_wins[i][j] == IODA_INVALID_M64) + for (j = 0; j < m64_bars; j++) { + if (pdn->m64_map[j][i] == IODA_INVALID_M64) continue; opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 0); - clear_bit(pdn->m64_wins[i][j], &phb->ioda.m64_bar_alloc); - pdn->m64_wins[i][j] = IODA_INVALID_M64; + OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0); + clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc); + pdn->m64_map[j][i] = IODA_INVALID_M64; } + kfree(pdn->m64_map); return 0; } @@ -1235,8 +1236,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) int total_vfs; resource_size_t size, start; int pe_num; - int vf_groups; - int vf_per_group; + int m64_bars; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1244,29 +1244,26 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) pdn = pci_get_pdn(pdev); total_vfs = pci_sriov_get_totalvfs(pdev); - /* Initialize the m64_wins to IODA_INVALID_M64 */ - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) - for (j = 0; j < M64_PER_IOV; j++) - pdn->m64_wins[i][j] = IODA_INVALID_M64; + if (pdn->m64_single_mode) + m64_bars = num_vfs; + else + m64_bars = 1; + + pdn->m64_map = kmalloc(sizeof(*pdn->m64_map) * m64_bars, GFP_KERNEL); + if (!pdn->m64_map) + return -ENOMEM; + /* Initialize the m64_map to IODA_INVALID_M64 */ + for (i = 0; i < m64_bars ; i++) + for (j = 0; j < PCI_SRIOV_NUM_BARS; j++) + pdn->m64_map[i][j] = IODA_INVALID_M64; - if (pdn->m64_per_iov == M64_PER_IOV) { - vf_groups = (num_vfs <= M64_PER_IOV) ? num_vfs: M64_PER_IOV; - vf_per_group = (num_vfs <= M64_PER_IOV)? 1: - roundup_pow_of_two(num_vfs) / pdn->m64_per_iov; - } else { - vf_groups = 1; - vf_per_group = 1; - } for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || !res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) - continue; - - for (j = 0; j < vf_groups; j++) { + for (j = 0; j < m64_bars; j++) { do { win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, phb->ioda.m64_bar_idx + 1, 0); @@ -1275,12 +1272,11 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) goto m64_failed; } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); - pdn->m64_wins[i][j] = win; + pdn->m64_map[j][i] = win; - if (pdn->m64_per_iov == M64_PER_IOV) { + if (pdn->m64_single_mode) { size = pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i); - size = size * vf_per_group; start = res->start + size * j; } else { size = resource_size(res); @@ -1288,16 +1284,16 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) } /* Map the M64 here */ - if (pdn->m64_per_iov == M64_PER_IOV) { - pe_num = pdn->offset + j; + if (pdn->m64_single_mode) { + pe_num = pdn->pe_num_map[j]; rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe_num, OPAL_M64_WINDOW_TYPE, - pdn->m64_wins[i][j], 0); + pdn->m64_map[j][i], 0); } rc = opal_pci_set_phb_mem_window(phb->opal_id, OPAL_M64_WINDOW_TYPE, - pdn->m64_wins[i][j], + pdn->m64_map[j][i], start, 0, /* unused */ size); @@ -1309,12 +1305,12 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) goto m64_failed; } - if (pdn->m64_per_iov == M64_PER_IOV) + if (pdn->m64_single_mode) rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 2); + OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2); else rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 1); + OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1); if (rc != OPAL_SUCCESS) { dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n", @@ -1326,7 +1322,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) return 0; m64_failed: - pnv_pci_vf_release_m64(pdev); + pnv_pci_vf_release_m64(pdev, num_vfs); return -EBUSY; } @@ -1353,15 +1349,13 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); } -static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) +static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) { struct pci_bus *bus; struct pci_controller *hose; struct pnv_phb *phb; struct pnv_ioda_pe *pe, *pe_n; struct pci_dn *pdn; - u16 vf_index; - int64_t rc; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1371,35 +1365,6 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) if (!pdev->is_physfn) return; - if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) { - int vf_group; - int vf_per_group; - int vf_index1; - - vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov; - - for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) - for (vf_index = vf_group * vf_per_group; - vf_index < (vf_group + 1) * vf_per_group && - vf_index < num_vfs; - vf_index++) - for (vf_index1 = vf_group * vf_per_group; - vf_index1 < (vf_group + 1) * vf_per_group && - vf_index1 < num_vfs; - vf_index1++){ - - rc = opal_pci_set_peltv(phb->opal_id, - pdn->offset + vf_index, - pdn->offset + vf_index1, - OPAL_REMOVE_PE_FROM_DOMAIN); - - if (rc) - dev_warn(&pdev->dev, "%s: Failed to unlink same group PE#%d(%lld)\n", - __func__, - pdn->offset + vf_index1, rc); - } - } - list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { if (pe->parent_dev != pdev) continue; @@ -1424,7 +1389,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) struct pnv_phb *phb; struct pci_dn *pdn; struct pci_sriov *iov; - u16 num_vfs; + u16 num_vfs, i; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1434,18 +1399,25 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) num_vfs = pdn->num_vfs; /* Release VF PEs */ - pnv_ioda_release_vf_PE(pdev, num_vfs); + pnv_ioda_release_vf_PE(pdev); if (phb->type == PNV_PHB_IODA2) { - if (pdn->m64_per_iov == 1) - pnv_pci_vf_resource_shift(pdev, -pdn->offset); + if (!pdn->m64_single_mode) + pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map); /* Release M64 windows */ - pnv_pci_vf_release_m64(pdev); + pnv_pci_vf_release_m64(pdev, num_vfs); /* Release PE numbers */ - bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); - pdn->offset = 0; + if (pdn->m64_single_mode) { + for (i = 0; i < num_vfs; i++) { + if (pdn->pe_num_map[i] != IODA_INVALID_PE) + pnv_ioda_free_pe(phb, pdn->pe_num_map[i]); + } + } else + bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); + /* Releasing pe_num_map */ + kfree(pdn->pe_num_map); } } @@ -1460,7 +1432,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) int pe_num; u16 vf_index; struct pci_dn *pdn; - int64_t rc; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1472,7 +1443,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) /* Reserve PE for each VF */ for (vf_index = 0; vf_index < num_vfs; vf_index++) { - pe_num = pdn->offset + vf_index; + if (pdn->m64_single_mode) + pe_num = pdn->pe_num_map[vf_index]; + else + pe_num = *pdn->pe_num_map + vf_index; pe = &phb->ioda.pe_array[pe_num]; pe->pe_number = pe_num; @@ -1505,37 +1479,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pnv_pci_ioda2_setup_dma_pe(phb, pe); } - - if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) { - int vf_group; - int vf_per_group; - int vf_index1; - - vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov; - - for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) { - for (vf_index = vf_group * vf_per_group; - vf_index < (vf_group + 1) * vf_per_group && - vf_index < num_vfs; - vf_index++) { - for (vf_index1 = vf_group * vf_per_group; - vf_index1 < (vf_group + 1) * vf_per_group && - vf_index1 < num_vfs; - vf_index1++) { - - rc = opal_pci_set_peltv(phb->opal_id, - pdn->offset + vf_index, - pdn->offset + vf_index1, - OPAL_ADD_PE_TO_DOMAIN); - - if (rc) - dev_warn(&pdev->dev, "%s: Failed to link same group PE#%d(%lld)\n", - __func__, - pdn->offset + vf_index1, rc); - } - } - } - } } int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) @@ -1545,6 +1488,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) struct pnv_phb *phb; struct pci_dn *pdn; int ret; + u16 i; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1552,20 +1496,59 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) pdn = pci_get_pdn(pdev); if (phb->type == PNV_PHB_IODA2) { + if (!pdn->vfs_expanded) { + dev_info(&pdev->dev, "don't support this SRIOV device" + " with non 64bit-prefetchable IOV BAR\n"); + return -ENOSPC; + } + + /* + * When M64 BARs functions in Single PE mode, the number of VFs + * could be enabled must be less than the number of M64 BARs. + */ + if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) { + dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n"); + return -EBUSY; + } + + /* Allocating pe_num_map */ + if (pdn->m64_single_mode) + pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map) * num_vfs, + GFP_KERNEL); + else + pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL); + + if (!pdn->pe_num_map) + return -ENOMEM; + + if (pdn->m64_single_mode) + for (i = 0; i < num_vfs; i++) + pdn->pe_num_map[i] = IODA_INVALID_PE; + /* Calculate available PE for required VFs */ - mutex_lock(&phb->ioda.pe_alloc_mutex); - pdn->offset = bitmap_find_next_zero_area( - phb->ioda.pe_alloc, phb->ioda.total_pe, - 0, num_vfs, 0); - if (pdn->offset >= phb->ioda.total_pe) { + if (pdn->m64_single_mode) { + for (i = 0; i < num_vfs; i++) { + pdn->pe_num_map[i] = pnv_ioda_alloc_pe(phb); + if (pdn->pe_num_map[i] == IODA_INVALID_PE) { + ret = -EBUSY; + goto m64_failed; + } + } + } else { + mutex_lock(&phb->ioda.pe_alloc_mutex); + *pdn->pe_num_map = bitmap_find_next_zero_area( + phb->ioda.pe_alloc, phb->ioda.total_pe, + 0, num_vfs, 0); + if (*pdn->pe_num_map >= phb->ioda.total_pe) { + mutex_unlock(&phb->ioda.pe_alloc_mutex); + dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); + kfree(pdn->pe_num_map); + return -EBUSY; + } + bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); mutex_unlock(&phb->ioda.pe_alloc_mutex); - dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); - pdn->offset = 0; - return -EBUSY; } - bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs); pdn->num_vfs = num_vfs; - mutex_unlock(&phb->ioda.pe_alloc_mutex); /* Assign M64 window accordingly */ ret = pnv_pci_vf_assign_m64(pdev, num_vfs); @@ -1579,8 +1562,8 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) * the IOV BAR according to the PE# allocated to the VFs. * Otherwise, the PE# for the VF will conflict with others. */ - if (pdn->m64_per_iov == 1) { - ret = pnv_pci_vf_resource_shift(pdev, pdn->offset); + if (!pdn->m64_single_mode) { + ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map); if (ret) goto m64_failed; } @@ -1592,8 +1575,16 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) return 0; m64_failed: - bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); - pdn->offset = 0; + if (pdn->m64_single_mode) { + for (i = 0; i < num_vfs; i++) { + if (pdn->pe_num_map[i] != IODA_INVALID_PE) + pnv_ioda_free_pe(phb, pdn->pe_num_map[i]); + } + } else + bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); + + /* Releasing pe_num_map */ + kfree(pdn->pe_num_map); return ret; } @@ -1612,8 +1603,7 @@ int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) /* Allocate PCI data */ add_dev_pci_data(pdev); - pnv_pci_sriov_enable(pdev, num_vfs); - return 0; + return pnv_pci_sriov_enable(pdev, num_vfs); } #endif /* CONFIG_PCI_IOV */ @@ -2851,45 +2841,58 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } #ifdef CONFIG_PCI_IOV static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) { - struct pci_controller *hose; - struct pnv_phb *phb; + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + const resource_size_t gate = phb->ioda.m64_segsize >> 2; struct resource *res; int i; - resource_size_t size; + resource_size_t size, total_vf_bar_sz; struct pci_dn *pdn; int mul, total_vfs; if (!pdev->is_physfn || pdev->is_added) return; - hose = pci_bus_to_host(pdev->bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); pdn->vfs_expanded = 0; + pdn->m64_single_mode = false; total_vfs = pci_sriov_get_totalvfs(pdev); - pdn->m64_per_iov = 1; mul = phb->ioda.total_pe; + total_vf_bar_sz = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || res->parent) continue; if (!pnv_pci_is_mem_pref_64(res->flags)) { - dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n", + dev_warn(&pdev->dev, "Don't support SR-IOV with" + " non M64 VF BAR%d: %pR. \n", i, res); - continue; + goto truncate_iov; } - size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); + total_vf_bar_sz += pci_iov_resource_size(pdev, + i + PCI_IOV_RESOURCES); - /* bigger than 64M */ - if (size > (1 << 26)) { - dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n", - i, res); - pdn->m64_per_iov = M64_PER_IOV; + /* + * If bigger than quarter of M64 segment size, just round up + * power of two. + * + * Generally, one M64 BAR maps one IOV BAR. To avoid conflict + * with other devices, IOV BAR size is expanded to be + * (total_pe * VF_BAR_size). When VF_BAR_size is half of M64 + * segment size , the expanded size would equal to half of the + * whole M64 space size, which will exhaust the M64 Space and + * limit the system flexibility. This is a design decision to + * set the boundary to quarter of the M64 segment size. + */ + if (total_vf_bar_sz > gate) { mul = roundup_pow_of_two(total_vfs); + dev_info(&pdev->dev, + "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n", + total_vf_bar_sz, gate, mul); + pdn->m64_single_mode = true; break; } } @@ -2898,20 +2901,31 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) { - dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n", - i, res); - continue; - } - dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); + /* + * On PHB3, the minimum size alignment of M64 BAR in single + * mode is 32MB. + */ + if (pdn->m64_single_mode && (size < SZ_32M)) + goto truncate_iov; + dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); res->end = res->start + size * mul - 1; dev_dbg(&pdev->dev, " %pR\n", res); dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)", i, res, mul); } pdn->vfs_expanded = mul; + + return; + +truncate_iov: + /* To save MMIO space, IOV BAR is truncated. */ + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + res->flags = 0; + res->end = res->start - 1; + } } #endif /* CONFIG_PCI_IOV */ @@ -3125,18 +3139,35 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno) { + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(pdev); - resource_size_t align, iov_align; - - iov_align = resource_size(&pdev->resource[resno]); - if (iov_align) - return iov_align; + resource_size_t align; + /* + * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the + * SR-IOV. While from hardware perspective, the range mapped by M64 + * BAR should be size aligned. + * + * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra + * powernv-specific hardware restriction is gone. But if just use the + * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with + * in one segment of M64 #15, which introduces the PE conflict between + * PF and VF. Based on this, the minimum alignment of an IOV BAR is + * m64_segsize. + * + * This function returns the total IOV BAR size if M64 BAR is in + * Shared PE mode or just VF BAR size if not. + * If the M64 BAR is in Single PE mode, return the VF BAR size or + * M64 segment size if IOV BAR size is less. + */ align = pci_iov_resource_size(pdev, resno); - if (pdn->vfs_expanded) - return pdn->vfs_expanded * align; + if (!pdn->vfs_expanded) + return align; + if (pdn->m64_single_mode) + return max(align, (resource_size_t)phb->ioda.m64_segsize); - return align; + return pdn->vfs_expanded * align; } #endif /* CONFIG_PCI_IOV */ @@ -3180,6 +3211,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose) static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .dma_dev_setup = pnv_pci_dma_dev_setup, + .dma_bus_setup = pnv_pci_dma_bus_setup, #ifdef CONFIG_PCI_MSI .setup_msi_irqs = pnv_setup_msi_irqs, .teardown_msi_irqs = pnv_teardown_msi_irqs, diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c deleted file mode 100644 index f2bdfea3b68d..000000000000 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Support PCI/PCIe on PowerNV platforms - * - * Currently supports only P5IOC2 - * - * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/irq.h> -#include <linux/io.h> -#include <linux/msi.h> - -#include <asm/sections.h> -#include <asm/io.h> -#include <asm/prom.h> -#include <asm/pci-bridge.h> -#include <asm/machdep.h> -#include <asm/msi_bitmap.h> -#include <asm/ppc-pci.h> -#include <asm/opal.h> -#include <asm/iommu.h> -#include <asm/tce.h> - -#include "powernv.h" -#include "pci.h" - -/* For now, use a fixed amount of TCE memory for each p5ioc2 - * hub, 16M will do - */ -#define P5IOC2_TCE_MEMORY 0x01000000 - -#ifdef CONFIG_PCI_MSI -static int pnv_pci_p5ioc2_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, - unsigned int hwirq, unsigned int virq, - unsigned int is_64, struct msi_msg *msg) -{ - if (WARN_ON(!is_64)) - return -ENXIO; - msg->data = hwirq - phb->msi_base; - msg->address_hi = 0x10000000; - msg->address_lo = 0; - - return 0; -} - -static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) -{ - unsigned int count; - const __be32 *prop = of_get_property(phb->hose->dn, - "ibm,opal-msi-ranges", NULL); - if (!prop) - return; - - /* Don't do MSI's on p5ioc2 PCI-X are they are not properly - * verified in HW - */ - if (of_device_is_compatible(phb->hose->dn, "ibm,p5ioc2-pcix")) - return; - phb->msi_base = be32_to_cpup(prop); - count = be32_to_cpup(prop + 1); - if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) { - pr_err("PCI %d: Failed to allocate MSI bitmap !\n", - phb->hose->global_number); - return; - } - phb->msi_setup = pnv_pci_p5ioc2_msi_setup; - phb->msi32_support = 0; - pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", - count, phb->msi_base); -} -#else -static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { } -#endif /* CONFIG_PCI_MSI */ - -static struct iommu_table_ops pnv_p5ioc2_iommu_ops = { - .set = pnv_tce_build, -#ifdef CONFIG_IOMMU_API - .exchange = pnv_tce_xchg, -#endif - .clear = pnv_tce_free, - .get = pnv_tce_get, -}; - -static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, - struct pci_dev *pdev) -{ - struct iommu_table *tbl = phb->p5ioc2.table_group.tables[0]; - - if (!tbl->it_map) { - tbl->it_ops = &pnv_p5ioc2_iommu_ops; - iommu_init_table(tbl, phb->hose->node); - iommu_register_group(&phb->p5ioc2.table_group, - pci_domain_nr(phb->hose->bus), phb->opal_id); - INIT_LIST_HEAD_RCU(&tbl->it_group_list); - pnv_pci_link_table_and_group(phb->hose->node, 0, - tbl, &phb->p5ioc2.table_group); - } - - set_iommu_table_base(&pdev->dev, tbl); - iommu_add_device(&pdev->dev); -} - -static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = { - .dma_dev_setup = pnv_pci_dma_dev_setup, -#ifdef CONFIG_PCI_MSI - .setup_msi_irqs = pnv_setup_msi_irqs, - .teardown_msi_irqs = pnv_teardown_msi_irqs, -#endif -}; - -static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, - void *tce_mem, u64 tce_size) -{ - struct pnv_phb *phb; - const __be64 *prop64; - u64 phb_id; - int64_t rc; - static int primary = 1; - struct iommu_table_group *table_group; - struct iommu_table *tbl; - - pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name); - - prop64 = of_get_property(np, "ibm,opal-phbid", NULL); - if (!prop64) { - pr_err(" Missing \"ibm,opal-phbid\" property !\n"); - return; - } - phb_id = be64_to_cpup(prop64); - pr_devel(" PHB-ID : 0x%016llx\n", phb_id); - pr_devel(" TCE AT : 0x%016lx\n", __pa(tce_mem)); - pr_devel(" TCE SZ : 0x%016llx\n", tce_size); - - rc = opal_pci_set_phb_tce_memory(phb_id, __pa(tce_mem), tce_size); - if (rc != OPAL_SUCCESS) { - pr_err(" Failed to set TCE memory, OPAL error %lld\n", rc); - return; - } - - phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); - phb->hose = pcibios_alloc_controller(np); - if (!phb->hose) { - pr_err(" Failed to allocate PCI controller\n"); - return; - } - - spin_lock_init(&phb->lock); - phb->hose->first_busno = 0; - phb->hose->last_busno = 0xff; - phb->hose->private_data = phb; - phb->hose->controller_ops = pnv_pci_p5ioc2_controller_ops; - phb->hub_id = hub_id; - phb->opal_id = phb_id; - phb->type = PNV_PHB_P5IOC2; - phb->model = PNV_PHB_MODEL_P5IOC2; - - phb->regs = of_iomap(np, 0); - - if (phb->regs == NULL) - pr_err(" Failed to map registers !\n"); - else { - pr_devel(" P_BUID = 0x%08x\n", in_be32(phb->regs + 0x100)); - pr_devel(" P_IOSZ = 0x%08x\n", in_be32(phb->regs + 0x1b0)); - pr_devel(" P_IO_ST = 0x%08x\n", in_be32(phb->regs + 0x1e0)); - pr_devel(" P_MEM1_H = 0x%08x\n", in_be32(phb->regs + 0x1a0)); - pr_devel(" P_MEM1_L = 0x%08x\n", in_be32(phb->regs + 0x190)); - pr_devel(" P_MSZ1_L = 0x%08x\n", in_be32(phb->regs + 0x1c0)); - pr_devel(" P_MEM_ST = 0x%08x\n", in_be32(phb->regs + 0x1d0)); - pr_devel(" P_MEM2_H = 0x%08x\n", in_be32(phb->regs + 0x2c0)); - pr_devel(" P_MEM2_L = 0x%08x\n", in_be32(phb->regs + 0x2b0)); - pr_devel(" P_MSZ2_H = 0x%08x\n", in_be32(phb->regs + 0x2d0)); - pr_devel(" P_MSZ2_L = 0x%08x\n", in_be32(phb->regs + 0x2e0)); - } - - /* Interpret the "ranges" property */ - /* This also maps the I/O region and sets isa_io/mem_base */ - pci_process_bridge_OF_ranges(phb->hose, np, primary); - primary = 0; - - phb->hose->ops = &pnv_pci_ops; - - /* Setup MSI support */ - pnv_pci_init_p5ioc2_msis(phb); - - /* Setup TCEs */ - phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup; - pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table, - tce_mem, tce_size, 0, - IOMMU_PAGE_SHIFT_4K); - /* - * We do not allocate iommu_table as we do not support - * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table() - * should not be called for phb->p5ioc2.table_group.tables[0] ever. - */ - tbl = phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table; - table_group = &phb->p5ioc2.table_group; - table_group->tce32_start = tbl->it_offset << tbl->it_page_shift; - table_group->tce32_size = tbl->it_size << tbl->it_page_shift; -} - -void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) -{ - struct device_node *phbn; - const __be64 *prop64; - u64 hub_id; - void *tce_mem; - uint64_t tce_per_phb; - int64_t rc; - int phb_count = 0; - - pr_info("Probing p5ioc2 IO-Hub %s\n", np->full_name); - - prop64 = of_get_property(np, "ibm,opal-hubid", NULL); - if (!prop64) { - pr_err(" Missing \"ibm,opal-hubid\" property !\n"); - return; - } - hub_id = be64_to_cpup(prop64); - pr_info(" HUB-ID : 0x%016llx\n", hub_id); - - /* Count child PHBs and calculate TCE space per PHB */ - for_each_child_of_node(np, phbn) { - if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || - of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) - phb_count++; - } - - if (phb_count <= 0) { - pr_info(" No PHBs for Hub %s\n", np->full_name); - return; - } - - tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count); - pr_info(" Allocating %lld MB of TCE memory per PHB\n", - tce_per_phb >> 20); - - /* Currently allocate 16M of TCE memory for every Hub - * - * XXX TODO: Make it chip local if possible - */ - tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY); - pr_debug(" TCE : 0x%016lx..0x%016lx\n", - __pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1); - rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem), - P5IOC2_TCE_MEMORY); - if (rc != OPAL_SUCCESS) { - pr_err(" Failed to allocate TCE memory, OPAL error %lld\n", rc); - return; - } - - /* Initialize PHBs */ - for_each_child_of_node(np, phbn) { - if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || - of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) { - pnv_pci_init_p5ioc2_phb(phbn, hub_id, - tce_mem, tce_per_phb); - tce_mem += tce_per_phb; - } - } -} diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 2f55c86df703..73c8dc2a353f 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -380,10 +380,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn) */ pe_no = pdn->pe_number; if (pe_no == IODA_INVALID_PE) { - if (phb->type == PNV_PHB_P5IOC2) - pe_no = 0; - else - pe_no = phb->ioda.reserved_pe; + pe_no = phb->ioda.reserved_pe; } /* @@ -599,6 +596,9 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages, u64 rpn = __pa(uaddr) >> tbl->it_page_shift; long i; + if (proto_tce & TCE_PCI_WRITE) + proto_tce |= TCE_PCI_READ; + for (i = 0; i < npages; i++) { unsigned long newtce = proto_tce | ((rpn + i) << tbl->it_page_shift); @@ -620,6 +620,9 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index, BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); + if (newtce & TCE_PCI_WRITE) + newtce |= TCE_PCI_READ; + oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)); *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE); *direction = iommu_tce_direction(oldtce); @@ -760,6 +763,26 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) phb->dma_dev_setup(phb, pdev); } +void pnv_pci_dma_bus_setup(struct pci_bus *bus) +{ + struct pci_controller *hose = bus->sysdata; + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pe; + + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))) + continue; + + if (!pe->pbus) + continue; + + if (bus->number == ((pe->rid >> 8) & 0xFF)) { + pe->pbus = bus; + break; + } + } +} + void pnv_pci_shutdown(void) { struct pci_controller *hose; @@ -779,7 +802,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk); void __init pnv_pci_init(void) { struct device_node *np; - bool found_ioda = false; pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN); @@ -787,20 +809,11 @@ void __init pnv_pci_init(void) if (!firmware_has_feature(FW_FEATURE_OPAL)) return; - /* Look for IODA IO-Hubs. We don't support mixing IODA - * and p5ioc2 due to the need to change some global - * probing flags - */ + /* Look for IODA IO-Hubs. */ for_each_compatible_node(np, NULL, "ibm,ioda-hub") { pnv_pci_init_ioda_hub(np); - found_ioda = true; } - /* Look for p5ioc2 IO-Hubs */ - if (!found_ioda) - for_each_compatible_node(np, NULL, "ibm,p5ioc2") - pnv_pci_init_p5ioc2_hub(np); - /* Look for ioda2 built-in PHB3's */ for_each_compatible_node(np, NULL, "ibm,ioda2-phb") pnv_pci_init_ioda2_phb(np); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 7f56313e8d72..3f814f382b2e 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -4,16 +4,14 @@ struct pci_dn; enum pnv_phb_type { - PNV_PHB_P5IOC2 = 0, - PNV_PHB_IODA1 = 1, - PNV_PHB_IODA2 = 2, - PNV_PHB_NPU = 3, + PNV_PHB_IODA1 = 0, + PNV_PHB_IODA2 = 1, + PNV_PHB_NPU = 2, }; /* Precise PHB model for error management */ enum pnv_phb_model { PNV_PHB_MODEL_UNKNOWN, - PNV_PHB_MODEL_P5IOC2, PNV_PHB_MODEL_P7IOC, PNV_PHB_MODEL_PHB3, PNV_PHB_MODEL_NPU, @@ -121,81 +119,74 @@ struct pnv_phb { void (*freeze_pe)(struct pnv_phb *phb, int pe_no); int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt); - union { - struct { - struct iommu_table iommu_table; - struct iommu_table_group table_group; - } p5ioc2; - - struct { - /* Global bridge info */ - unsigned int total_pe; - unsigned int reserved_pe; - - /* 32-bit MMIO window */ - unsigned int m32_size; - unsigned int m32_segsize; - unsigned int m32_pci_base; - - /* 64-bit MMIO window */ - unsigned int m64_bar_idx; - unsigned long m64_size; - unsigned long m64_segsize; - unsigned long m64_base; - unsigned long m64_bar_alloc; - - /* IO ports */ - unsigned int io_size; - unsigned int io_segsize; - unsigned int io_pci_base; - - /* PE allocation bitmap */ - unsigned long *pe_alloc; - /* PE allocation mutex */ - struct mutex pe_alloc_mutex; - - /* M32 & IO segment maps */ - unsigned int *m32_segmap; - unsigned int *io_segmap; - struct pnv_ioda_pe *pe_array; - - /* IRQ chip */ - int irq_chip_init; - struct irq_chip irq_chip; - - /* Sorted list of used PE's based - * on the sequence of creation - */ - struct list_head pe_list; - struct mutex pe_list_mutex; - - /* Reverse map of PEs, will have to extend if - * we are to support more than 256 PEs, indexed - * bus { bus, devfn } - */ - unsigned char pe_rmap[0x10000]; - - /* 32-bit TCE tables allocation */ - unsigned long tce32_count; - - /* Total "weight" for the sake of DMA resources - * allocation - */ - unsigned int dma_weight; - unsigned int dma_pe_count; - - /* Sorted list of used PE's, sorted at - * boot for resource allocation purposes - */ - struct list_head pe_dma_list; - - /* TCE cache invalidate registers (physical and - * remapped) - */ - phys_addr_t tce_inval_reg_phys; - __be64 __iomem *tce_inval_reg; - } ioda; - }; + struct { + /* Global bridge info */ + unsigned int total_pe; + unsigned int reserved_pe; + + /* 32-bit MMIO window */ + unsigned int m32_size; + unsigned int m32_segsize; + unsigned int m32_pci_base; + + /* 64-bit MMIO window */ + unsigned int m64_bar_idx; + unsigned long m64_size; + unsigned long m64_segsize; + unsigned long m64_base; + unsigned long m64_bar_alloc; + + /* IO ports */ + unsigned int io_size; + unsigned int io_segsize; + unsigned int io_pci_base; + + /* PE allocation bitmap */ + unsigned long *pe_alloc; + /* PE allocation mutex */ + struct mutex pe_alloc_mutex; + + /* M32 & IO segment maps */ + unsigned int *m32_segmap; + unsigned int *io_segmap; + struct pnv_ioda_pe *pe_array; + + /* IRQ chip */ + int irq_chip_init; + struct irq_chip irq_chip; + + /* Sorted list of used PE's based + * on the sequence of creation + */ + struct list_head pe_list; + struct mutex pe_list_mutex; + + /* Reverse map of PEs, will have to extend if + * we are to support more than 256 PEs, indexed + * bus { bus, devfn } + */ + unsigned char pe_rmap[0x10000]; + + /* 32-bit TCE tables allocation */ + unsigned long tce32_count; + + /* Total "weight" for the sake of DMA resources + * allocation + */ + unsigned int dma_weight; + unsigned int dma_pe_count; + + /* Sorted list of used PE's, sorted at + * boot for resource allocation purposes + */ + struct list_head pe_dma_list; + + /* TCE cache invalidate registers (physical and + * remapped) + */ + phys_addr_t tce_inval_reg_phys; + __be64 __iomem *tce_inval_reg; + } ioda; /* PHB and hub status structure */ union { @@ -232,7 +223,6 @@ extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned page_shift); -extern void pnv_pci_init_p5ioc2_hub(struct device_node *np); extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_init_npu_phb(struct device_node *np); @@ -242,6 +232,7 @@ extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev); +extern void pnv_pci_dma_bus_setup(struct pci_bus *bus); extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 503a73f59359..0babef11136f 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -407,7 +407,7 @@ static DEVICE_ATTR(subcores_per_core, 0644, static int subcore_init(void) { - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + if (!cpu_has_feature(CPU_FTR_SUBCORE)) return 0; /* |