diff options
author | Michael Ellerman <mpe@ellerman.id.au> | 2015-04-14 02:29:23 +0300 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2015-04-14 02:29:23 +0300 |
commit | ad30cb9946515f72af5c3e89ad9de18870c1a1e7 (patch) | |
tree | b4c9b385e18ae8128da1522055a17b453f2309fc /drivers/pci | |
parent | b0a478ede669949682b9c698f6146c0065543b91 (diff) | |
parent | d4ed11aa4881246e1e36e0189f30f053f140370c (diff) | |
download | linux-ad30cb9946515f72af5c3e89ad9de18870c1a1e7.tar.xz |
Merge branch 'next-sriov' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc into next
Merge Richard's work to support SR-IOV on PowerNV. All generic PCI
patches acked by Bjorn.
Some minor conflicts with Daniel's pci_controller_ops work.
Conflicts:
arch/powerpc/include/asm/machdep.h
arch/powerpc/platforms/powernv/pci-ioda.c
Diffstat (limited to 'drivers/pci')
-rw-r--r-- | drivers/pci/iov.c | 155 | ||||
-rw-r--r-- | drivers/pci/pci.h | 2 | ||||
-rw-r--r-- | drivers/pci/setup-bus.c | 95 |
3 files changed, 195 insertions, 57 deletions
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 4b3a4eaad996..ee0ebff103a4 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -19,16 +19,59 @@ #define VIRTFN_ID_LEN 16 -static inline u8 virtfn_bus(struct pci_dev *dev, int id) +int pci_iov_virtfn_bus(struct pci_dev *dev, int vf_id) { + if (!dev->is_physfn) + return -EINVAL; return dev->bus->number + ((dev->devfn + dev->sriov->offset + - dev->sriov->stride * id) >> 8); + dev->sriov->stride * vf_id) >> 8); } -static inline u8 virtfn_devfn(struct pci_dev *dev, int id) +int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id) { + if (!dev->is_physfn) + return -EINVAL; return (dev->devfn + dev->sriov->offset + - dev->sriov->stride * id) & 0xff; + dev->sriov->stride * vf_id) & 0xff; +} + +/* + * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may + * change when NumVFs changes. + * + * Update iov->offset and iov->stride when NumVFs is written. + */ +static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn) +{ + struct pci_sriov *iov = dev->sriov; + + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride); +} + +/* + * The PF consumes one bus number. NumVFs, First VF Offset, and VF Stride + * determine how many additional bus numbers will be consumed by VFs. + * + * Iterate over all valid NumVFs and calculate the maximum number of bus + * numbers that could ever be required. + */ +static inline u8 virtfn_max_buses(struct pci_dev *dev) +{ + struct pci_sriov *iov = dev->sriov; + int nr_virtfn; + u8 max = 0; + int busnr; + + for (nr_virtfn = 1; nr_virtfn <= iov->total_VFs; nr_virtfn++) { + pci_iov_set_numvfs(dev, nr_virtfn); + busnr = pci_iov_virtfn_bus(dev, nr_virtfn - 1); + if (busnr > max) + max = busnr; + } + + return max; } static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) @@ -57,6 +100,14 @@ static void virtfn_remove_bus(struct pci_bus *physbus, struct pci_bus *virtbus) pci_remove_bus(virtbus); } +resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) +{ + if (!dev->is_physfn) + return 0; + + return dev->sriov->barsz[resno - PCI_IOV_RESOURCES]; +} + static int virtfn_add(struct pci_dev *dev, int id, int reset) { int i; @@ -69,7 +120,7 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset) struct pci_bus *bus; mutex_lock(&iov->dev->sriov->lock); - bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id)); + bus = virtfn_add_bus(dev->bus, pci_iov_virtfn_bus(dev, id)); if (!bus) goto failed; @@ -77,7 +128,7 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset) if (!virtfn) goto failed0; - virtfn->devfn = virtfn_devfn(dev, id); + virtfn->devfn = pci_iov_virtfn_devfn(dev, id); virtfn->vendor = dev->vendor; pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device); pci_setup_device(virtfn); @@ -87,13 +138,12 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset) virtfn->multifunction = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = dev->resource + PCI_IOV_RESOURCES + i; + res = &dev->resource[i + PCI_IOV_RESOURCES]; if (!res->parent) continue; virtfn->resource[i].name = pci_name(virtfn); virtfn->resource[i].flags = res->flags; - size = resource_size(res); - do_div(size, iov->total_VFs); + size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); virtfn->resource[i].start = res->start + size * id; virtfn->resource[i].end = virtfn->resource[i].start + size - 1; rc = request_resource(res, &virtfn->resource[i]); @@ -140,8 +190,8 @@ static void virtfn_remove(struct pci_dev *dev, int id, int reset) struct pci_sriov *iov = dev->sriov; virtfn = pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus), - virtfn_bus(dev, id), - virtfn_devfn(dev, id)); + pci_iov_virtfn_bus(dev, id), + pci_iov_virtfn_devfn(dev, id)); if (!virtfn) return; @@ -170,6 +220,11 @@ static void virtfn_remove(struct pci_dev *dev, int id, int reset) pci_dev_put(dev); } +int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + return 0; +} + static int sriov_enable(struct pci_dev *dev, int nr_virtfn) { int rc; @@ -180,6 +235,8 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) struct pci_dev *pdev; struct pci_sriov *iov = dev->sriov; int bars = 0; + int bus; + int retval; if (!nr_virtfn) return 0; @@ -204,7 +261,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) nres = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { bars |= (1 << (i + PCI_IOV_RESOURCES)); - res = dev->resource + PCI_IOV_RESOURCES + i; + res = &dev->resource[i + PCI_IOV_RESOURCES]; if (res->parent) nres++; } @@ -216,8 +273,10 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) iov->offset = offset; iov->stride = stride; - if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->busn_res.end) { - dev_err(&dev->dev, "SR-IOV: bus number out of range\n"); + bus = pci_iov_virtfn_bus(dev, nr_virtfn - 1); + if (bus > dev->bus->busn_res.end) { + dev_err(&dev->dev, "can't enable %d VFs (bus %02x out of range of %pR)\n", + nr_virtfn, bus, &dev->bus->busn_res); return -ENOMEM; } @@ -243,7 +302,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) return rc; } - pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); + pci_iov_set_numvfs(dev, nr_virtfn); iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; pci_cfg_access_lock(dev); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); @@ -254,6 +313,12 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) if (nr_virtfn < initial) initial = nr_virtfn; + if ((retval = pcibios_sriov_enable(dev, initial))) { + dev_err(&dev->dev, "failure %d from pcibios_sriov_enable()\n", + retval); + return retval; + } + for (i = 0; i < initial; i++) { rc = virtfn_add(dev, i, 0); if (rc) @@ -272,7 +337,7 @@ failed: iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); pci_cfg_access_lock(dev); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); - pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); + pci_iov_set_numvfs(dev, 0); ssleep(1); pci_cfg_access_unlock(dev); @@ -282,6 +347,11 @@ failed: return rc; } +int __weak pcibios_sriov_disable(struct pci_dev *pdev) +{ + return 0; +} + static void sriov_disable(struct pci_dev *dev) { int i; @@ -293,6 +363,8 @@ static void sriov_disable(struct pci_dev *dev) for (i = 0; i < iov->num_VFs; i++) virtfn_remove(dev, i, 0); + pcibios_sriov_disable(dev); + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); pci_cfg_access_lock(dev); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); @@ -303,12 +375,12 @@ static void sriov_disable(struct pci_dev *dev) sysfs_remove_link(&dev->dev.kobj, "dep_link"); iov->num_VFs = 0; - pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); + pci_iov_set_numvfs(dev, 0); } static int sriov_init(struct pci_dev *dev, int pos) { - int i; + int i, bar64; int rc; int nres; u32 pgsz; @@ -357,27 +429,29 @@ found: pgsz &= ~(pgsz - 1); pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz); + iov = kzalloc(sizeof(*iov), GFP_KERNEL); + if (!iov) + return -ENOMEM; + nres = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = dev->resource + PCI_IOV_RESOURCES + i; - i += __pci_read_base(dev, pci_bar_unknown, res, - pos + PCI_SRIOV_BAR + i * 4); + res = &dev->resource[i + PCI_IOV_RESOURCES]; + bar64 = __pci_read_base(dev, pci_bar_unknown, res, + pos + PCI_SRIOV_BAR + i * 4); if (!res->flags) continue; if (resource_size(res) & (PAGE_SIZE - 1)) { rc = -EIO; goto failed; } + iov->barsz[i] = resource_size(res); res->end = res->start + resource_size(res) * total - 1; + dev_info(&dev->dev, "VF(n) BAR%d space: %pR (contains BAR%d for %d VFs)\n", + i, res, i, total); + i += bar64; nres++; } - iov = kzalloc(sizeof(*iov), GFP_KERNEL); - if (!iov) { - rc = -ENOMEM; - goto failed; - } - iov->pos = pos; iov->nres = nres; iov->ctrl = ctrl; @@ -400,15 +474,17 @@ found: dev->sriov = iov; dev->is_physfn = 1; + iov->max_VF_buses = virtfn_max_buses(dev); return 0; failed: for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = dev->resource + PCI_IOV_RESOURCES + i; + res = &dev->resource[i + PCI_IOV_RESOURCES]; res->flags = 0; } + kfree(iov); return rc; } @@ -439,7 +515,7 @@ static void sriov_restore_state(struct pci_dev *dev) pci_update_resource(dev, i); pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); - pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->num_VFs); + pci_iov_set_numvfs(dev, iov->num_VFs); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); if (iov->ctrl & PCI_SRIOV_CTRL_VFE) msleep(100); @@ -493,6 +569,12 @@ int pci_iov_resource_bar(struct pci_dev *dev, int resno) 4 * (resno - PCI_IOV_RESOURCES); } +resource_size_t __weak pcibios_iov_resource_alignment(struct pci_dev *dev, + int resno) +{ + return pci_iov_resource_size(dev, resno); +} + /** * pci_sriov_resource_alignment - get resource alignment for VF BAR * @dev: the PCI device @@ -505,14 +587,7 @@ int pci_iov_resource_bar(struct pci_dev *dev, int resno) */ resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno) { - struct resource tmp; - int reg = pci_iov_resource_bar(dev, resno); - - if (!reg) - return 0; - - __pci_read_base(dev, pci_bar_unknown, &tmp, reg); - return resource_alignment(&tmp); + return pcibios_iov_resource_alignment(dev, resno); } /** @@ -535,15 +610,13 @@ void pci_restore_iov_state(struct pci_dev *dev) int pci_iov_bus_range(struct pci_bus *bus) { int max = 0; - u8 busnr; struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { if (!dev->is_physfn) continue; - busnr = virtfn_bus(dev, dev->sriov->total_VFs - 1); - if (busnr > max) - max = busnr; + if (dev->sriov->max_VF_buses > max) + max = dev->sriov->max_VF_buses; } return max ? max - bus->number : 0; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 4091f82239cd..bae593c04541 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -243,10 +243,12 @@ struct pci_sriov { u16 stride; /* following VF stride */ u32 pgsz; /* page size for BAR alignment */ u8 link; /* Function Dependency Link */ + u8 max_VF_buses; /* max buses consumed by VFs */ u16 driver_max_VFs; /* max num VFs driver supports */ struct pci_dev *dev; /* lowest numbered PF */ struct pci_dev *self; /* this PF */ struct mutex lock; /* lock for VF bus */ + resource_size_t barsz[PCI_SRIOV_NUM_BARS]; /* VF BAR size */ }; #ifdef CONFIG_PCI_ATS diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index e3e17f3c0f0f..6603d401bb7c 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -99,8 +99,8 @@ static void remove_from_list(struct list_head *head, } } -static resource_size_t get_res_add_size(struct list_head *head, - struct resource *res) +static struct pci_dev_resource *res_to_dev_res(struct list_head *head, + struct resource *res) { struct pci_dev_resource *dev_res; @@ -109,17 +109,37 @@ static resource_size_t get_res_add_size(struct list_head *head, int idx = res - &dev_res->dev->resource[0]; dev_printk(KERN_DEBUG, &dev_res->dev->dev, - "res[%d]=%pR get_res_add_size add_size %llx\n", + "res[%d]=%pR res_to_dev_res add_size %llx min_align %llx\n", idx, dev_res->res, - (unsigned long long)dev_res->add_size); + (unsigned long long)dev_res->add_size, + (unsigned long long)dev_res->min_align); - return dev_res->add_size; + return dev_res; } } - return 0; + return NULL; } +static resource_size_t get_res_add_size(struct list_head *head, + struct resource *res) +{ + struct pci_dev_resource *dev_res; + + dev_res = res_to_dev_res(head, res); + return dev_res ? dev_res->add_size : 0; +} + +static resource_size_t get_res_add_align(struct list_head *head, + struct resource *res) +{ + struct pci_dev_resource *dev_res; + + dev_res = res_to_dev_res(head, res); + return dev_res ? dev_res->min_align : 0; +} + + /* Sort resources by alignment */ static void pdev_sort_resources(struct pci_dev *dev, struct list_head *head) { @@ -215,7 +235,7 @@ static void reassign_resources_sorted(struct list_head *realloc_head, struct resource *res; struct pci_dev_resource *add_res, *tmp; struct pci_dev_resource *dev_res; - resource_size_t add_size; + resource_size_t add_size, align; int idx; list_for_each_entry_safe(add_res, tmp, realloc_head, list) { @@ -238,13 +258,13 @@ static void reassign_resources_sorted(struct list_head *realloc_head, idx = res - &add_res->dev->resource[0]; add_size = add_res->add_size; + align = add_res->min_align; if (!resource_size(res)) { - res->start = add_res->start; + res->start = align; res->end = res->start + add_size - 1; if (pci_assign_resource(add_res->dev, idx)) reset_resource(res); } else { - resource_size_t align = add_res->min_align; res->flags |= add_res->flags & (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN); if (pci_reassign_resource(add_res->dev, idx, @@ -368,8 +388,9 @@ static void __assign_resources_sorted(struct list_head *head, LIST_HEAD(save_head); LIST_HEAD(local_fail_head); struct pci_dev_resource *save_res; - struct pci_dev_resource *dev_res, *tmp_res; + struct pci_dev_resource *dev_res, *tmp_res, *dev_res2; unsigned long fail_type; + resource_size_t add_align, align; /* Check if optional add_size is there */ if (!realloc_head || list_empty(realloc_head)) @@ -384,10 +405,44 @@ static void __assign_resources_sorted(struct list_head *head, } /* Update res in head list with add_size in realloc_head list */ - list_for_each_entry(dev_res, head, list) + list_for_each_entry_safe(dev_res, tmp_res, head, list) { dev_res->res->end += get_res_add_size(realloc_head, dev_res->res); + /* + * There are two kinds of additional resources in the list: + * 1. bridge resource -- IORESOURCE_STARTALIGN + * 2. SR-IOV resource -- IORESOURCE_SIZEALIGN + * Here just fix the additional alignment for bridge + */ + if (!(dev_res->res->flags & IORESOURCE_STARTALIGN)) + continue; + + add_align = get_res_add_align(realloc_head, dev_res->res); + + /* + * The "head" list is sorted by the alignment to make sure + * resources with bigger alignment will be assigned first. + * After we change the alignment of a dev_res in "head" list, + * we need to reorder the list by alignment to make it + * consistent. + */ + if (add_align > dev_res->res->start) { + dev_res->res->start = add_align; + dev_res->res->end = add_align + + resource_size(dev_res->res); + + list_for_each_entry(dev_res2, head, list) { + align = pci_resource_alignment(dev_res2->dev, + dev_res2->res); + if (add_align > align) + list_move_tail(&dev_res->list, + &dev_res2->list); + } + } + + } + /* Try updated head list with add_size added */ assign_requested_resources_sorted(head, &local_fail_head); @@ -962,6 +1017,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, struct resource *b_res = find_free_bus_resource(bus, mask | IORESOURCE_PREFETCH, type); resource_size_t children_add_size = 0; + resource_size_t children_add_align = 0; + resource_size_t add_align = 0; if (!b_res) return -ENOSPC; @@ -986,6 +1043,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, /* put SRIOV requested res to the optional list */ if (realloc_head && i >= PCI_IOV_RESOURCES && i <= PCI_IOV_RESOURCE_END) { + add_align = max(pci_resource_alignment(dev, r), add_align); r->end = r->start - 1; add_to_list(realloc_head, dev, r, r_size, 0/* don't care */); children_add_size += r_size; @@ -1016,19 +1074,23 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, if (order > max_order) max_order = order; - if (realloc_head) + if (realloc_head) { children_add_size += get_res_add_size(realloc_head, r); + children_add_align = get_res_add_align(realloc_head, r); + add_align = max(add_align, children_add_align); + } } } min_align = calculate_mem_align(aligns, max_order); min_align = max(min_align, window_alignment(bus, b_res->flags)); size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align); + add_align = max(min_align, add_align); if (children_add_size > add_size) add_size = children_add_size; size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 : calculate_memsize(size, min_size, add_size, - resource_size(b_res), min_align); + resource_size(b_res), add_align); if (!size0 && !size1) { if (b_res->start || b_res->end) dev_info(&bus->self->dev, "disabling bridge window %pR to %pR (unused)\n", @@ -1040,10 +1102,11 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, b_res->end = size0 + min_align - 1; b_res->flags |= IORESOURCE_STARTALIGN; if (size1 > size0 && realloc_head) { - add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align); - dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window %pR to %pR add_size %llx\n", + add_to_list(realloc_head, bus->self, b_res, size1-size0, add_align); + dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window %pR to %pR add_size %llx add_align %llx\n", b_res, &bus->busn_res, - (unsigned long long)size1-size0); + (unsigned long long) (size1 - size0), + (unsigned long long) add_align); } return 0; } |