From 302b4215daa0a704c843da40fd2529e5757a72da Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 18 May 2009 13:51:32 +0800 Subject: PCI: support the ATS capability The PCIe ATS capability makes the Endpoint be able to request the DMA address translation from the IOMMU and cache the translation in the device side, thus alleviate IOMMU pressure and improve the hardware performance in the I/O virtualization environment. Signed-off-by: Yu Zhao Acked-by: Jesse Barnes Signed-off-by: David Woodhouse --- drivers/pci/iov.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) (limited to 'drivers/pci/iov.c') diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index b497daab3d4a..0a7a1b40286f 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -5,6 +5,7 @@ * * PCI Express I/O Virtualization (IOV) support. * Single Root IOV 1.0 + * Address Translation Service 1.0 */ #include @@ -679,3 +680,107 @@ irqreturn_t pci_sriov_migration(struct pci_dev *dev) return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE; } EXPORT_SYMBOL_GPL(pci_sriov_migration); + +static int ats_alloc_one(struct pci_dev *dev, int ps) +{ + int pos; + u16 cap; + struct pci_ats *ats; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS); + if (!pos) + return -ENODEV; + + ats = kzalloc(sizeof(*ats), GFP_KERNEL); + if (!ats) + return -ENOMEM; + + ats->pos = pos; + ats->stu = ps; + pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap); + ats->qdep = PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) : + PCI_ATS_MAX_QDEP; + dev->ats = ats; + + return 0; +} + +static void ats_free_one(struct pci_dev *dev) +{ + kfree(dev->ats); + dev->ats = NULL; +} + +/** + * pci_enable_ats - enable the ATS capability + * @dev: the PCI device + * @ps: the IOMMU page shift + * + * Returns 0 on success, or negative on failure. + */ +int pci_enable_ats(struct pci_dev *dev, int ps) +{ + int rc; + u16 ctrl; + + BUG_ON(dev->ats); + + if (ps < PCI_ATS_MIN_STU) + return -EINVAL; + + rc = ats_alloc_one(dev, ps); + if (rc) + return rc; + + ctrl = PCI_ATS_CTRL_ENABLE; + ctrl |= PCI_ATS_CTRL_STU(ps - PCI_ATS_MIN_STU); + pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl); + + return 0; +} + +/** + * pci_disable_ats - disable the ATS capability + * @dev: the PCI device + */ +void pci_disable_ats(struct pci_dev *dev) +{ + u16 ctrl; + + BUG_ON(!dev->ats); + + pci_read_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, &ctrl); + ctrl &= ~PCI_ATS_CTRL_ENABLE; + pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl); + + ats_free_one(dev); +} + +/** + * pci_ats_queue_depth - query the ATS Invalidate Queue Depth + * @dev: the PCI device + * + * Returns the queue depth on success, or negative on failure. + * + * The ATS spec uses 0 in the Invalidate Queue Depth field to + * indicate that the function can accept 32 Invalidate Request. + * But here we use the `real' values (i.e. 1~32) for the Queue + * Depth. + */ +int pci_ats_queue_depth(struct pci_dev *dev) +{ + int pos; + u16 cap; + + if (dev->ats) + return dev->ats->qdep; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS); + if (!pos) + return -ENODEV; + + pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap); + + return PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) : + PCI_ATS_MAX_QDEP; +} -- cgit v1.2.3 From e277d2fc79d6abb86fafadb58dca0b9c498a9aa7 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 18 May 2009 13:51:33 +0800 Subject: PCI: handle Virtual Function ATS enabling The SR-IOV spec requires that the Smallest Translation Unit and the Invalidate Queue Depth fields in the Virtual Function ATS capability are hardwired to 0. If a function is a Virtual Function, then and set its Physical Function's STU before enabling the ATS. Signed-off-by: Yu Zhao Acked-by: Jesse Barnes Signed-off-by: David Woodhouse --- drivers/pci/iov.c | 66 +++++++++++++++++++++++++++++++++++++++++++------------ drivers/pci/pci.h | 4 +++- 2 files changed, 55 insertions(+), 15 deletions(-) (limited to 'drivers/pci/iov.c') diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 0a7a1b40286f..415140499ffb 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -491,10 +491,10 @@ found: if (pdev) iov->dev = pci_dev_get(pdev); - else { + else iov->dev = dev; - mutex_init(&iov->lock); - } + + mutex_init(&iov->lock); dev->sriov = iov; dev->is_physfn = 1; @@ -514,11 +514,11 @@ static void sriov_release(struct pci_dev *dev) { BUG_ON(dev->sriov->nr_virtfn); - if (dev == dev->sriov->dev) - mutex_destroy(&dev->sriov->lock); - else + if (dev != dev->sriov->dev) pci_dev_put(dev->sriov->dev); + mutex_destroy(&dev->sriov->lock); + kfree(dev->sriov); dev->sriov = NULL; } @@ -723,19 +723,40 @@ int pci_enable_ats(struct pci_dev *dev, int ps) int rc; u16 ctrl; - BUG_ON(dev->ats); + BUG_ON(dev->ats && dev->ats->is_enabled); if (ps < PCI_ATS_MIN_STU) return -EINVAL; - rc = ats_alloc_one(dev, ps); - if (rc) - return rc; + if (dev->is_physfn || dev->is_virtfn) { + struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn; + + mutex_lock(&pdev->sriov->lock); + if (pdev->ats) + rc = pdev->ats->stu == ps ? 0 : -EINVAL; + else + rc = ats_alloc_one(pdev, ps); + + if (!rc) + pdev->ats->ref_cnt++; + mutex_unlock(&pdev->sriov->lock); + if (rc) + return rc; + } + + if (!dev->is_physfn) { + rc = ats_alloc_one(dev, ps); + if (rc) + return rc; + } ctrl = PCI_ATS_CTRL_ENABLE; - ctrl |= PCI_ATS_CTRL_STU(ps - PCI_ATS_MIN_STU); + if (!dev->is_virtfn) + ctrl |= PCI_ATS_CTRL_STU(ps - PCI_ATS_MIN_STU); pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl); + dev->ats->is_enabled = 1; + return 0; } @@ -747,13 +768,26 @@ void pci_disable_ats(struct pci_dev *dev) { u16 ctrl; - BUG_ON(!dev->ats); + BUG_ON(!dev->ats || !dev->ats->is_enabled); pci_read_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, &ctrl); ctrl &= ~PCI_ATS_CTRL_ENABLE; pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl); - ats_free_one(dev); + dev->ats->is_enabled = 0; + + if (dev->is_physfn || dev->is_virtfn) { + struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn; + + mutex_lock(&pdev->sriov->lock); + pdev->ats->ref_cnt--; + if (!pdev->ats->ref_cnt) + ats_free_one(pdev); + mutex_unlock(&pdev->sriov->lock); + } + + if (!dev->is_physfn) + ats_free_one(dev); } /** @@ -765,13 +799,17 @@ void pci_disable_ats(struct pci_dev *dev) * The ATS spec uses 0 in the Invalidate Queue Depth field to * indicate that the function can accept 32 Invalidate Request. * But here we use the `real' values (i.e. 1~32) for the Queue - * Depth. + * Depth; and 0 indicates the function shares the Queue with + * other functions (doesn't exclusively own a Queue). */ int pci_ats_queue_depth(struct pci_dev *dev) { int pos; u16 cap; + if (dev->is_virtfn) + return 0; + if (dev->ats) return dev->ats->qdep; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 3c2ec64f78e9..f73bcbedf37c 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -234,6 +234,8 @@ struct pci_ats { int pos; /* capability position */ int stu; /* Smallest Translation Unit */ int qdep; /* Invalidate Queue Depth */ + int ref_cnt; /* Physical Function reference count */ + int is_enabled:1; /* Enable bit is set */ }; #ifdef CONFIG_PCI_IOV @@ -255,7 +257,7 @@ extern int pci_ats_queue_depth(struct pci_dev *dev); */ static inline int pci_ats_enabled(struct pci_dev *dev) { - return !!dev->ats; + return dev->ats && dev->ats->is_enabled; } #else static inline int pci_iov_init(struct pci_dev *dev) -- cgit v1.2.3 From 4d135dbee7b0a89e946f7ba284f2b957505a2c3a Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 20 May 2009 17:11:57 +0800 Subject: PCI: fix SR-IOV function dependency link problem PCIe root complex integrated endpoint does not implement ARI, so this kind of endpoint uses 3-bit function number. The function dependency link of the integrated endpoint should be calculated using the device number plus the value from function dependency link register. Normal endpoint always implements ARI and the function dependency link register contains 8-bit function number (i.e. `devfn' from software's perspective). Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/iov.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/pci/iov.c') diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index b497daab3d4a..e87fe95da814 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -487,6 +487,8 @@ found: iov->self = dev; pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap); pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link); + if (dev->pcie_type == PCI_EXP_TYPE_RC_END) + iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link); if (pdev) iov->dev = pci_dev_get(pdev); -- cgit v1.2.3 From 8c1c699fec9e9021bf6ff0285dee086bb27aec90 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 13 Jun 2009 15:52:13 +0800 Subject: PCI: cleanup Function Level Reset This patch enhances the FLR functions: 1) remove disable_irq() so the shared IRQ won't be disabled. 2) replace the 1s wait with 100, 200 and 400ms wait intervals for the Pending Transaction. 3) replace mdelay() with msleep(). 4) add might_sleep(). 5) lock the device to prevent PM suspend from accessing the CSRs during the reset. 6) coding style fixes. Reviewed-by: Kenji Kaneshige Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/iov.c | 4 +- drivers/pci/pci.c | 166 ++++++++++++++++++++++++++-------------------------- include/linux/pci.h | 2 +- 3 files changed, 87 insertions(+), 85 deletions(-) (limited to 'drivers/pci/iov.c') diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index e87fe95da814..03c7706c0a09 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -110,7 +110,7 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset) } if (reset) - pci_execute_reset_function(virtfn); + __pci_reset_function(virtfn); pci_device_add(virtfn, virtfn->bus); mutex_unlock(&iov->dev->sriov->lock); @@ -164,7 +164,7 @@ static void virtfn_remove(struct pci_dev *dev, int id, int reset) if (reset) { device_release_driver(&virtfn->dev); - pci_execute_reset_function(virtfn); + __pci_reset_function(virtfn); } sprintf(buf, "virtfn%u", id); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 8ea911e55722..6a052ada3fe8 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2055,111 +2055,112 @@ int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask) EXPORT_SYMBOL(pci_set_dma_seg_boundary); #endif -static int __pcie_flr(struct pci_dev *dev, int probe) +static int pcie_flr(struct pci_dev *dev, int probe) { - u16 status; + int i; + int pos; u32 cap; - int exppos = pci_find_capability(dev, PCI_CAP_ID_EXP); + u16 status; - if (!exppos) + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) return -ENOTTY; - pci_read_config_dword(dev, exppos + PCI_EXP_DEVCAP, &cap); + + pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP, &cap); if (!(cap & PCI_EXP_DEVCAP_FLR)) return -ENOTTY; if (probe) return 0; - pci_block_user_cfg_access(dev); - /* Wait for Transaction Pending bit clean */ - pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); - if (!(status & PCI_EXP_DEVSTA_TRPND)) - goto transaction_done; + for (i = 0; i < 4; i++) { + if (i) + msleep((1 << (i - 1)) * 100); - msleep(100); - pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); - if (!(status & PCI_EXP_DEVSTA_TRPND)) - goto transaction_done; - - dev_info(&dev->dev, "Busy after 100ms while trying to reset; " - "sleeping for 1 second\n"); - ssleep(1); - pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); - if (status & PCI_EXP_DEVSTA_TRPND) - dev_info(&dev->dev, "Still busy after 1s; " - "proceeding with reset anyway\n"); - -transaction_done: - pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL, + pci_read_config_word(dev, pos + PCI_EXP_DEVSTA, &status); + if (!(status & PCI_EXP_DEVSTA_TRPND)) + goto clear; + } + + dev_err(&dev->dev, "transaction is not cleared; " + "proceeding with reset anyway\n"); + +clear: + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR); - mdelay(100); + msleep(100); - pci_unblock_user_cfg_access(dev); return 0; } -static int __pci_af_flr(struct pci_dev *dev, int probe) +static int pci_af_flr(struct pci_dev *dev, int probe) { - int cappos = pci_find_capability(dev, PCI_CAP_ID_AF); - u8 status; + int i; + int pos; u8 cap; + u8 status; - if (!cappos) + pos = pci_find_capability(dev, PCI_CAP_ID_AF); + if (!pos) return -ENOTTY; - pci_read_config_byte(dev, cappos + PCI_AF_CAP, &cap); + + pci_read_config_byte(dev, pos + PCI_AF_CAP, &cap); if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) return -ENOTTY; if (probe) return 0; - pci_block_user_cfg_access(dev); - /* Wait for Transaction Pending bit clean */ - pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status); - if (!(status & PCI_AF_STATUS_TP)) - goto transaction_done; + for (i = 0; i < 4; i++) { + if (i) + msleep((1 << (i - 1)) * 100); + + pci_read_config_byte(dev, pos + PCI_AF_STATUS, &status); + if (!(status & PCI_AF_STATUS_TP)) + goto clear; + } + + dev_err(&dev->dev, "transaction is not cleared; " + "proceeding with reset anyway\n"); +clear: + pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR); msleep(100); - pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status); - if (!(status & PCI_AF_STATUS_TP)) - goto transaction_done; - - dev_info(&dev->dev, "Busy after 100ms while trying to" - " reset; sleeping for 1 second\n"); - ssleep(1); - pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status); - if (status & PCI_AF_STATUS_TP) - dev_info(&dev->dev, "Still busy after 1s; " - "proceeding with reset anyway\n"); - -transaction_done: - pci_write_config_byte(dev, cappos + PCI_AF_CTRL, PCI_AF_CTRL_FLR); - mdelay(100); - - pci_unblock_user_cfg_access(dev); + return 0; } -static int __pci_reset_function(struct pci_dev *pdev, int probe) +static int pci_dev_reset(struct pci_dev *dev, int probe) { - int res; + int rc; + + might_sleep(); + + if (!probe) { + pci_block_user_cfg_access(dev); + /* block PM suspend, driver probe, etc. */ + down(&dev->dev.sem); + } - res = __pcie_flr(pdev, probe); - if (res != -ENOTTY) - return res; + rc = pcie_flr(dev, probe); + if (rc != -ENOTTY) + goto done; - res = __pci_af_flr(pdev, probe); - if (res != -ENOTTY) - return res; + rc = pci_af_flr(dev, probe); +done: + if (!probe) { + up(&dev->dev.sem); + pci_unblock_user_cfg_access(dev); + } - return res; + return rc; } /** - * pci_execute_reset_function() - Reset a PCI device function - * @dev: Device function to reset + * __pci_reset_function - reset a PCI device function + * @dev: PCI device to reset * * Some devices allow an individual function to be reset without affecting * other functions in the same device. The PCI device must be responsive @@ -2171,18 +2172,18 @@ static int __pci_reset_function(struct pci_dev *pdev, int probe) * device including MSI, bus mastering, BARs, decoding IO and memory spaces, * etc. * - * Returns 0 if the device function was successfully reset or -ENOTTY if the + * Returns 0 if the device function was successfully reset or negative if the * device doesn't support resetting a single function. */ -int pci_execute_reset_function(struct pci_dev *dev) +int __pci_reset_function(struct pci_dev *dev) { - return __pci_reset_function(dev, 0); + return pci_dev_reset(dev, 0); } -EXPORT_SYMBOL_GPL(pci_execute_reset_function); +EXPORT_SYMBOL_GPL(__pci_reset_function); /** - * pci_reset_function() - quiesce and reset a PCI device function - * @dev: Device function to reset + * pci_reset_function - quiesce and reset a PCI device function + * @dev: PCI device to reset * * Some devices allow an individual function to be reset without affecting * other functions in the same device. The PCI device must be responsive @@ -2190,32 +2191,33 @@ EXPORT_SYMBOL_GPL(pci_execute_reset_function); * * This function does not just reset the PCI portion of a device, but * clears all the state associated with the device. This function differs - * from pci_execute_reset_function in that it saves and restores device state + * from __pci_reset_function in that it saves and restores device state * over the reset. * - * Returns 0 if the device function was successfully reset or -ENOTTY if the + * Returns 0 if the device function was successfully reset or negative if the * device doesn't support resetting a single function. */ int pci_reset_function(struct pci_dev *dev) { - int r = __pci_reset_function(dev, 1); + int rc; - if (r < 0) - return r; + rc = pci_dev_reset(dev, 1); + if (rc) + return rc; - if (!dev->msi_enabled && !dev->msix_enabled && dev->irq != 0) - disable_irq(dev->irq); pci_save_state(dev); + /* + * both INTx and MSI are disabled after the Interrupt Disable bit + * is set and the Bus Master bit is cleared. + */ pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); - r = pci_execute_reset_function(dev); + rc = pci_dev_reset(dev, 0); pci_restore_state(dev); - if (!dev->msi_enabled && !dev->msix_enabled && dev->irq != 0) - enable_irq(dev->irq); - return r; + return rc; } EXPORT_SYMBOL_GPL(pci_reset_function); diff --git a/include/linux/pci.h b/include/linux/pci.h index 61d9b790d21c..91b06be2f01e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -702,8 +702,8 @@ int pcix_get_mmrbc(struct pci_dev *dev); int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); int pcie_get_readrq(struct pci_dev *dev); int pcie_set_readrq(struct pci_dev *dev, int rq); +int __pci_reset_function(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); -int pci_execute_reset_function(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); int pci_select_bars(struct pci_dev *dev, unsigned long flags); -- cgit v1.2.3